From 24e43e985620b104697c45052638e4470048ba68 Mon Sep 17 00:00:00 2001 From: raver119 Date: Wed, 7 Aug 2019 17:49:13 +0300 Subject: [PATCH] [WIP] build time improvements (#106) * fix pad javadoc and @see links. (#72) Signed-off-by: Robert Altena * [WIP] More fixes (#73) * special tests for ConstantTadHelper/ConstantShapeHelper Signed-off-by: raver119 * release methods for data buffers Signed-off-by: raver119 * delete temporary buffer Java side Signed-off-by: raver119 * delete temporary buffer Java side Signed-off-by: raver119 * delete temporary TadPack C++/Java side (#74) Signed-off-by: raver119 * Zoo model TF import test updates (#75) * argLine fix, update compression_gru comment * updated comment for xception * undid but commented argLine change * updated xlnet comment * copyright headers * - new NDArray methods like()/ulike() (#77) - fix for depthwise_conv2d_bp + special test Signed-off-by: raver119 * upsampling2d fix CUDA Signed-off-by: raver119 * DL4J trace logging (#79) * MLN/CG trace logging for debugging Signed-off-by: AlexDBlack * Tiny tweak Signed-off-by: AlexDBlack * strided_slice_bp shape fn leak fix Signed-off-by: raver119 * SameDiff fixes and naming (#78) * remove SDVariable inplace methods * import methods * npe fix in OpVal * removed SameDiff inplace ops from tests * Naming updates, moved to centralized methods in SameDiff, should use op_#:# for everything * quick fixes * javadoc * SDVariable eval with placeholders * use regex match * better matching * fix javadoc. (#76) * fix javadoc. Signed-off-by: Robert Altena * replace most @see with @link s. Signed-off-by: Robert Altena * 4 additional tests Signed-off-by: raver119 * Various DL4J/ND4J fixes (#81) * #7954 Force refresh of UI when switching tabs on overview page Signed-off-by: AlexDBlack * #8017 Concurrent modification exception (synchronize) fix Signed-off-by: AlexDBlack * #8033 Don't initialize updater in middle of writing memory crash dump Signed-off-by: AlexDBlack * #8208 Fix shape checks for ND4J int[] creator methods Signed-off-by: AlexDBlack * #6385 #7992 Keras import naming fixes + cleanup Signed-off-by: AlexDBlack * #8016 Upsampling3D - add NDHWC format support Signed-off-by: AlexDBlack * Refactor NativeOps.h to export C functions * Actually export functions from NativeOps.h * Adapt the Java wrappers in ND4J generated with JavaCPP * Create C wrappers for some of the C++ classes currently used by ND4J * remove duplicate code in createBufferDetached. (#83) Signed-off-by: Robert Altena * Keras model import - updater lr fix (#84) * Keras model import - updater lr fix Signed-off-by: eraly * Keras model import - updater lr fix, cleanup Signed-off-by: eraly * Fix functions of OpaqueVariablesSet * SameDiff Convolution Config validation, better output methods (#82) * Conv Config validation & tests Signed-off-by: Ryan Nett * stackOutputs utility method Signed-off-by: Ryan Nett * use constructor for validation, support negative kernel sizes (infered from weights) Signed-off-by: Ryan Nett * better output methods Signed-off-by: Ryan Nett * move output to be with fit and evaluate Signed-off-by: Ryan Nett * fixes Signed-off-by: Ryan Nett * more fixes Signed-off-by: Ryan Nett * refactor duplicate code from pad methods. (#86) * refactor duplicate code from pad methods. Signed-off-by: Robert Altena * replace switch with if. Signed-off-by: Robert Altena * Various ND4J/DL4J fixes and improvements (#87) * Reshape and reallocate - small fixes Signed-off-by: AlexDBlack * Reshape and reallocate - small fixes Signed-off-by: AlexDBlack * #6488 ElementWiseVertex broadcast support Signed-off-by: AlexDBlack * Constructors and broadcast supported it Transforms.max/min Signed-off-by: AlexDBlack * #8054 ElementWiseVertex now supports broadcast inputs Signed-off-by: AlexDBlack * #8057 Nd4j.create overload dtype fix Signed-off-by: AlexDBlack * #7551 ND4J Shape validation fix Signed-off-by: AlexDBlack * [WIP] Numpy boolean import (#91) * numpy bool type Signed-off-by: raver119 * numpy bool java side Signed-off-by: raver119 * remove create method with unused parameter. (#89) * remove create method with unused parameter. * removed more unused methods. Signed-off-by: Robert Altena * removing more unused code. Signed-off-by: Robert Altena * last removal of unused code. Signed-off-by: Robert Altena * remove createSparse methods. (#92) Signed-off-by: Robert Altena * Various ND4J/DL4J fixes (#90) * Deprecate Old*Op instances Signed-off-by: AlexDBlack * #8063 #8054 Broadcast exceptions + cleanup inplace ops Signed-off-by: AlexDBlack * Small fix Signed-off-by: AlexDBlack * Remove bad test condition Signed-off-by: AlexDBlack * #7993 Fix shape function issue in crop_and_resize op Signed-off-by: AlexDBlack * DL4J SameDiff lambda layer fix Signed-off-by: AlexDBlack * #8029 Fix for pnorm backprop math Signed-off-by: AlexDBlack * #8038 Fix Op profiler NaN/Inf triggering + add tests (#93) Signed-off-by: AlexDBlack * createUninitializedDetached refactoring. (#94) * wip * update interface, add null implementations. * Breaking one test in a weird way. Signed-off-by: Robert Altena * createUninitializedDetached refactored. Signed-off-by: Robert Altena * cuda build fix for issues introduced by recent refactoring Signed-off-by: raver119 * [WIP] More of CUDA (#95) * initial commit Signed-off-by: raver119 * Implementation of hashcode cuda helper. Working edition. * Fixed parallel test input arangements. * Fixed tests for hashcode op. * Fixed shape calculation for image:crop_and_resize op and test. * NativeOps tests. Initial test suite. * Added tests for indexReduce methods. * Added test on execBroadcast with NDArray as dimensions. * Added test on execBroadcastBool with NDArray as dimensions. * Added tests on execPairwiseTransform and execPairwiseTransofrmBool. * Added tests for execReduce with scalar results. * Added reduce tests for non-empty dims array. * Added tests for reduce3. * Added tests for execScalar. * Added tests for execSummaryStats. * - provide cpu/cuda code for batch_to_space - testing it Signed-off-by: Yurii * - remove old test for batch_to_space (had wrong format and numbers were not checked) Signed-off-by: Yurii * Fixed complilation errors with test. * Added test for execTransformFloat. * Added test for execTransformSame. * Added test for execTransformBool. * Added test for execTransformStrict. * Added tests for execScalar/execScalarBool with TADs. * Added test for flatten. * - provide cpu/cuda code for space_to_Batch operaion Signed-off-by: Yurii * Added test for concat. * comment unnecessary stuff in s_t_b Signed-off-by: Yurii * Added test for specialConcat. * Added tests for memcpy/set routines. * Fixed pullRow cuda test. * Added pullRow test. * Added average test. * - correct typo in NDArray::applyPairwiseTransform(nd4j::pairwise::BoolOps op...) Signed-off-by: Yurii * - debugging and fixing cuda tests in JavaInteropTests file Signed-off-by: Yurii * - correct some tests Signed-off-by: Yurii * Added test for shuffle. * Fixed ops declarations. * Restored omp and added shuffle test. * Added convertTypes test. * Added tests for execRandom. Eliminated usage of RandomBuffer with NativeOps. * Added sort tests. * Added tests for execCustomOp. * - further debuging and fixing tests terminated with crash Signed-off-by: Yurii * Added tests for calculateOutputShapes. * Addded Benchmarks test. * Commented benchmark tests. * change assertion Signed-off-by: raver119 * Added tests for apply_sgd op. Added cpu helper for that op. * Implement cuda helper for aplly_sgd op. Fixed tests for NativeOps. * Added test for assign broadcastable. * Added tests for assign_bp op. * Added tests for axpy op. * - assign/execScalar/execTransformAny signature change - minor test fix Signed-off-by: raver119 * Fixed axpy op. * meh Signed-off-by: raver119 * - fix tests for nativeOps::concat Signed-off-by: Yurii * sequential transform/scalar Signed-off-by: raver119 * allow nested parallelism Signed-off-by: raver119 * assign_bp leak fix Signed-off-by: raver119 * block setRNG fix Signed-off-by: raver119 * enable parallelism by default Signed-off-by: raver119 * enable nested parallelism by default Signed-off-by: raver119 * Added cuda implementation for row_count helper. * Added implementation for tnse gains op helper. * - take into account possible situations when input arrays are empty in reduce_ cuda stuff Signed-off-by: Yurii * Implemented tsne/edge_forces op cuda-based helper. Parallelized cpu-based helper for edge_forces. * Added kernel for tsne/symmetrized op heleper. * Implementation of tsne/symmetrized op cuda helper. Working edition. * Eliminated waste printfs. * Added test for broadcastgradientargs op. * host-only fallback for empty reduce float Signed-off-by: raver119 * - some tests fixes Signed-off-by: Yurii * - correct the rest of reduce_ stuff Signed-off-by: Yurii * - further correction of reduce_ stuff Signed-off-by: Yurii * Added test for Cbow op. Also added cuda implementation for cbow helpers. * - improve code of stack operation for scalar case Signed-off-by: Yurii * - provide cuda kernel for gatherND operation Signed-off-by: Yurii * Implementation of cbow helpers with cuda kernels. * minor tests tweaks Signed-off-by: raver119 * minor tests tweaks Signed-off-by: raver119 * - further correction of cuda stuff Signed-off-by: Yurii * Implementatation of cbow op helper with cuda kernels. Working edition. * Skip random testing for cudablas case. * lstmBlockCell context fix Signed-off-by: raver119 * Added tests for ELU and ELU_BP ops. * Added tests for eq_scalar, gt_scalar, gte_scalar and lte_scalar ops. * Added tests for neq_scalar. * Added test for noop. * - further work on clipbynorm_bp Signed-off-by: Yurii * - get rid of concat op call, use instead direct concat helper call Signed-off-by: Yurii * lstmBlockCell context fix Signed-off-by: raver119 * Added tests for lrelu and lrelu_bp. * Added tests for selu and selu_bp. * Fixed lrelu derivative helpers. * - some corrections in lstm Signed-off-by: Yurii * operator * result shape fix Signed-off-by: raver119 * - correct typo in lstmCell Signed-off-by: Yurii * few tests fixed Signed-off-by: raver119 * CUDA inverse broadcast bool fix Signed-off-by: raver119 * disable MMAP test for CUDA Signed-off-by: raver119 * BooleanOp syncToDevice Signed-off-by: raver119 * meh Signed-off-by: raver119 * additional data types for im2col/col2im Signed-off-by: raver119 * Added test for firas_sparse op. * one more RandomBuffer test excluded Signed-off-by: raver119 * Added tests for flatten op. * Added test for Floor op. * bunch of tests fixed Signed-off-by: raver119 * mmulDot tests fixed Signed-off-by: raver119 * more tests fixed Signed-off-by: raver119 * Implemented floordiv_bp op and tests. * Fixed scalar case with cuda implementation for bds. * - work on cuda kernel for clip_by_norm backprop op is completed Signed-off-by: Yurii * Eliminate cbow crach. * more tests fixed Signed-off-by: raver119 * more tests fixed Signed-off-by: raver119 * Eliminated abortion with batched nlp test. * more tests fixed Signed-off-by: raver119 * Fixed shared flag initializing. * disabled bunch of cpu workspaces tests Signed-off-by: raver119 * scalar operators fix: missing registerSpecialUse call Signed-off-by: raver119 * Fixed logdet for cuda and tests. * - correct clipBynorm_bp Signed-off-by: Yurii * Fixed crop_and_resize shape datatype. * - correct some mmul tests Signed-off-by: Yurii * build fix Signed-off-by: raver119 * exclude two methods for JNI Signed-off-by: raver119 * exclude two methods for JNI Signed-off-by: raver119 * exclude two methods for JNI (#97) Signed-off-by: raver119 * temporary stack fix Signed-off-by: raver119 * couple of legacy groups reorganized into separate compialtion units Signed-off-by: raver119 * wrong include Signed-off-by: raver119 * wrong include Signed-off-by: raver119 * ReductionLoops_float split Signed-off-by: raver119 * maximum Signed-off-by: raver119 * some more rearrangements Signed-off-by: raver119 * spare ifdef Signed-off-by: raver119 * mirror pad Signed-off-by: raver119 * - reduce_float split - mcmodel Signed-off-by: raver119 * bad include fix Signed-off-by: raver119 * norelax Signed-off-by: raver119 * norelax Signed-off-by: raver119 * norelax Signed-off-by: raver119 * norelax Signed-off-by: raver119 * norelax Signed-off-by: raver119 * norelax gone Signed-off-by: raver119 * get back sm Signed-off-by: raver119 * fix couple of tests for msvc Signed-off-by: raver119 * fix couple of tests for msvc Signed-off-by: raver119 * compress-all Signed-off-by: raver119 * reduced arch list Signed-off-by: raver119 * compress-all Signed-off-by: raver119 * reduced arch list Signed-off-by: raver119 * all compute capabilities option for tests Signed-off-by: raver119 --- libnd4j/blas/CMakeLists.txt | 28 +- ...s_float.cpp => ReductionLoops_float_0.cpp} | 2 +- .../impl/loops/ReductionLoops_float_1.cpp | 46 + .../impl/loops/ReductionLoops_float_2.cpp | 46 + .../impl/loops/ReductionLoops_float_3.cpp | 46 + .../{broadcasting.cu => broadcasting.chpp} | 3 +- .../broadcasting/broadcasting_0.cu | 27 + .../broadcasting/broadcasting_1.cu | 27 + .../broadcasting/broadcasting_2.cu | 27 + .../broadcasting/broadcasting_3.cu | 27 + .../broadcasting/broadcasting_4.cu | 27 + .../broadcasting/broadcasting_5.cu | 27 + .../broadcasting/broadcasting_6.cu | 27 + .../broadcasting/broadcasting_7.cu | 27 + .../broadcasting/broadcasting_8.cu | 27 + .../broadcasting/broadcasting_9.cu | 27 + .../compilation_units/pairwise/pairwise_0.cu | 27 + .../compilation_units/pairwise/pairwise_1.cu | 27 + .../compilation_units/pairwise/pairwise_2.cu | 27 + .../compilation_units/pairwise/pairwise_3.cu | 27 + .../compilation_units/pairwise/pairwise_4.cu | 27 + .../compilation_units/pairwise/pairwise_5.cu | 27 + .../compilation_units/pairwise/pairwise_6.cu | 27 + .../compilation_units/pairwise/pairwise_7.cu | 27 + .../compilation_units/pairwise/pairwise_8.cu | 27 + .../compilation_units/pairwise/pairwise_9.cu | 27 + .../compilation_units/reduce3/reduce3_0.cu | 27 + .../compilation_units/reduce3/reduce3_1.cu | 27 + .../compilation_units/reduce3/reduce3_2.cu | 27 + .../compilation_units/reduce3/reduce3_3.cu | 27 + .../reduce_float/reduce_float_0.cu | 27 + .../reduce_float/reduce_float_1.cu | 27 + .../reduce_float/reduce_float_2.cu | 27 + .../reduce_float/reduce_float_3.cu | 27 + .../cuda/compilation_units/scalar/scalar_0.cu | 27 + .../cuda/compilation_units/scalar/scalar_1.cu | 27 + .../cuda/compilation_units/scalar/scalar_2.cu | 27 + .../cuda/compilation_units/scalar/scalar_3.cu | 27 + .../cuda/compilation_units/scalar/scalar_4.cu | 27 + .../cuda/compilation_units/scalar/scalar_5.cu | 27 + .../cuda/compilation_units/scalar/scalar_6.cu | 27 + .../cuda/compilation_units/scalar/scalar_7.cu | 27 + .../cuda/compilation_units/scalar/scalar_8.cu | 27 + .../cuda/compilation_units/scalar/scalar_9.cu | 27 + .../loops/cuda/{pairwise.cu => pairwise.chpp} | 3 +- .../{reduce_float.cu => reduce_float.chpp} | 2 +- .../loops/cuda/{reduce3.cu => reduce3.chpp} | 2 +- .../loops/cuda/{scalar.cu => scalar.chpp} | 12 - .../ops/declarable/helpers/cuda/concat.cu | 129 +++ .../ops/declarable/helpers/cuda/gather_nd.cu | 147 ++++ .../declarable/helpers/cuda/legacy/relu.cu | 118 +++ .../declarable/helpers/cuda/legacy/tanh.cu | 93 ++ .../declarable/helpers/cuda/legacy_helper.cu | 151 ---- .../ops/declarable/helpers/cuda/maximum.cu | 114 +++ .../ops/declarable/helpers/cuda/merge.cu | 234 +++++ .../helpers/cuda/{minimax.cu => minimum.cu} | 80 -- .../ops/declarable/helpers/cuda/pad.cu | 283 ++++++ .../declarable/helpers/cuda/scatter_simple.cu | 79 ++ .../declarable/helpers/cuda/scatter_update.cu | 133 +++ .../ops/declarable/helpers/cuda/transforms.cu | 830 +----------------- libnd4j/tests_cpu/layers_tests/CMakeLists.txt | 8 +- .../layers_tests/ConvolutionTests1.cpp | 16 +- .../layers_tests/ConvolutionTests2.cpp | 26 +- .../layers_tests/DeclarableOpsTests14.cpp | 30 +- .../tests_cpu/layers_tests/NativeOpsTests.cpp | 6 +- 65 files changed, 2558 insertions(+), 1135 deletions(-) rename libnd4j/include/helpers/impl/loops/{ReductionLoops_float.cpp => ReductionLoops_float_0.cpp} (97%) create mode 100644 libnd4j/include/helpers/impl/loops/ReductionLoops_float_1.cpp create mode 100644 libnd4j/include/helpers/impl/loops/ReductionLoops_float_2.cpp create mode 100644 libnd4j/include/helpers/impl/loops/ReductionLoops_float_3.cpp rename libnd4j/include/loops/cuda/{broadcasting.cu => broadcasting.chpp} (99%) create mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_0.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_1.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_2.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_3.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_4.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_5.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_6.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_7.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_8.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_9.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_0.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_1.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_2.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_3.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_4.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_5.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_6.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_7.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_8.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_9.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_0.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_1.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_2.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_3.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_0.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_1.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_2.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_3.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_0.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_1.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_2.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_3.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_4.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_5.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_6.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_7.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_8.cu create mode 100644 libnd4j/include/loops/cuda/compilation_units/scalar/scalar_9.cu rename libnd4j/include/loops/cuda/{pairwise.cu => pairwise.chpp} (99%) rename libnd4j/include/loops/cuda/reduce/{reduce_float.cu => reduce_float.chpp} (99%) rename libnd4j/include/loops/cuda/{reduce3.cu => reduce3.chpp} (99%) rename libnd4j/include/loops/cuda/{scalar.cu => scalar.chpp} (89%) create mode 100644 libnd4j/include/ops/declarable/helpers/cuda/concat.cu create mode 100644 libnd4j/include/ops/declarable/helpers/cuda/gather_nd.cu create mode 100644 libnd4j/include/ops/declarable/helpers/cuda/legacy/relu.cu create mode 100644 libnd4j/include/ops/declarable/helpers/cuda/legacy/tanh.cu create mode 100644 libnd4j/include/ops/declarable/helpers/cuda/maximum.cu create mode 100644 libnd4j/include/ops/declarable/helpers/cuda/merge.cu rename libnd4j/include/ops/declarable/helpers/cuda/{minimax.cu => minimum.cu} (56%) create mode 100644 libnd4j/include/ops/declarable/helpers/cuda/pad.cu create mode 100644 libnd4j/include/ops/declarable/helpers/cuda/scatter_simple.cu create mode 100644 libnd4j/include/ops/declarable/helpers/cuda/scatter_update.cu diff --git a/libnd4j/blas/CMakeLists.txt b/libnd4j/blas/CMakeLists.txt index d7a0f1dd1..13a85eaf2 100755 --- a/libnd4j/blas/CMakeLists.txt +++ b/libnd4j/blas/CMakeLists.txt @@ -99,7 +99,7 @@ elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel") elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") # using Visual Studio C++ - set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc") + set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /w") elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") # using GCC SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_TUNE}") @@ -118,16 +118,6 @@ if(!CUDA_BLAS) endif() endif() -# TODO: get rid of this once problem confirmed solved -#if (APPLE) -# if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") -# if ("${CMAKE_C_COMPILER_VERSION}" VERSION_GREATER 6.0 OR "${CMAKE_C_COMPILER_VERSION}" VERSION_EQUAL 6.0) -# SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wa,-mavx512f,-mavx512vl,-mavx512bw,-mavx512dq,-mavx512cd ") -# endif() -# endif() -#endif() - - if(CUDA_BLAS) message("Build cublas") find_package(CUDA) @@ -173,32 +163,32 @@ if(CUDA_BLAS) if(CUDA_VERSION VERSION_GREATER "9.2") # cuda 10 if ("${COMPUTE}" STREQUAL "all") if (APPLE) - list(APPEND CUDA_NVCC_FLAGS -DCUDA_10 ${EXPM} -w --cudart=static -O3 --expt-extended-lambda -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61) + list(APPEND CUDA_NVCC_FLAGS -DCUDA_10 ${EXPM} -w --cudart=static -O3 --expt-extended-lambda -gencode arch=compute_35,code=sm_35 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60) else() - list(APPEND CUDA_NVCC_FLAGS -DCUDA_10 ${EXPM} -w --cudart=static -O3 --expt-extended-lambda -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75) + list(APPEND CUDA_NVCC_FLAGS -DCUDA_10 ${EXPM} -w --cudart=static -O3 --expt-extended-lambda -gencode arch=compute_35,code=sm_35 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70) endif() else() - list(APPEND CUDA_NVCC_FLAGS -DCUDA_10 ${EXPM} -w --cudart=static --expt-extended-lambda -O3 -arch=compute_${COMPUTE} -code=sm_${COMPUTE}) + list(APPEND CUDA_NVCC_FLAGS -DCUDA_10 ${EXPM} -w --cudart=static --expt-extended-lambda -O3 --fatbin -arch=compute_${COMPUTE} -code=sm_${COMPUTE}) endif() elseif(CUDA_VERSION VERSION_GREATER "8.0") # cuda 9 if ("${COMPUTE}" STREQUAL "all") if (APPLE) - list(APPEND CUDA_NVCC_FLAGS -DCUDA_9 ${EXPM} -w --cudart=static -O3 --expt-extended-lambda -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61) + list(APPEND CUDA_NVCC_FLAGS -DCUDA_9 ${EXPM} -w --cudart=static -O3 --expt-extended-lambda -gencode arch=compute_35,code=sm_35 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60) else() - list(APPEND CUDA_NVCC_FLAGS -DCUDA_9 ${EXPM} -w --cudart=static -O3 --expt-extended-lambda -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61) + list(APPEND CUDA_NVCC_FLAGS -DCUDA_9 ${EXPM} -w --cudart=static -O3 --expt-extended-lambda -gencode arch=compute_35,code=sm_35 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60) endif() else() list(APPEND CUDA_NVCC_FLAGS -DCUDA_9 ${EXPM} -w --cudart=static --expt-extended-lambda -O3 -arch=compute_${COMPUTE} -code=sm_${COMPUTE}) endif() elseif (CUDA_VERSION VERSION_GREATER "7.5") # cuda 8.0 if ("${COMPUTE}" STREQUAL "all") - list(APPEND CUDA_NVCC_FLAGS -DCUDA_8 ${EXPM} -w --cudart=static -O3 --expt-extended-lambda -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61) + list(APPEND CUDA_NVCC_FLAGS -DCUDA_8 ${EXPM} -w --cudart=static -O3 --expt-extended-lambda -gencode arch=compute_30,code=sm_30 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60) else() list(APPEND CUDA_NVCC_FLAGS -DCUDA_8 ${EXPM} -w --cudart=static --expt-extended-lambda -O3 -arch=compute_${COMPUTE} -code=sm_${COMPUTE}) endif() else() if ("${COMPUTE}" STREQUAL "all") - list(APPEND CUDA_NVCC_FLAGS -DCUDA_75 ${EXPM} --cudart=static --expt-extended-lambda -O3 -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52) + list(APPEND CUDA_NVCC_FLAGS -DCUDA_75 ${EXPM} --cudart=static --expt-extended-lambda -O3 -gencode arch=compute_30,code=sm_30 -gencode arch=compute_52,code=sm_52 ) else() list(APPEND CUDA_NVCC_FLAGS -DCUDA_75 ${EXPM} --cudart=static --expt-extended-lambda -O3 -arch=compute_${COMPUTE} -code=sm_${COMPUTE}) endif() @@ -220,7 +210,7 @@ if(CUDA_BLAS) list(APPEND CUDA_NVCC_FLAGS -DCUDA_10 ${EXPM} -w -G -g --cudart=static --expt-extended-lambda -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_53,code=sm_53 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_62,code=sm_62 -gencode arch=compute_70,code=sm_70) endif() else() - list(APPEND CUDA_NVCC_FLAGS -DCUDA_10 ${EXPM} -w -G -g --cudart=static --expt-extended-lambda -arch=compute_${COMPUTE} -code=sm_${COMPUTE}) + list(APPEND CUDA_NVCC_FLAGS -DCUDA_10 ${EXPM} -w -G -g --cudart=static --expt-extended-lambda -arch=compute_${COMPUTE} -code=compute_${COMPUTE}) endif() elseif(CUDA_VERSION VERSION_GREATER "8.0") # cuda 9 if ("${COMPUTE}" STREQUAL "all") diff --git a/libnd4j/include/helpers/impl/loops/ReductionLoops_float.cpp b/libnd4j/include/helpers/impl/loops/ReductionLoops_float_0.cpp similarity index 97% rename from libnd4j/include/helpers/impl/loops/ReductionLoops_float.cpp rename to libnd4j/include/helpers/impl/loops/ReductionLoops_float_0.cpp index f6173036e..c7b1f6ff8 100644 --- a/libnd4j/include/helpers/impl/loops/ReductionLoops_float.cpp +++ b/libnd4j/include/helpers/impl/loops/ReductionLoops_float_0.cpp @@ -40,7 +40,7 @@ namespace nd4j { DISPATCH_BY_OPNUM_TT(innerloopReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams ), REDUCE_FLOAT_OPS); } - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReductionFloatLoops, , LIBND4J_TYPES, FLOAT_TYPES); + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReductionFloatLoops, , LIBND4J_TYPES, FLOAT_TYPES_0); } diff --git a/libnd4j/include/helpers/impl/loops/ReductionLoops_float_1.cpp b/libnd4j/include/helpers/impl/loops/ReductionLoops_float_1.cpp new file mode 100644 index 000000000..76c1141bf --- /dev/null +++ b/libnd4j/include/helpers/impl/loops/ReductionLoops_float_1.cpp @@ -0,0 +1,46 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "ReductionLoops.hpp" +#include +#include + +using namespace simdOps; + +namespace nd4j { + + template + template + void ReductionFloatLoops::innerloopReduce(X * x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, Z* extraParams) { + ReductionLoops::template loopReduce(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams); + } + + template + void ReductionFloatLoops::wrapper(const int opNum, X *x, Nd4jLong *xShapeInfo, Y *z, + Nd4jLong *zShapeInfo, Nd4jLong *tadShapeInfo, + Nd4jLong *tadOffsets, Y *extraParams) { + + DISPATCH_BY_OPNUM_TT(innerloopReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams ), REDUCE_FLOAT_OPS); + } + + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReductionFloatLoops, , LIBND4J_TYPES, FLOAT_TYPES_1); +} + + diff --git a/libnd4j/include/helpers/impl/loops/ReductionLoops_float_2.cpp b/libnd4j/include/helpers/impl/loops/ReductionLoops_float_2.cpp new file mode 100644 index 000000000..7288816ad --- /dev/null +++ b/libnd4j/include/helpers/impl/loops/ReductionLoops_float_2.cpp @@ -0,0 +1,46 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "ReductionLoops.hpp" +#include +#include + +using namespace simdOps; + +namespace nd4j { + + template + template + void ReductionFloatLoops::innerloopReduce(X * x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, Z* extraParams) { + ReductionLoops::template loopReduce(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams); + } + + template + void ReductionFloatLoops::wrapper(const int opNum, X *x, Nd4jLong *xShapeInfo, Y *z, + Nd4jLong *zShapeInfo, Nd4jLong *tadShapeInfo, + Nd4jLong *tadOffsets, Y *extraParams) { + + DISPATCH_BY_OPNUM_TT(innerloopReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams ), REDUCE_FLOAT_OPS); + } + + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReductionFloatLoops, , LIBND4J_TYPES, FLOAT_TYPES_2); +} + + diff --git a/libnd4j/include/helpers/impl/loops/ReductionLoops_float_3.cpp b/libnd4j/include/helpers/impl/loops/ReductionLoops_float_3.cpp new file mode 100644 index 000000000..251624076 --- /dev/null +++ b/libnd4j/include/helpers/impl/loops/ReductionLoops_float_3.cpp @@ -0,0 +1,46 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "ReductionLoops.hpp" +#include +#include + +using namespace simdOps; + +namespace nd4j { + + template + template + void ReductionFloatLoops::innerloopReduce(X * x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, Z* extraParams) { + ReductionLoops::template loopReduce(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams); + } + + template + void ReductionFloatLoops::wrapper(const int opNum, X *x, Nd4jLong *xShapeInfo, Y *z, + Nd4jLong *zShapeInfo, Nd4jLong *tadShapeInfo, + Nd4jLong *tadOffsets, Y *extraParams) { + + DISPATCH_BY_OPNUM_TT(innerloopReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams ), REDUCE_FLOAT_OPS); + } + + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReductionFloatLoops, , LIBND4J_TYPES, FLOAT_TYPES_3); +} + + diff --git a/libnd4j/include/loops/cuda/broadcasting.cu b/libnd4j/include/loops/cuda/broadcasting.chpp similarity index 99% rename from libnd4j/include/loops/cuda/broadcasting.cu rename to libnd4j/include/loops/cuda/broadcasting.chpp index b6a7b1830..e673f4eae 100644 --- a/libnd4j/include/loops/cuda/broadcasting.cu +++ b/libnd4j/include/loops/cuda/broadcasting.chpp @@ -220,7 +220,7 @@ namespace functions { } } } - +/* BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_0); BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_1); BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_2); @@ -231,5 +231,6 @@ namespace functions { BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_7); BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_8); BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_9); + */ } } \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_0.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_0.cu new file mode 100644 index 000000000..d7902af87 --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_0.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../broadcasting.chpp" + +namespace functions { + namespace broadcast { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_0); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_1.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_1.cu new file mode 100644 index 000000000..b24ebdb6c --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_1.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../broadcasting.chpp" + +namespace functions { + namespace broadcast { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_1); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_2.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_2.cu new file mode 100644 index 000000000..7d7fdc1b6 --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_2.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../broadcasting.chpp" + +namespace functions { + namespace broadcast { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_2); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_3.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_3.cu new file mode 100644 index 000000000..d5c09f114 --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_3.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../broadcasting.chpp" + +namespace functions { + namespace broadcast { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_3); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_4.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_4.cu new file mode 100644 index 000000000..f3c64a91a --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_4.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../broadcasting.chpp" + +namespace functions { + namespace broadcast { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_4); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_5.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_5.cu new file mode 100644 index 000000000..5ca557a30 --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_5.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../broadcasting.chpp" + +namespace functions { + namespace broadcast { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_5); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_6.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_6.cu new file mode 100644 index 000000000..9c53e8b36 --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_6.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../broadcasting.chpp" + +namespace functions { + namespace broadcast { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_6); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_7.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_7.cu new file mode 100644 index 000000000..a64b6f0d3 --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_7.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../broadcasting.chpp" + +namespace functions { + namespace broadcast { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_7); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_8.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_8.cu new file mode 100644 index 000000000..4404fed7c --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_8.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../broadcasting.chpp" + +namespace functions { + namespace broadcast { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_8); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_9.cu b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_9.cu new file mode 100644 index 000000000..dbb560f5c --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/broadcasting/broadcasting_9.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../broadcasting.chpp" + +namespace functions { + namespace broadcast { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT Broadcast, , PAIRWISE_TYPES_9); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_0.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_0.cu new file mode 100644 index 000000000..e57433ae2 --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_0.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../pairwise.chpp" + +namespace functions { + namespace pairwise_transforms { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_0); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_1.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_1.cu new file mode 100644 index 000000000..513a2c056 --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_1.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../pairwise.chpp" + +namespace functions { + namespace pairwise_transforms { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_1); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_2.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_2.cu new file mode 100644 index 000000000..8e0261d14 --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_2.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../pairwise.chpp" + +namespace functions { + namespace pairwise_transforms { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_2); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_3.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_3.cu new file mode 100644 index 000000000..86c23344a --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_3.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../pairwise.chpp" + +namespace functions { + namespace pairwise_transforms { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_3); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_4.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_4.cu new file mode 100644 index 000000000..1ac28891f --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_4.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../pairwise.chpp" + +namespace functions { + namespace pairwise_transforms { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_4); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_5.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_5.cu new file mode 100644 index 000000000..713fe344c --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_5.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../pairwise.chpp" + +namespace functions { + namespace pairwise_transforms { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_5); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_6.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_6.cu new file mode 100644 index 000000000..0983be1e9 --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_6.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../pairwise.chpp" + +namespace functions { + namespace pairwise_transforms { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_6); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_7.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_7.cu new file mode 100644 index 000000000..b12d82eac --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_7.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../pairwise.chpp" + +namespace functions { + namespace pairwise_transforms { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_7); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_8.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_8.cu new file mode 100644 index 000000000..fc1876f3d --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_8.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../pairwise.chpp" + +namespace functions { + namespace pairwise_transforms { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_8); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_9.cu b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_9.cu new file mode 100644 index 000000000..f13c28e85 --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/pairwise/pairwise_9.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../pairwise.chpp" + +namespace functions { + namespace pairwise_transforms { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_9); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_0.cu b/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_0.cu new file mode 100644 index 000000000..d3aeadb5f --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_0.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../reduce3.chpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES, FLOAT_TYPES_0); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_1.cu b/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_1.cu new file mode 100644 index 000000000..cfc7cb5f3 --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_1.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../reduce3.chpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES, FLOAT_TYPES_1); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_2.cu b/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_2.cu new file mode 100644 index 000000000..754ac9f52 --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_2.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../reduce3.chpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES, FLOAT_TYPES_2); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_3.cu b/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_3.cu new file mode 100644 index 000000000..340698b34 --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/reduce3/reduce3_3.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../reduce3.chpp" + +namespace functions { + namespace reduce3 { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES, FLOAT_TYPES_3); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_0.cu b/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_0.cu new file mode 100644 index 000000000..dd893939d --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_0.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../reduce/reduce_float.chpp" + +namespace functions { + namespace reduce { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReduceFloatFunction, , LIBND4J_TYPES, FLOAT_TYPES_0); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_1.cu b/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_1.cu new file mode 100644 index 000000000..4d98cb61c --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_1.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../reduce/reduce_float.chpp" + +namespace functions { + namespace reduce { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReduceFloatFunction, , LIBND4J_TYPES, FLOAT_TYPES_1); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_2.cu b/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_2.cu new file mode 100644 index 000000000..346627563 --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_2.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../reduce/reduce_float.chpp" + +namespace functions { + namespace reduce { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReduceFloatFunction, , LIBND4J_TYPES, FLOAT_TYPES_2); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_3.cu b/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_3.cu new file mode 100644 index 000000000..2852063ad --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/reduce_float/reduce_float_3.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../reduce/reduce_float.chpp" + +namespace functions { + namespace reduce { + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReduceFloatFunction, , LIBND4J_TYPES, FLOAT_TYPES_3); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_0.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_0.cu new file mode 100644 index 000000000..28f754b14 --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_0.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../scalar.chpp" + +namespace functions { + namespace scalar { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_0); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_1.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_1.cu new file mode 100644 index 000000000..fb54e4767 --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_1.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../scalar.chpp" + +namespace functions { + namespace scalar { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_1); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_2.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_2.cu new file mode 100644 index 000000000..af2de5b0e --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_2.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../scalar.chpp" + +namespace functions { + namespace scalar { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_2); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_3.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_3.cu new file mode 100644 index 000000000..a50cee507 --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_3.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../scalar.chpp" + +namespace functions { + namespace scalar { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_3); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_4.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_4.cu new file mode 100644 index 000000000..7f99764d8 --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_4.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../scalar.chpp" + +namespace functions { + namespace scalar { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_4); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_5.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_5.cu new file mode 100644 index 000000000..10e93e14c --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_5.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../scalar.chpp" + +namespace functions { + namespace scalar { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_5); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_6.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_6.cu new file mode 100644 index 000000000..a1a98cf41 --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_6.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../scalar.chpp" + +namespace functions { + namespace scalar { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_6); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_7.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_7.cu new file mode 100644 index 000000000..f29d26c44 --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_7.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../scalar.chpp" + +namespace functions { + namespace scalar { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_7); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_8.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_8.cu new file mode 100644 index 000000000..38d275b6f --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_8.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../scalar.chpp" + +namespace functions { + namespace scalar { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_8); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_9.cu b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_9.cu new file mode 100644 index 000000000..be7c66956 --- /dev/null +++ b/libnd4j/include/loops/cuda/compilation_units/scalar/scalar_9.cu @@ -0,0 +1,27 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include "../../scalar.chpp" + +namespace functions { + namespace scalar { + BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_9); + } +} \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/pairwise.cu b/libnd4j/include/loops/cuda/pairwise.chpp similarity index 99% rename from libnd4j/include/loops/cuda/pairwise.cu rename to libnd4j/include/loops/cuda/pairwise.chpp index efa4ddb82..a7e6f32cc 100644 --- a/libnd4j/include/loops/cuda/pairwise.cu +++ b/libnd4j/include/loops/cuda/pairwise.chpp @@ -106,7 +106,7 @@ void __host__ PairWiseTransform::executeCudaShaped(dim3& launchDims, cuda DISPATCH_BY_OPNUM_TTT(intermediateShaped, PARAMS(launchDims, stream, vx, xShapeInfo, vy, yShapeInfo, vz, zShapeInfo, vextraParams), PAIRWISE_TRANSFORM_OPS); } - +/* BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_0); BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_1); BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_2); @@ -117,6 +117,7 @@ void __host__ PairWiseTransform::executeCudaShaped(dim3& launchDims, cuda BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_7); BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_8); BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT PairWiseTransform, , PAIRWISE_TYPES_9); + */ } } diff --git a/libnd4j/include/loops/cuda/reduce/reduce_float.cu b/libnd4j/include/loops/cuda/reduce/reduce_float.chpp similarity index 99% rename from libnd4j/include/loops/cuda/reduce/reduce_float.cu rename to libnd4j/include/loops/cuda/reduce/reduce_float.chpp index 363488700..ffd075715 100644 --- a/libnd4j/include/loops/cuda/reduce/reduce_float.cu +++ b/libnd4j/include/loops/cuda/reduce/reduce_float.chpp @@ -304,7 +304,7 @@ __device__ void initializeShared(X *extraParams, X **sPartials, int sMemSize) { } -BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReduceFloatFunction, , LIBND4J_TYPES, FLOAT_TYPES); +//BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT ReduceFloatFunction, , LIBND4J_TYPES, FLOAT_TYPES); } } diff --git a/libnd4j/include/loops/cuda/reduce3.cu b/libnd4j/include/loops/cuda/reduce3.chpp similarity index 99% rename from libnd4j/include/loops/cuda/reduce3.cu rename to libnd4j/include/loops/cuda/reduce3.chpp index 61fe31a1e..819c215fc 100644 --- a/libnd4j/include/loops/cuda/reduce3.cu +++ b/libnd4j/include/loops/cuda/reduce3.chpp @@ -559,7 +559,7 @@ __host__ void Reduce3::execScalar(dim3 launchDims, cudaStream_t *stream, - BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES, FLOAT_TYPES); + //BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT Reduce3, , LIBND4J_TYPES, FLOAT_TYPES); } } \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/scalar.cu b/libnd4j/include/loops/cuda/scalar.chpp similarity index 89% rename from libnd4j/include/loops/cuda/scalar.cu rename to libnd4j/include/loops/cuda/scalar.chpp index 4f1e6aa95..503e5c97a 100644 --- a/libnd4j/include/loops/cuda/scalar.cu +++ b/libnd4j/include/loops/cuda/scalar.chpp @@ -165,18 +165,6 @@ void ScalarTransform::executeCudaAlongDimension(dim3& launchDims, cudaStr } - -BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_0); -BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_1); -BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_2); -BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_3); -BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_4); -BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_5); -BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_6); -BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_7); -BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_8); -BUILD_PAIRWISE_TEMPLATE(template class ND4J_EXPORT ScalarTransform, , PAIRWISE_TYPES_9); - } } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/concat.cu b/libnd4j/include/ops/declarable/helpers/cuda/concat.cu new file mode 100644 index 000000000..1a1730efc --- /dev/null +++ b/libnd4j/include/ops/declarable/helpers/cuda/concat.cu @@ -0,0 +1,129 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com), created on 20.04.2018 +// + + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nd4j { + namespace ops { + namespace helpers { + /////////////////////////////////////////////////////////////////// + template + __global__ static void concatCuda(const int numOfArrs, void* pVx, void* pxShapeInfo, void* pVz, void* pzShapeInfo) { + + __shared__ int arrIdx, blocksPerArr; + __shared__ T *x, *z; + __shared__ Nd4jLong *zShapeInfo, *xShapeInfo, arrLen, arrLenPerBlock, start, end; + + if (threadIdx.x == 0) { + + blocksPerArr = (gridDim.x + numOfArrs - 1) / numOfArrs; // ceil + arrIdx = blockIdx.x / blocksPerArr; + + x = reinterpret_cast(reinterpret_cast(pVx)[arrIdx]); + z = reinterpret_cast(reinterpret_cast(pVz)[arrIdx]); + xShapeInfo = reinterpret_cast(pxShapeInfo)[arrIdx]; + zShapeInfo = reinterpret_cast(pzShapeInfo)[arrIdx]; + arrLen = shape::length(xShapeInfo); + + arrLenPerBlock = (arrLen + blocksPerArr - 1) / blocksPerArr; // ceil + + start = (blockIdx.x % blocksPerArr) * arrLenPerBlock; + end = (start + arrLenPerBlock) > arrLen ? arrLen : (start + arrLenPerBlock); + } + + __syncthreads(); + + for (Nd4jLong i = start + threadIdx.x; i < end; i += blockDim.x) + z[shape::getIndexOffset(i, zShapeInfo, arrLen)] = x[shape::getIndexOffset(i, xShapeInfo, arrLen)]; + } + +/////////////////////////////////////////////////////////////////// + template + __host__ static void concatCudaLauncher(const int numOfArrs, const cudaStream_t *stream, void* pVx, void* pxShapeInfo, void* pVz, void* pzShapeInfo) { + + concatCuda<<<512, 256, 1024, *stream>>>(numOfArrs, pVx, pxShapeInfo, pVz, pzShapeInfo); + } + BUILD_SINGLE_TEMPLATE(template void concatCudaLauncher, (const int numOfArrs, const cudaStream_t *stream, void* pVx, void* pxShapeInfo, void* pVz, void* pzShapeInfo), LIBND4J_TYPES); + + ////////////////////////////////////////////////////////////////////////// + void concat(nd4j::LaunchContext * context, const std::vector& inArrs, NDArray& output, const int axis) { + + const int numOfArrs = inArrs.size(); + for(int i = 0; i < numOfArrs; ++i) + if(!inArrs[i]->isActualOnDeviceSide()) inArrs[i]->syncToDevice(); + + const int rank = inArrs[0]->rankOf(); + const int rank2 = 2*rank; + std::vector> indices(numOfArrs, std::vector(rank2,0)); + + // take into account indices for first array + indices[0][2 * axis + 1] = inArrs[0]->sizeAt(axis); + + // loop through the rest of input arrays + for(int i = 1; i < numOfArrs; ++i) { + indices[i][2 * axis] = indices[i-1][2 * axis + 1]; // index start from + indices[i][2 * axis + 1] = indices[i-1][2 * axis + 1] + inArrs[i]->sizeAt(axis); // index end with (excluding) + } + + std::vector outSubArrs(numOfArrs); + for(int i = 0; i < numOfArrs; ++i) + outSubArrs[i] = new NDArray(output(indices[i], true)); + + // prepare arrays of pointers on buffers and shapes + std::vector hOutBuffers(numOfArrs), hInBuffers(numOfArrs); + std::vector hOutShapeInfo(numOfArrs), hInShapeInfo(numOfArrs); + for(int i = 0; i < numOfArrs; ++i) { + hOutBuffers[i] = outSubArrs[i]->getSpecialBuffer(); + hInBuffers[i] = inArrs[i]->getSpecialBuffer(); + hOutShapeInfo[i] = outSubArrs[i]->getSpecialShapeInfo(); + hInShapeInfo[i] = inArrs[i]->getSpecialShapeInfo(); + } + + // allocate and copy all buffers and shapes arrays to global memory + PointersManager manager(context, "helpers::concat"); + void* dOutBuffers = manager.replicatePointer(hOutBuffers.data(), hOutBuffers.size() * sizeof(void*)); + void* dInBuffers = manager.replicatePointer(hInBuffers.data(), hInBuffers.size() * sizeof(void*)); + void* dInShapeInfo = manager.replicatePointer(hInShapeInfo.data(), hInShapeInfo.size() * sizeof(Nd4jLong*)); + void* dOutShapeInfo = manager.replicatePointer(hOutShapeInfo.data(), hOutShapeInfo.size() * sizeof(Nd4jLong*)); + + BUILD_SINGLE_SELECTOR(inArrs[0]->dataType(), concatCudaLauncher, (numOfArrs, context->getCudaStream(), dInBuffers, dInShapeInfo, dOutBuffers, dOutShapeInfo), LIBND4J_TYPES); + + manager.synchronize(); + + for(int i = 0; i < numOfArrs; ++i) + delete outSubArrs[i]; + + for(int i = 0; i < numOfArrs; ++i) + inArrs[i]->tickReadHost(); + + output.tickWriteDevice(); + } + } + } +} \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/cuda/gather_nd.cu b/libnd4j/include/ops/declarable/helpers/cuda/gather_nd.cu new file mode 100644 index 000000000..614ac95c1 --- /dev/null +++ b/libnd4j/include/ops/declarable/helpers/cuda/gather_nd.cu @@ -0,0 +1,147 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com), created on 20.04.2018 +// + + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nd4j { + namespace ops { + namespace helpers { + /////////////////////////////////////////////////////////////////// +// x - input, y - indices, z - output + template + __global__ static void gatherNDCuda(const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo) { + + const auto x = reinterpret_cast(vx); + const auto y = reinterpret_cast(vy); + auto z = reinterpret_cast(vz); + + __shared__ int xRank, yRank, zRank, maxRank, yLastDim; + __shared__ Nd4jLong zLen, totalThreads, *sharedMem; + + if (threadIdx.x == 0) { + + extern __shared__ unsigned char shmem[]; + sharedMem = reinterpret_cast(shmem); + + xRank = shape::rank(xShapeInfo); + yRank = shape::rank(yShapeInfo); + zRank = shape::rank(zShapeInfo); + maxRank = nd4j::math::nd4j_max(yRank, nd4j::math::nd4j_max(xRank, zRank)); + + zLen = shape::length(zShapeInfo); + yLastDim = yShapeInfo[yRank]; + + totalThreads = gridDim.x * blockDim.x; + } + + __syncthreads(); + + auto coord = sharedMem + threadIdx.x * maxRank; + + Nd4jLong *zCoordStart, *xCoordStart; + + if(yLastDim == xRank) { + zCoordStart = coord; + xCoordStart = coord; + } + if(zRank >= xRank) { + zCoordStart = coord; + xCoordStart = coord + zRank - xRank; + } + else { + zCoordStart = coord + xRank - zRank; + xCoordStart = coord; + } + + const auto tid = blockIdx.x * blockDim.x + threadIdx.x; + + for (Nd4jLong i = tid; i < zLen; i += totalThreads) { + + shape::index2coords(zRank, zShapeInfo + 1, i, zLen, zCoordStart); + + const auto zOffset = shape::getOffset(0, zShapeInfo + 1, zShapeInfo + zRank + 1, zCoordStart, zRank); + + // last y coordinate + int coordToRestore; + if(yLastDim != xRank) + coordToRestore = static_cast(zCoordStart[yRank - 1]); + + zCoordStart[yRank - 1] = 0; // last y coordinate + const auto yOffset = shape::getOffset(0, yShapeInfo + 1, yShapeInfo + yRank + 1, zCoordStart, yRank); + + //restore z coordinate + if(yLastDim != xRank) + zCoordStart[yRank - 1] = coordToRestore; + + // construct coordinates for x + for(uint j = 0; j < yLastDim; ++j) + xCoordStart[j] = y[yOffset + j * yShapeInfo[2 * yRank]]; // last stride + + const auto xOffset = shape::getOffset(0, xShapeInfo + 1, xShapeInfo + xRank + 1, xCoordStart, xRank); + + z[zOffset] = x[xOffset]; + } + } + +/////////////////////////////////////////////////////////////////// + template + static void gatherNDCudaLauncher(const int blocksPerGrid, const int threadsPerBlock, const int sharedMem, const cudaStream_t *stream, + const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo) { + + gatherNDCuda<<>>(vx, xShapeInfo, vy, yShapeInfo, vz, zShapeInfo); + } + BUILD_DOUBLE_TEMPLATE(template void gatherNDCudaLauncher, (const int blocksPerGrid, const int threadsPerBlock, const int sharedMem, const cudaStream_t *stream, const void *vx, const Nd4jLong *xShapeInfo, const void *vy, const Nd4jLong *yShapeInfo, void *vz, const Nd4jLong *zShapeInfo), LIBND4J_TYPES, INTEGER_TYPES); + +/////////////////////////////////////////////////////////////////// + void gatherND(nd4j::LaunchContext * context, NDArray& input, NDArray& indices, NDArray& output) { + + const int maxRank = nd4j::math::nd4j_max(indices.rankOf(), nd4j::math::nd4j_max(input.rankOf(), output.rankOf())); + + const int threadsPerBlock = MAX_NUM_THREADS; + const int blocksPerGrid = (output.lengthOf() + threadsPerBlock - 1) / threadsPerBlock; + const int sharedMem = 8 * threadsPerBlock * maxRank + 128; + + const auto xType = input.dataType(); + const auto yType = indices.dataType(); + + PointersManager manager(context, "gatherND"); + + NDArray::prepareSpecialUse({&output}, {&input, &indices}); + BUILD_DOUBLE_SELECTOR(xType, yType, gatherNDCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), indices.getSpecialBuffer(), indices.getSpecialShapeInfo(), output.getSpecialBuffer(), output.getSpecialShapeInfo()), LIBND4J_TYPES, INTEGER_TYPES); + NDArray::registerSpecialUse({&output}, {&input, &indices}); + + manager.synchronize(); + } + } + } +} \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/cuda/legacy/relu.cu b/libnd4j/include/ops/declarable/helpers/cuda/legacy/relu.cu new file mode 100644 index 000000000..46f972f44 --- /dev/null +++ b/libnd4j/include/ops/declarable/helpers/cuda/legacy/relu.cu @@ -0,0 +1,118 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author GS +// + +#include +#include +#include + +namespace nd4j { + namespace ops { + namespace helpers { + + template + linkage void reluDerivative__(NDArray* theFirst, NDArray* theSecond) { + auto functor = LAMBDA_TT(x, y){ + return x > (T) 0.f ? y : T(0.f); + }; + + theFirst->applyPairwiseLambda(theSecond, functor, nullptr); + } + BUILD_SINGLE_TEMPLATE(template void reluDerivative__, (NDArray* input, NDArray* epsilon), FLOAT_TYPES); + + void reluDerivative(nd4j::LaunchContext * context, NDArray* theFirst, NDArray* theSecond) { + BUILD_SINGLE_SELECTOR(theFirst->dataType(), reluDerivative__, (theFirst, theSecond), FLOAT_TYPES); + } + + template + linkage void reluDerivative_(NDArray* input, NDArray* epsilon, NDArray* output) { + auto functor = LAMBDA_TT(x, y){ + return x > (T)0.f ? y : T(0.f); + }; + + input->applyPairwiseLambda(epsilon, functor, output); + } + BUILD_SINGLE_TEMPLATE(template void reluDerivative_, (NDArray* input, NDArray* epsilon, NDArray*output);, FLOAT_TYPES); + + void reluDerivative(nd4j::LaunchContext * context, NDArray* theFirst, NDArray* theSecond, NDArray* theOutput) { + BUILD_SINGLE_SELECTOR(theFirst->dataType(), reluDerivative_, (theFirst, theSecond, theOutput), FLOAT_TYPES); + } + + template + linkage void relu6Derivative_(NDArray* input, NDArray* epsilon, NDArray* output) { + auto functor = LAMBDA_TT(x, y){ + return x > (T)0.f && x < (T)6.f? y : T(0.f); + }; + + input->applyPairwiseLambda(epsilon, functor, output); + } + + BUILD_SINGLE_TEMPLATE(template void relu6Derivative_, (NDArray* input, NDArray* epsilon, NDArray*output);, FLOAT_TYPES); + + void relu6Derivative(nd4j::LaunchContext * context, NDArray* theFirst, NDArray* theSecond, NDArray* theOutput) { + BUILD_SINGLE_SELECTOR(theFirst->dataType(), relu6Derivative_, (theFirst, theSecond, theOutput), FLOAT_TYPES); + } + + template + linkage void leakyReluDerivative_(NDArray* input, NDArray* epsilon, NDArray* output) { + auto functor = LAMBDA_TT(x, y){ + return x >= (T)0.f? y : T(0.f); + }; + + input->applyPairwiseLambda(epsilon, functor, output); + } + + BUILD_SINGLE_TEMPLATE(template void leakyReluDerivative_, (NDArray* input, NDArray* epsilon, NDArray*output);, FLOAT_TYPES); + + void leakyReluDerivative(nd4j::LaunchContext * context, NDArray* theFirst, NDArray* theSecond, NDArray* theOutput) { + BUILD_SINGLE_SELECTOR(theFirst->dataType(), leakyReluDerivative_, (theFirst, theSecond, theOutput), FLOAT_TYPES); + } + + template + linkage void eluDerivative_(NDArray* input, NDArray* epsilon, NDArray* output) { + auto functor = LAMBDA_TT(x, y){ + return y * nd4j::math::nd4j_eluderivative(x); + }; + + input->applyPairwiseLambda(epsilon, functor, output); + } + + BUILD_SINGLE_TEMPLATE(template void eluDerivative_, (NDArray* input, NDArray* epsilon, NDArray*output);, FLOAT_TYPES); + + void eluDerivative(nd4j::LaunchContext * context, NDArray* theFirst, NDArray* theSecond, NDArray* theOutput) { + BUILD_SINGLE_SELECTOR(theFirst->dataType(), eluDerivative_, (theFirst, theSecond, theOutput), FLOAT_TYPES); + } + + template + linkage void seluDerivative_(NDArray* input, NDArray* epsilon, NDArray* output) { + auto functor = LAMBDA_TT(x, y){ + return y * simdOps::SELUDerivative::op(x, nullptr); + }; + + input->applyPairwiseLambda(epsilon, functor, output); + } + + BUILD_SINGLE_TEMPLATE(template void seluDerivative_, (NDArray* input, NDArray* epsilon, NDArray*output);, FLOAT_TYPES); + + void seluDerivative(nd4j::LaunchContext * context, NDArray* theFirst, NDArray* theSecond, NDArray* theOutput) { + BUILD_SINGLE_SELECTOR(theFirst->dataType(), seluDerivative_, (theFirst, theSecond, theOutput), FLOAT_TYPES); + } + } + } +} \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/cuda/legacy/tanh.cu b/libnd4j/include/ops/declarable/helpers/cuda/legacy/tanh.cu new file mode 100644 index 000000000..9ad1ee0ad --- /dev/null +++ b/libnd4j/include/ops/declarable/helpers/cuda/legacy/tanh.cu @@ -0,0 +1,93 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author GS +// + +#include +#include +#include + +namespace nd4j { + namespace ops { + namespace helpers { + //////////////////////////////////////////////////////////////////////// + template + linkage void tanhDerivative_(NDArray* input, NDArray* epsilon, NDArray* output) { + auto functor = LAMBDA_TT(x, y){ + T th = nd4j::math::nd4j_tanh(x); + return y * ((T)1.0f - (th * th)); + }; + + input->applyPairwiseLambda(epsilon, functor, output); + } + + BUILD_SINGLE_TEMPLATE(template void tanhDerivative_, (NDArray* input, NDArray* epsilon, NDArray*output);, FLOAT_TYPES); + + void tanhDerivative(nd4j::LaunchContext * context, NDArray* theFirst, NDArray* theSecond, NDArray* theOutput) { + BUILD_SINGLE_SELECTOR(theFirst->dataType(), tanhDerivative_, (theFirst, theSecond, theOutput), FLOAT_TYPES); + } + + // return static_cast(d2) * simdOps::HardTanhDerivative::op(d1, nullptr); + template + linkage void hardTanhDerivative_(NDArray* input, NDArray* epsilon, NDArray* output) { + auto functor = LAMBDA_TT(x, y){ + T th = nd4j::math::nd4j_tanh(x); + return y * simdOps::HardTanhDerivative::op(x, nullptr); + }; + + input->applyPairwiseLambda(epsilon, functor, output); + } + + BUILD_SINGLE_TEMPLATE(template void hardTanhDerivative_, (NDArray* input, NDArray* epsilon, NDArray*output);, FLOAT_TYPES); + + void hardTanhDerivative(nd4j::LaunchContext * context, NDArray* theFirst, NDArray* theSecond, NDArray* theOutput) { + BUILD_SINGLE_SELECTOR(theFirst->dataType(), hardTanhDerivative_, (theFirst, theSecond, theOutput), FLOAT_TYPES); + } + + template + linkage void rationalTanhDerivative_(NDArray* input, NDArray* epsilon, NDArray* output) { + auto functor = LAMBDA_TT(x, y){ + return y * simdOps::RationalTanhDerivative::op(x, nullptr); + }; + + input->applyPairwiseLambda(epsilon, functor, output); + } + + BUILD_SINGLE_TEMPLATE(template void rationalTanhDerivative_, (NDArray* input, NDArray* epsilon, NDArray*output);, FLOAT_TYPES); + + void rationalTanhDerivative(nd4j::LaunchContext * context, NDArray* theFirst, NDArray* theSecond, NDArray* theOutput) { + BUILD_SINGLE_SELECTOR(theFirst->dataType(), rationalTanhDerivative_, (theFirst, theSecond, theOutput), FLOAT_TYPES); + } + + template + linkage void rectifiedTanhDerivative_(NDArray* input, NDArray* epsilon, NDArray* output) { + auto functor = LAMBDA_TT(x, y){ + return x > (T) 0.0f ? y * (nd4j::math::nd4j_tanhderivative(x)) : (T) 0.0f; + }; + + input->applyPairwiseLambda(epsilon, functor, output); + } + + BUILD_SINGLE_TEMPLATE(template void rectifiedTanhDerivative_, (NDArray* input, NDArray* epsilon, NDArray*output);, FLOAT_TYPES); + + void rectifiedTanhDerivative(nd4j::LaunchContext * context, NDArray* theFirst, NDArray* theSecond, NDArray* theOutput) { + BUILD_SINGLE_SELECTOR(theFirst->dataType(), rectifiedTanhDerivative_, (theFirst, theSecond, theOutput), FLOAT_TYPES); + } + } + } +} \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/cuda/legacy_helper.cu b/libnd4j/include/ops/declarable/helpers/cuda/legacy_helper.cu index 6b182dafb..6d0788c64 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/legacy_helper.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/legacy_helper.cu @@ -25,93 +25,6 @@ namespace nd4j { namespace ops { namespace helpers { - template - linkage void reluDerivative__(NDArray* theFirst, NDArray* theSecond) { - auto functor = LAMBDA_TT(x, y){ - return x > (T) 0.f ? y : T(0.f); - }; - - theFirst->applyPairwiseLambda(theSecond, functor, nullptr); - } - BUILD_SINGLE_TEMPLATE(template void reluDerivative__, (NDArray* input, NDArray* epsilon), FLOAT_TYPES); - - void reluDerivative(nd4j::LaunchContext * context, NDArray* theFirst, NDArray* theSecond) { - BUILD_SINGLE_SELECTOR(theFirst->dataType(), reluDerivative__, (theFirst, theSecond), FLOAT_TYPES); - } - - template - linkage void reluDerivative_(NDArray* input, NDArray* epsilon, NDArray* output) { - auto functor = LAMBDA_TT(x, y){ - return x > (T)0.f ? y : T(0.f); - }; - - input->applyPairwiseLambda(epsilon, functor, output); - } - BUILD_SINGLE_TEMPLATE(template void reluDerivative_, (NDArray* input, NDArray* epsilon, NDArray*output);, FLOAT_TYPES); - - void reluDerivative(nd4j::LaunchContext * context, NDArray* theFirst, NDArray* theSecond, NDArray* theOutput) { - BUILD_SINGLE_SELECTOR(theFirst->dataType(), reluDerivative_, (theFirst, theSecond, theOutput), FLOAT_TYPES); - } - - template - linkage void relu6Derivative_(NDArray* input, NDArray* epsilon, NDArray* output) { - auto functor = LAMBDA_TT(x, y){ - return x > (T)0.f && x < (T)6.f? y : T(0.f); - }; - - input->applyPairwiseLambda(epsilon, functor, output); - } - - BUILD_SINGLE_TEMPLATE(template void relu6Derivative_, (NDArray* input, NDArray* epsilon, NDArray*output);, FLOAT_TYPES); - - void relu6Derivative(nd4j::LaunchContext * context, NDArray* theFirst, NDArray* theSecond, NDArray* theOutput) { - BUILD_SINGLE_SELECTOR(theFirst->dataType(), relu6Derivative_, (theFirst, theSecond, theOutput), FLOAT_TYPES); - } - - template - linkage void leakyReluDerivative_(NDArray* input, NDArray* epsilon, NDArray* output) { - auto functor = LAMBDA_TT(x, y){ - return x >= (T)0.f? y : T(0.f); - }; - - input->applyPairwiseLambda(epsilon, functor, output); - } - - BUILD_SINGLE_TEMPLATE(template void leakyReluDerivative_, (NDArray* input, NDArray* epsilon, NDArray*output);, FLOAT_TYPES); - - void leakyReluDerivative(nd4j::LaunchContext * context, NDArray* theFirst, NDArray* theSecond, NDArray* theOutput) { - BUILD_SINGLE_SELECTOR(theFirst->dataType(), leakyReluDerivative_, (theFirst, theSecond, theOutput), FLOAT_TYPES); - } - - template - linkage void eluDerivative_(NDArray* input, NDArray* epsilon, NDArray* output) { - auto functor = LAMBDA_TT(x, y){ - return y * nd4j::math::nd4j_eluderivative(x); - }; - - input->applyPairwiseLambda(epsilon, functor, output); - } - - BUILD_SINGLE_TEMPLATE(template void eluDerivative_, (NDArray* input, NDArray* epsilon, NDArray*output);, FLOAT_TYPES); - - void eluDerivative(nd4j::LaunchContext * context, NDArray* theFirst, NDArray* theSecond, NDArray* theOutput) { - BUILD_SINGLE_SELECTOR(theFirst->dataType(), eluDerivative_, (theFirst, theSecond, theOutput), FLOAT_TYPES); - } - - template - linkage void seluDerivative_(NDArray* input, NDArray* epsilon, NDArray* output) { - auto functor = LAMBDA_TT(x, y){ - return y * simdOps::SELUDerivative::op(x, nullptr); - }; - - input->applyPairwiseLambda(epsilon, functor, output); - } - - BUILD_SINGLE_TEMPLATE(template void seluDerivative_, (NDArray* input, NDArray* epsilon, NDArray*output);, FLOAT_TYPES); - - void seluDerivative(nd4j::LaunchContext * context, NDArray* theFirst, NDArray* theSecond, NDArray* theOutput) { - BUILD_SINGLE_SELECTOR(theFirst->dataType(), seluDerivative_, (theFirst, theSecond, theOutput), FLOAT_TYPES); - } template linkage void cubeDerivative_(NDArray* input, NDArray* epsilon, NDArray* output) { @@ -180,70 +93,6 @@ namespace helpers { BUILD_SINGLE_SELECTOR(logits->dataType(), sigmCrossEntropyGrad_, (logits, labels, output), FLOAT_TYPES); } - //////////////////////////////////////////////////////////////////////// - template - linkage void tanhDerivative_(NDArray* input, NDArray* epsilon, NDArray* output) { - auto functor = LAMBDA_TT(x, y){ - T th = nd4j::math::nd4j_tanh(x); - return y * ((T)1.0f - (th * th)); - }; - - input->applyPairwiseLambda(epsilon, functor, output); - } - - BUILD_SINGLE_TEMPLATE(template void tanhDerivative_, (NDArray* input, NDArray* epsilon, NDArray*output);, FLOAT_TYPES); - - void tanhDerivative(nd4j::LaunchContext * context, NDArray* theFirst, NDArray* theSecond, NDArray* theOutput) { - BUILD_SINGLE_SELECTOR(theFirst->dataType(), tanhDerivative_, (theFirst, theSecond, theOutput), FLOAT_TYPES); - } - - // return static_cast(d2) * simdOps::HardTanhDerivative::op(d1, nullptr); - template - linkage void hardTanhDerivative_(NDArray* input, NDArray* epsilon, NDArray* output) { - auto functor = LAMBDA_TT(x, y){ - T th = nd4j::math::nd4j_tanh(x); - return y * simdOps::HardTanhDerivative::op(x, nullptr); - }; - - input->applyPairwiseLambda(epsilon, functor, output); - } - - BUILD_SINGLE_TEMPLATE(template void hardTanhDerivative_, (NDArray* input, NDArray* epsilon, NDArray*output);, FLOAT_TYPES); - - void hardTanhDerivative(nd4j::LaunchContext * context, NDArray* theFirst, NDArray* theSecond, NDArray* theOutput) { - BUILD_SINGLE_SELECTOR(theFirst->dataType(), hardTanhDerivative_, (theFirst, theSecond, theOutput), FLOAT_TYPES); - } - - template - linkage void rationalTanhDerivative_(NDArray* input, NDArray* epsilon, NDArray* output) { - auto functor = LAMBDA_TT(x, y){ - return y * simdOps::RationalTanhDerivative::op(x, nullptr); - }; - - input->applyPairwiseLambda(epsilon, functor, output); - } - - BUILD_SINGLE_TEMPLATE(template void rationalTanhDerivative_, (NDArray* input, NDArray* epsilon, NDArray*output);, FLOAT_TYPES); - - void rationalTanhDerivative(nd4j::LaunchContext * context, NDArray* theFirst, NDArray* theSecond, NDArray* theOutput) { - BUILD_SINGLE_SELECTOR(theFirst->dataType(), rationalTanhDerivative_, (theFirst, theSecond, theOutput), FLOAT_TYPES); - } - - template - linkage void rectifiedTanhDerivative_(NDArray* input, NDArray* epsilon, NDArray* output) { - auto functor = LAMBDA_TT(x, y){ - return x > (T) 0.0f ? y * (nd4j::math::nd4j_tanhderivative(x)) : (T) 0.0f; - }; - - input->applyPairwiseLambda(epsilon, functor, output); - } - - BUILD_SINGLE_TEMPLATE(template void rectifiedTanhDerivative_, (NDArray* input, NDArray* epsilon, NDArray*output);, FLOAT_TYPES); - - void rectifiedTanhDerivative(nd4j::LaunchContext * context, NDArray* theFirst, NDArray* theSecond, NDArray* theOutput) { - BUILD_SINGLE_SELECTOR(theFirst->dataType(), rectifiedTanhDerivative_, (theFirst, theSecond, theOutput), FLOAT_TYPES); - } - // X f = (X) 1.0f + nd4j::math::nd4j_abs(d1); // return (X) d2 * ((X) 1.0f / (f * f)); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/maximum.cu b/libnd4j/include/ops/declarable/helpers/cuda/maximum.cu new file mode 100644 index 000000000..0af1f0eda --- /dev/null +++ b/libnd4j/include/ops/declarable/helpers/cuda/maximum.cu @@ -0,0 +1,114 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author sgazeos@gmail.com +// + +#include +#include +#include + + +namespace nd4j { + namespace ops { + namespace helpers { + + template + void maximumBPFunctor_(NDArray* x, NDArray* y, NDArray* epsNext, NDArray* gradX, NDArray* gradY) { + + auto lambdaX = LAMBDA_TTT(_e, _x, _y) { + return _x >= _y ? _e : (T) 0.; + }; + + auto lambdaY = LAMBDA_TTT(_e, _x, _y) { + return _x <= _y ? _e : (T) 0.; + }; + + + if (x->isSameShape(y)) { + // PWT case case + + // X gradient + epsNext->applyTriplewiseLambda(x, y, lambdaX, gradX); + + // Y gradient + epsNext->applyTriplewiseLambda(x, y, lambdaY, gradY); + + } else if (y->isScalar()) { + T s = y->e(0); + auto lambdaS = LAMBDA_TT(_e, _x, s) { + return _x >= s ? _e : (T) 0.; + }; + + // scalar case + auto tmp = epsNext->reduceNumber(reduce::Sum); + if (x <= y) + gradY->assign(tmp); + else + gradY->assign(0.0f); + + epsNext->applyPairwiseLambda(x, lambdaS, gradX); + } else { + // broadcast case + + // in this case we want to boost our X and Y shapes to the size of FF pass output (or epsNext, which has the same shape) + auto preX = x->dup(); + auto preY = y->dup(); + + auto targetShape = epsNext->getShapeAsVector(); + + preX->tileToShape(targetShape); + preY->tileToShape(targetShape); + + epsNext->applyTriplewiseLambda(preX, preY, lambdaX, preX); + epsNext->applyTriplewiseLambda(preX, preY, lambdaY, preY); + + auto axisX = ShapeUtils::evalBroadcastBackwardAxis(x->shapeInfo(), epsNext->shapeInfo()); + auto axisY = ShapeUtils::evalBroadcastBackwardAxis(y->shapeInfo(), epsNext->shapeInfo()); + + if (axisX.size() > 0) { + auto sum = preX->reduceAlongDimension(reduce::Sum, axisX); + gradX->assign(sum); + delete sum; + } else + gradX->assign(preX); + + if (axisY.size() > 0) { + auto sum = preY->reduceAlongDimension(reduce::Sum, axisY); + gradY->assign(sum); + delete sum; + } else + gradY->assign(preY); + + + delete preX; + delete preY; + } + } + + void maximumBPFunctor(nd4j::LaunchContext * context, NDArray* x, NDArray* y, NDArray* epsNext, NDArray* gradX, NDArray* gradY) { + NDArray::prepareSpecialUse({gradX, gradY}, {x, y, epsNext}); + + BUILD_SINGLE_SELECTOR(x->dataType(), maximumBPFunctor_, (x, y, epsNext, gradX, gradY), NUMERIC_TYPES); + + NDArray::registerSpecialUse({gradX, gradY}, {x, y, epsNext}); + } + BUILD_SINGLE_TEMPLATE(template void maximumBPFunctor_, (NDArray* x, NDArray* y, NDArray* epsNext, NDArray* gradX, NDArray* gradY), NUMERIC_TYPES); + + } + } +} \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/cuda/merge.cu b/libnd4j/include/ops/declarable/helpers/cuda/merge.cu new file mode 100644 index 000000000..3c8d159be --- /dev/null +++ b/libnd4j/include/ops/declarable/helpers/cuda/merge.cu @@ -0,0 +1,234 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com), created on 20.04.2018 +// + + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nd4j { + namespace ops { + namespace helpers { + ////////////////////////////////////////////////////////////////////////// + template + static __global__ void global_mergeMaxIndex_(void **inArrs, void **inShapes, const int numArrays, void *voutput, Nd4jLong *outputShape, Nd4jLong length) { + auto output = reinterpret_cast(voutput); + + const auto tid = blockIdx.x * gridDim.x + threadIdx.x; + const auto step = gridDim.x * blockDim.x; + + for (Nd4jLong e = tid; e < length; e += step) { + T mVal = -DataTypeUtils::max(); + Z mIdx(0); + + for (int i = 0; i < numArrays; i++) { + auto x = reinterpret_cast(inArrs[i]); + auto xShape = reinterpret_cast(inShapes[i]); + auto val = x[shape::getIndexOffset(e, xShape, length)];; + if (mVal < val) + mIdx = static_cast(e); + } + __syncthreads(); + + output[shape::getIndexOffset(e, outputShape, length)] = mIdx; + } + } + + template + static void mergeMaxIndex_(nd4j::LaunchContext * context, const std::vector& inArrs, NDArray& output) { + std::vector inBuffers(inArrs.size()); + std::vector inShapes(inArrs.size()); + + for (int e = 0; e < inArrs.size(); e++) { + inBuffers[e] = inArrs[e]->getSpecialBuffer(); + inShapes[e] = inArrs[e]->getSpecialShapeInfo(); + } + + PointersManager manager(context, "mergeMaxIndex"); + + auto pInBuffers = reinterpret_cast(manager.replicatePointer(inBuffers.data(), inBuffers.size() * sizeof(void *))); + auto pInShapes = reinterpret_cast(manager.replicatePointer(inShapes.data(), inShapes.size() * sizeof(void *))); + auto length = output.lengthOf(); + + global_mergeMaxIndex_<<<512, 512, 512, *context->getCudaStream()>>>(pInBuffers, pInShapes, (int) inArrs.size(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), length); + + manager.synchronize(); + } + + void mergeMaxIndex(nd4j::LaunchContext * context, const std::vector& inArrs, NDArray& output) { + BUILD_DOUBLE_SELECTOR(inArrs[0]->dataType(), output.dataType(), mergeMaxIndex_, (context, inArrs, output), LIBND4J_TYPES, INTEGER_TYPES); + } + + BUILD_DOUBLE_TEMPLATE(template void mergeMaxIndex_, (nd4j::LaunchContext * context, const std::vector& inArrs, NDArray& output), LIBND4J_TYPES, INTEGER_TYPES); + + ////////////////////////////////////////////////////////////////////////// + template + static __global__ void global_mergeMax_(void **inArrs, void **inShapes, const int numArrays, void *voutput, Nd4jLong *outputShape, Nd4jLong length) { + auto output = reinterpret_cast(voutput); + + const auto tid = blockIdx.x * gridDim.x + threadIdx.x; + const auto step = gridDim.x * blockDim.x; + + for (Nd4jLong e = tid; e < length; e += step) { + T mVal = -DataTypeUtils::max(); + + for (int i = 0; i < numArrays; i++) { + auto x = reinterpret_cast(inArrs[i]); + auto xShape = reinterpret_cast(inShapes[i]); + auto val = x[shape::getIndexOffset(e, xShape, length)];; + if (mVal < val) + mVal = val; + } + __syncthreads(); + + output[shape::getIndexOffset(e, outputShape, length)] = mVal; + } + } + + template + static void mergeMax_(nd4j::LaunchContext * context, const std::vector& inArrs, NDArray& output) { + std::vector inBuffers(inArrs.size()); + std::vector inShapes(inArrs.size()); + + for (int e = 0; e < inArrs.size(); e++) { + inBuffers[e] = inArrs[e]->getSpecialBuffer(); + inShapes[e] = inArrs[e]->getSpecialShapeInfo(); + } + + PointersManager manager(context, "mergeMax"); + + auto pInBuffers = reinterpret_cast(manager.replicatePointer(inBuffers.data(), inBuffers.size() * sizeof(void *))); + auto pInShapes = reinterpret_cast(manager.replicatePointer(inShapes.data(), inShapes.size() * sizeof(void *))); + auto length = output.lengthOf(); + + global_mergeMax_<<<512, 512, 512, *context->getCudaStream()>>>(pInBuffers, pInShapes, (int) inArrs.size(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), length); + + manager.synchronize(); + } + BUILD_SINGLE_TEMPLATE(template void mergeMax_, (nd4j::LaunchContext * context, const std::vector& inArrs, NDArray& output), LIBND4J_TYPES); + + void mergeMax(nd4j::LaunchContext * context, const std::vector& inArrs, NDArray& output) { + BUILD_SINGLE_SELECTOR(output.dataType(), mergeMax_, (context, inArrs, output), LIBND4J_TYPES); + } + + ////////////////////////////////////////////////////////////////////////// + template + static __global__ void global_mergeAvg_(void **inArrs, void **inShapes, const int numArrays, void *voutput, Nd4jLong *outputShape, Nd4jLong length) { + auto output = reinterpret_cast(voutput); + + const auto tid = blockIdx.x * gridDim.x + threadIdx.x; + const auto step = gridDim.x * blockDim.x; + + for (Nd4jLong e = tid; e < length; e += step) { + T sum(0.0f); + + for (int i = 0; i < numArrays; i++) { + auto x = reinterpret_cast(inArrs[i]); + auto xShape = reinterpret_cast(inShapes[i]); + + sum += x[shape::getIndexOffset(e, xShape, length)]; + } + + output[shape::getIndexOffset(e, outputShape, length)] = sum / numArrays; + } + } + + template + static void mergeAvg_(nd4j::LaunchContext * context, const std::vector& inArrs, NDArray& output) { + std::vector inBuffers(inArrs.size()); + std::vector inShapes(inArrs.size()); + + for (int e = 0; e < inArrs.size(); e++) { + inBuffers[e] = inArrs[e]->getSpecialBuffer(); + inShapes[e] = inArrs[e]->getSpecialShapeInfo(); + } + + PointersManager manager(context, "mergeAvg"); + + auto pInBuffers = reinterpret_cast(manager.replicatePointer(inBuffers.data(), inBuffers.size() * sizeof(void *))); + auto pInShapes = reinterpret_cast(manager.replicatePointer(inShapes.data(), inShapes.size() * sizeof(void *))); + auto length = output.lengthOf(); + + global_mergeAvg_<<<512, 512, 512, *context->getCudaStream()>>>(pInBuffers, pInShapes, (int) inArrs.size(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), length); + + manager.synchronize(); + } + BUILD_SINGLE_TEMPLATE(template void mergeAvg_, (nd4j::LaunchContext * context, const std::vector& inArrs, NDArray& output), LIBND4J_TYPES); + + void mergeAvg(nd4j::LaunchContext * context, const std::vector& inArrs, NDArray& output) { + BUILD_SINGLE_SELECTOR(output.dataType(), mergeAvg_, (context, inArrs, output), LIBND4J_TYPES); + } + + ////////////////////////////////////////////////////////////////////////// + template + static __global__ void global_mergeAdd_(void **inArrs, void **inShapes, const int numArrays, void *voutput, Nd4jLong *outputShape, Nd4jLong length) { + auto output = reinterpret_cast(voutput); + + const auto tid = blockIdx.x * gridDim.x + threadIdx.x; + const auto step = gridDim.x * blockDim.x; + + for (Nd4jLong e = tid; e < length; e += step) { + T sum(0.0f); + + for (int i = 0; i < numArrays; i++) { + auto x = reinterpret_cast(inArrs[i]); + auto xShape = reinterpret_cast(inShapes[i]); + + sum += x[shape::getIndexOffset(e, xShape, length)]; + } + + output[shape::getIndexOffset(e, outputShape, length)] = sum; + } + } + + template + static void mergeAdd_(nd4j::LaunchContext * context, const std::vector& inArrs, NDArray& output) { + std::vector inBuffers(inArrs.size()); + std::vector inShapes(inArrs.size()); + + for (int e = 0; e < inArrs.size(); e++) { + inBuffers[e] = inArrs[e]->getSpecialBuffer(); + inShapes[e] = inArrs[e]->getSpecialShapeInfo(); + } + + PointersManager manager(context, "mergeAdd"); + + auto pInBuffers = reinterpret_cast(manager.replicatePointer(inBuffers.data(), inBuffers.size() * sizeof(void *))); + auto pInShapes = reinterpret_cast(manager.replicatePointer(inShapes.data(), inShapes.size() * sizeof(void *))); + auto length = output.lengthOf(); + + global_mergeAdd_<<<512, 512, 512, *context->getCudaStream()>>>(pInBuffers, pInShapes, (int) inArrs.size(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), length); + + manager.synchronize(); + } + BUILD_SINGLE_TEMPLATE(template void mergeAdd_, (nd4j::LaunchContext * context, const std::vector& inArrs, NDArray& output), LIBND4J_TYPES); + + void mergeAdd(nd4j::LaunchContext * context, const std::vector& inArrs, NDArray& output) { + BUILD_SINGLE_SELECTOR(output.dataType(), mergeAdd_, (context, inArrs, output), LIBND4J_TYPES); + } + } + } +} \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/cuda/minimax.cu b/libnd4j/include/ops/declarable/helpers/cuda/minimum.cu similarity index 56% rename from libnd4j/include/ops/declarable/helpers/cuda/minimax.cu rename to libnd4j/include/ops/declarable/helpers/cuda/minimum.cu index 5c1ffe417..12f888005 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/minimax.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/minimum.cu @@ -100,78 +100,6 @@ namespace nd4j { } } - template - void maximumBPFunctor_(NDArray* x, NDArray* y, NDArray* epsNext, NDArray* gradX, NDArray* gradY) { - - auto lambdaX = LAMBDA_TTT(_e, _x, _y) { - return _x >= _y ? _e : (T) 0.; - }; - - auto lambdaY = LAMBDA_TTT(_e, _x, _y) { - return _x <= _y ? _e : (T) 0.; - }; - - - if (x->isSameShape(y)) { - // PWT case case - - // X gradient - epsNext->applyTriplewiseLambda(x, y, lambdaX, gradX); - - // Y gradient - epsNext->applyTriplewiseLambda(x, y, lambdaY, gradY); - - } else if (y->isScalar()) { - T s = y->e(0); - auto lambdaS = LAMBDA_TT(_e, _x, s) { - return _x >= s ? _e : (T) 0.; - }; - - // scalar case - auto tmp = epsNext->reduceNumber(reduce::Sum); - if (x <= y) - gradY->assign(tmp); - else - gradY->assign(0.0f); - - epsNext->applyPairwiseLambda(x, lambdaS, gradX); - } else { - // broadcast case - - // in this case we want to boost our X and Y shapes to the size of FF pass output (or epsNext, which has the same shape) - auto preX = x->dup(); - auto preY = y->dup(); - - auto targetShape = epsNext->getShapeAsVector(); - - preX->tileToShape(targetShape); - preY->tileToShape(targetShape); - - epsNext->applyTriplewiseLambda(preX, preY, lambdaX, preX); - epsNext->applyTriplewiseLambda(preX, preY, lambdaY, preY); - - auto axisX = ShapeUtils::evalBroadcastBackwardAxis(x->shapeInfo(), epsNext->shapeInfo()); - auto axisY = ShapeUtils::evalBroadcastBackwardAxis(y->shapeInfo(), epsNext->shapeInfo()); - - if (axisX.size() > 0) { - auto sum = preX->reduceAlongDimension(reduce::Sum, axisX); - gradX->assign(sum); - delete sum; - } else - gradX->assign(preX); - - if (axisY.size() > 0) { - auto sum = preY->reduceAlongDimension(reduce::Sum, axisY); - gradY->assign(sum); - delete sum; - } else - gradY->assign(preY); - - - delete preX; - delete preY; - } - } void minimumBPFunctor(nd4j::LaunchContext * context, NDArray* x, NDArray* y, NDArray* epsNext, NDArray* gradX, NDArray* gradY) { NDArray::prepareSpecialUse({gradX, gradY}, {x, y, epsNext}); @@ -181,15 +109,7 @@ namespace nd4j { NDArray::registerSpecialUse({gradX, gradY}, {x, y, epsNext}); } - void maximumBPFunctor(nd4j::LaunchContext * context, NDArray* x, NDArray* y, NDArray* epsNext, NDArray* gradX, NDArray* gradY) { - NDArray::prepareSpecialUse({gradX, gradY}, {x, y, epsNext}); - - BUILD_SINGLE_SELECTOR(x->dataType(), maximumBPFunctor_, (x, y, epsNext, gradX, gradY), NUMERIC_TYPES); - - NDArray::registerSpecialUse({gradX, gradY}, {x, y, epsNext}); - } BUILD_SINGLE_TEMPLATE(template void minimumBPFunctor_, (NDArray* x, NDArray* y, NDArray* epsNext, NDArray* gradX, NDArray* gradY), NUMERIC_TYPES); - BUILD_SINGLE_TEMPLATE(template void maximumBPFunctor_, (NDArray* x, NDArray* y, NDArray* epsNext, NDArray* gradX, NDArray* gradY), NUMERIC_TYPES); } } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/pad.cu b/libnd4j/include/ops/declarable/helpers/cuda/pad.cu new file mode 100644 index 000000000..c3cc284ce --- /dev/null +++ b/libnd4j/include/ops/declarable/helpers/cuda/pad.cu @@ -0,0 +1,283 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com), created on 20.04.2018 +// + + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nd4j { + namespace ops { + namespace helpers { + /////////////////////////////////////////////////////////////////// +// x - input, y - paddings, z - output + template + __global__ static void padCuda(const int mode, + const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + const void *vPadVal) { + + const X padVal = *reinterpret_cast(vPadVal); + + const auto x = reinterpret_cast(vx); + const auto y = reinterpret_cast(vy); + auto z = reinterpret_cast(vz); + + __shared__ int rank, rankMinusOne; + __shared__ Nd4jLong zLen, yLen, totalThreads, *coords, *xShape, *zShape, *xStride, *zStride, shift1, shift2, yStride0; + + if (threadIdx.x == 0) { + + extern __shared__ unsigned char shmem[]; + coords = reinterpret_cast(shmem); + zLen = shape::length(zShapeInfo); + xShape = shape::shapeOf(const_cast(xShapeInfo)); + zShape = shape::shapeOf(const_cast(zShapeInfo)); + xStride = shape::stride(const_cast(xShapeInfo)); + zStride = shape::stride(const_cast(zShapeInfo)); + yStride0 = shape::stride(const_cast(yShapeInfo))[0]; + rank = shape::rank(xShapeInfo); + zLen = shape::length(zShapeInfo); + yLen = 2 * rank; + rankMinusOne = rank - 1; + totalThreads = gridDim.x * blockDim.x; + shift1 = mode == 1 ? 0 : 1; // REFLECT : SYMMETRIC + shift2 = mode == 1 ? 2 : 1; // REFLECT : SYMMETRIC + } + + __syncthreads(); + + auto xzCoord = coords + threadIdx.x * rank; // we use xzCoord storage both for x and z arrays + + const auto tid = blockIdx.x * blockDim.x + threadIdx.x; + + if(mode == 0) { // CONSTANT case + + for (Nd4jLong i = tid; i < zLen; i += totalThreads) { + + shape::index2coords(rank, zShape, i, zLen, xzCoord); + const auto zOffset = shape::getOffset(0, zShape, zStride, xzCoord, rank); + + bool within = true; + for(int j = rankMinusOne; j >= 0; --j) { + if(xShape[j] == zShape[j]) continue; + const auto left = y[shape::getIndexOffset(yStride0 * j, yShapeInfo, yLen)]; + if(xzCoord[j] < left || xzCoord[j] >= left + xShape[j]) {within = false; break;} + else {xzCoord[j] = xzCoord[j] - left;} + } + + if(within) + z[zOffset] = x[shape::getOffset(0, xShape, xStride, xzCoord, rank)]; + else + z[zOffset] = padVal; + } + } + else { // REFLECT and SYMMETRIC cases + + for (Nd4jLong i = tid; i < zLen; i += totalThreads) { + + shape::index2coords(rank, zShape, i, zLen, xzCoord); + const auto zOffset = shape::getOffset(0, zShape, zStride, xzCoord, rank); + + for(int j = rankMinusOne; j >= 0; --j) { + + if(xShape[j] == zShape[j]) continue; + xzCoord[j] = xzCoord[j] - y[shape::getIndexOffset(yStride0 * j, yShapeInfo, yLen)]; // are ready to fill middle (within input dimension range) + if(xzCoord[j] < 0) xzCoord[j] = -xzCoord[j] - shift1; // means fill from left + else if(xzCoord[j] >= xShape[j]) xzCoord[j] = 2 * xShape[j] - xzCoord[j] - shift2; // means fill from right + } + + const auto xOffset = shape::getOffset(0, xShape, xStride, xzCoord, rank); + z[zOffset] = x[xOffset]; + } + } + } + +/////////////////////////////////////////////////////////////////// + template + static void padCudaLauncher(const int blocksPerGrid, const int threadsPerBlock, const int sharedMem, const cudaStream_t *stream, + const int mode, + const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + const void* padVal) { + + padCuda<<>>(mode, vx, xShapeInfo, vy, yShapeInfo, vz, zShapeInfo, padVal); + } + BUILD_DOUBLE_TEMPLATE(template void padCudaLauncher, (const int blocksPerGrid, const int threadsPerBlock, const int sharedMem, const cudaStream_t *stream, const int mode, const void *vx, const Nd4jLong *xShapeInfo, const void *vy, const Nd4jLong *yShapeInfo, void *vz, const Nd4jLong *zShapeInfo, const void* vPadVal), LIBND4J_TYPES, INTEGER_TYPES); + +/////////////////////////////////////////////////////////////////// + void pad(nd4j::LaunchContext * context, const int mode, const NDArray& input, const NDArray& paddings, NDArray& output, const NDArray& padValue) { + + PointersManager manager(context, "pad"); + + NDArray::prepareSpecialUse({&output}, {&input, &paddings, &padValue}); + + const int threadsPerBlock = MAX_NUM_THREADS / 4; + const int blocksPerGrid = (output.lengthOf() + threadsPerBlock - 1) / threadsPerBlock; + const int sharedMem = 8 * threadsPerBlock * output.rankOf() + 128; + + const auto xType = input.dataType(); + const auto yType = paddings.dataType(); + + BUILD_DOUBLE_SELECTOR(xType, yType, padCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), mode, input.getSpecialBuffer(), input.getSpecialShapeInfo(), paddings.getSpecialBuffer(), paddings.getSpecialShapeInfo(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), padValue.getSpecialBuffer()), LIBND4J_TYPES, INTEGER_TYPES); + + NDArray::registerSpecialUse({&output}, {&input, &paddings, &padValue}); + manager.synchronize(); + } + + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + template + static __global__ void mirrorPadLinearKernel(void const* vx, Nd4jLong* xShape, void* vz, Nd4jLong* zShape, Nd4jLong leftSide, Nd4jLong leftSideCorrected, Nd4jLong xLen, Nd4jLong len, Nd4jLong zLen) { + + __shared__ T const* x; + __shared__ T* z; + if (threadIdx.x == 0) { + x = reinterpret_cast(vx); + z = reinterpret_cast(vz); + } + __syncthreads(); + auto start = blockIdx.x * blockDim.x + threadIdx.x; + auto step = blockDim.x * gridDim.x; + + for(int i = start; i < zLen; i+= step) { + auto zIndex = shape::getIndexOffset(i, zShape, zLen); + auto xIndex = shape::getIndexOffset(len - i, xShape, xLen); + + if (i < leftSide) // left side + xIndex = shape::getIndexOffset(leftSideCorrected - i, xShape, xLen); + + else if(i >= leftSide && i < leftSide + xLen) // middle + xIndex = shape::getIndexOffset(i - leftSide, xShape, xLen); + +// else // right side +// z[i] = x[len - i]; + z[zIndex] = x[xIndex]; + } + + } + + template + static __global__ void mirrorPadKernel(void const* vx, Nd4jLong* xShape, void* vz, Nd4jLong* zShape, Nd4jLong outLen, void const* paddings, Nd4jLong* paddingShape, int reflBorder) { + + __shared__ F const* x; + __shared__ I const* pads; + __shared__ F* z; + __shared__ Nd4jLong zRank, rank; + __shared__ Nd4jLong* xShapeOf, *xStrideOf, *padsShapeOf, *padsStrideOf; + __shared__ Nd4jLong* zShapeOf, *zStrideOf; + __shared__ Nd4jLong* xIdx; + if (threadIdx.x == 0) { + extern __shared__ unsigned char shmem[]; + xIdx = reinterpret_cast(shmem); + rank = shape::rank(xShape); + + x = reinterpret_cast(vx);// + pads = reinterpret_cast(paddings); + z = reinterpret_cast(vz); + xShapeOf = shape::shapeOf(xShape); + xStrideOf = shape::stride(xShape); + zShapeOf = shape::shapeOf(zShape); + zRank = shape::rank(zShape); + zStrideOf = shape::stride(zShape); + padsShapeOf = shape::shapeOf(paddingShape); + padsStrideOf = shape::stride(paddingShape); + } + __syncthreads(); + auto start = threadIdx.x + blockIdx.x * blockDim.x; + auto step = blockDim.x * gridDim.x; + + for(Nd4jLong i = start; i < outLen; i+= step) { + auto xzCoord = xIdx + threadIdx.x * rank; + //auto zxCoord = xIdx + (threadIdx.x + threadIdx.x % 2 + 1) * rank; + + shape::index2coords(rank, zShapeOf, i, xzCoord); + auto outOffset = shape::getOffset(0, zShapeOf, zStrideOf, xzCoord, rank); +// auto intStep = blockDim.y * gridDim.y; + for(int j = 0; j < rank; j++) { + + const Nd4jLong inLen = shape::sizeAt(xShape, j); + Nd4jLong coords[2] = {j, 0}; + auto padOffset = shape::getOffset(0, padsShapeOf, padsStrideOf, coords, 2); // padding already has rank 2 + const auto leftSide = pads[padOffset]; + const auto leftSideCorrected = leftSide - reflBorder; + const Nd4jLong len = 2 * (inLen - 1) + leftSide + reflBorder; + + if(xzCoord[j] < leftSide) // left side + xzCoord[j] = leftSideCorrected - xzCoord[j]; + + else if(xzCoord[j] >= leftSide && xzCoord[j] < leftSide + inLen) // middle + xzCoord[j] = xzCoord[j] - leftSide; + + else if (len > xzCoord[j]) // right side + xzCoord[j] = len - xzCoord[j]; + else + xzCoord[j] = xzCoord[j] - len; + } + + auto inOffset = shape::getOffset(0, xShapeOf, xStrideOf, xzCoord, rank); + z[outOffset] = x[inOffset]; + } + } + + template + static void mirrorPad_(nd4j::LaunchContext * context, const NDArray& input, const NDArray& paddings, NDArray& output, const int mode) { + // mode: 0 - REFLECT, else - SYMMETRIC + const int reflBorder = (bool)mode ? 1 : 0; + const int rank = input.rankOf(); + const Nd4jLong outLen = output.lengthOf(); + auto stream = context->getCudaStream(); + NDArray::prepareSpecialUse({&output}, {&input, &paddings}); + + if(rank <= 1) { + + const Nd4jLong inLen = input.lengthOf(); + const auto leftSide = paddings.e(0); + const auto leftSideCorrected = leftSide - reflBorder; + const Nd4jLong len = 2*(inLen-1) + leftSide + reflBorder; + + mirrorPadLinearKernel<<<256, 512, 256, *stream>>>(input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), leftSide, leftSideCorrected, inLen, len, outLen); + nd4j::DebugHelper::checkErrorCode(stream, "helpers::mirrorPadLinearKernel(...) failed"); + } + else { + mirrorPadKernel<<<256, 256, 8192, *stream>>>(input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), outLen, paddings.getSpecialBuffer(), paddings.getSpecialShapeInfo(), reflBorder); + nd4j::DebugHelper::checkErrorCode(stream, "helpers::mirrorPadKernel(...) failed"); + } + NDArray::registerSpecialUse({&output}, {&input, &paddings}); + } + + void mirrorPad(nd4j::LaunchContext * context, const NDArray& input, const NDArray& paddings, NDArray& output, const int mode) { + BUILD_DOUBLE_SELECTOR(input.dataType(), paddings.dataType(), mirrorPad_, (context, input, paddings, output, mode), LIBND4J_TYPES, INTEGER_TYPES); + } + + BUILD_DOUBLE_TEMPLATE(template void mirrorPad_, (nd4j::LaunchContext * context, const NDArray& input, const NDArray& paddings, NDArray& output, const int mode), LIBND4J_TYPES, INTEGER_TYPES); + + + } + } +} \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/cuda/scatter_simple.cu b/libnd4j/include/ops/declarable/helpers/cuda/scatter_simple.cu new file mode 100644 index 000000000..5d3c4eb52 --- /dev/null +++ b/libnd4j/include/ops/declarable/helpers/cuda/scatter_simple.cu @@ -0,0 +1,79 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com), created on 20.04.2018 +// + + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nd4j { + namespace ops { + namespace helpers { + template + static _CUDA_G void scatterSimpleKernel(void *vx, Nd4jLong *xTadShape, Nd4jLong *xTadOffsets, Nd4jLong xLength, Nd4jLong numTads, void *vi, Nd4jLong *iShapeInfo, Nd4jLong iLength, void *vu, Nd4jLong *uShapeInfo, Nd4jLong uLength) { + auto u = reinterpret_cast(vu); + auto indices = reinterpret_cast(vi); + + auto tid = threadIdx.x + blockIdx.x * blockDim.x; + for (int i = tid; i < iLength; i += blockDim.x * gridDim.x) { + auto x = reinterpret_cast(vx) + xTadOffsets[i]; + auto idx = indices[shape::getIndexOffset(i, iShapeInfo, iLength)]; + + x[shape::getIndexOffset(idx, xTadShape, xLength)] = u[shape::getIndexOffset(i, uShapeInfo, uLength)]; + } + } + + + template + void scatterSimple_(nd4j::LaunchContext * context, const int opId, NDArray& input, const NDArray& updates, const NDArray& indices, const std::vector& dimensions) { + + auto dims = ShapeUtils::evalDimsToExclude(input.rankOf(), dimensions); + auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), dims); + + auto xLength = shape::length(packX.primaryShapeInfo()); + auto iLength = indices.lengthOf(); + auto uLength = updates.lengthOf(); + + scatterSimpleKernel<<<256, 256, 1024, *context->getCudaStream()>>>(input.getSpecialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), xLength, packX.numberOfTads(), indices.getSpecialBuffer(), indices.getSpecialShapeInfo(), iLength, updates.getSpecialBuffer(), updates.getSpecialShapeInfo(), uLength); + } + + + void scatterSimple(nd4j::LaunchContext * context, const int opId, NDArray& input, const NDArray& updates, const NDArray& indices, const std::vector& dimensions) { + auto xType = input.dataType(); + auto yType = indices.dataType(); + + if (opId != 6) + throw std::runtime_error("scatterSimple: only copy op is supported"); + + NDArray::prepareSpecialUse({&input}, {&updates, &indices}); + + BUILD_DOUBLE_SELECTOR(xType, yType, scatterSimple_, (context, opId, input, updates, indices, dimensions), LIBND4J_TYPES, INTEGER_TYPES); + + NDArray::registerSpecialUse({&input}, {&updates, &indices}); + } + } + } +} \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/cuda/scatter_update.cu b/libnd4j/include/ops/declarable/helpers/cuda/scatter_update.cu new file mode 100644 index 000000000..4a64cd4c7 --- /dev/null +++ b/libnd4j/include/ops/declarable/helpers/cuda/scatter_update.cu @@ -0,0 +1,133 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author Yurii Shyrma (iuriish@yahoo.com), created on 20.04.2018 +// + + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nd4j { + namespace ops { + namespace helpers { + /////////////////////////////////////////////////////////////////// + template + __global__ static void scatterUpdateCuda(const int opCode, const int numOfInd, + void* vx, const Nd4jLong *xShapeInfo, const Nd4jLong *xOffsets, + void* vy, const Nd4jLong *yShapeInfo, const Nd4jLong *yOffsets, + const int* indexes) { + + __shared__ T *x, *y; + __shared__ Nd4jLong arrLenX, arrLenY; + + for (int e = 0; e < numOfInd; e++ ) { + + const auto xIndex = indexes[e]; + const bool isOwner = xIndex < gridDim.x ? blockIdx.x == xIndex : blockIdx.x == xIndex % gridDim.x; + + if (!isOwner) + continue; + + if (threadIdx.x == 0) { + x = reinterpret_cast(vx) + xOffsets[xIndex]; + y = reinterpret_cast(vy) + yOffsets[e]; + arrLenX = shape::length(xShapeInfo); + arrLenY = shape::length(yShapeInfo); + } + + __syncthreads(); + + if (arrLenX != arrLenY) + return; + + for (Nd4jLong i = threadIdx.x; i < arrLenX; i += blockDim.x) { + + const auto xOffset = shape::getIndexOffset(i, xShapeInfo, arrLenX); + const auto yOffset = shape::getIndexOffset(i, yShapeInfo, arrLenY); + + switch (opCode) { + case 0: + x[xOffset] += y[yOffset]; + break; + case 1: + x[xOffset] -= y[yOffset]; + break; + case 2: + x[xOffset] *= y[yOffset]; + break; + case 3: + x[xOffset] /= y[yOffset]; + break; + case 4: + x[xOffset] = y[yOffset] - x[xOffset]; + break; + case 5: + x[xOffset] = y[yOffset] / x[xOffset]; + break; + case 6: + x[xOffset] = y[yOffset]; + break; + default: + continue; + } + } + __syncthreads(); + } + } + + template + __host__ static void scatterUpdateCudaLauncher(const cudaStream_t* stream, const int opCode, const int numOfInd, void* vx, const Nd4jLong *xShapeInfo, const Nd4jLong *xOffsets, void* vy, const Nd4jLong *yShapeInfo, const Nd4jLong *yOffsets, const int* indexes) { + + scatterUpdateCuda<<<512, 256, MAX_NUM_THREADS, *stream>>>(opCode, numOfInd, vx, xShapeInfo, xOffsets, vy, yShapeInfo, yOffsets, indexes); + } + + +////////////////////////////////////////////////////////////////////////// + void scatterUpdate(nd4j::LaunchContext* context, NDArray& input, NDArray& updates, const std::vector* intArgs) { + + const int opCode = (*intArgs)[0]; + const int numOfDims = (*intArgs)[1]; + const int numOfInd = (*intArgs)[2 + numOfDims]; + + std::vector tadDimensions(numOfDims); + for (int e = 2; e < 2 + numOfDims; e++) + tadDimensions[e-2] = (*intArgs)[e]; + + auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), tadDimensions); + auto packY = ConstantTadHelper::getInstance()->tadForDimensions(updates.getShapeInfo(), tadDimensions); + + NDArray indices(const_cast(intArgs->data()) + numOfDims + 3, 'c', {numOfInd}, nd4j::DataType::INT32, context); + + PointersManager manager(context, "scatterUpdate"); + + NDArray::prepareSpecialUse({&input}, {&input, &updates, &indices}); + BUILD_SINGLE_SELECTOR(input.dataType(), scatterUpdateCudaLauncher, (context->getCudaStream(), opCode, numOfInd, input.specialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), updates.specialBuffer(), packY.platformShapeInfo(), packY.platformOffsets(), reinterpret_cast(indices.getSpecialBuffer())), LIBND4J_TYPES); + NDArray::registerSpecialUse({&input}, {&input, &updates, &indices}); + + manager.synchronize(); + } + } + } +} \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/cuda/transforms.cu b/libnd4j/include/ops/declarable/helpers/cuda/transforms.cu index 80822b20f..e6af2fadc 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/transforms.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/transforms.cu @@ -33,163 +33,6 @@ namespace nd4j { namespace ops { namespace helpers { -/////////////////////////////////////////////////////////////////// -template -__global__ static void concatCuda(const int numOfArrs, void* pVx, void* pxShapeInfo, void* pVz, void* pzShapeInfo) { - - __shared__ int arrIdx, blocksPerArr; - __shared__ T *x, *z; - __shared__ Nd4jLong *zShapeInfo, *xShapeInfo, arrLen, arrLenPerBlock, start, end; - - if (threadIdx.x == 0) { - - blocksPerArr = (gridDim.x + numOfArrs - 1) / numOfArrs; // ceil - arrIdx = blockIdx.x / blocksPerArr; - - x = reinterpret_cast(reinterpret_cast(pVx)[arrIdx]); - z = reinterpret_cast(reinterpret_cast(pVz)[arrIdx]); - xShapeInfo = reinterpret_cast(pxShapeInfo)[arrIdx]; - zShapeInfo = reinterpret_cast(pzShapeInfo)[arrIdx]; - arrLen = shape::length(xShapeInfo); - - arrLenPerBlock = (arrLen + blocksPerArr - 1) / blocksPerArr; // ceil - - start = (blockIdx.x % blocksPerArr) * arrLenPerBlock; - end = (start + arrLenPerBlock) > arrLen ? arrLen : (start + arrLenPerBlock); - } - - __syncthreads(); - - for (Nd4jLong i = start + threadIdx.x; i < end; i += blockDim.x) - z[shape::getIndexOffset(i, zShapeInfo, arrLen)] = x[shape::getIndexOffset(i, xShapeInfo, arrLen)]; -} - -/////////////////////////////////////////////////////////////////// -template -__host__ static void concatCudaLauncher(const int numOfArrs, const cudaStream_t *stream, void* pVx, void* pxShapeInfo, void* pVz, void* pzShapeInfo) { - - concatCuda<<<512, 256, 1024, *stream>>>(numOfArrs, pVx, pxShapeInfo, pVz, pzShapeInfo); -} -BUILD_SINGLE_TEMPLATE(template void concatCudaLauncher, (const int numOfArrs, const cudaStream_t *stream, void* pVx, void* pxShapeInfo, void* pVz, void* pzShapeInfo), LIBND4J_TYPES); - -/////////////////////////////////////////////////////////////////// -// x - input, y - paddings, z - output -template -__global__ static void padCuda(const int mode, - const void *vx, const Nd4jLong *xShapeInfo, - const void *vy, const Nd4jLong *yShapeInfo, - void *vz, const Nd4jLong *zShapeInfo, - const void *vPadVal) { - - const X padVal = *reinterpret_cast(vPadVal); - - const auto x = reinterpret_cast(vx); - const auto y = reinterpret_cast(vy); - auto z = reinterpret_cast(vz); - - __shared__ int rank, rankMinusOne; - __shared__ Nd4jLong zLen, yLen, totalThreads, *coords, *xShape, *zShape, *xStride, *zStride, shift1, shift2, yStride0; - - if (threadIdx.x == 0) { - - extern __shared__ unsigned char shmem[]; - coords = reinterpret_cast(shmem); - zLen = shape::length(zShapeInfo); - xShape = shape::shapeOf(const_cast(xShapeInfo)); - zShape = shape::shapeOf(const_cast(zShapeInfo)); - xStride = shape::stride(const_cast(xShapeInfo)); - zStride = shape::stride(const_cast(zShapeInfo)); - yStride0 = shape::stride(const_cast(yShapeInfo))[0]; - rank = shape::rank(xShapeInfo); - zLen = shape::length(zShapeInfo); - yLen = 2 * rank; - rankMinusOne = rank - 1; - totalThreads = gridDim.x * blockDim.x; - shift1 = mode == 1 ? 0 : 1; // REFLECT : SYMMETRIC - shift2 = mode == 1 ? 2 : 1; // REFLECT : SYMMETRIC - } - - __syncthreads(); - - auto xzCoord = coords + threadIdx.x * rank; // we use xzCoord storage both for x and z arrays - - const auto tid = blockIdx.x * blockDim.x + threadIdx.x; - - if(mode == 0) { // CONSTANT case - - for (Nd4jLong i = tid; i < zLen; i += totalThreads) { - - shape::index2coords(rank, zShape, i, zLen, xzCoord); - const auto zOffset = shape::getOffset(0, zShape, zStride, xzCoord, rank); - - bool within = true; - for(int j = rankMinusOne; j >= 0; --j) { - if(xShape[j] == zShape[j]) continue; - const auto left = y[shape::getIndexOffset(yStride0 * j, yShapeInfo, yLen)]; - if(xzCoord[j] < left || xzCoord[j] >= left + xShape[j]) {within = false; break;} - else {xzCoord[j] = xzCoord[j] - left;} - } - - if(within) - z[zOffset] = x[shape::getOffset(0, xShape, xStride, xzCoord, rank)]; - else - z[zOffset] = padVal; - } - } - else { // REFLECT and SYMMETRIC cases - - for (Nd4jLong i = tid; i < zLen; i += totalThreads) { - - shape::index2coords(rank, zShape, i, zLen, xzCoord); - const auto zOffset = shape::getOffset(0, zShape, zStride, xzCoord, rank); - - for(int j = rankMinusOne; j >= 0; --j) { - - if(xShape[j] == zShape[j]) continue; - xzCoord[j] = xzCoord[j] - y[shape::getIndexOffset(yStride0 * j, yShapeInfo, yLen)]; // are ready to fill middle (within input dimension range) - if(xzCoord[j] < 0) xzCoord[j] = -xzCoord[j] - shift1; // means fill from left - else if(xzCoord[j] >= xShape[j]) xzCoord[j] = 2 * xShape[j] - xzCoord[j] - shift2; // means fill from right - } - - const auto xOffset = shape::getOffset(0, xShape, xStride, xzCoord, rank); - z[zOffset] = x[xOffset]; - } - } -} - -/////////////////////////////////////////////////////////////////// -template -static void padCudaLauncher(const int blocksPerGrid, const int threadsPerBlock, const int sharedMem, const cudaStream_t *stream, - const int mode, - const void *vx, const Nd4jLong *xShapeInfo, - const void *vy, const Nd4jLong *yShapeInfo, - void *vz, const Nd4jLong *zShapeInfo, - const void* padVal) { - - padCuda<<>>(mode, vx, xShapeInfo, vy, yShapeInfo, vz, zShapeInfo, padVal); -} -BUILD_DOUBLE_TEMPLATE(template void padCudaLauncher, (const int blocksPerGrid, const int threadsPerBlock, const int sharedMem, const cudaStream_t *stream, const int mode, const void *vx, const Nd4jLong *xShapeInfo, const void *vy, const Nd4jLong *yShapeInfo, void *vz, const Nd4jLong *zShapeInfo, const void* vPadVal), LIBND4J_TYPES, INTEGER_TYPES); - -/////////////////////////////////////////////////////////////////// -void pad(nd4j::LaunchContext * context, const int mode, const NDArray& input, const NDArray& paddings, NDArray& output, const NDArray& padValue) { - - PointersManager manager(context, "pad"); - - NDArray::prepareSpecialUse({&output}, {&input, &paddings, &padValue}); - - const int threadsPerBlock = MAX_NUM_THREADS / 4; - const int blocksPerGrid = (output.lengthOf() + threadsPerBlock - 1) / threadsPerBlock; - const int sharedMem = 8 * threadsPerBlock * output.rankOf() + 128; - - const auto xType = input.dataType(); - const auto yType = paddings.dataType(); - - BUILD_DOUBLE_SELECTOR(xType, yType, padCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), mode, input.getSpecialBuffer(), input.getSpecialShapeInfo(), paddings.getSpecialBuffer(), paddings.getSpecialShapeInfo(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), padValue.getSpecialBuffer()), LIBND4J_TYPES, INTEGER_TYPES); - - NDArray::registerSpecialUse({&output}, {&input, &paddings, &padValue}); - manager.synchronize(); -} - /////////////////////////////////////////////////////////////////// template __global__ static void invertPermutationCuda(const void* vx, const Nd4jLong* xShapeInfo, void* vz, const Nd4jLong* zShapeInfo) { @@ -458,214 +301,6 @@ void tileBP(nd4j::LaunchContext * context, const NDArray& gradO /*input*/, NDArr manager.synchronize(); } -/////////////////////////////////////////////////////////////////// -template -__global__ static void scatterUpdateCuda(const int opCode, const int numOfInd, - void* vx, const Nd4jLong *xShapeInfo, const Nd4jLong *xOffsets, - void* vy, const Nd4jLong *yShapeInfo, const Nd4jLong *yOffsets, - const int* indexes) { - - __shared__ T *x, *y; - __shared__ Nd4jLong arrLenX, arrLenY; - - for (int e = 0; e < numOfInd; e++ ) { - - const auto xIndex = indexes[e]; - const bool isOwner = xIndex < gridDim.x ? blockIdx.x == xIndex : blockIdx.x == xIndex % gridDim.x; - - if (!isOwner) - continue; - - if (threadIdx.x == 0) { - x = reinterpret_cast(vx) + xOffsets[xIndex]; - y = reinterpret_cast(vy) + yOffsets[e]; - arrLenX = shape::length(xShapeInfo); - arrLenY = shape::length(yShapeInfo); - } - - __syncthreads(); - - if (arrLenX != arrLenY) - return; - - for (Nd4jLong i = threadIdx.x; i < arrLenX; i += blockDim.x) { - - const auto xOffset = shape::getIndexOffset(i, xShapeInfo, arrLenX); - const auto yOffset = shape::getIndexOffset(i, yShapeInfo, arrLenY); - - switch (opCode) { - case 0: - x[xOffset] += y[yOffset]; - break; - case 1: - x[xOffset] -= y[yOffset]; - break; - case 2: - x[xOffset] *= y[yOffset]; - break; - case 3: - x[xOffset] /= y[yOffset]; - break; - case 4: - x[xOffset] = y[yOffset] - x[xOffset]; - break; - case 5: - x[xOffset] = y[yOffset] / x[xOffset]; - break; - case 6: - x[xOffset] = y[yOffset]; - break; - default: - continue; - } - } - __syncthreads(); - } -} - -template -__host__ static void scatterUpdateCudaLauncher(const cudaStream_t* stream, const int opCode, const int numOfInd, void* vx, const Nd4jLong *xShapeInfo, const Nd4jLong *xOffsets, void* vy, const Nd4jLong *yShapeInfo, const Nd4jLong *yOffsets, const int* indexes) { - - scatterUpdateCuda<<<512, 256, MAX_NUM_THREADS, *stream>>>(opCode, numOfInd, vx, xShapeInfo, xOffsets, vy, yShapeInfo, yOffsets, indexes); -} - - -////////////////////////////////////////////////////////////////////////// -void scatterUpdate(nd4j::LaunchContext* context, NDArray& input, NDArray& updates, const std::vector* intArgs) { - - const int opCode = (*intArgs)[0]; - const int numOfDims = (*intArgs)[1]; - const int numOfInd = (*intArgs)[2 + numOfDims]; - - std::vector tadDimensions(numOfDims); - for (int e = 2; e < 2 + numOfDims; e++) - tadDimensions[e-2] = (*intArgs)[e]; - - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), tadDimensions); - auto packY = ConstantTadHelper::getInstance()->tadForDimensions(updates.getShapeInfo(), tadDimensions); - - NDArray indices(const_cast(intArgs->data()) + numOfDims + 3, 'c', {numOfInd}, nd4j::DataType::INT32, context); - - PointersManager manager(context, "scatterUpdate"); - - NDArray::prepareSpecialUse({&input}, {&input, &updates, &indices}); - BUILD_SINGLE_SELECTOR(input.dataType(), scatterUpdateCudaLauncher, (context->getCudaStream(), opCode, numOfInd, input.specialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), updates.specialBuffer(), packY.platformShapeInfo(), packY.platformOffsets(), reinterpret_cast(indices.getSpecialBuffer())), LIBND4J_TYPES); - NDArray::registerSpecialUse({&input}, {&input, &updates, &indices}); - - manager.synchronize(); -} - -/////////////////////////////////////////////////////////////////// -// x - input, y - indices, z - output -template -__global__ static void gatherNDCuda(const void *vx, const Nd4jLong *xShapeInfo, - const void *vy, const Nd4jLong *yShapeInfo, - void *vz, const Nd4jLong *zShapeInfo) { - - const auto x = reinterpret_cast(vx); - const auto y = reinterpret_cast(vy); - auto z = reinterpret_cast(vz); - - __shared__ int xRank, yRank, zRank, maxRank, yLastDim; - __shared__ Nd4jLong zLen, totalThreads, *sharedMem; - - if (threadIdx.x == 0) { - - extern __shared__ unsigned char shmem[]; - sharedMem = reinterpret_cast(shmem); - - xRank = shape::rank(xShapeInfo); - yRank = shape::rank(yShapeInfo); - zRank = shape::rank(zShapeInfo); - maxRank = nd4j::math::nd4j_max(yRank, nd4j::math::nd4j_max(xRank, zRank)); - - zLen = shape::length(zShapeInfo); - yLastDim = yShapeInfo[yRank]; - - totalThreads = gridDim.x * blockDim.x; - } - - __syncthreads(); - - auto coord = sharedMem + threadIdx.x * maxRank; - - Nd4jLong *zCoordStart, *xCoordStart; - - if(yLastDim == xRank) { - zCoordStart = coord; - xCoordStart = coord; - } - if(zRank >= xRank) { - zCoordStart = coord; - xCoordStart = coord + zRank - xRank; - } - else { - zCoordStart = coord + xRank - zRank; - xCoordStart = coord; - } - - const auto tid = blockIdx.x * blockDim.x + threadIdx.x; - - for (Nd4jLong i = tid; i < zLen; i += totalThreads) { - - shape::index2coords(zRank, zShapeInfo + 1, i, zLen, zCoordStart); - - const auto zOffset = shape::getOffset(0, zShapeInfo + 1, zShapeInfo + zRank + 1, zCoordStart, zRank); - - // last y coordinate - int coordToRestore; - if(yLastDim != xRank) - coordToRestore = static_cast(zCoordStart[yRank - 1]); - - zCoordStart[yRank - 1] = 0; // last y coordinate - const auto yOffset = shape::getOffset(0, yShapeInfo + 1, yShapeInfo + yRank + 1, zCoordStart, yRank); - - //restore z coordinate - if(yLastDim != xRank) - zCoordStart[yRank - 1] = coordToRestore; - - // construct coordinates for x - for(uint j = 0; j < yLastDim; ++j) - xCoordStart[j] = y[yOffset + j * yShapeInfo[2 * yRank]]; // last stride - - const auto xOffset = shape::getOffset(0, xShapeInfo + 1, xShapeInfo + xRank + 1, xCoordStart, xRank); - - z[zOffset] = x[xOffset]; - } -} - -/////////////////////////////////////////////////////////////////// -template -static void gatherNDCudaLauncher(const int blocksPerGrid, const int threadsPerBlock, const int sharedMem, const cudaStream_t *stream, - const void *vx, const Nd4jLong *xShapeInfo, - const void *vy, const Nd4jLong *yShapeInfo, - void *vz, const Nd4jLong *zShapeInfo) { - - gatherNDCuda<<>>(vx, xShapeInfo, vy, yShapeInfo, vz, zShapeInfo); -} -BUILD_DOUBLE_TEMPLATE(template void gatherNDCudaLauncher, (const int blocksPerGrid, const int threadsPerBlock, const int sharedMem, const cudaStream_t *stream, const void *vx, const Nd4jLong *xShapeInfo, const void *vy, const Nd4jLong *yShapeInfo, void *vz, const Nd4jLong *zShapeInfo), LIBND4J_TYPES, INTEGER_TYPES); - -/////////////////////////////////////////////////////////////////// -void gatherND(nd4j::LaunchContext * context, NDArray& input, NDArray& indices, NDArray& output) { - - const int maxRank = nd4j::math::nd4j_max(indices.rankOf(), nd4j::math::nd4j_max(input.rankOf(), output.rankOf())); - - const int threadsPerBlock = MAX_NUM_THREADS; - const int blocksPerGrid = (output.lengthOf() + threadsPerBlock - 1) / threadsPerBlock; - const int sharedMem = 8 * threadsPerBlock * maxRank + 128; - - const auto xType = input.dataType(); - const auto yType = indices.dataType(); - - PointersManager manager(context, "gatherND"); - - NDArray::prepareSpecialUse({&output}, {&input, &indices}); - BUILD_DOUBLE_SELECTOR(xType, yType, gatherNDCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), indices.getSpecialBuffer(), indices.getSpecialShapeInfo(), output.getSpecialBuffer(), output.getSpecialShapeInfo()), LIBND4J_TYPES, INTEGER_TYPES); - NDArray::registerSpecialUse({&output}, {&input, &indices}); - - manager.synchronize(); -} - ////////////////////////////////////////////////////////////////////////// // x - input, y - gradO, z - gradI template @@ -929,43 +564,6 @@ void clipByNormBP(nd4j::LaunchContext* context, const NDArray& input, const NDAr manager.synchronize(); } - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - template static __global__ void swapShuffleKernel(T* input, Nd4jLong* shape, Nd4jLong firstDim, Nd4jLong len, nd4j::graph::RandomGenerator* rng) { auto tid = blockIdx.x * blockDim.x; @@ -1091,209 +689,11 @@ void clipByNormBP(nd4j::LaunchContext* context, const NDArray& input, const NDAr -////////////////////////////////////////////////////////////////////////// -void eye(nd4j::LaunchContext * context, NDArray& output) { - - output.setIdentity(); -} - ////////////////////////////////////////////////////////////////////////// - template - static __global__ void global_mergeMaxIndex_(void **inArrs, void **inShapes, const int numArrays, void *voutput, Nd4jLong *outputShape, Nd4jLong length) { - auto output = reinterpret_cast(voutput); - - const auto tid = blockIdx.x * gridDim.x + threadIdx.x; - const auto step = gridDim.x * blockDim.x; - - for (Nd4jLong e = tid; e < length; e += step) { - T mVal = -DataTypeUtils::max(); - Z mIdx(0); - - for (int i = 0; i < numArrays; i++) { - auto x = reinterpret_cast(inArrs[i]); - auto xShape = reinterpret_cast(inShapes[i]); - auto val = x[shape::getIndexOffset(e, xShape, length)];; - if (mVal < val) - mIdx = static_cast(e); - } - __syncthreads(); - - output[shape::getIndexOffset(e, outputShape, length)] = mIdx; - } + void eye(nd4j::LaunchContext * context, NDArray& output) { + output.setIdentity(); } - template - static void mergeMaxIndex_(nd4j::LaunchContext * context, const std::vector& inArrs, NDArray& output) { - std::vector inBuffers(inArrs.size()); - std::vector inShapes(inArrs.size()); - - for (int e = 0; e < inArrs.size(); e++) { - inBuffers[e] = inArrs[e]->getSpecialBuffer(); - inShapes[e] = inArrs[e]->getSpecialShapeInfo(); - } - - PointersManager manager(context, "mergeMaxIndex"); - - auto pInBuffers = reinterpret_cast(manager.replicatePointer(inBuffers.data(), inBuffers.size() * sizeof(void *))); - auto pInShapes = reinterpret_cast(manager.replicatePointer(inShapes.data(), inShapes.size() * sizeof(void *))); - auto length = output.lengthOf(); - - global_mergeMaxIndex_<<<512, 512, 512, *context->getCudaStream()>>>(pInBuffers, pInShapes, (int) inArrs.size(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), length); - - manager.synchronize(); - } - - void mergeMaxIndex(nd4j::LaunchContext * context, const std::vector& inArrs, NDArray& output) { - BUILD_DOUBLE_SELECTOR(inArrs[0]->dataType(), output.dataType(), mergeMaxIndex_, (context, inArrs, output), LIBND4J_TYPES, INTEGER_TYPES); - } - - BUILD_DOUBLE_TEMPLATE(template void mergeMaxIndex_, (nd4j::LaunchContext * context, const std::vector& inArrs, NDArray& output), LIBND4J_TYPES, INTEGER_TYPES); - - ////////////////////////////////////////////////////////////////////////// - template - static __global__ void global_mergeMax_(void **inArrs, void **inShapes, const int numArrays, void *voutput, Nd4jLong *outputShape, Nd4jLong length) { - auto output = reinterpret_cast(voutput); - - const auto tid = blockIdx.x * gridDim.x + threadIdx.x; - const auto step = gridDim.x * blockDim.x; - - for (Nd4jLong e = tid; e < length; e += step) { - T mVal = -DataTypeUtils::max(); - - for (int i = 0; i < numArrays; i++) { - auto x = reinterpret_cast(inArrs[i]); - auto xShape = reinterpret_cast(inShapes[i]); - auto val = x[shape::getIndexOffset(e, xShape, length)];; - if (mVal < val) - mVal = val; - } - __syncthreads(); - - output[shape::getIndexOffset(e, outputShape, length)] = mVal; - } - } - - template - static void mergeMax_(nd4j::LaunchContext * context, const std::vector& inArrs, NDArray& output) { - std::vector inBuffers(inArrs.size()); - std::vector inShapes(inArrs.size()); - - for (int e = 0; e < inArrs.size(); e++) { - inBuffers[e] = inArrs[e]->getSpecialBuffer(); - inShapes[e] = inArrs[e]->getSpecialShapeInfo(); - } - - PointersManager manager(context, "mergeMax"); - - auto pInBuffers = reinterpret_cast(manager.replicatePointer(inBuffers.data(), inBuffers.size() * sizeof(void *))); - auto pInShapes = reinterpret_cast(manager.replicatePointer(inShapes.data(), inShapes.size() * sizeof(void *))); - auto length = output.lengthOf(); - - global_mergeMax_<<<512, 512, 512, *context->getCudaStream()>>>(pInBuffers, pInShapes, (int) inArrs.size(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), length); - - manager.synchronize(); - } - BUILD_SINGLE_TEMPLATE(template void mergeMax_, (nd4j::LaunchContext * context, const std::vector& inArrs, NDArray& output), LIBND4J_TYPES); - - void mergeMax(nd4j::LaunchContext * context, const std::vector& inArrs, NDArray& output) { - BUILD_SINGLE_SELECTOR(output.dataType(), mergeMax_, (context, inArrs, output), LIBND4J_TYPES); - } - - ////////////////////////////////////////////////////////////////////////// - template - static __global__ void global_mergeAvg_(void **inArrs, void **inShapes, const int numArrays, void *voutput, Nd4jLong *outputShape, Nd4jLong length) { - auto output = reinterpret_cast(voutput); - - const auto tid = blockIdx.x * gridDim.x + threadIdx.x; - const auto step = gridDim.x * blockDim.x; - - for (Nd4jLong e = tid; e < length; e += step) { - T sum(0.0f); - - for (int i = 0; i < numArrays; i++) { - auto x = reinterpret_cast(inArrs[i]); - auto xShape = reinterpret_cast(inShapes[i]); - - sum += x[shape::getIndexOffset(e, xShape, length)]; - } - - output[shape::getIndexOffset(e, outputShape, length)] = sum / numArrays; - } - } - - template - static void mergeAvg_(nd4j::LaunchContext * context, const std::vector& inArrs, NDArray& output) { - std::vector inBuffers(inArrs.size()); - std::vector inShapes(inArrs.size()); - - for (int e = 0; e < inArrs.size(); e++) { - inBuffers[e] = inArrs[e]->getSpecialBuffer(); - inShapes[e] = inArrs[e]->getSpecialShapeInfo(); - } - - PointersManager manager(context, "mergeAvg"); - - auto pInBuffers = reinterpret_cast(manager.replicatePointer(inBuffers.data(), inBuffers.size() * sizeof(void *))); - auto pInShapes = reinterpret_cast(manager.replicatePointer(inShapes.data(), inShapes.size() * sizeof(void *))); - auto length = output.lengthOf(); - - global_mergeAvg_<<<512, 512, 512, *context->getCudaStream()>>>(pInBuffers, pInShapes, (int) inArrs.size(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), length); - - manager.synchronize(); - } - BUILD_SINGLE_TEMPLATE(template void mergeAvg_, (nd4j::LaunchContext * context, const std::vector& inArrs, NDArray& output), LIBND4J_TYPES); - - void mergeAvg(nd4j::LaunchContext * context, const std::vector& inArrs, NDArray& output) { - BUILD_SINGLE_SELECTOR(output.dataType(), mergeAvg_, (context, inArrs, output), LIBND4J_TYPES); - } - - ////////////////////////////////////////////////////////////////////////// - template - static __global__ void global_mergeAdd_(void **inArrs, void **inShapes, const int numArrays, void *voutput, Nd4jLong *outputShape, Nd4jLong length) { - auto output = reinterpret_cast(voutput); - - const auto tid = blockIdx.x * gridDim.x + threadIdx.x; - const auto step = gridDim.x * blockDim.x; - - for (Nd4jLong e = tid; e < length; e += step) { - T sum(0.0f); - - for (int i = 0; i < numArrays; i++) { - auto x = reinterpret_cast(inArrs[i]); - auto xShape = reinterpret_cast(inShapes[i]); - - sum += x[shape::getIndexOffset(e, xShape, length)]; - } - - output[shape::getIndexOffset(e, outputShape, length)] = sum; - } - } - - template - static void mergeAdd_(nd4j::LaunchContext * context, const std::vector& inArrs, NDArray& output) { - std::vector inBuffers(inArrs.size()); - std::vector inShapes(inArrs.size()); - - for (int e = 0; e < inArrs.size(); e++) { - inBuffers[e] = inArrs[e]->getSpecialBuffer(); - inShapes[e] = inArrs[e]->getSpecialShapeInfo(); - } - - PointersManager manager(context, "mergeAdd"); - - auto pInBuffers = reinterpret_cast(manager.replicatePointer(inBuffers.data(), inBuffers.size() * sizeof(void *))); - auto pInShapes = reinterpret_cast(manager.replicatePointer(inShapes.data(), inShapes.size() * sizeof(void *))); - auto length = output.lengthOf(); - - global_mergeAdd_<<<512, 512, 512, *context->getCudaStream()>>>(pInBuffers, pInShapes, (int) inArrs.size(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), length); - - manager.synchronize(); - } - BUILD_SINGLE_TEMPLATE(template void mergeAdd_, (nd4j::LaunchContext * context, const std::vector& inArrs, NDArray& output), LIBND4J_TYPES); - - void mergeAdd(nd4j::LaunchContext * context, const std::vector& inArrs, NDArray& output) { - BUILD_SINGLE_SELECTOR(output.dataType(), mergeAdd_, (context, inArrs, output), LIBND4J_TYPES); - } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// template @@ -1546,232 +946,6 @@ void eye(nd4j::LaunchContext * context, NDArray& output) { BUILD_SINGLE_TEMPLATE(template void clipByValue_, (nd4j::LaunchContext * context, NDArray& input, double leftBound, double rightBound, NDArray& output);, FLOAT_TYPES); - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - template - static __global__ void mirrorPadLinearKernel(void const* vx, Nd4jLong* xShape, void* vz, Nd4jLong* zShape, Nd4jLong leftSide, Nd4jLong leftSideCorrected, Nd4jLong xLen, Nd4jLong len, Nd4jLong zLen) { - - __shared__ T const* x; - __shared__ T* z; - if (threadIdx.x == 0) { - x = reinterpret_cast(vx); - z = reinterpret_cast(vz); - } - __syncthreads(); - auto start = blockIdx.x * blockDim.x + threadIdx.x; - auto step = blockDim.x * gridDim.x; - - for(int i = start; i < zLen; i+= step) { - auto zIndex = shape::getIndexOffset(i, zShape, zLen); - auto xIndex = shape::getIndexOffset(len - i, xShape, xLen); - - if (i < leftSide) // left side - xIndex = shape::getIndexOffset(leftSideCorrected - i, xShape, xLen); - - else if(i >= leftSide && i < leftSide + xLen) // middle - xIndex = shape::getIndexOffset(i - leftSide, xShape, xLen); - -// else // right side -// z[i] = x[len - i]; - z[zIndex] = x[xIndex]; - } - - } - - template - static __global__ void mirrorPadKernel(void const* vx, Nd4jLong* xShape, void* vz, Nd4jLong* zShape, Nd4jLong outLen, void const* paddings, Nd4jLong* paddingShape, int reflBorder) { - - __shared__ F const* x; - __shared__ I const* pads; - __shared__ F* z; - __shared__ Nd4jLong zRank, rank; - __shared__ Nd4jLong* xShapeOf, *xStrideOf, *padsShapeOf, *padsStrideOf; - __shared__ Nd4jLong* zShapeOf, *zStrideOf; - __shared__ Nd4jLong* xIdx; - if (threadIdx.x == 0) { - extern __shared__ unsigned char shmem[]; - xIdx = reinterpret_cast(shmem); - rank = shape::rank(xShape); - - x = reinterpret_cast(vx);// - pads = reinterpret_cast(paddings); - z = reinterpret_cast(vz); - xShapeOf = shape::shapeOf(xShape); - xStrideOf = shape::stride(xShape); - zShapeOf = shape::shapeOf(zShape); - zRank = shape::rank(zShape); - zStrideOf = shape::stride(zShape); - padsShapeOf = shape::shapeOf(paddingShape); - padsStrideOf = shape::stride(paddingShape); - } - __syncthreads(); - auto start = threadIdx.x + blockIdx.x * blockDim.x; - auto step = blockDim.x * gridDim.x; - - for(Nd4jLong i = start; i < outLen; i+= step) { - auto xzCoord = xIdx + threadIdx.x * rank; - //auto zxCoord = xIdx + (threadIdx.x + threadIdx.x % 2 + 1) * rank; - - shape::index2coords(rank, zShapeOf, i, xzCoord); - auto outOffset = shape::getOffset(0, zShapeOf, zStrideOf, xzCoord, rank); -// auto intStep = blockDim.y * gridDim.y; - for(int j = 0; j < rank; j++) { - - const Nd4jLong inLen = shape::sizeAt(xShape, j); - Nd4jLong coords[2] = {j, 0}; - auto padOffset = shape::getOffset(0, padsShapeOf, padsStrideOf, coords, 2); // padding already has rank 2 - const auto leftSide = pads[padOffset]; - const auto leftSideCorrected = leftSide - reflBorder; - const Nd4jLong len = 2 * (inLen - 1) + leftSide + reflBorder; - - if(xzCoord[j] < leftSide) // left side - xzCoord[j] = leftSideCorrected - xzCoord[j]; - - else if(xzCoord[j] >= leftSide && xzCoord[j] < leftSide + inLen) // middle - xzCoord[j] = xzCoord[j] - leftSide; - - else if (len > xzCoord[j]) // right side - xzCoord[j] = len - xzCoord[j]; - else - xzCoord[j] = xzCoord[j] - len; - } - - auto inOffset = shape::getOffset(0, xShapeOf, xStrideOf, xzCoord, rank); - z[outOffset] = x[inOffset]; - } - } - - template - static void mirrorPad_(nd4j::LaunchContext * context, const NDArray& input, const NDArray& paddings, NDArray& output, const int mode) { - // mode: 0 - REFLECT, else - SYMMETRIC - const int reflBorder = (bool)mode ? 1 : 0; - const int rank = input.rankOf(); - const Nd4jLong outLen = output.lengthOf(); - auto stream = context->getCudaStream(); - NDArray::prepareSpecialUse({&output}, {&input, &paddings}); - - if(rank <= 1) { - - const Nd4jLong inLen = input.lengthOf(); - const auto leftSide = paddings.e(0); - const auto leftSideCorrected = leftSide - reflBorder; - const Nd4jLong len = 2*(inLen-1) + leftSide + reflBorder; - - mirrorPadLinearKernel<<<256, 512, 256, *stream>>>(input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), leftSide, leftSideCorrected, inLen, len, outLen); - nd4j::DebugHelper::checkErrorCode(stream, "helpers::mirrorPadLinearKernel(...) failed"); - } - else { - mirrorPadKernel<<<256, 256, 8192, *stream>>>(input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), outLen, paddings.getSpecialBuffer(), paddings.getSpecialShapeInfo(), reflBorder); - nd4j::DebugHelper::checkErrorCode(stream, "helpers::mirrorPadKernel(...) failed"); - } - NDArray::registerSpecialUse({&output}, {&input, &paddings}); - } - - void mirrorPad(nd4j::LaunchContext * context, const NDArray& input, const NDArray& paddings, NDArray& output, const int mode) { - BUILD_DOUBLE_SELECTOR(input.dataType(), paddings.dataType(), mirrorPad_, (context, input, paddings, output, mode), LIBND4J_TYPES, INTEGER_TYPES); - } - - BUILD_DOUBLE_TEMPLATE(template void mirrorPad_, (nd4j::LaunchContext * context, const NDArray& input, const NDArray& paddings, NDArray& output, const int mode), LIBND4J_TYPES, INTEGER_TYPES); - -////////////////////////////////////////////////////////////////////////// -void concat(nd4j::LaunchContext * context, const std::vector& inArrs, NDArray& output, const int axis) { - - const int numOfArrs = inArrs.size(); - for(int i = 0; i < numOfArrs; ++i) - if(!inArrs[i]->isActualOnDeviceSide()) inArrs[i]->syncToDevice(); - - const int rank = inArrs[0]->rankOf(); - const int rank2 = 2*rank; - std::vector> indices(numOfArrs, std::vector(rank2,0)); - - // take into account indices for first array - indices[0][2 * axis + 1] = inArrs[0]->sizeAt(axis); - - // loop through the rest of input arrays - for(int i = 1; i < numOfArrs; ++i) { - indices[i][2 * axis] = indices[i-1][2 * axis + 1]; // index start from - indices[i][2 * axis + 1] = indices[i-1][2 * axis + 1] + inArrs[i]->sizeAt(axis); // index end with (excluding) - } - - std::vector outSubArrs(numOfArrs); - for(int i = 0; i < numOfArrs; ++i) - outSubArrs[i] = new NDArray(output(indices[i], true)); - - // prepare arrays of pointers on buffers and shapes - std::vector hOutBuffers(numOfArrs), hInBuffers(numOfArrs); - std::vector hOutShapeInfo(numOfArrs), hInShapeInfo(numOfArrs); - for(int i = 0; i < numOfArrs; ++i) { - hOutBuffers[i] = outSubArrs[i]->getSpecialBuffer(); - hInBuffers[i] = inArrs[i]->getSpecialBuffer(); - hOutShapeInfo[i] = outSubArrs[i]->getSpecialShapeInfo(); - hInShapeInfo[i] = inArrs[i]->getSpecialShapeInfo(); - } - - // allocate and copy all buffers and shapes arrays to global memory - PointersManager manager(context, "helpers::concat"); - void* dOutBuffers = manager.replicatePointer(hOutBuffers.data(), hOutBuffers.size() * sizeof(void*)); - void* dInBuffers = manager.replicatePointer(hInBuffers.data(), hInBuffers.size() * sizeof(void*)); - void* dInShapeInfo = manager.replicatePointer(hInShapeInfo.data(), hInShapeInfo.size() * sizeof(Nd4jLong*)); - void* dOutShapeInfo = manager.replicatePointer(hOutShapeInfo.data(), hOutShapeInfo.size() * sizeof(Nd4jLong*)); - - BUILD_SINGLE_SELECTOR(inArrs[0]->dataType(), concatCudaLauncher, (numOfArrs, context->getCudaStream(), dInBuffers, dInShapeInfo, dOutBuffers, dOutShapeInfo), LIBND4J_TYPES); - - manager.synchronize(); - - for(int i = 0; i < numOfArrs; ++i) - delete outSubArrs[i]; - - for(int i = 0; i < numOfArrs; ++i) - inArrs[i]->tickReadHost(); - - output.tickWriteDevice(); -} - - template - static _CUDA_G void scatterSimpleKernel(void *vx, Nd4jLong *xTadShape, Nd4jLong *xTadOffsets, Nd4jLong xLength, Nd4jLong numTads, void *vi, Nd4jLong *iShapeInfo, Nd4jLong iLength, void *vu, Nd4jLong *uShapeInfo, Nd4jLong uLength) { - auto u = reinterpret_cast(vu); - auto indices = reinterpret_cast(vi); - - auto tid = threadIdx.x + blockIdx.x * blockDim.x; - for (int i = tid; i < iLength; i += blockDim.x * gridDim.x) { - auto x = reinterpret_cast(vx) + xTadOffsets[i]; - auto idx = indices[shape::getIndexOffset(i, iShapeInfo, iLength)]; - - x[shape::getIndexOffset(idx, xTadShape, xLength)] = u[shape::getIndexOffset(i, uShapeInfo, uLength)]; - } - } - - - template - void scatterSimple_(nd4j::LaunchContext * context, const int opId, NDArray& input, const NDArray& updates, const NDArray& indices, const std::vector& dimensions) { - - auto dims = ShapeUtils::evalDimsToExclude(input.rankOf(), dimensions); - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), dims); - - auto xLength = shape::length(packX.primaryShapeInfo()); - auto iLength = indices.lengthOf(); - auto uLength = updates.lengthOf(); - - scatterSimpleKernel<<<256, 256, 1024, *context->getCudaStream()>>>(input.getSpecialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), xLength, packX.numberOfTads(), indices.getSpecialBuffer(), indices.getSpecialShapeInfo(), iLength, updates.getSpecialBuffer(), updates.getSpecialShapeInfo(), uLength); - } - - - void scatterSimple(nd4j::LaunchContext * context, const int opId, NDArray& input, const NDArray& updates, const NDArray& indices, const std::vector& dimensions) { - auto xType = input.dataType(); - auto yType = indices.dataType(); - - if (opId != 6) - throw std::runtime_error("scatterSimple: only copy op is supported"); - - NDArray::prepareSpecialUse({&input}, {&updates, &indices}); - - BUILD_DOUBLE_SELECTOR(xType, yType, scatterSimple_, (context, opId, input, updates, indices, dimensions), LIBND4J_TYPES, INTEGER_TYPES); - - NDArray::registerSpecialUse({&input}, {&updates, &indices}); - } - - - - } } } diff --git a/libnd4j/tests_cpu/layers_tests/CMakeLists.txt b/libnd4j/tests_cpu/layers_tests/CMakeLists.txt index 798d4f50d..6aa483ef3 100644 --- a/libnd4j/tests_cpu/layers_tests/CMakeLists.txt +++ b/libnd4j/tests_cpu/layers_tests/CMakeLists.txt @@ -29,11 +29,15 @@ if (CUDA_BLAS) if(WIN32) message("CUDA on Windows: enabling /EHsc") - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /FS") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /FS /w") SET_TARGET_PROPERTIES(${LIBND4J_NAME} PROPERTIES COMPILER_FLAGS "/EHsc") endif() - list(APPEND CUDA_NVCC_FLAGS -DCUDA_10 ${EXPM} -w -G -g --cudart=static --expt-extended-lambda -arch=compute_${COMPUTE} -code=sm_${COMPUTE}) + if ("${COMPUTE}" STREQUAL "all") + list(APPEND CUDA_NVCC_FLAGS -DCUDA_10 ${EXPM} -w --cudart=static -O3 --expt-extended-lambda -gencode arch=compute_35,code=sm_35 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70) + else() + list(APPEND CUDA_NVCC_FLAGS -DCUDA_10 ${EXPM} -w -G -g --cudart=static --expt-extended-lambda -arch=compute_${COMPUTE} -code=sm_${COMPUTE}) + endif() endif() # -fsanitize=address diff --git a/libnd4j/tests_cpu/layers_tests/ConvolutionTests1.cpp b/libnd4j/tests_cpu/layers_tests/ConvolutionTests1.cpp index 9f8f7c67a..4af21ce20 100644 --- a/libnd4j/tests_cpu/layers_tests/ConvolutionTests1.cpp +++ b/libnd4j/tests_cpu/layers_tests/ConvolutionTests1.cpp @@ -159,10 +159,10 @@ TYPED_TEST(TypedConvolutionTests1, conv2d_3) { auto bias = NDArrayFactory::create('c', {oC}, {1,2,3}); - auto expOutput = NDArrayFactory::create('c', {bS, oH, oW, oC},{ 152. , 155.2, 158.4,152. , 155.2, 158.4, 66.4, 68. , 69.6,170.4, 175.2, 180. ,170.4, 175.2, 180. , 70.8, 73.2, 75.6, - 170.4, 175.2, 180. ,170.4, 175.2, 180. , 70.8, 73.2, 75.6, 75.2, 78.4, 81.6, 75.2, 78.4, 81.6, 28. , 29.6, 31.2, - 152. , 155.2, 158.4,152. , 155.2, 158.4, 66.4, 68. , 69.6,170.4, 175.2, 180. ,170.4, 175.2, 180. , 70.8, 73.2, 75.6, - 170.4, 175.2, 180. ,170.4, 175.2, 180. , 70.8, 73.2, 75.6, 75.2, 78.4, 81.6, 75.2, 78.4, 81.6, 28. , 29.6, 31.2}); + auto expOutput = NDArrayFactory::create('c', {bS, oH, oW, oC},{ 152.f, 155.2f, 158.4f, 152.f, 155.2f, 158.4f, 66.4f, 68.f, 69.6f, 170.4f, 175.2f, 180.f, 170.4f, 175.2f, 180.f, 70.8f, 73.2f, 75.6f, + 170.4f, 175.2f, 180.f, 170.4f, 175.2f, 180.f, 70.8f, 73.2f, 75.6f, 75.2f, 78.4f, 81.6f, 75.2f, 78.4f, 81.6f, 28.f, 29.6f, 31.2f, + 152.f, 155.2f, 158.4f, 152.f, 155.2f, 158.4f, 66.4f, 68.f, 69.6f, 170.4f, 175.2f, 180.f, 170.4f, 175.2f, 180.f, 70.8f, 73.2f, 75.6f, + 170.4f, 175.2f, 180.f, 170.4f, 175.2f, 180.f, 70.8f, 73.2f, 75.6f, 75.2f, 78.4f, 81.6f, 75.2f, 78.4f, 81.6f, 28.f, 29.6f, 31.2f}); input = 2.; weights.linspace(0.1, 0.1); @@ -190,7 +190,7 @@ TYPED_TEST(TypedConvolutionTests1, conv2d_4) { auto weights = NDArrayFactory::create('c', {kH, kW, iC, oC}); auto bias = NDArrayFactory::create('c', {oC}, {1,2,3}); - auto expOutput = NDArrayFactory::create('c', {bS, oH, oW, oC},{ 170.4,175.20001,180.,170.4,175.20001,180.,170.4,175.20001,180.,170.4,175.20001,180.,170.4,175.20001,180.,170.4,175.20001,180.,170.4,175.20001,180.,170.4,175.20001,180.}); + auto expOutput = NDArrayFactory::create('c', {bS, oH, oW, oC},{ 170.4f,175.20001f,180.f,170.4f,175.20001f,180.f,170.4f,175.20001f,180.f,170.4f,175.20001f,180.f,170.4f,175.20001f,180.f,170.4f,175.20001f,180.f,170.4f,175.20001f,180.f,170.4f,175.20001f,180.f}); input = 2.; weights.linspace(0.1, 0.1); @@ -219,7 +219,7 @@ TYPED_TEST(TypedConvolutionTests1, conv2d_5) { auto weights = NDArrayFactory::create('c', {oC, iC, kH, kW}); auto bias = NDArrayFactory::create('c', {oC}, {1,2,3}); - auto expOutput = NDArrayFactory::create('c', {bS, oC, oH, oW}, {61. , 61. , 61. , 61. ,177.2, 177.2,177.2, 177.2,293.4, 293.4,293.4, 293.4, 61. , 61. , 61. , 61. ,177.2, 177.2,177.2, 177.2,293.4, 293.4,293.4, 293.4}); + auto expOutput = NDArrayFactory::create('c', {bS, oC, oH, oW}, {61.f, 61.f, 61.f, 61.f, 177.2f, 177.2f, 177.2f, 177.2f, 293.4f, 293.4f, 293.4f, 293.4f, 61.f, 61.f, 61.f, 61.f, 177.2f, 177.2f, 177.2f, 177.2f, 293.4f, 293.4f, 293.4f, 293.4f}); input = 2.; weights.linspace(0.1, 0.1); @@ -252,8 +252,8 @@ TYPED_TEST(TypedConvolutionTests1, conv2d_6) { } TYPED_TEST(TypedConvolutionTests1, TestAvgFF_TF) { - auto input = NDArrayFactory::create('c', {4, 10, 10, 3}, {9.37125111, 2.20166993, 2.91434479, 5.43639755, -2.10573769, 4.08528662, 5.86908436, -4.46203756, 2.21057916, 5.35849190, 0.01394637, 4.40566349, 7.07982206, -0.09633455, 2.42429352, 3.97301817, -1.89553940, 1.99690318, 6.33141708, 0.55401880, 1.70707977, 5.55204201, -0.03513752, 1.60011971, 2.62700319, -2.74582434, 3.06697464, 1.06277943, -1.16075921, -0.78095782, 9.72352791, -1.22686064, 1.99644792, 7.35571337, 1.40607321, 0.11390255, 9.53334427, 2.28303599, -1.66728830, 6.16678810, -0.04532295, -1.97708666, 9.74906158, 1.46223176, -1.46734393, 4.30761862, -1.23790228, 1.24823606, 6.13938427, -3.83689475, -1.19625473, 7.91535568, 6.05868721, -3.22946382, 8.81633949, -0.19967777, 0.66053957, 2.30919123, 0.74543846, -0.39347672, 11.11058044, 0.53720862, 1.52645731, 5.70012379, -1.15213466, 1.16451406, 7.00526333, 1.57362783, -2.44384766, 5.54213285, -1.98828590, -0.70483637, 7.88281822, -3.59875536, 0.80745387, 13.41578484, -1.55507684, -0.65855008, 9.32583523, -0.14544789, 0.73436141, 3.61176538, -1.71268058, -2.58490300, 9.09280205, -3.27405524, -2.04569697, 4.44761324, -0.62955856, -2.61917663, 8.04890442, 0.54579324, 0.85929775, 9.82259560, -1.93825579, 0.77703512, 4.67090321, -4.79267597, -2.38906908, 9.31265545, 0.96026313, -1.14109385, 11.54231834, -0.01417295, -0.39500344, 8.49191666, 0.55300158, 2.79490185, 6.92466164, 1.72254205, 2.82222271, 8.83112717, 2.95033407, 2.18054962, 6.73509789, -2.22272944, 0.51127720, -1.04563558, 2.15747333, -2.30959272, 9.55441570, 1.50396204, 1.77370787, 7.38146257, -1.79076433, 3.20961165, 7.18864202, 2.91217351, 0.43018937, 7.11078024, -1.17386127, -0.16817921, 6.12327290, -2.82205725, 3.30696845, 13.51291752, -1.30856836, -2.38332748, 11.09487438, -1.47190213, -0.53050828, 4.38285351, -5.07309771, 1.50714362, 5.72274446, -2.85825086, -0.89673209, 3.73791552, -0.67708802, -4.13149452, -0.00671843, -0.26566532, 0.32961160, 7.14501762, -1.41608179, -4.96590328, 12.26205540, -0.65158135, -0.88641000, 6.95777559, -0.79058206, -0.10260171, 7.87169170, 1.35921454, 1.11759663, 5.46187401, -2.57214499, 2.48484039, 4.04043484, -2.07137156, -1.42709637, 9.25487137, -0.12605135, -2.66949964, 2.89412403, 0.74451172, -2.96250391, 3.99258423, 0.27084303, 0.32213116, 5.42332172, -0.44414216, 1.70881832, 6.69346905, 0.53058422, -4.73146200, 4.22051668, 2.24834967, 0.66996074, 4.30173683, 0.11849818, -4.07520294, 8.27318478, -2.54398274, -2.86705542, 10.11775303, -0.99382895, 0.65881538, 7.93556786, -1.27934420, -1.69343162, 9.68042564, -1.02609646, -1.18189347, 5.75370646, -1.67888868, -4.48871994, 4.79537392, -0.79212248, -0.19855022, 6.15060997, -0.01081491, 3.64454579, 10.82562447, 1.58859253, -2.65847278, 8.60093212, -1.59196103, 0.07635692, 11.76175690, -1.17453325, 0.10122013, 6.86458445, -2.18891335, -2.74004745, 8.07066154, 0.71818852, -2.03035975, 6.31053686, 0.51509416, 1.39789927, 9.43515587, 2.04256630, 0.13985133, 4.65010691, 2.40911126, -0.36255789, -3.06867862, -0.45225358, -1.56778407, 6.05917358, -1.09891272, 1.77184200, 6.46248102, 0.96042323, -0.24346280, 4.63436460, -4.69907761, 1.25187206, 11.46173859, -2.21917558, 1.28007793, 6.92173195, 2.11268163, -3.47389889, 5.08722782, -3.03950930, -4.17154264, 11.30568314, 0.80361372, 2.53214502, 7.18707085, -4.49114513, 2.85449266, 10.14906883, -0.31974933, -0.84472644, -0.52459574, 0.12921631, -1.81390119, 2.76170087, 1.03982210, 2.91744232, -0.29048753, 5.87453508, -1.53684759, 1.85800636, -0.91404629, 1.28954852, 5.11354685, -2.47475505, -1.33179152, 2.58552408, 1.37316465, -3.32339454, 1.54122913, 3.24953628, -0.29758382, 2.82391763, -1.51142192, -1.22699404, 6.75745535, 0.65452754, -3.29385471, 2.06008053, 2.53172946, -4.23532820, -1.53909743, -0.07010663, -1.42173731, 7.29031610, -0.18448229, 4.59496164, 6.73027277, 0.73441899, 0.14426160, 4.14915276, -2.97010231, 6.05851364, 4.95218086, -2.39145470, 2.40494704, 2.10288811, 0.53503096, 1.44511235, 6.66344261, -3.05803776, 7.21418667, 3.30303526, -0.24163735, 3.47409391, 3.64520788, 2.15189481, -3.11243272, 3.62310791, 0.37379482, 0.40865007, -0.83132005, -4.78246069, 2.07030797, 6.51765442, 3.16178989, 5.06180477, 3.78434467, -0.96689719, 0.35965276, 5.89967585, 1.40294051, 1.11952639, 10.59778214, 0.26739889, -1.61297631, 6.24801159, -0.93914318, -0.57812452, 9.92604542, -0.73025000, -3.38530874, 2.45646000, -2.47949195, 0.51638460, 10.65636063, 1.97816694, -3.00407791, 2.66914415, -0.81951088, -0.23316640, 2.40737987, -2.70007610, 1.51531935, 4.08860207, -0.27552786, -1.31721711, 7.11568260, -3.33498216, -4.02545023, 7.22675610, -0.81690705, -2.52689576, 1.04016697, -0.79291463, -0.34875512, 10.00498390, -4.24167728, 1.46162593, 11.82569408, -1.70359993, -0.30161047, 16.44085884, -0.82253462, -0.09435523, 6.13080597, -0.20259480, 0.68308711, 6.15663004, -6.61776876, 0.33295766, 2.55449438, -0.17819691, -1.14892209, 5.56776142, 1.99279118, 1.33035934, 4.45823956, 3.34916544, -2.59905386, 6.16164446, -2.03881931, -2.45273542, 12.46793365, -2.22743297, 2.83738565, 8.48628139, -1.39347959, -1.30867767, 11.08041477, -4.00363779, 2.09183025, 11.30395889, -2.20504737, 1.37426853, 8.98735619, 1.04676604, -0.72757077, 8.28050232, -6.70741081, -0.65798020, 5.68592072, -0.60760021, 0.35854483, 6.26852131, 1.94100165, 1.32112014, 0.80987954, -1.74617672, -0.25434083, 7.16045523, 1.58884013, -2.64847064, 13.14820385, 1.21393633, -2.47258949, 9.41650105, -0.79384226, 2.48954105, 10.95629311, 0.47723705, 4.02126694, 8.02593136, -2.20726371, -1.18794477, 1.50836647, 0.93118095, -1.73513174, 8.85493565, -2.99670315, -0.79055870, 2.39473820, 2.05046916, -2.38055134, 11.82299423, 0.15609655, 0.68744308, 5.66401434, -0.69281673, 2.09855556, 7.74626589, -0.34283102, 1.00542057, 9.95838642, 0.80161905, 2.33455157, 9.80057335, -0.93561798, 2.56991577, 8.29711342, 0.94213426, 0.44209945, 11.70259857, 0.92710167, 2.60957146, 0.24971688, -0.86529571, 3.78628922, 6.80884457, -0.68178189, 2.21103406, 3.18895817, 0.60283208, -2.92716241, 6.72060776, -1.06625068, 2.56543374, 9.97404480, 3.58080721, -0.94936347, 10.16736984, -1.38464379, 1.18191063, 6.66179037, -3.56115270, 0.32329530, 10.90870762, 2.20638227, 0.19653285, 7.34650040, -3.63859272, -1.03027737, 5.98829985, -3.66606474, -3.89746714, 8.63469028, 1.22569811, 1.63240814, 3.74385309, 0.58243257, -0.56981975, 3.69260955, 1.00979900, -1.44030499, 8.57058144, -1.10648811, 1.20474911, 5.43133020, -2.14822555, -0.07928789, 11.25825310, 0.19645604, -5.49546146, 10.41917038, -0.68178523, -2.99639869, 6.50054455, 0.46488351, -5.42328453, 9.09500027, -2.82107449, 0.05601966, 15.34610748, -0.06820253, 3.86699796, 10.73316956, -3.04795432, -0.14702171, 5.64813185, 1.44028485, -2.47596145, 0.07280898, -3.03187990, -1.35183525, 9.35835648, 2.72966957, 1.88199532, 10.36187744, -0.22834805, -3.26738238, 6.92025137, -2.34061313, 4.77379704, 5.28559113, -2.96323752, -1.76186585, 5.94436455, 0.38647744, -5.73869514, 6.76849556, 1.40892124, -1.19068217, 5.37919092, -6.65328646, 3.62782669, 12.34744644, 2.44762444, -4.19242620, 6.14906216, 0.08121119, 0.61355996, 2.69666457, -1.88962626, -0.55314136, 1.84937525, 1.56048691, 1.17460012, 3.75674725, 1.06198275, -5.74625874, 5.41645575, -1.28946674, -1.51689398, 4.32400894, -0.05222082, -4.83948946, 1.80747867, 1.63144708, -2.73887825, 1.63975775, -2.02163982, -0.16210437, 2.93518686, 1.14427686, -2.83246303, 4.79283667, 2.69697428, -3.12678456, -1.19225168, -2.37022972, -3.09429741, 1.94225383, -1.13747168, -2.55048585, 5.40242243, 1.12777328, 3.43713188, 3.62658787, -2.16878843, 0.30164462, 2.97407579, -0.07275413, -1.31149673, 4.70066261, -2.01323795, 4.85255766, 4.59128904, 1.68084168, 1.60336494, 6.58138466, -1.04759812, 2.69906545, 3.55769277, -0.74327278, 2.65819693, 5.39528131, 2.11248922, -1.06446671, 5.24546766, -2.43146014, 4.58907509, 0.06521678, -2.24503994, 2.45722699, 6.94863081, 0.35258654, 2.83396196, 9.92525196, -1.12225175, -0.34365177, 7.19116688, -4.39813757, 0.46517885, 13.22028065, -2.57483673, -6.37226963, 7.58046293, -2.74600363, 0.42231262, 8.04881668, 0.17289802, -0.53447008, 16.55157471, -5.63614368, 0.39288223, 3.37079263, 1.26484549, -0.12820500, 8.46440125, -4.39304399, 2.97676420, 0.65650189, 0.83158541, -1.11556435, 6.32885838, -0.36087769, 2.80724382, 9.90292645, 1.15936041, 0.20947981, 6.91249275, -2.67404819, 2.93782163, 6.65656614, -2.30828357, 2.98214006, 6.80611229, -4.93821478, -7.66555262, 7.59763002, -0.54159302, 3.87403512, 12.42607784, 2.59284401, -0.23375344, 8.95293331, -0.71807784, 0.61873478, 8.66713524, 1.24289191, -2.37835455, 2.08071637, -0.88315344, -3.41891551, 6.85245323, 1.73007369, 1.02169311, 7.69170332, -2.85411978, 2.69790673, 8.12906551, -1.19351399, -2.26442742, 12.26104450, -0.75579089, -1.73274946, 10.68729019, 2.20655656, -0.90522075, 12.42165184, -1.67929137, 2.44851565, 9.31565762, -0.06645700, 1.52762020, 6.18427515, -1.68882596, 3.70261097, 3.02252960, -3.44125366, -1.31575799, 2.84617424, -0.96849400, -4.52356243, 9.95027161, 0.19966406, -0.78874779, 8.18595028, -4.08300209, 1.75126517, 0.96418417, -4.04913044, -0.95200396, 12.03637886, -0.03041124, 0.41642749, 8.88267422, -3.24985337, -2.24919462, 7.32566118, 0.16964148, -2.74123430, 7.05264473, -3.30191112, 0.17163286, 4.81851053, -1.64463484, -0.85933101, 7.29276276, 2.34066939, -2.14860010, 3.46148157, -0.01782012, 1.51504040, 4.79304934, 1.85281146, -1.70663762, 6.93470192, -4.15440845, -1.25983095, 10.52491760, 0.42930329, -1.85146868, 11.70042324, -0.41704914, 3.83796859, 9.21148491, -2.79719448, 0.79470479, 6.26926661, -5.85230207, 3.95105338, 7.84790897, -1.38680744, -1.78099084, 11.95235348, -2.99841452, -1.34507811, 6.15714645, -1.07552516, -2.81228638, 1.66234732, -4.55166149, -1.92601109, 8.64634514, -0.48158705, 3.31595659, 7.67371941, 2.56964207, 0.12107098, 4.56467867, -0.93541539, 1.39432955, 11.99714088, 1.05353570, -2.13099813, 3.67617917, 3.45895386, 1.37365830, 8.74344158, -4.17585802, 1.43908918, 6.28764772, 3.97346330, -0.69144285, 9.07983303, -0.41635889, -0.14965028, 8.85469818, 1.11306190, 2.59440994, 5.38982344, -1.07948279, 1.37252975, 10.26984596, -0.09318046, 2.73104119, 12.45902252, -1.55446684, -2.76124811, 12.19395065, -0.51846564, 1.02764034, 11.42673588, -0.95940983, -0.04781032, 8.78379822, -4.88957930, 0.32534006, 11.97696400, -3.35108662, 1.95104563, 4.46915388, -2.32061648, 3.45230985, 8.29983711, 2.81034684, -2.35529327, 6.07801294, -0.98105043, -0.05359888, 2.52291036, -0.01986909, -2.35321999, 10.51954269, 2.11145401, 3.53506470, 7.29093266, 0.03721160, -1.13496494, 7.43886709, -5.84201956, 2.50796294, 12.14647675, 2.77490377, -2.18896222, 6.05641937, 5.32617044, 1.04221284, 10.79106712, -2.95749092, -2.75414610, 11.30037117, -3.40654182, -2.24673963, 7.49126101, 0.70811015, -6.18003702, 13.83951187, -1.01204085, 1.36298490, -1.04451632, 2.42435336, -0.02346706, -0.85528886, 1.04731262, 0.22192979, 4.15708160, 0.34933877, 0.04814529, 2.24107265, 0.49676740, -1.47752666, 0.45040059, -0.70471478, -1.19759345, 0.21711677, 0.88461423, -2.76830935, 5.52066898, 1.97664857, -1.75381601, 3.45877838, 1.52617192, -1.61350942, 0.85337949, 1.97610760, -3.40310287, 3.40319014, -3.38691044, -0.71319139, 1.65463758, -0.60680127, -1.80700517, 8.02592373, 2.59627104, 2.65895891, 5.93043184, -4.48425817, 3.92670918, 4.19496679, -2.28286791, 6.41634607, 5.72330523, 1.16269672, -0.28753027, 2.46342492, 0.36693189, 0.26712441, 6.37652683, -2.50139046, 2.43923736, 5.56310415, 0.98065847, 1.04267502, 4.16403675, -0.04966142, 4.40897894, 3.72905660, -3.46129870, 3.59962773, 1.34830284, -1.76661730, 0.47943926, 5.29946661, -1.12711561, 1.26970029, 15.17655945, -1.50971997, 5.81345224, 8.48562050, -4.36049604, 2.48144460, 8.23780441, -3.46030426, -0.84656560, 5.94946814, 1.12747943, -2.65683913, 8.69085693, 1.31309867, -2.79958344, 8.76840591, -1.56444156, 1.62710834, 2.41177034, -0.72804940, 5.70619011, 4.67169666, -0.86167198, -1.83803177, 2.96346045, 2.82692933, -2.81557131, 7.11113358, -1.90071094, 2.54244423, 11.19284058, -0.06298946, -1.71517313, 12.98388577, 0.84510714, 3.00816894, 2.57200313, 0.03899818, -1.49330592, 9.60099125, -3.59513044, -1.30045319, 7.09241819, -0.65233821, -2.33627677, 8.81366920, 0.84154201, 1.03312039, 9.85289097, 0.19351870, 1.78496623, 7.34631205, -2.16530800, -0.65016162, 2.46842360, 0.24016285, -1.24308395, 4.78175163, -0.97682536, 2.20942235, 6.68382788, 3.76786447, -1.44454038, 6.26453733, -3.23575711, -2.30137897, 9.53092670, -5.55222607, 3.25999236, 9.37559509, 1.86339056, -0.23551451, 10.23400211, 3.93031883, -0.52629089, 7.85724449, -2.91549587, 4.46612740, 5.66530371, -2.70820427, 4.81359577, 10.31247330, 1.92230141, 2.53931546, 0.74986327, 1.70303428, 0.48063779, 5.31099129, -0.78976244, 3.75864220, 4.23051405, 2.34042454, -7.98193836, 9.83987141, -1.46722627, 3.54497814, 10.36455154, -4.51249075, 0.77715248, 7.78694630, -4.59989023, -2.49585629, 9.90296268, 1.38535416, 1.17441154, 10.10452843, -0.98628229, 0.60194463, 9.12639141, -3.90754628, 2.88526392, 7.24123430, -0.15283313, -0.75728363, -1.15116858, -2.53791571, 0.77229571, 6.44114161, 0.02646767, 4.95463037, 7.21066380, 1.79384065, 0.73250306, 8.04447937, 0.32576546, -0.79447043, 10.12717724, 2.33392906, 1.30716443, 12.36073112, -0.36694977, -1.20438910, 7.03105593, 0.59557682, 0.69267452, 10.18113136, 2.49944925, -0.42229167, 8.83143330, -1.18805945, -2.87509322, 4.53596449, 4.09732771, -3.39088297, -1.02536607, 0.82119560, -3.47302604, 9.29991817, 0.21001509, 4.97036457, 9.50018406, 1.04420102, 1.96560478, 10.74769592, -6.22709799, 3.11690164, 5.06759691, -1.23724771, -3.05831861, 8.12925529, -1.93435478, -1.10151744, 9.32263088, -0.04249470, -5.98547363, 10.49398136, 0.26400441, -0.78915191, 13.28219604, 2.99276900, 0.74853164, 2.49364305, -3.43529654, 4.05278301, 2.13498688, -2.35444307, -0.79900265, 4.66968822, -0.31095147, 3.60674143, 12.37222099, -0.07855003, -3.30292702, 12.15215874, 0.60886210, 2.87075138, 7.75271845, 0.38044083, 3.34402204, 6.40583277, -0.87888050, 0.67438459, 6.91080809, 1.98332930, -0.08303714, 8.08630371, -0.16772588, -2.74058914, 7.17253590, -2.69122696, 1.48173678, 8.99470139, -1.43302310, -0.88651133, 2.66944790, -0.29186964, 2.00838661, 5.09587479, -0.76676071, -2.88322186, 8.31110573, -0.14550979, -1.37726915, 10.28355122, -1.60575438, -0.04118848, 9.97510815, 0.14440438, -3.24632120, 9.00034523, 4.14319563, -1.31023729, 7.16950464, -0.70428526, 2.01559544, 7.26155043, 2.40816474, 2.09847403, 7.31264496, -0.75401551, 2.13392544, 7.03648758, 1.04036045, -1.15636516, 1.09634531, -0.06340861, -0.58107805, -0.65623116, 1.18972754, -0.80717683, 1.40118241, -0.61932516, -3.60596156, 1.59904599, -2.23774099, -1.13721037, 3.89620137, -0.09115922, -7.51356888, 2.36975193, -1.42520905, -2.34173775, 3.33830214, -2.74016523, -3.04115510, 6.00119495, -1.36084354, -2.45065260, 4.56992292, -3.02825928, -3.74182844, 5.11069250, -0.91531068, -2.31385994, 1.83399653, 3.39370203, -3.60886002}); - auto exp = NDArrayFactory::create('c', {4, 4, 4, 3}, {7.97172260, 0.06878620, 2.27749538, 7.29276514, -0.14074677, 0.65480286, 5.70313978, -0.06546132, 0.35443667, 3.70382833, -0.84020567, 0.63826996, 8.60301399, -0.38236514, 1.55177069, 7.37542057, -0.99374938, -0.29971302, 8.84352493, -0.67121059, 0.43132120, 4.78175592, -1.25070143, -1.91523600, 6.03855371, -0.00292124, -1.11214364, 7.90158176, -0.57949901, -0.96735370, 7.81192017, -0.53255427, -0.48009714, 3.16953635, 0.08353355, -1.54299748, 3.74821687, 1.69396687, 0.72724354, 5.42915201, -1.13686812, -0.71793109, 5.78376389, -0.72239977, -0.60055625, 2.53636408, 0.56777251, -2.07892323, 6.08064651, 0.68620735, 2.54017019, 5.65828180, -0.68255502, 1.47283304, 6.10842514, -0.39655915, 0.28380761, 1.96707797, -1.98206317, 0.94027776, 4.71811438, 0.32104525, -0.92409706, 8.34588146, -1.05581069, -0.55217457, 9.58440876, -0.96549922, 0.45820439, 5.65453672, -2.50953507, -0.71441835, 8.03059578, -0.21281289, 0.92125505, 9.26900673, -0.35963219, -0.70039093, 8.59924412, -1.22358346, 0.81318003, 3.85920119, -0.01305223, -1.09234154, 6.33158875, 1.28094780, -1.48926139, 4.94969177, -0.77126902, -1.97033751, 5.64381838, -0.16285487, -1.31277227, 2.39893222, -1.32902908, -1.39609122, 6.47572327, -0.45267010, 1.55727172, 6.70965624, -1.68735468, -0.05672536, 7.25092363, -0.64613032, 0.67050058, 3.60789680, -2.05948973, 2.22687531, 8.15202713, -0.70148355, 1.28314006, 8.14842319, -1.88807654, -1.04808438, 8.45500565, -0.76425624, 0.94542569, 4.56179953, -0.28786001, -2.04502511, 8.46278095, -0.31019822, 0.07339200, 9.34214592, -0.61948007, 0.52481830, 8.32515621, -1.52418160, 0.49678251, 5.11082315, -1.09908783, -0.52969611, 5.27806664, 0.88632923, 0.66754371, 4.75839233, 0.48928693, -0.68036932, 6.56925392, -0.02949905, -2.99189186, 4.46320581, -0.64534980, -0.29516968, 8.60809517, -1.13120568, 3.41720533, 5.84243155, -1.24109328, 0.89566326, 5.99578333, -0.42496428, 2.07076764, 3.17812920, -0.81566459, -0.14363396, 6.55184317, 0.39633346, -0.43852386, 8.70214558, -2.24613595, 0.30708700, 8.73882294, -0.53545928, 1.54409575, 4.49452257, -0.16509305, 0.19028664, 8.24897003, 0.44750381, 2.15448594, 8.97640514, -0.77728152, 0.57272542, 9.03467560, 0.47173575, -1.10807717, 3.30056310, -0.43268481, -0.41470885, 3.53798294, -0.08546703, -2.16840744, 6.18733406, -0.17871059, -2.59837723, 5.94218683, -1.02990067, -0.49760687, 3.76938033, 0.86383581, -1.91504073}); + auto input = NDArrayFactory::create('c', {4, 10, 10, 3}, {9.37125111f, 2.20166993f, 2.91434479f, 5.43639755f, -2.10573769f, 4.08528662f, 5.86908436f, -4.46203756f, 2.21057916f, 5.35849190f, 0.01394637f, 4.40566349f, 7.07982206f, -0.09633455f, 2.42429352f, 3.97301817f, -1.89553940f, 1.99690318f, 6.33141708f, 0.55401880f, 1.70707977f, 5.55204201f, -0.03513752f, 1.60011971f, 2.62700319f, -2.74582434f, 3.06697464f, 1.06277943f, -1.16075921f, -0.78095782f, 9.72352791f, -1.22686064f, 1.99644792f, 7.35571337f, 1.40607321f, 0.11390255f, 9.53334427f, 2.28303599f, -1.66728830f, 6.16678810f, -0.04532295f, -1.97708666f, 9.74906158f, 1.46223176f, -1.46734393f, 4.30761862f, -1.23790228f, 1.24823606f, 6.13938427f, -3.83689475f, -1.19625473f, 7.91535568f, 6.05868721f, -3.22946382f, 8.81633949f, -0.19967777f, 0.66053957f, 2.30919123f, 0.74543846f, -0.39347672f, 11.11058044f, 0.53720862f, 1.52645731f, 5.70012379f, -1.15213466f, 1.16451406f, 7.00526333f, 1.57362783f, -2.44384766f, 5.54213285f, -1.98828590f, -0.70483637f, 7.88281822f, -3.59875536f, 0.80745387f, 13.41578484f, -1.55507684f, -0.65855008f, 9.32583523f, -0.14544789f, 0.73436141f, 3.61176538f, -1.71268058f, -2.58490300f, 9.09280205f, -3.27405524f, -2.04569697f, 4.44761324f, -0.62955856f, -2.61917663f, 8.04890442f, 0.54579324f, 0.85929775f, 9.82259560f, -1.93825579f, 0.77703512f, 4.67090321f, -4.79267597f, -2.38906908f, 9.31265545f, 0.96026313f, -1.14109385f, 11.54231834f, -0.01417295f, -0.39500344f, 8.49191666f, 0.55300158f, 2.79490185f, 6.92466164f, 1.72254205f, 2.82222271f, 8.83112717f, 2.95033407f, 2.18054962f, 6.73509789f, -2.22272944f, 0.51127720f, -1.04563558f, 2.15747333f, -2.30959272f, 9.55441570f, 1.50396204f, 1.77370787f, 7.38146257f, -1.79076433f, 3.20961165f, 7.18864202f, 2.91217351f, 0.43018937f, 7.11078024f, -1.17386127f, -0.16817921f, 6.12327290f, -2.82205725f, 3.30696845f, 13.51291752f, -1.30856836f, -2.38332748f, 11.09487438f, -1.47190213f, -0.53050828f, 4.38285351f, -5.07309771f, 1.50714362f, 5.72274446f, -2.85825086f, -0.89673209f, 3.73791552f, -0.67708802f, -4.13149452f, -0.00671843f, -0.26566532f, 0.32961160f, 7.14501762f, -1.41608179f, -4.96590328f, 12.26205540f, -0.65158135f, -0.88641000f, 6.95777559f, -0.79058206f, -0.10260171f, 7.87169170f, 1.35921454f, 1.11759663f, 5.46187401f, -2.57214499f, 2.48484039f, 4.04043484f, -2.07137156f, -1.42709637f, 9.25487137f, -0.12605135f, -2.66949964f, 2.89412403f, 0.74451172f, -2.96250391f, 3.99258423f, 0.27084303f, 0.32213116f, 5.42332172f, -0.44414216f, 1.70881832f, 6.69346905f, 0.53058422f, -4.73146200f, 4.22051668f, 2.24834967f, 0.66996074f, 4.30173683f, 0.11849818f, -4.07520294f, 8.27318478f, -2.54398274f, -2.86705542f, 10.11775303f, -0.99382895f, 0.65881538f, 7.93556786f, -1.27934420f, -1.69343162f, 9.68042564f, -1.02609646f, -1.18189347f, 5.75370646f, -1.67888868f, -4.48871994f, 4.79537392f, -0.79212248f, -0.19855022f, 6.15060997f, -0.01081491f, 3.64454579f, 10.82562447f, 1.58859253f, -2.65847278f, 8.60093212f, -1.59196103f, 0.07635692f, 11.76175690f, -1.17453325f, 0.10122013f, 6.86458445f, -2.18891335f, -2.74004745f, 8.07066154f, 0.71818852f, -2.03035975f, 6.31053686f, 0.51509416f, 1.39789927f, 9.43515587f, 2.04256630f, 0.13985133f, 4.65010691f, 2.40911126f, -0.36255789f, -3.06867862f, -0.45225358f, -1.56778407f, 6.05917358f, -1.09891272f, 1.77184200f, 6.46248102f, 0.96042323f, -0.24346280f, 4.63436460f, -4.69907761f, 1.25187206f, 11.46173859f, -2.21917558f, 1.28007793f, 6.92173195f, 2.11268163f, -3.47389889f, 5.08722782f, -3.03950930f, -4.17154264f, 11.30568314f, 0.80361372f, 2.53214502f, 7.18707085f, -4.49114513f, 2.85449266f, 10.14906883f, -0.31974933f, -0.84472644f, -0.52459574f, 0.12921631f, -1.81390119f, 2.76170087f, 1.03982210f, 2.91744232f, -0.29048753f, 5.87453508f, -1.53684759f, 1.85800636f, -0.91404629f, 1.28954852f, 5.11354685f, -2.47475505f, -1.33179152f, 2.58552408f, 1.37316465f, -3.32339454f, 1.54122913f, 3.24953628f, -0.29758382f, 2.82391763f, -1.51142192f, -1.22699404f, 6.75745535f, 0.65452754f, -3.29385471f, 2.06008053f, 2.53172946f, -4.23532820f, -1.53909743f, -0.07010663f, -1.42173731f, 7.29031610f, -0.18448229f, 4.59496164f, 6.73027277f, 0.73441899f, 0.14426160f, 4.14915276f, -2.97010231f, 6.05851364f, 4.95218086f, -2.39145470f, 2.40494704f, 2.10288811f, 0.53503096f, 1.44511235f, 6.66344261f, -3.05803776f, 7.21418667f, 3.30303526f, -0.24163735f, 3.47409391f, 3.64520788f, 2.15189481f, -3.11243272f, 3.62310791f, 0.37379482f, 0.40865007f, -0.83132005f, -4.78246069f, 2.07030797f, 6.51765442f, 3.16178989f, 5.06180477f, 3.78434467f, -0.96689719f, 0.35965276f, 5.89967585f, 1.40294051f, 1.11952639f, 10.59778214f, 0.26739889f, -1.61297631f, 6.24801159f, -0.93914318f, -0.57812452f, 9.92604542f, -0.73025000f, -3.38530874f, 2.45646000f, -2.47949195f, 0.51638460f, 10.65636063f, 1.97816694f, -3.00407791f, 2.66914415f, -0.81951088f, -0.23316640f, 2.40737987f, -2.70007610f, 1.51531935f, 4.08860207f, -0.27552786f, -1.31721711f, 7.11568260f, -3.33498216f, -4.02545023f, 7.22675610f, -0.81690705f, -2.52689576f, 1.04016697f, -0.79291463f, -0.34875512f, 10.00498390f, -4.24167728f, 1.46162593f, 11.82569408f, -1.70359993f, -0.30161047f, 16.44085884f, -0.82253462f, -0.09435523f, 6.13080597f, -0.20259480f, 0.68308711f, 6.15663004f, -6.61776876f, 0.33295766f, 2.55449438f, -0.17819691f, -1.14892209f, 5.56776142f, 1.99279118f, 1.33035934f, 4.45823956f, 3.34916544f, -2.59905386f, 6.16164446f, -2.03881931f, -2.45273542f, 12.46793365f, -2.22743297f, 2.83738565f, 8.48628139f, -1.39347959f, -1.30867767f, 11.08041477f, -4.00363779f, 2.09183025f, 11.30395889f, -2.20504737f, 1.37426853f, 8.98735619f, 1.04676604f, -0.72757077f, 8.28050232f, -6.70741081f, -0.65798020f, 5.68592072f, -0.60760021f, 0.35854483f, 6.26852131f, 1.94100165f, 1.32112014f, 0.80987954f, -1.74617672f, -0.25434083f, 7.16045523f, 1.58884013f, -2.64847064f, 13.14820385f, 1.21393633f, -2.47258949f, 9.41650105f, -0.79384226f, 2.48954105f, 10.95629311f, 0.47723705f, 4.02126694f, 8.02593136f, -2.20726371f, -1.18794477f, 1.50836647f, 0.93118095f, -1.73513174f, 8.85493565f, -2.99670315f, -0.79055870f, 2.39473820f, 2.05046916f, -2.38055134f, 11.82299423f, 0.15609655f, 0.68744308f, 5.66401434f, -0.69281673f, 2.09855556f, 7.74626589f, -0.34283102f, 1.00542057f, 9.95838642f, 0.80161905f, 2.33455157f, 9.80057335f, -0.93561798f, 2.56991577f, 8.29711342f, 0.94213426f, 0.44209945f, 11.70259857f, 0.92710167f, 2.60957146f, 0.24971688f, -0.86529571f, 3.78628922f, 6.80884457f, -0.68178189f, 2.21103406f, 3.18895817f, 0.60283208f, -2.92716241f, 6.72060776f, -1.06625068f, 2.56543374f, 9.97404480f, 3.58080721f, -0.94936347f, 10.16736984f, -1.38464379f, 1.18191063f, 6.66179037f, -3.56115270f, 0.32329530f, 10.90870762f, 2.20638227f, 0.19653285f, 7.34650040f, -3.63859272f, -1.03027737f, 5.98829985f, -3.66606474f, -3.89746714f, 8.63469028f, 1.22569811f, 1.63240814f, 3.74385309f, 0.58243257f, -0.56981975f, 3.69260955f, 1.00979900f, -1.44030499f, 8.57058144f, -1.10648811f, 1.20474911f, 5.43133020f, -2.14822555f, -0.07928789f, 11.25825310f, 0.19645604f, -5.49546146f, 10.41917038f, -0.68178523f, -2.99639869f, 6.50054455f, 0.46488351f, -5.42328453f, 9.09500027f, -2.82107449f, 0.05601966f, 15.34610748f, -0.06820253f, 3.86699796f, 10.73316956f, -3.04795432f, -0.14702171f, 5.64813185f, 1.44028485f, -2.47596145f, 0.07280898f, -3.03187990f, -1.35183525f, 9.35835648f, 2.72966957f, 1.88199532f, 10.36187744f, -0.22834805f, -3.26738238f, 6.92025137f, -2.34061313f, 4.77379704f, 5.28559113f, -2.96323752f, -1.76186585f, 5.94436455f, 0.38647744f, -5.73869514f, 6.76849556f, 1.40892124f, -1.19068217f, 5.37919092f, -6.65328646f, 3.62782669f, 12.34744644f, 2.44762444f, -4.19242620f, 6.14906216f, 0.08121119f, 0.61355996f, 2.69666457f, -1.88962626f, -0.55314136f, 1.84937525f, 1.56048691f, 1.17460012f, 3.75674725f, 1.06198275f, -5.74625874f, 5.41645575f, -1.28946674f, -1.51689398f, 4.32400894f, -0.05222082f, -4.83948946f, 1.80747867f, 1.63144708f, -2.73887825f, 1.63975775f, -2.02163982f, -0.16210437f, 2.93518686f, 1.14427686f, -2.83246303f, 4.79283667f, 2.69697428f, -3.12678456f, -1.19225168f, -2.37022972f, -3.09429741f, 1.94225383f, -1.13747168f, -2.55048585f, 5.40242243f, 1.12777328f, 3.43713188f, 3.62658787f, -2.16878843f, 0.30164462f, 2.97407579f, -0.07275413f, -1.31149673f, 4.70066261f, -2.01323795f, 4.85255766f, 4.59128904f, 1.68084168f, 1.60336494f, 6.58138466f, -1.04759812f, 2.69906545f, 3.55769277f, -0.74327278f, 2.65819693f, 5.39528131f, 2.11248922f, -1.06446671f, 5.24546766f, -2.43146014f, 4.58907509f, 0.06521678f, -2.24503994f, 2.45722699f, 6.94863081f, 0.35258654f, 2.83396196f, 9.92525196f, -1.12225175f, -0.34365177f, 7.19116688f, -4.39813757f, 0.46517885f, 13.22028065f, -2.57483673f, -6.37226963f, 7.58046293f, -2.74600363f, 0.42231262f, 8.04881668f, 0.17289802f, -0.53447008f, 16.55157471f, -5.63614368f, 0.39288223f, 3.37079263f, 1.26484549f, -0.12820500f, 8.46440125f, -4.39304399f, 2.97676420f, 0.65650189f, 0.83158541f, -1.11556435f, 6.32885838f, -0.36087769f, 2.80724382f, 9.90292645f, 1.15936041f, 0.20947981f, 6.91249275f, -2.67404819f, 2.93782163f, 6.65656614f, -2.30828357f, 2.98214006f, 6.80611229f, -4.93821478f, -7.66555262f, 7.59763002f, -0.54159302f, 3.87403512f, 12.42607784f, 2.59284401f, -0.23375344f, 8.95293331f, -0.71807784f, 0.61873478f, 8.66713524f, 1.24289191f, -2.37835455f, 2.08071637f, -0.88315344f, -3.41891551f, 6.85245323f, 1.73007369f, 1.02169311f, 7.69170332f, -2.85411978f, 2.69790673f, 8.12906551f, -1.19351399f, -2.26442742f, 12.26104450f, -0.75579089f, -1.73274946f, 10.68729019f, 2.20655656f, -0.90522075f, 12.42165184f, -1.67929137f, 2.44851565f, 9.31565762f, -0.06645700f, 1.52762020f, 6.18427515f, -1.68882596f, 3.70261097f, 3.02252960f, -3.44125366f, -1.31575799f, 2.84617424f, -0.96849400f, -4.52356243f, 9.95027161f, 0.19966406f, -0.78874779f, 8.18595028f, -4.08300209f, 1.75126517f, 0.96418417f, -4.04913044f, -0.95200396f, 12.03637886f, -0.03041124f, 0.41642749f, 8.88267422f, -3.24985337f, -2.24919462f, 7.32566118f, 0.16964148f, -2.74123430f, 7.05264473f, -3.30191112f, 0.17163286f, 4.81851053f, -1.64463484f, -0.85933101f, 7.29276276f, 2.34066939f, -2.14860010f, 3.46148157f, -0.01782012f, 1.51504040f, 4.79304934f, 1.85281146f, -1.70663762f, 6.93470192f, -4.15440845f, -1.25983095f, 10.52491760f, 0.42930329f, -1.85146868f, 11.70042324f, -0.41704914f, 3.83796859f, 9.21148491f, -2.79719448f, 0.79470479f, 6.26926661f, -5.85230207f, 3.95105338f, 7.84790897f, -1.38680744f, -1.78099084f, 11.95235348f, -2.99841452f, -1.34507811f, 6.15714645f, -1.07552516f, -2.81228638f, 1.66234732f, -4.55166149f, -1.92601109f, 8.64634514f, -0.48158705f, 3.31595659f, 7.67371941f, 2.56964207f, 0.12107098f, 4.56467867f, -0.93541539f, 1.39432955f, 11.99714088f, 1.05353570f, -2.13099813f, 3.67617917f, 3.45895386f, 1.37365830f, 8.74344158f, -4.17585802f, 1.43908918f, 6.28764772f, 3.97346330f, -0.69144285f, 9.07983303f, -0.41635889f, -0.14965028f, 8.85469818f, 1.11306190f, 2.59440994f, 5.38982344f, -1.07948279f, 1.37252975f, 10.26984596f, -0.09318046f, 2.73104119f, 12.45902252f, -1.55446684f, -2.76124811f, 12.19395065f, -0.51846564f, 1.02764034f, 11.42673588f, -0.95940983f, -0.04781032f, 8.78379822f, -4.88957930f, 0.32534006f, 11.97696400f, -3.35108662f, 1.95104563f, 4.46915388f, -2.32061648f, 3.45230985f, 8.29983711f, 2.81034684f, -2.35529327f, 6.07801294f, -0.98105043f, -0.05359888f, 2.52291036f, -0.01986909f, -2.35321999f, 10.51954269f, 2.11145401f, 3.53506470f, 7.29093266f, 0.03721160f, -1.13496494f, 7.43886709f, -5.84201956f, 2.50796294f, 12.14647675f, 2.77490377f, -2.18896222f, 6.05641937f, 5.32617044f, 1.04221284f, 10.79106712f, -2.95749092f, -2.75414610f, 11.30037117f, -3.40654182f, -2.24673963f, 7.49126101f, 0.70811015f, -6.18003702f, 13.83951187f, -1.01204085f, 1.36298490f, -1.04451632f, 2.42435336f, -0.02346706f, -0.85528886f, 1.04731262f, 0.22192979f, 4.15708160f, 0.34933877f, 0.04814529f, 2.24107265f, 0.49676740f, -1.47752666f, 0.45040059f, -0.70471478f, -1.19759345f, 0.21711677f, 0.88461423f, -2.76830935f, 5.52066898f, 1.97664857f, -1.75381601f, 3.45877838f, 1.52617192f, -1.61350942f, 0.85337949f, 1.97610760f, -3.40310287f, 3.40319014f, -3.38691044f, -0.71319139f, 1.65463758f, -0.60680127f, -1.80700517f, 8.02592373f, 2.59627104f, 2.65895891f, 5.93043184f, -4.48425817f, 3.92670918f, 4.19496679f, -2.28286791f, 6.41634607f, 5.72330523f, 1.16269672f, -0.28753027f, 2.46342492f, 0.36693189f, 0.26712441f, 6.37652683f, -2.50139046f, 2.43923736f, 5.56310415f, 0.98065847f, 1.04267502f, 4.16403675f, -0.04966142f, 4.40897894f, 3.72905660f, -3.46129870f, 3.59962773f, 1.34830284f, -1.76661730f, 0.47943926f, 5.29946661f, -1.12711561f, 1.26970029f, 15.17655945f, -1.50971997f, 5.81345224f, 8.48562050f, -4.36049604f, 2.48144460f, 8.23780441f, -3.46030426f, -0.84656560f, 5.94946814f, 1.12747943f, -2.65683913f, 8.69085693f, 1.31309867f, -2.79958344f, 8.76840591f, -1.56444156f, 1.62710834f, 2.41177034f, -0.72804940f, 5.70619011f, 4.67169666f, -0.86167198f, -1.83803177f, 2.96346045f, 2.82692933f, -2.81557131f, 7.11113358f, -1.90071094f, 2.54244423f, 11.19284058f, -0.06298946f, -1.71517313f, 12.98388577f, 0.84510714f, 3.00816894f, 2.57200313f, 0.03899818f, -1.49330592f, 9.60099125f, -3.59513044f, -1.30045319f, 7.09241819f, -0.65233821f, -2.33627677f, 8.81366920f, 0.84154201f, 1.03312039f, 9.85289097f, 0.19351870f, 1.78496623f, 7.34631205f, -2.16530800f, -0.65016162f, 2.46842360f, 0.24016285f, -1.24308395f, 4.78175163f, -0.97682536f, 2.20942235f, 6.68382788f, 3.76786447f, -1.44454038f, 6.26453733f, -3.23575711f, -2.30137897f, 9.53092670f, -5.55222607f, 3.25999236f, 9.37559509f, 1.86339056f, -0.23551451f, 10.23400211f, 3.93031883f, -0.52629089f, 7.85724449f, -2.91549587f, 4.46612740f, 5.66530371f, -2.70820427f, 4.81359577f, 10.31247330f, 1.92230141f, 2.53931546f, 0.74986327f, 1.70303428f, 0.48063779f, 5.31099129f, -0.78976244f, 3.75864220f, 4.23051405f, 2.34042454f, -7.98193836f, 9.83987141f, -1.46722627f, 3.54497814f, 10.36455154f, -4.51249075f, 0.77715248f, 7.78694630f, -4.59989023f, -2.49585629f, 9.90296268f, 1.38535416f, 1.17441154f, 10.10452843f, -0.98628229f, 0.60194463f, 9.12639141f, -3.90754628f, 2.88526392f, 7.24123430f, -0.15283313f, -0.75728363f, -1.15116858f, -2.53791571f, 0.77229571f, 6.44114161f, 0.02646767f, 4.95463037f, 7.21066380f, 1.79384065f, 0.73250306f, 8.04447937f, 0.32576546f, -0.79447043f, 10.12717724f, 2.33392906f, 1.30716443f, 12.36073112f, -0.36694977f, -1.20438910f, 7.03105593f, 0.59557682f, 0.69267452f, 10.18113136f, 2.49944925f, -0.42229167f, 8.83143330f, -1.18805945f, -2.87509322f, 4.53596449f, 4.09732771f, -3.39088297f, -1.02536607f, 0.82119560f, -3.47302604f, 9.29991817f, 0.21001509f, 4.97036457f, 9.50018406f, 1.04420102f, 1.96560478f, 10.74769592f, -6.22709799f, 3.11690164f, 5.06759691f, -1.23724771f, -3.05831861f, 8.12925529f, -1.93435478f, -1.10151744f, 9.32263088f, -0.04249470f, -5.98547363f, 10.49398136f, 0.26400441f, -0.78915191f, 13.28219604f, 2.99276900f, 0.74853164f, 2.49364305f, -3.43529654f, 4.05278301f, 2.13498688f, -2.35444307f, -0.79900265f, 4.66968822f, -0.31095147f, 3.60674143f, 12.37222099f, -0.07855003f, -3.30292702f, 12.15215874f, 0.60886210f, 2.87075138f, 7.75271845f, 0.38044083f, 3.34402204f, 6.40583277f, -0.87888050f, 0.67438459f, 6.91080809f, 1.98332930f, -0.08303714f, 8.08630371f, -0.16772588f, -2.74058914f, 7.17253590f, -2.69122696f, 1.48173678f, 8.99470139f, -1.43302310f, -0.88651133f, 2.66944790f, -0.29186964f, 2.00838661f, 5.09587479f, -0.76676071f, -2.88322186f, 8.31110573f, -0.14550979f, -1.37726915f, 10.28355122f, -1.60575438f, -0.04118848f, 9.97510815f, 0.14440438f, -3.24632120f, 9.00034523f, 4.14319563f, -1.31023729f, 7.16950464f, -0.70428526f, 2.01559544f, 7.26155043f, 2.40816474f, 2.09847403f, 7.31264496f, -0.75401551f, 2.13392544f, 7.03648758f, 1.04036045f, -1.15636516f, 1.09634531f, -0.06340861f, -0.58107805f, -0.65623116f, 1.18972754f, -0.80717683f, 1.40118241f, -0.61932516f, -3.60596156f, 1.59904599f, -2.23774099f, -1.13721037f, 3.89620137f, -0.09115922f, -7.51356888f, 2.36975193f, -1.42520905f, -2.34173775f, 3.33830214f, -2.74016523f, -3.04115510f, 6.00119495f, -1.36084354f, -2.45065260f, 4.56992292f, -3.02825928f, -3.74182844f, 5.11069250f, -0.91531068f, -2.31385994f, 1.83399653f, 3.39370203f, -3.60886002f}); + auto exp = NDArrayFactory::create('c', {4, 4, 4, 3}, {7.97172260f, 0.06878620f, 2.27749538f, 7.29276514f, -0.14074677f, 0.65480286f, 5.70313978f, -0.06546132f, 0.35443667f, 3.70382833f, -0.84020567f, 0.63826996f, 8.60301399f, -0.38236514f, 1.55177069f, 7.37542057f, -0.99374938f, -0.29971302f, 8.84352493f, -0.67121059f, 0.43132120f, 4.78175592f, -1.25070143f, -1.91523600f, 6.03855371f, -0.00292124f, -1.11214364f, 7.90158176f, -0.57949901f, -0.96735370f, 7.81192017f, -0.53255427f, -0.48009714f, 3.16953635f, 0.08353355f, -1.54299748f, 3.74821687f, 1.69396687f, 0.72724354f, 5.42915201f, -1.13686812f, -0.71793109f, 5.78376389f, -0.72239977f, -0.60055625f, 2.53636408f, 0.56777251f, -2.07892323f, 6.08064651f, 0.68620735f, 2.54017019f, 5.65828180f, -0.68255502f, 1.47283304f, 6.10842514f, -0.39655915f, 0.28380761f, 1.96707797f, -1.98206317f, 0.94027776f, 4.71811438f, 0.32104525f, -0.92409706f, 8.34588146f, -1.05581069f, -0.55217457f, 9.58440876f, -0.96549922f, 0.45820439f, 5.65453672f, -2.50953507f, -0.71441835f, 8.03059578f, -0.21281289f, 0.92125505f, 9.26900673f, -0.35963219f, -0.70039093f, 8.59924412f, -1.22358346f, 0.81318003f, 3.85920119f, -0.01305223f, -1.09234154f, 6.33158875f, 1.28094780f, -1.48926139f, 4.94969177f, -0.77126902f, -1.97033751f, 5.64381838f, -0.16285487f, -1.31277227f, 2.39893222f, -1.32902908f, -1.39609122f, 6.47572327f, -0.45267010f, 1.55727172f, 6.70965624f, -1.68735468f, -0.05672536f, 7.25092363f, -0.64613032f, 0.67050058f, 3.60789680f, -2.05948973f, 2.22687531f, 8.15202713f, -0.70148355f, 1.28314006f, 8.14842319f, -1.88807654f, -1.04808438f, 8.45500565f, -0.76425624f, 0.94542569f, 4.56179953f, -0.28786001f, -2.04502511f, 8.46278095f, -0.31019822f, 0.07339200f, 9.34214592f, -0.61948007f, 0.52481830f, 8.32515621f, -1.52418160f, 0.49678251f, 5.11082315f, -1.09908783f, -0.52969611f, 5.27806664f, 0.88632923f, 0.66754371f, 4.75839233f, 0.48928693f, -0.68036932f, 6.56925392f, -0.02949905f, -2.99189186f, 4.46320581f, -0.64534980f, -0.29516968f, 8.60809517f, -1.13120568f, 3.41720533f, 5.84243155f, -1.24109328f, 0.89566326f, 5.99578333f, -0.42496428f, 2.07076764f, 3.17812920f, -0.81566459f, -0.14363396f, 6.55184317f, 0.39633346f, -0.43852386f, 8.70214558f, -2.24613595f, 0.30708700f, 8.73882294f, -0.53545928f, 1.54409575f, 4.49452257f, -0.16509305f, 0.19028664f, 8.24897003f, 0.44750381f, 2.15448594f, 8.97640514f, -0.77728152f, 0.57272542f, 9.03467560f, 0.47173575f, -1.10807717f, 3.30056310f, -0.43268481f, -0.41470885f, 3.53798294f, -0.08546703f, -2.16840744f, 6.18733406f, -0.17871059f, -2.59837723f, 5.94218683f, -1.02990067f, -0.49760687f, 3.76938033f, 0.86383581f, -1.91504073f}); nd4j::ops::avgpool2d op; auto result = op.execute({&input}, {}, {3,3, 3,3, 0,0, 1,1,1, 0,1}, {}); diff --git a/libnd4j/tests_cpu/layers_tests/ConvolutionTests2.cpp b/libnd4j/tests_cpu/layers_tests/ConvolutionTests2.cpp index 301d98e04..dc29b1b14 100644 --- a/libnd4j/tests_cpu/layers_tests/ConvolutionTests2.cpp +++ b/libnd4j/tests_cpu/layers_tests/ConvolutionTests2.cpp @@ -152,19 +152,19 @@ TYPED_TEST(TypedConvolutionTests2, Test_DeConv2D_TF_1) { TYPED_TEST(TypedConvolutionTests2, Test_DeConv2D_TF_2) { auto input0 = NDArrayFactory::create('c', {4}, {3, 8, 8, 16}); - auto input1 = NDArrayFactory::create('c', {7, 7, 16, 5}, {1.05293429,-0.89349967,0.31027254,1.22991478,-0.62926656,0.56918693, --1.60992694,1.10167944,-0.80843484,0.07521993,-1.15994942,0.76016301,-0.40056285,-1.16872537,-0.91384381,-0.36700436,1.82389200,-1.18200207,0.51612782,-0.92479187,-0.09307563,-0.55122334,1.23532486,-1.11124146,-0.05812126,0.68159896,0.69125599,-0.77127314,-0.10874277,0.86469102, --1.31614351,0.33354419,-1.71750402,0.17197680,-1.03965557,1.10570908,-1.19115615,1.05115080,0.18277600,1.08820546,-0.72191417,-0.10999311,1.56521320,-0.35433730,-1.11799145,0.34499285,0.64998639,-1.64371550,0.92592359,-0.47659501,0.49101439,-0.15613313,1.47486567,0.43576995, -2.19538260,-0.83567709,-1.21846950,0.80400819,1.14637423,-1.01503456,-0.61992753,-0.47378838,0.86503726,0.27147385,0.37073180,-0.19951358,0.79167330,-0.33982825,0.18631981,-1.54715073,0.39967480,0.95067030,1.12508667,-0.86676019,-1.10341156,2.33141375,1.10972047,0.71407092, -1.70640314,1.80666339,0.59465605,-0.39653218,-2.61163163,-1.15013492,-1.19908321,0.41783467,-0.22730024,0.31425011,-0.58562893,-0.10131568,-0.85047537,-2.59974790,1.22072542,-2.08812046,-0.19363593,-1.27664304,-0.02703438,1.08477545,-0.65506506,0.46040919,-0.13715318, --0.74945593,-0.69006950,-1.29617655,-0.15865716,1.38956285,0.90216327,-1.31185400,-0.15067385,-0.63093358,-0.05895613,0.26545224,0.29332840,0.42852548,0.72409540,0.12879130,1.43038857,0.68647617,2.19654775,0.51878077,-0.03769343,0.52877223,-0.21733910,1.13710785,-0.59003806, -1.54624867,-0.64997369,-1.03239334,0.19708300,0.68658423,0.71048903,-1.55250466,-1.38636279,0.32385820,0.81226677,0.19209047,-0.23002781,-0.63631231,1.02101684,0.65428704,-0.17206922,1.09488952,1.03022420,-0.95567745,-0.07595373,-1.48606372,2.57174873,-1.75366247,1.12913883, -0.97053039,-0.28552356,0.56511772,-0.79568213,0.07561764,-1.02085686,1.05770981,-1.25715709,0.42046708,-2.57390857,0.96947151,1.05215812,0.65624017,-1.29019403,0.64157075,-0.40509227,-0.65354455,0.42348680,-1.34107757,0.05931387,-0.54337227,0.95460182,1.59319806,-0.44433126, --0.33717924,0.79566282,0.50112695,-0.22244534,1.76904583,-0.89817202,1.82985342,0.17671813,0.80720717,1.32469308,0.39417782,-0.23720963,0.96796370,-1.02348757,-0.86615551,-1.58120525,-0.37634999,0.00905940,0.01880967,1.75771821,-0.64372772,0.36687651,0.15854552,-0.67599791, -0.53726906,-1.20158446,-1.78549063,0.96476388,-0.66158366,-0.41681561,-0.97541636,2.35928202,0.32130197,1.06886065,1.38736427,-0.73718959,0.11215294,2.12865782,-0.37927702,0.55621815,-1.10108411,-0.02032263,0.29595461,1.58737493,1.24001300,-0.66748160,0.80729002,-0.10575818, --1.03175950,1.80755460,0.10825710,2.20666361,1.33633149,1.39290452,0.45211342,-0.07837920,2.08304930,-0.28387162,-0.70775616,0.43626297,0.53556961,0.06201901,-0.59255266,-0.11854446,2.10024118,0.37638292,-0.56178707,-0.25220188,-1.23731256,-1.30002999,0.34283713,0.30502397, --1.09233856,1.12430644,0.52273953,-0.68507338,-0.69913578,0.88440478,-0.76959240,1.07093310,-0.34802195,0.35683727,-0.76079178,-1.92807376,0.84499562,1.39131641,0.44825050,0.34567752,0.44607711,-1.00986362,-0.50038189,-0.09060892,-2.55645394,0.56416476,-0.83058155,-0.65931624, --0.73649710,0.59814465,-0.86736494,-0.32200798,-1.28087902,-0.76818323,0.86848933,-0.98678392,-1.30813944,-0.20255326,0.26557815,-0.31090519,-1.46331608,-0.62782109,0.59034890,1.63147473,-0.17727259,-0.37636510,1.27368402,0.19096918,-0.29936951,-1.99038267,0.54831523,0.48849005,-2.55680346,-0.63126534,1.21715927,1.22841084,-0.67416084,0.02927168,-0.36693662,0.63204330,0.13721083,0.28742912,0.19470036,0.74873924,-1.47602463,0.86264688,-0.23730527,-0.99978864,-1.17048764,-0.34996086,1.43019187,0.26224539,0.60689932,-0.75002515,-0.79823422,-1.37300086,-0.19951135,-0.12150808,-0.75272322,0.23755015,0.31270382,1.66539109,-1.04104745,0.79540199,-0.54042423,-0.54150617,0.43871084,0.24163951,-0.24517761,-0.66178995,-1.13064528,-0.84426326,0.56437236,0.09088907,-0.82823074,0.81753862,-1.74096012,-1.80599844,-0.60943592,1.36094582,-1.47762752,0.15931177,1.05569172,0.36751524,0.06497604,0.13536447,-1.57156146,0.22783801,-0.96910107,-1.24294984,-1.47147155,-1.04790676,0.64629447,-0.32266054,-0.55675793,-0.95612079,-0.23005411,-0.75229394,0.03050950,-1.72484553,-2.06055546,0.19892083,-0.13597751,0.65180075,0.27096850,0.08977254,0.57564765,-0.43227410,0.09541437,-0.00358280,0.65680492,0.04006556,0.57160908,0.43821687,1.96118212,0.42602235,-0.36731303,0.67200917,-0.56667900,0.44014785,0.06970236,-1.34415269,-1.13301528,-0.08848868,0.35615012,-0.06426942,-0.81406075,0.94097465,-0.54560357,-0.65877116,-1.29646838,-1.13109028,-1.64186084,-2.12723470,1.86027610,1.22621441,0.26098135,-0.05608099,0.21143445,-0.87244326,0.79408187,1.24279130,0.14458629,0.25532281,-1.24023473,2.42278886,0.00405578,-1.00119174,1.19856644,-1.37395728,-0.16656208,0.46858498,-0.00678801,-0.34960639,0.16614936,2.41560221,-0.53880709,0.91618651,-1.77009308,0.32911557,0.30216452,0.02881077,0.77705866,0.27061903,-0.07440855,-1.14010465,1.25383139,-1.58615100,1.04185510,0.15140508,-0.88059032,-0.33872122,-0.42526904,2.17365575,0.29308075,-2.24234557,-1.03164542,-0.09263755,0.08050421,-0.74946511,-0.64589006,-1.13416314,-0.64989561,0.16502371,-0.33831969,0.22832428,-0.08389475,-0.28009200,1.34536922,-0.19075738,0.36238208,0.83690089,0.26144615,0.04457319,-2.55585861,-0.01807522,1.68334866,-0.05795629,-0.21315987,-1.84039557,0.06512877,-1.77318645,-0.27637982,0.20439345,0.67558700,-0.77179354,-0.17902173,0.70381826,-0.40395790,-0.96492916,0.84138173,2.43879008,-0.32297835,-1.74370265,-0.10330839,-1.07465363,1.85030377,-0.59153467,0.99667048,-0.56753993,0.57383025,-1.90630126,1.24299097,0.22797665,0.30468231,-0.07360230,1.64654350,0.57195550,0.03227921,1.11005175,0.00088721,1.19266295,0.61323351,0.13754399,0.59900171,-0.75831634,1.11500823,0.99747783,-1.36923385,1.26563418,0.01253266,0.35483193,1.95143735,-2.02703261,-1.38265920,-0.02404256,2.02788448,-0.75144875,-0.58445263,0.26129767,0.60691077,-1.84661067,0.65872228,-0.58298993,0.33067298,-0.09431327,0.43333948,-1.52616286,-0.25961858,-1.65459549,-0.72950101,-0.89906919,-0.80081612,-1.32189929,-1.36574399,-0.35809481,0.36385000,0.31480747,-0.35797358,-1.04066050,0.07971872,-0.21176252,-0.76559299,-0.10352154,0.29248312,-1.75030553,0.68219930,0.56189102,-1.11212170,0.06501702,-0.07131009,1.23410738,0.29311740,-1.02052307,1.40220940,-1.00995779,0.57955760,0.22640309,0.74853230,-0.02586563,-0.33427954,1.70311153,-0.53405988,0.90975094,-0.46450076,0.19904344,0.28559047,0.23167793,-0.69065529,-0.17176504,-0.29301846,-0.85477978,-0.00267053,-0.28529504,-0.64201307,1.03479636,1.03805065,0.83270210,-0.09405448,2.50615931,0.62019676,0.31354564,-1.51599669,0.42848015,0.66263914,0.74651009,-1.13042867,-0.58933645,-0.35146511,0.06223279,0.28065836,0.66506970,0.16942430,-0.23316263,-0.87481076,1.21992230,1.48536301,-0.79667616,-0.75519305,1.40999961,-0.42802793,-0.20252463,0.30573779,-0.23319976,1.77525878,-1.80704832,2.71519923,-0.67500192,0.12268137,-0.13014549,-0.07479453,-1.51065743,1.04198146,0.96205556,-2.00525570,-0.37911776,0.89329720,-0.39495832,-0.03683375,-0.90928614,-1.56263304,0.45038295,-2.62184358,-0.45686841,-0.52536523,1.05351484,0.89982438,-0.63724512,3.21004057,-0.08608918,1.55209303,0.62688643,-0.59702635,1.85774517,0.38172096,-1.25640929,-2.59278178,0.85050315,-1.10080361,-1.26422560,-1.80045366,-0.34494889,0.68448657,1.25671864,-1.26594126,0.32244179,-0.51956522,-0.56212711,-0.95574015,0.71973872,0.46736258,-0.11772985,-1.52736545,0.19571695,0.73147154,0.87724912,-0.26265728,-2.60267401,0.19263546,0.18320183,0.11485019,-0.82999659,0.13582672,-0.08040185,0.28152901,-0.51421624,-2.32467175,0.19923948,0.64616692,0.29718629,0.32785949,-0.62266952,-0.98174316,1.23276305,0.58563638,1.28528512,-2.13718534,0.28842899,0.12676710,-1.72105229,0.15053287,2.19496536,1.28683448,-0.96318281,0.17043279,-0.05245409,-0.38710704,-0.30441490,-0.08249986,0.28423953,0.72963721,-1.49658203,0.99077344,-0.78913772,-1.12661564,-1.26294816,0.16517465,0.10124251,-0.77198768,-0.16342169,0.08615876,0.49711797,-0.66083062,0.76648003,1.04756033,1.46122825,-0.42798752,-2.29203916,0.30444992,0.58697921,1.22166932,0.09022947,-0.03920181,0.10444995,0.10361757,1.18224072,-0.76641631,0.90802073,1.41639423,1.55682337,1.28101575,-0.35396016,1.11443567,1.18218529,-0.06048089,0.85024464,-1.01789165,-0.69154263,0.06663221,0.68429029,0.12560424,0.37915874,-0.66829866,-0.64524972,-0.05568011,0.12230454,-0.35041061,0.62027830,-0.16739209,-0.72145337,0.46263054,-1.67837834,0.69413221,-0.57243419,0.37638462,-0.21446526,-0.89821470,0.60078722,-1.06706369,-1.26132309,0.35714921,2.39221811,-0.09376130,0.30760849,0.59180892,0.55815399,-0.32628775,1.28890121,-2.53237987,-0.98241091,1.10520673,-1.74751687,-0.90837651,-0.25220659,-0.56625104,-0.30691949,0.16058689,0.44309673,-1.09874964,-0.76747823,-0.33679363,-0.02535496,0.00990100,1.35318136,-0.70140815,0.50937581,0.55386209,-1.21721983,0.71376961,-0.18079315,-0.11077732,0.09292522,-0.57235324,0.62748206,0.42587611,0.64860481,-1.10635614,1.66414368,0.47505483,1.48602211,-0.59611166,-0.41932896,-0.96542233,-0.41756630,-1.02963889,-0.70070386,1.65803933,0.20138647,0.05895034,-1.46152759,-0.37278318,1.05535650,0.34437978,-1.13257408,0.17635690,0.09386671,0.37079874,1.47695887,-1.58420062,-0.26100200,0.44847637,0.88847303,-0.13877590,-0.64620668,-0.38019657,1.01608157,0.13357787,0.05137976,0.93498152,-0.62226880,0.80461699,-0.71682596,-0.88756353,0.40933055,-1.52167451,0.79756850,-0.17307425,0.62368619,-0.22466940,-1.72802913,0.59047443,-0.58020931,0.09096476,-0.07317388,0.44522321,-0.64880705,0.15684015,0.08708375,-0.41556796,1.11579072,-0.81733495,0.11643656,-0.73995101,0.93685871,1.57971406,0.67606360,0.70509088,-0.25283816,-0.00010609,-0.61884147,-0.86409342,0.95383751,-0.05895388,-1.45261180,0.45166013,-1.01434863,0.18496066,1.06517637,1.81127059,0.89470667,-0.13232610,0.46958798,0.13884509,0.57117194,0.29575035,-0.97884250,0.83291447,-0.59255791,-0.04354135,-0.19431923,0.30071029,-0.95421529,0.76359886,-0.47799742,0.68254346,1.19368529,-0.48935115,0.30357337,-0.50225669,-0.23370270,1.96702433,1.46558523,2.68482018,0.41622332,0.73697484,1.43430734,0.15387188,0.20875402,-2.49335337,-1.39674246,-0.22125854,-0.00424605,0.91416460,0.33384630,0.44703746,0.25610185,0.38966551,-0.01784045,1.66148460,0.36005461,0.95716912,-0.18246566,-0.15480693,0.38775176,-0.56969136,-0.29644895,-1.04565966,-1.00455630,0.30897698,-1.46885884,0.03657720,-0.49302089,1.34134722,0.01673754,1.22725964,0.55256772,0.63803208,-0.29041430,1.11455286,0.76329172,0.27073982,0.77173829,-1.79884446,-0.11889492,-1.92040312,-0.46382675,0.20078070,-0.98889589,1.46711135,-1.68280172,-0.52852470,0.66245162,0.29575166,1.34826505,-0.22362417,-0.14345661,-2.34815073,1.26572001,0.66505629,1.01141500,1.08030057,0.17036134,0.00168786,-0.37282917,0.69206375,1.07367527,-0.49708191,1.49504781,0.58224988,0.96593714,-1.07661915,0.25202179,0.25531644,0.42357162,-0.31236249,0.48383278,-0.06361829,0.24131298,-0.95695931,-0.12589653,0.36134180,3.20266032,-0.40879184,-0.66985190,1.51674330,0.34072638,1.15076303,-0.40199137,0.46223637,-0.48608047,0.99119538,-0.22506073,0.30968750,0.64210880,0.54640514,0.18607031,1.26293361,-0.77960914,0.79572529,1.01936150,2.27160740,-1.48034489,0.74466604,0.14863680,0.31102443,-1.15673816,-0.38609681,-2.65026069,-0.45524642,-0.74022961,2.74991131,0.00103815,-3.03303242,-0.41556966,-0.87103498,0.78306234,-0.88195556,-0.77297026,1.21203196,-1.09754920,-0.03556008,-0.31546223,0.72954375,0.25251788,0.11378583,0.50921023,0.30301905,-1.60631680,0.27152416,1.17342317,-0.70891970,-0.08392961,0.92137378,-0.10568139,-0.31653777,-0.28878728,1.22166574,1.12693942,-0.21325994,0.94010323,1.21796405,-0.68866694,2.30724216,0.28141466,0.83481526,-0.04885862,0.01675143,1.04355800,-0.81050140,1.51300573,0.53429186,-0.56439877,0.38572624,-0.05620475,0.67644542,0.72528905,0.05937041,-1.06315899,-0.51393986,0.46937627,-0.34699562,-0.64765716,-1.45512629,0.47739139,-0.88228017,-2.00791359,1.29929042,0.05482405,-0.66725296,-0.54735124,0.09972951,0.76675093,0.98748523,0.08900899,-0.78854066,1.47970486,-0.61667502,0.45625573,-0.21766303,-0.46250847,-0.07130960,0.64414692,0.12784545,0.26393634,1.07720757,-1.23938286,0.62483376,-0.55001754,-0.05358591,0.07322436,1.12003291,-1.00830650,-0.20486419,0.76664752,0.28850746,-0.04464776,-0.40146068,0.73262817,-1.12827921,-0.19989438,-1.15999687,1.37973154,0.78881019,-0.34762639,1.22088552,-1.64088547,0.63218033,0.45736769,0.05502866,2.22683382,-1.78935897,-1.49635041,0.83450896,1.67770112,1.33909333,1.51158953,0.28595078,-0.08593627,0.45812801,-0.15193029,1.14770603,-0.88920450,-1.96352005,-1.49894583,0.49629962,1.59872091,0.00903497,2.15563583,2.25149560,-2.01200557,2.56229877,-1.38850498,0.73552012,-0.39378855,0.52616280,-0.03685786,0.87403935,0.12163408,0.74297994,-0.30697080,0.38139752,0.49113834,-0.95485127,-0.99908817,0.71716321,0.04000283,-2.09645271,1.38789880,1.37198520,0.82493287,0.17114936,0.53696346,-0.19516060,-0.50377476,-0.91730285,-0.70113552,-0.02406530,0.84943396,-0.17428185,-1.09140801,-0.68156958,1.70756388,-1.00399911,0.03023832,-0.39023280,-1.89737976,1.14469039,-0.58337289,-0.60037899,-1.17490256,-1.56342828,0.48714057,0.62266618,-0.15967095,1.32789338,-1.25700688,-0.55633998,-0.83128709,-0.49346271,1.59561753,-0.24675299,0.38012561,0.91796309,-0.38522810,-0.65509188,0.94100451,-0.57324487,2.19070768,1.24058700,-0.75978851,-0.40460554,0.79189235,0.70192885,1.93569362,-0.03070199,0.77010989,0.58794290,0.51087004,0.22892070,0.35007235,1.56023848,-0.67453802,-0.18485607,0.64349502,-0.31489357,-1.95834625,0.06560058,2.30394220,1.18194163,-0.88034087,-1.05000436,-1.05471325,-0.98481798,0.49904808,0.16438948,-1.10297823,-1.39736509,0.01306054,-1.85160267,-0.87292641,-0.15418227,0.43412164,1.16518164,0.06273691,0.24659210,-0.08267246,1.28885782,0.73575675,-0.01019809,-0.08753663,-0.61827368,-0.40863234,2.12599611,-0.53620332,0.53789747,-0.66386080,-1.70461988,0.86608189,-1.11151052,0.14120635,1.18858743,-0.31760478,-0.73533046,0.20978074,-0.84074509,0.16523147,-1.03362834,0.59721231,0.21318658,0.23671274,1.75115061,0.25363782,-1.32541454,1.13056135,0.24652456,0.60381413,0.21478581,0.75044096,-0.63125616,-1.69889998,-0.02116571,1.46165359,1.03068244,0.63693464,0.67795700,1.20033514,-1.39205134,-0.61743122,0.56549704,0.65182322,-0.74250507,-1.61939359,1.14054918,-0.45725963,1.74519682,-0.66251940,-0.94811529,-1.60865819,-0.59968346,0.86309159,-1.91936195,-1.02646923,-1.50352538,0.58292735,0.05320299,1.53582895,0.01069612,0.15226212,-0.71840125,-1.36896348,2.14600968,0.96626586,-0.52014917,0.41001406,0.59478027,0.15282436,0.27790198,0.76614654,-0.38971323,-0.01839927,-1.57882118,0.61391610,-0.62133092,-0.03968323,-0.88467252,-1.24041140,2.07306671,-0.41776338,0.14537935,-0.91069067,1.67362070,4.72630215,-0.07395106,0.46280116,-0.40843824,0.70683080,-0.27510864,-0.63465804,-0.83630908,-0.44419941,0.60405648,-0.65039170,-1.02413189,1.05983019,1.73366308,0.73343736,-0.00895882,-1.00826013,0.17323074,0.73995626,0.24128854,0.94510227,0.25557515,0.02244723,-0.95197725,-0.16297856,-0.38497585,1.17993331,1.20282137,-1.31491220,0.44229278,-0.24349044,-0.01230415,1.37944865,0.48554277,-0.54510897,-0.10793537,0.41121426,-0.12889031,0.26434359,1.27966082,0.64518744,-0.15577169,-0.99864733,-0.61746484,2.01614976,1.56254935,1.86473298,-0.54662132,-0.22047071,-0.06118120,0.84799510,0.17009684,-1.30523121,0.64000309,0.36299205,-0.59620583,1.36372304,-0.05389515,-0.93849313,0.98043185,-0.39373067,-0.84898937,1.32077873,1.05988657,-1.35339200,0.23259017,0.63816410,-0.80297333,0.60017115,1.25715804,1.18894124,-0.62473553,1.05611980,0.02335166,1.07509828,0.25873449,-1.68341100,0.54547334,0.79288185,-0.93678916,0.19202201,-1.48575914,1.08649087,0.50851744,-0.45758674,-0.39734635,0.35637981,-1.63079453,-0.75910008,0.92640859,-0.55599529,-0.40276715,0.31307653,0.39907026,-1.18830419,0.71051043,0.14157933,-0.39581308,-1.64361024,-0.06161860,-0.25312796,1.10018682,0.56500763,0.80385065,0.35395023,0.81813669,0.27644628,0.65563256,1.73197234,0.68178749,0.76769936,0.44597456,0.67761195,0.67635447,-0.32315412,0.19330767,-0.25557944,1.91693723,0.38335562,0.07107610,-0.57384586,0.79184365,1.87835479,0.60902315,-0.94220877,0.79479855,-0.25656971,0.08739131,0.53384244,1.22159266,-0.39152125,-1.46373534,-0.02458516,1.62825716,-1.26112676,0.19967082,-0.71114451,0.27929229,0.65001321,-0.11868202,-0.55587751,0.78069001,0.57969242,-0.60274386,0.31650013,0.90339553,0.09453616,-0.37119162,-1.00320566,0.33299938,-0.48636708,0.26342997,-0.91914523,0.28682709,-1.24780893,-1.59254742,0.97176319,0.14744301,-0.53056234,-1.73221612,-0.67645556,0.98705006,0.79895812,-2.04333115,-0.60132772,-0.91653955,-0.28094748,0.47943443,0.38157779,-0.67648011,1.09093642,1.66012859,-0.29358891,-1.26773024,0.36747769,-1.10141146,0.82383633,-0.89772314,-0.47145563,0.63939518,-0.64430422,-0.48889321,-0.37680882,-1.06962025,-1.28689516,1.28365147,0.61859220,-0.84676331,1.38404000,1.21053445,-0.14871351,1.06349385,1.45878971,-0.47362664,1.40707004,1.25224137,0.87364739,0.92858213,0.00157326,1.45661485,-0.27318576,0.15482858,-1.07058907,-0.06903186,-0.74147576,-1.64111829,-0.67226541,-1.13458407,1.28511488,-0.41041154,2.09085560,0.45243183,-0.67437285,0.84960121,-1.49300814,-0.42961186,-2.35021853,0.57255560,-0.73903763,1.37607956,-2.44575167,1.25105727,1.38575912,-1.16299784,-0.13719854,-1.11507034,0.35796806,-0.64511567,-0.87903833,0.32833642,-0.87696886,0.02714214,0.30224666,-0.69118696,-1.23500824,0.76678628,-3.20508122,-0.24704689,0.49019828,-1.20862615,-0.03778638,-0.07273687,-0.11517122,-1.75857520,-1.64188445,1.21574795,0.57325113,1.14370298,-1.07824504,1.70653832,-0.03700557,-0.47645858,0.11065386,-1.03143036,-2.18094873,-0.94403434,-0.09335683,-0.44817665,1.39707148,-1.21947956,0.56575936,-0.69612634,-1.12361753,-0.17105591,1.15422392,0.02840637,0.09469353,-0.52859986,-2.08487725,1.28789508,-0.03740775,0.61196613,1.23405397,1.56595814,-0.65800631,2.02985072,-0.69446486,-0.88443804,-0.23448054,-0.43628734,-0.45888957,-0.21943338,1.78258693,1.75214970,0.71804136,0.49782532,0.37886053,-1.59176385,-1.74758542,-0.02820176,0.75398153,1.00119829,0.80881971,-0.53365272,-0.22720885,0.37476870,0.01005529,-1.23421800,-0.13431595,-1.01843679,1.87386346,-1.68539488,-1.04942071,-0.77322137,0.53964764,0.29278332,-0.58299130,-1.56022692,-0.79441273,0.49289709,0.44112054,1.07305002,0.54899335,1.13781393,0.77809113,0.81795985,0.16576190,0.32552773,-0.20250474,1.46543837,0.12731771,0.21013761,-1.34241438,0.44267517,0.93246883,0.08808212,0.92653406,-1.21083558,0.17247954,-0.70557106,0.04630012,0.48834828,0.89634645,0.46683592,-0.29553145,0.46363977,-0.48971879,-0.88603491,-0.12333342,0.37073737,0.92061806,0.54675460,-0.14716248,0.75578392,-0.98173791,-1.15983224,-0.58713156,0.07950903,-0.59016788,0.41622928,-0.32474482,0.42086437,0.23061797,0.62596649,-0.22615278,-2.14721417,1.01685894,-0.25976995,0.00739352,-1.31597066,0.39005190,-1.09549701,1.68375242,0.43331525,-0.37124026,0.22255214,0.59654880,-0.73840386,-1.20048976,0.12226126,0.12997478,1.04826224,0.03894836,-0.36289826,1.14466560,-1.18198848,-0.03713558,0.67677927,-0.42329931,-0.89409167,-0.77874780,0.58438253,-0.35176343,-1.53329861,-0.02995299,-0.40145162,-1.51052392,0.09194464,-1.13275242,-0.61983156,-0.40004560,-0.19893464,0.22134103,-0.03903082,1.14894116,-0.03476744,0.22520730,-0.55851930,0.76650429,-0.57863152,-1.34161711,-0.31498179,-1.19411755,1.70044947,-0.17428267,-0.35983825,-0.42613637,0.58165723,-0.77866900,-1.59727287,-0.61723864,1.51078022,0.32971445,-0.86441469,0.60552609,0.00208178,-0.47096625,-1.10479307,-1.21652532,-0.08211990,-1.43739200,-1.31684434,0.43312529,-0.76822090,1.88128507,-0.02179282,1.04971325,-1.55004108,1.25337446,0.11203052,-1.16048300,1.59467411,-1.29469275,1.14019871,1.20021439,1.84098923,0.05004879,0.73529941,2.05272865,-0.13080600,-0.08436690,-1.17919350,-0.66256678,-0.36727047,0.73840511,1.22293818,-0.00206342,-0.29839504,-0.00618613,1.04213119,1.21176076,-0.62886089,-0.02589060,0.96009409,-0.64478731,-1.16516542,0.57528079,1.04294407,-0.09774588,0.45935291,1.03263175,1.00633478,-1.82209253,-0.18035053,-0.28302726,-0.83813244,0.57593471,-0.03807700,1.60498738,0.16530658,-1.43083501,2.10824299,0.30279446,-0.03961089,-0.38900724,1.31272805,-0.56575215,0.57970244,-0.48305038,1.34114623,0.21859215,0.66399640,-1.52087069,-1.30717897,0.14394683,0.97648209,-0.71372712,-1.22574198,-0.27702177,0.04041927,0.02442212,2.19617033,-0.48566443,0.81463927,0.20383844,1.17562282,-0.33829874,-0.42141283,-0.96415234,-2.39141965,-1.04285860,-0.23004992,0.41186509,0.03811268,0.36818987,-0.71099734,-0.56749570,0.18486284,-0.44530040,2.14008284,-0.27467576,1.70690107,-1.40462613,0.24697532,-1.31629777,-2.20674944,-0.67868507,-1.15767133,-0.64391804,-1.79037917,0.58749497,-1.58303332,-0.69021022,1.64376318,-0.95393223,1.98415601,-0.10991055,0.02474386,0.23683345,-0.63420391,-0.57991928,0.83028817,-0.40033704,0.19212338,0.74640590,1.10264432,-1.65286255,0.92683482,-1.42252541,-0.74605089,2.14535880,0.12971123,-0.47971717,1.67546797,0.42268261,0.22648531,-0.42369929,0.77403021,-1.31818616,-0.67143595,-0.04311426,1.64128351,0.34776631,-0.39353722,-0.42765084,0.16170517,-0.54488391,-0.38428506,0.42097485,-0.55982012,-1.74543798,1.53704774,0.43562424,-0.30395737,0.31846946,0.39205357,0.57386035,-1.11912560,-1.39164317,-1.04337609,0.31629622,1.51927638,0.88745505,-0.40445471,0.25783861,1.88646257,0.36509129,-1.13266826,-0.45394278,-0.48400903,-1.22332740,0.38626808,-1.10049105,0.84138852,1.27863181,0.53942156,-0.67743856,-0.03896645,1.70393491,0.60997570,0.43368068,-0.13338457,-0.18920666,-0.29583672,-1.40738738,1.03876019,1.71253765,2.12821221,-0.96092403,0.93841934,-0.79030478,1.36427641,-1.39196694,0.08514920,0.16223004,0.71259701,0.20150672,0.25068361,-0.99952722,1.80129099,-1.28586197,-0.64957166,-0.94813949,-0.40161121,0.31977695,0.54932386,-0.67757767,1.88086259,0.92337233,-1.64887333,0.44333732,-0.19468001,0.12977587,0.21171951,0.27679422,0.49134475,-1.44429457,1.25617445,0.39978400,0.99869555,-1.61617446,1.61177349,0.70243025,-0.95748568,-0.61795151,-0.77302909,0.72967088,0.81964350,-0.71813750,0.90140164,-1.45950246,-0.79972702,0.40875742,0.00152073,-1.74491429,1.53776145,0.75769204,-0.22075878,-0.58385569,2.18884754,0.33597681,-1.66265559,1.03805876,-1.55245185,-0.03582226,-1.94542754,-0.76081425,-0.50471377,1.35763168,-0.39631784,-0.17134467,-0.82220149,-0.41021580,-0.00940776,-0.80176353,-0.19816744,1.22061026,-0.14486519,-0.71727395,-0.65721530,0.47020102,-0.70403302,-0.94795334,1.79884899,0.07779162,-1.50615680,0.04140327,-0.22001404,0.63735324,0.79237640,-2.25412822,-0.52519119,-0.87280381,-0.07100742,-0.94734806,-0.12286110,-0.13623615,-0.42595413,0.17547913,-0.81707209,0.36855817,-1.68186557,0.19312963,-0.66249490,-0.98283452,-0.33314428,0.40918943,0.88268638,-0.05390308,-0.22440539,-0.15879378,-0.34859571,-0.01013108,-0.30005428,-1.19408464,0.21789688,-1.07769871,0.81475031,-0.69555300,2.35201311,-0.40362412,0.93497628,1.13343573,0.92343372,0.26987928,0.46123627,0.22577702,1.26289701,-0.45956740,0.55994868,-0.58410591,0.13304594,-0.25806463,0.49044946,-0.82065403,-3.06672239,-0.27774641,0.68504512,-0.21386372,1.11427057,-0.73201770,0.51655543,1.77261138,0.72081727,0.11116749,0.16637769,-0.74987584,0.66579849,-0.75808716,0.20678560,-0.67698354,-0.82141948,0.61008269,0.66520184,0.44894725,0.73015076,-1.52517414,0.11714164,1.90452611,-1.30355322,0.12144456,1.18547559,-0.07349755,-2.28061509,0.83522540,0.78438890,2.19334102,0.90305614,-0.59345531,0.77925014,1.32338643,0.14068902,1.19032264,0.20666829,-0.76595837,0.74967057,2.86965609,0.55690205,-1.72530472,-0.83317834,-0.85842621,-0.29678273,1.80955839,-0.70496303,1.19106734,-0.92985237,-1.00617313,-0.56049556,-0.29382578,-2.04022193,-1.95356870,-0.42553005,-0.33369407,1.02115977,-1.45769477,-0.67720300,0.53819913,1.57643425,-0.47015440,-1.47861958,-0.00545934,-0.97836047,0.42680529,1.56110144,-1.49487829,-0.65198445,0.22720462,1.83036661,-0.47099793,-0.09915133,0.14923312,-1.16313052,0.67798084,-1.63665557,-0.38220280,0.01719763,0.30041245,0.43148938,-0.44021657,-1.25734651,0.02465564,-1.00845659,-0.28574651,0.01367745,0.77253437,-0.99399441,0.61445391,0.18343423,-0.50997210,0.41359940,0.77279282,0.83511519,0.27929801,0.70800692,-0.20278299,1.57884383,0.22650529,0.43347472,0.74003208,-0.71401161,-0.69829476,-1.56766701,-0.99254119,1.27301061,2.73726511,0.66089469,-1.95778012,-1.24642098,-0.63579029,-1.63168180,-0.66980726,0.81933254,0.61866677,1.40594471,0.05158535,0.00196500,-0.24592508,-0.50780547,-0.83905292,-0.10748957,0.04490763,0.27769178,-0.23227681,0.82108080,0.03562285,0.95483875,-1.49897683,0.67809856,0.35497451,-0.44021592,-1.67361462,-0.88895375,1.44293678,-0.85046643,-0.46437624,-1.87252641,0.26775804,-0.24535774,0.73365933,0.52253938,0.27947086,-0.58796054,0.59045380,1.93476331,-0.46775359,0.25238225,-1.26601815,-0.13324316,-0.71454948,-0.21610366,-1.49586582,1.04903507,0.22208478,0.25512528,-0.46157327,-0.41319233,-0.63846964,-0.25100923,0.81277549,-0.26959971,0.88737756,1.24578953,-0.91121447,-1.05756927,0.44390878,0.16672316,-1.22941923,0.89547867,-1.50212002,-1.69620168,0.53339505,-0.23656729,-1.69879091,0.01510374,0.08315694,-0.73196459,-1.60263407,-1.07601058,-0.76389569,-1.65307498,-0.61484390,-0.43546933,0.71318507,-0.16273083,0.64122051,-0.15406294,1.17673671,-0.91240519,0.71091145,2.40497613,1.26343656,0.71469337,0.20705548,0.81776261,0.36253929,-1.92106628,-0.09300470,-0.36648872,1.27732766,-0.39180157,-0.61186749,-1.03455031,-0.25079829,-0.61479062,-1.07094336,0.82218504,0.89934880,0.41308978,-0.59968555,0.37682834,-1.77388155,0.00294951,-0.66145372,-0.50789726,-0.85123241,-0.89909405,-1.89454281,-0.56692821,1.52272677,-0.11961794,0.27843913,-0.60582250,1.01871169,-0.36098275,-0.12242325,-0.67375034,-0.11204147,-2.62773919,-0.95901299,0.14040214,1.32364666,-1.35099924,-0.11077739,-0.79319423,0.75949597,-0.25485823,-0.90959758,-0.42373934,-1.29850340,0.85699379,-1.11882365,0.63470817,0.49696380,-0.07983235,-0.23903450,-0.22618714,-0.12117998,-0.09442677,1.55589819,-0.11996678,-1.72700179,0.54683149,-0.40804827,-0.50099218,0.34596699,-1.81841791,0.06385052,0.84428120,0.69901514,1.94559097,0.43251973,0.16794942,1.82829034,1.70959795,0.36130908,-0.94608402,-0.53498030,0.47781768,-0.24203247,1.25065851,0.51788396,-2.09381890,0.72973937,0.03281829,0.58632666,1.85737121,-0.49569523,0.45921183,1.87173629,0.22803484,1.66433418,-1.05872321,-1.13663685,0.12397861,-0.65112090,0.98152941,0.83739656,-0.18783289,1.84249437,-0.90706986,-0.80824369,-1.23854923,-0.86488134,-1.02627063,0.10976455,-0.61403006,1.27554715,0.14653525,-0.03953953,-0.08512071,-1.30043304,-0.02566035,0.12054887,0.00282162,0.48921332,-1.74398839,1.44554436,-1.35854721,0.69256759,0.34101671,2.50045252,0.49121150,-0.27115449,0.93974596,0.26258010,0.27151433,-0.87214381,-0.92580765,-1.03269923,0.20615758,-0.37822601,0.58983004,0.16426525,0.68218285,1.98158526,0.47492698,0.54224718,1.28722692,-1.76915324,-1.11240053,0.77428484,0.27184650,2.22473478,-0.05574624,0.39976570,-0.43911108,0.52805597,0.17340177,1.36057591,-0.35004014,1.72787797,0.68357420,1.25532615,-0.56752264,0.51840127,-0.21237844,-0.58821255,-0.85278064,1.90179110,-0.67447448,-0.36831430,-0.22930753,0.98231596,-0.07011599,-0.08560387,0.05998110,-0.02481356,-0.57335132,-0.44288307,-0.24468307,0.53321087,1.19609559,0.10664973,0.24379487,0.93687552,0.93615580,1.74319768,-0.68310338,1.32163060,0.61918712,-0.76501870,-0.54549301,1.74077415,-0.69977754,-0.66880983,-1.15981388,0.81571609,0.53788543,0.47898352,-0.02484704,-1.64646924,-0.69822907,0.27020717,0.05027051,1.75149667,0.01548872,0.32615909,2.55151844,-1.29172051,-0.36133784,0.98637396,0.14009331,-0.50038946,-0.92230296,0.17307127,1.05361068,-1.46784890,2.38960409,1.19413340,-1.33349669,1.59141159,-0.71811068,1.22429430,1.26947939,1.08177102,-1.18138707,-0.72775704,0.17282635,-0.40554270,-0.40341887,0.46564049,-1.02069795,-0.07653128,-0.13979210,-0.31195050,-1.72042310,1.37131393,0.63849634,0.75561279,1.81152904,0.26686314,1.32796574,0.56100166,0.70058894,-0.88962644,-0.04360984,-0.88249093,0.24311203,0.50410056,-2.22567797,0.94520348,-2.12467694,0.47282359,-0.71379906,-0.09857135,0.62374717,1.37182784,0.73380554,0.59745449,2.80427694,0.67253572,1.65335357,1.69891667,1.34585941,-0.79989213,1.44980943,-0.52013642,-0.46971673,-1.50070012,-0.25687039,-0.56916732,0.71065760,-1.31996286,0.96031237,0.13929774,1.49679291,-0.05966444,-0.58674580,-0.08278833,-0.93390942,0.42415768,-1.77889526,0.75336021,-0.72699982,-0.82880586,0.63955617,0.42771208,-0.42366457,-0.91581815,0.94750947,0.43123913,-0.99053741,0.70470595,-1.16662264,1.14847183,-0.83885664,0.46714026,-2.27748466,-1.23656678,0.14695056,-0.33159894,-0.52553117,-0.04391259,-0.29630372,0.25949728,0.96991086,-0.37714824,-0.28251833,0.16106486,1.38844633,-0.18713553,-1.30708838,0.48490265,0.29553881,-0.45505449,0.83341682,0.87346369,-0.63516861,0.66063565,0.93892503,-2.73996735,-0.81515318,-0.91458052,0.00978268,0.43472794,-0.08090764,1.37249672,0.76722521,-1.19154143,0.22046764,0.34916410,0.51383299,-0.56379753,-2.49949312,-0.74207872,-0.68400806,-0.09663232,-0.07199454,-1.05562651,-0.75028551,-0.87253797,0.69039482,0.45923674,-1.27515161,-0.04555376,-1.41501272,-0.83773375,-0.74807298,1.36646152,0.06317432,-1.32559633,1.89092779,1.24883330,-1.03608561,1.08677161,-0.99629849,-0.69947034,-0.85716367,-0.07947286,-0.25485426,-0.19732477,1.64581251,1.04618108,1.87186897,-0.18198362,-0.83807969,0.70462501,-3.18930101,0.74610996,-0.60935193,-0.49383929,-2.88986492,0.51707613,1.04620326,1.09837818,-1.19840038,-0.10391295,-0.20789115,-1.51052022,-0.31087330,0.22411564,-1.30506921,-1.52000105,-1.51593041,1.04321992,0.97611690,0.90424490,1.83324766,-0.08682299,0.47035542,1.70865905,-0.31108001,0.04115159,-1.36352801,-0.90797836,0.32128647,0.66191489,0.08681208,0.14993365,0.47110486,-0.31522670,-0.38906571,-0.08876022,-0.13106902,2.25685239,-0.62211353,-1.68553007,-0.23707703,0.69236159,-0.46686995,-0.27520603,0.26619941,1.48525345,1.61278927,0.49452963,1.20846486,-1.11853909,-0.30010033,-0.75471467,-1.69959772,-0.52042168,-0.43881389,-1.45240712,1.02122891,1.73639011,-0.03813924,-0.22239220,0.15797073,-0.64418089,-0.60228932,-0.83248150,-0.02042520,0.38137484,0.86056453,0.06410559,-0.62785137,-0.49916875,-2.53796315,-0.79168582,-0.69197005,-0.77175534,-0.28669405,-0.79764080,0.97218460,-0.10351621,-0.52759898,1.02840185,1.16363287,0.08351815,-0.61088538,0.59944046,1.54409397,-1.39842033,0.27917057,-0.27146137,1.46310735,0.03626106,0.15038440,-0.07894899,-1.42527366,1.69641745,1.48384345,-0.43328866,-0.54252565,-0.94416499,1.54436302,-0.81367069,-1.67925239,-0.17525831,0.27891046,-0.69066733,0.89911050,0.11606655,0.67450327,0.41538724,0.90886223,1.19786549,0.85810721,1.32862210,-0.83469814,-1.09682298,0.88092703,-0.97478902,-0.11664717,-0.07929394,-0.69581884,-0.16928329,-0.70731819,-0.40485084,-0.28954300,0.52882415,0.38769314,-1.38704026,1.15099049,-0.43566978,0.34459323,0.49520254,1.11130333,0.28783718,-0.53783375,-1.63577271,1.02222812,0.86302060,0.48346213,0.46627176,-1.30133855,-1.48477137,0.31219670,-1.21498191,0.89838904,0.87186617,-0.39968935,0.34930915,-0.32909471,-1.39364409,2.13006306,0.33270469,0.00215986,0.97776711,0.24908836,1.56164885,0.45157790,-1.55970144,0.27677536,0.07662498,-0.08262251,-0.17658773,0.65820259,2.01052690,-1.71946216,0.84686053,-1.23594892,1.40792072,-1.47772563,-0.36132276,-0.50405115,0.09009213,0.81659186,1.85574234,-0.64974433,0.63352364,1.01766217,-1.54804432,-0.42570522,-0.24763709,0.72822112,-0.93733686,0.68087620,-1.40644944,0.48672482,0.09725539,-0.64416331,-0.95747960,0.36771363,0.39155054,-0.71790671,-2.17222738,-0.08655047,-0.97842115,-0.22991380,0.52029115,-1.42072022,0.29576331,0.32391560,-1.00823236,1.67909145,1.16841447,-0.32307062,0.15756166,-0.97590631,-0.39429301,-0.03583352,0.17554663,0.57961231,-0.46873134,-0.23343173,-0.85060924,1.71745574,-0.04658702,0.63088381,-0.67581934,-1.53171062,-1.58800113,-1.17987096,-1.16737640,-0.87544650,-1.17138922,0.38979119,-2.39369726,-1.34747124,0.58450359,0.87791806,-0.04459394,0.97995293,-0.10354915,0.65324986,-0.17833626,-0.85849386,-0.42063358,0.19708554,0.10255250,-0.59539181,0.86194044,1.68610668,0.55275291,-0.43127069,-0.04218780,-0.08466262,0.31236625,-0.92824298,-0.09879152,0.32358822,1.04045570,0.35617545,0.09059231,1.19069445,1.96978688,0.63561743,0.15030998,-0.29879019,0.22774190,-1.01608860,1.03605175,0.47804731,-0.30450734,-0.61382371,0.45390254,-1.93547988,2.01267338,0.52447683,0.18379784,1.11913633,-1.24273467,0.15803322,1.72184098,-0.79349059,0.10258614,-1.53445125,0.02630571,0.81649125,0.91089755,-1.12968338,1.04016411,0.28999722,0.74863863,-0.61388236,0.01665530,1.43592548,0.68138391,0.11963340,-1.26123953,1.36340797,0.25696915,-0.58877039,1.42209792,0.55563360,-1.33329606,1.84695840,0.88433737,1.04359078,0.18906727,-0.03448994,1.17944050,0.86783957,0.44934425,-0.77892244,-1.76232874,-1.01689589,0.78943914,0.92141974,-1.00187087,-0.13809921,-0.90222073,1.10094714,-0.13657950,-0.44349849,-1.61441302,1.05724919,1.50337231,-0.05785890,-0.76958144,-0.51498759,0.69227600,-0.37975949,1.31949317,0.82049531,0.32868597,-0.31557772,-0.75534385,1.27303052,0.43453619,0.11296938,1.18182182,2.23387384,-0.86412978,-0.01599468,-0.70869064,-0.09221385,-1.23729551,0.79490280,0.03522846,-0.95069039,-1.73461652,0.72329187,1.40385795,-0.11585230,-0.78033113,0.07491048,-1.12873089,0.18476245,0.57568848,-0.28792691,1.35411644,-0.76956165,0.29571572,1.03178787,-0.38780826,0.31680650,0.69368076,-1.23856580,-0.49848995,0.14766994,1.02625990,3.03858209,-0.51030380,0.96796870,1.35078156,-1.07729447,0.84322494,0.54886484,1.31453705,-0.45792100,0.31196272,-0.15701357,0.83586836,-0.74952888,-1.17432022,-0.31002575,-1.02149463,-0.36117774,-1.22079086,0.03532525,0.00555908,-0.45891216,0.29636297,-0.68272704,0.41257843,0.37988129,0.01747893,0.82739186,1.52292180,-0.79456621,2.20275712,2.13212132,-0.81393015,-1.15712392,0.22488308,0.62776327,-0.85444915,0.44017896,0.05863331,-0.83198178,0.93063420,-0.16121253,0.12382501,-0.37826315,0.93118382,0.19507533,-0.58595538,1.46994352,0.13170272,-0.70031989,-0.12820166,0.30487457,0.84148771,-0.68807501,0.21187615,-0.67030680,-1.79136002,0.70810199,-1.20959783,-0.08468831,-0.06317700,1.35527098,-0.47018668,-0.91693246,0.14818805,-0.05405350,1.16875637,-0.17363262,-1.61833882,-0.32934523,-0.38346377,-0.62702698,0.34135151,0.48015586,-0.65263331,-0.04689486,0.01156854,0.37580970,-0.16174591,0.59627324,0.24351901,-0.87983090,1.57049024,1.25836349,-0.41464049,-0.62279183,0.09693756,-0.23850618,-0.49007827,0.22298151,0.10914832,-0.35192192,-1.27221346,1.10203624,-0.86399704,-0.47319838,-0.77105570,-1.68624854,0.81198281,0.82534081,0.75654501,1.47631240,-0.61000234,-0.58933264,0.54822850,-1.22829592,0.11107657,0.56449169,1.50693524,-0.59280968,-0.64286685,-0.20120731,0.27184448,1.55500400,-0.48919386,1.04044867,-0.87048137,-0.40569979,0.21908638,-0.51829034,-1.48748124,0.02990401,1.83462536,0.29885170,1.32370698,-1.30129600,2.43271399,0.22967771,-1.13014007,0.95529765,-0.83325785,0.43633386,0.85774118,0.78160155,0.58583075,1.18906367,-1.54354560,-0.68320692,0.01900371,-0.79777133,0.12851712,1.10176420,0.79418170,-1.41154039,0.36929929,1.12176800,1.23849642,-0.89377707,1.01390159,-0.50889206,-1.12554002,0.17932732,0.48949540,-0.54235244,-0.28146735,-1.39125514,0.13309635,-1.12864995,-1.29901242,-0.04266220,-1.98028529,-1.34869373,0.00038156,-0.92473024,1.48010647,-0.02754467,-0.26030368,0.93083733,0.27946711,0.64052200,-0.04220961,1.25002527,-1.07923257,0.19048618,0.08900311,-0.40813437,-0.73068553,0.52122378,0.68990833,-0.38749605,-1.09269309,-1.63480806,1.01789618,-0.61596102,0.81049860,1.30838764,-1.49213874,-0.77916288,-0.72660202,-0.92013240,-1.61726642,-0.11527207,0.35143322,-1.11646879,-1.45525432,-0.82892823,0.15512508,1.01891017,1.40162635,1.02494884,0.33882582,-0.78747398,-0.26009330,-0.38519114,0.79247451,0.02065756,-0.48030257,1.01167107,-1.74057114,-0.84549171,-0.15337363,-1.92544484,1.01270044,0.00762185,-0.16405612,1.61778915,0.93316060,-0.68960994,-1.13214970,-0.94695878,-0.28418848,0.17102109,-0.08787476,-1.83799696,-0.13761258,-0.18652774,1.46456254,0.34169790,-0.40697145,1.49663997,-0.99555492,-0.67775637,-0.51951116,1.35157657,-0.27099034,-0.46987835,2.28101230,0.59104478,0.75010139,1.01472175,0.25741309,-0.56074983,1.12267506,0.35336846,0.61733276,-1.63976014,-0.17700450,-0.25093642,-0.75599891,2.10956192,0.95155340,0.72049862,0.50492924,0.62067389,2.08688402,-0.73604703,0.63383341,-0.53528428,-2.11538506,-0.98173052,0.59560484,-0.26205051,-0.91948050,0.00593397,-0.11734286,-1.41261208,-0.83611172,-0.27682739,-0.20619918,-0.36557615,0.77194935,1.67695415,-1.39265156,0.04892010,-0.37773246,0.16124558,-0.18348448,-1.38248885,0.58459854,0.65064198,1.11349559,0.36708066,-0.15471332,0.14208725,-2.06860566,0.29629150,0.93084633,-0.47215626,0.60208917,0.95415461,1.03390312,-0.03639749,-0.23988228,1.27037442,0.95133096,0.33187470,-0.34527761,0.22134073,1.01799667,-0.81475645,-1.18869019,0.23314142,0.25180560,-1.23762786,1.25283313,0.16980635,0.40740708,0.59256923,0.16274920,-0.69713289,-0.16444311,-2.41602516,0.37952334,-0.05604568,-0.23772651,0.20581599,-0.54303211,1.71877348,0.83602583,-0.32586128,0.73609394,-1.73640239,0.07249248,0.31248692,1.77627432,0.97660398,-0.42095289,-0.18750280,-0.84246057,0.29762223,1.87054563,-1.46980762,-0.45306337,1.52366042,1.39061129,-0.04980387,-0.55382830,-0.96987218,-0.06910808,-0.41276473,-0.83891344,-0.92597574,0.60252470,0.21938549,-0.04451685,-1.00330937,-0.36955237,-1.52876902,0.27296364,-1.96721256,0.05291027,-0.91540521,0.48990685,-1.99560380,-0.68551093,-0.14532298,-1.56881595,-0.08319287,0.31003201,-1.42829597,-0.61810297,-0.03581250,0.77747720,1.25297558,-1.36239243,-1.13274276,-0.35045877,-2.34157228,0.04515179,-0.83044821,1.81353962,-1.36855912,0.39704823,0.16665934,-0.16654585,1.17806077,1.00086153,-1.25474250,-1.46876431,1.18021631,-0.32257929,2.12062597,0.86819613,-1.18048275,-1.69747460,-0.74092305,0.05086798,1.15339577,1.32972670,0.27247882,0.98499072,2.35597157,0.30179837,-0.66633248,0.13794266,-0.22753908,-0.22868259,-1.81792033,0.50151759,-0.79408127,-1.05343878,0.45727381,0.84800923,-1.73605800,-0.02032863,1.82778001,1.41025102,-0.81715560,0.25888795,-0.25075480,0.66256499,0.11993053,1.81336939,-0.06345166,-1.49658346,0.07531686,0.96972889,0.87405980,0.75830793,-0.13497087,-2.45855975,-0.65984958,0.93919373,-0.97305542,0.73477978,1.04337513,-1.22712576,-0.46385625,-1.20876372,-0.82760453,0.01455977,-1.05089867,-0.02801843,0.60899758,-0.82052249,-1.48932517,-0.98073828,-0.19311285,-0.25602359,0.50351876,-1.24557400,-0.82138073,-1.45966852,0.44991320,-0.75550151,-0.98550314,-1.21418869,-1.15771639,-1.72192061,-0.39616469,-0.55566746,-1.31880891,-0.08843257,1.00422776,0.35846478,0.46060917,0.77326930,1.60129988,-1.85124147,-0.30582917,1.30227256,1.81890345,-0.44084981,0.25315762,0.70259613,-0.94882858,1.97040296,0.71473581,-0.68193883,-0.36290962,1.16348684,0.15418798,1.07806778,0.40554729,0.10280909,-1.06474805,0.64398485,-0.63568884,-0.06108581,-1.03290677,1.02834034,1.15284693,0.14046004,1.86630619,0.46804786,-0.68397558,1.60733378,-1.64890087,-1.03819239,-1.19212389,-0.78382361,0.03925850,1.52259934,0.09540676,-0.21220762,0.55955195,-0.39845437,-2.14541650,0.49337825,-0.68574250,0.74040270,0.50783634,-1.60461199,-1.26806450,-0.12652303,-0.83992827,-0.15524681,0.40098447,0.23392735,-0.23262636,0.06525709,-0.35994548,-1.08432877,-0.21395946,-0.78357452,-0.57157278,0.71407390,0.86596155,-1.13723528,0.13460183,-1.20881450,0.71018457,0.68943661,-0.70428050,0.64600736,0.01990297,-0.10575775,-0.80263519,0.10618331,0.08865548,1.51651669,0.60851854,1.15161908,1.04919207,1.18359745,-0.04352076,-0.83643389,-0.07922365,0.10597949,-1.34984851,-1.91319740,0.71585363,-2.10845160,0.64385056,-0.54551518,-1.02039802,-1.62510490,1.65401149,-0.42711899,0.07970079,-0.21404363,0.30498922,1.07942021,0.63995659,-1.82114816,0.56396323,1.07084870,-2.00350380,0.53339815,0.18500003,1.15034151,-0.21436051,-0.99986565,-0.58812016,-0.07247020,0.78910017,0.48839527,0.98795873,0.10357288,-0.05604928,0.38977858,0.73745090,1.40838420,0.25967824,0.23588051,-0.03451392,1.04897523,-1.77121758,2.35625434,-0.67086869,-0.84005541,-0.85940343,-1.04449213,-0.65917015,-0.78713167,-0.95910054,0.38597879,-0.31879017,-0.86260867,-1.08593106,0.02802678,0.99484950,-0.55113328,2.60936737,-0.03388772,-0.47583574,-0.14021793,0.99019170,-1.22431207,0.78734446,-1.77037835,0.15018673,0.36423206,1.36447549,-1.61007094,0.51875496,-1.60788095,-1.73557448,-0.41414359,-0.93710536,0.38715765,0.04243837,-1.59682858,-1.10728157,1.88292623,-1.01428258,0.01074958,-1.88169158,-0.31616244,0.45334938,1.12449574,-1.16699445,-1.59505820,0.04126552,-0.89016622,0.45838884,0.71463561,0.14563711,0.30694655,0.67193079,0.61429602,1.00201404,-0.49295208,0.05997690,0.99491668,-0.73801446,-1.17185295,0.94778723,0.36106884,-0.43561545,0.04102699,0.52626407,0.08442099,-1.57626402,1.56855237,-1.65396678,1.74014664,-0.38219589,0.39305371,-0.31705827,-1.15742850,0.11669596,0.54043210,-0.52270615,-0.13375773,0.68094701,-1.84134769,-1.49383473,0.14632171,-0.54607725,-1.20867658,-1.28439069,-1.81734920,1.54257309,0.78347659,-0.24049839,1.69973648,0.99825776,0.99971974,-0.26055810,0.34143049,-0.44862366,0.11253342,-0.60932243,0.70383030,-1.87318194,0.21953633,0.82791799,1.64545465,-0.42693698,-0.64897031,-0.97996652,-1.06616282,0.52939081,-0.12541170,-0.57480675,0.73600835,0.35711968,-0.03528263,0.79997194,0.55742902,-0.28909785,0.64331138,-1.79893720,1.01572442,0.27111965,-0.51778597,0.12906317,0.76148927,1.51315522,0.41101140,0.38008851,0.66759896,-0.13804778,0.64854795,1.73474562,0.75999504,-0.73411214,-0.05406699,1.35664344,-0.25298578,-0.12696666,-0.42628938,0.61129904,1.55259824,-0.05820796,-0.38598019,-0.87325627,-0.55066222,-1.24557889,-0.26509118,-0.32103062,1.14031804,-0.75985742,0.70659167,-1.15016067,1.24906838,0.90396994,-0.16241251,0.43682271,-1.42695689,0.47134697,-1.66143429,0.08698819,-1.00775325,-2.24129725,-1.04226267,-0.98537570,-0.89938259,-1.80710697,-1.22866321,0.78125423,1.55150509,0.46235040,0.18444096,0.19313288,-2.20686269,-0.40341458,0.50321484,0.47339424,-0.81383848,-0.21972439,0.66612029,0.60239881,1.20443010,0.70015103,0.30632916,0.01489905,0.68129027,-0.89645082,-2.68969011,-0.96684915,1.66421318,0.74333072,-0.78321886,1.60063362,-1.27524030,-1.95856726,0.47504124,0.15398432,-0.20796098,-0.13449343,0.93458968,1.60390890,0.21798505,-0.27035928,-1.23248971,-1.25361061,1.34666133,1.07233441,0.88799530,-1.23687923,-0.40781614,-0.11916534,-0.88050151,-0.66422415,-2.61471510,0.78276747,2.42323995,-1.70715427,0.71550035,-0.60298312,0.70491880,0.46175584,0.80827898,-0.45108104,-0.98219043,-1.72823501,1.73190725,0.53906441,-1.50445580,-0.59250867,-0.07239901,0.44743437,-0.13740127,1.69935930,-1.00480616,-0.58191377,0.39853972,-0.60960841,-0.45473522,-0.76396072,-0.31872150,1.74509728,-0.59950751,0.89810580,-0.81400329,1.14280319,1.11165059,-1.31295311,-1.60784578,-0.87506992,-1.13461006,-2.09486437,-0.16449419,-0.37728927,0.47595578,-0.55342919,-0.17574213,2.21499181,1.14331865,-0.14938518,0.18935619,-0.33802557,0.52538890,0.82673949,1.16562462,1.24713838,0.98890215,-0.64991701,1.49886703,1.97769642,0.08059916,-1.60925281,-1.23822486,-1.40829837,0.51331180,-0.29928651,-1.04348791,-0.39911583,0.69380492,1.54516888,1.22791195,2.25008130,1.33348894,-0.21775827,-0.71937007,0.54982573,1.70691478,0.32459491,-0.57187974,-0.21614684,1.08274269,0.41384646,0.24497485,-1.43703413,0.89616930,0.82032162,-0.24598582,0.84271127,-0.81894702,-0.01828136,1.70397091,0.39505738,-0.51221430,-0.87979966,0.10795479,0.45194778,-0.76008922,1.23394477,-0.56798172,1.06459570,-0.44333413,-2.40399075,-0.37267187,1.42946172,0.95734519,1.86127949,-0.15217264,1.68742633,1.97638428,-0.44211119,-0.98393327,-0.54173928,-1.72017395,0.74697793,-1.77827263,-1.92299354,-0.17189410,-0.48633271,-2.21230388,-0.45906609,-0.53493047,0.37253976,-0.56951141,0.07728028,0.03530006,-1.18123293,1.94158125,-1.55930352,0.69334733,-1.95163214,-0.95800400,-0.01804711,-0.56747472,-0.99099451,-1.52853060,-0.98279524,-1.67307866,0.96121490,0.35654056,1.74034202,-1.44633865,-0.27781928,1.79457986,-0.41029963,-0.76871634,0.36555341,-0.77664107,0.19535238,-0.76185411,-0.19828433,-0.88820636,0.63885397,0.11346363,-2.50265074,0.16319332,-1.01288569,1.86605489,0.89761645,1.11795115,-0.00714116,-0.89034635,-0.76447034,-0.18822117,-0.48340848,-0.99788517,1.02172959,-0.39395007,0.72566581,-0.81438208,-0.71715081,0.96243578,-1.36424279,-1.13870537,1.17602491,0.16320205,0.71959788,1.66669416,0.55690295,-0.28912008,-1.19219172,0.23308393,-0.37963116,0.45347008,-0.42606446,1.30938649,1.25128853,0.57649273,0.34440875,-0.23893952,-1.06604803,0.31336102,0.75727910,0.46772480,-0.37650385,-0.06036821,1.03686309,0.46158856,-1.81028461,1.43393028,0.85494965,-2.34685564,-0.17571987,-0.45592231,-1.31190526,1.73194158,-0.11856517,0.07041293,0.25689471,-0.56000596,2.06649089,0.38954756,1.36627376,0.13905638,0.77370811,0.43944249,-0.08798827,0.07245751,-1.30234015,0.29710820,0.74389762,0.11971968,-0.07381748,1.32652700,1.34079397}); + auto input1 = NDArrayFactory::create('c', {7, 7, 16, 5}, {1.05293429f, -0.89349967f, 0.31027254f, 1.22991478f, -0.62926656f, 0.56918693f, +-1.60992694f, 1.10167944f, -0.80843484f, 0.07521993f, -1.15994942f, 0.76016301f, -0.40056285f, -1.16872537f, -0.91384381f, -0.36700436f, 1.82389200f, -1.18200207f, 0.51612782f, -0.92479187f, -0.09307563f, -0.55122334f, 1.23532486f, -1.11124146f, -0.05812126f, 0.68159896f, 0.69125599f, -0.77127314f, -0.10874277f, 0.86469102f, +-1.31614351f, 0.33354419f, -1.71750402f, 0.17197680f, -1.03965557f, 1.10570908f, -1.19115615f, 1.05115080f, 0.18277600f, 1.08820546f, -0.72191417f, -0.10999311f, 1.56521320f, -0.35433730f, -1.11799145f, 0.34499285f, 0.64998639f, -1.64371550f, 0.92592359f, -0.47659501f, 0.49101439f, -0.15613313f, 1.47486567f, 0.43576995f, +2.19538260f, -0.83567709f, -1.21846950f, 0.80400819f, 1.14637423f, -1.01503456f, -0.61992753f, -0.47378838f, 0.86503726f, 0.27147385f, 0.37073180f, -0.19951358f, 0.79167330f, -0.33982825f, 0.18631981f, -1.54715073f, 0.39967480f, 0.95067030f, 1.12508667f, -0.86676019f, -1.10341156f, 2.33141375f, 1.10972047f, 0.71407092f, +1.70640314f, 1.80666339f, 0.59465605f, -0.39653218f, -2.61163163f, -1.15013492f, -1.19908321f, 0.41783467f, -0.22730024f, 0.31425011f, -0.58562893f, -0.10131568f, -0.85047537f, -2.59974790f, 1.22072542f, -2.08812046f, -0.19363593f, -1.27664304f, -0.02703438f, 1.08477545f, -0.65506506f, 0.46040919f, -0.13715318f, +-0.74945593f, -0.69006950f, -1.29617655f, -0.15865716f, 1.38956285f, 0.90216327f, -1.31185400f, -0.15067385f, -0.63093358f, -0.05895613f, 0.26545224f, 0.29332840f, 0.42852548f, 0.72409540f, 0.12879130f, 1.43038857f, 0.68647617f, 2.19654775f, 0.51878077f, -0.03769343f, 0.52877223f, -0.21733910f, 1.13710785f, -0.59003806f, +1.54624867f, -0.64997369f, -1.03239334f, 0.19708300f, 0.68658423f, 0.71048903f, -1.55250466f, -1.38636279f, 0.32385820f, 0.81226677f, 0.19209047f, -0.23002781f, -0.63631231f, 1.02101684f, 0.65428704f, -0.17206922f, 1.09488952f, 1.03022420f, -0.95567745f, -0.07595373f, -1.48606372f, 2.57174873f, -1.75366247f, 1.12913883f, +0.97053039f, -0.28552356f, 0.56511772f, -0.79568213f, 0.07561764f, -1.02085686f, 1.05770981f, -1.25715709f, 0.42046708f, -2.57390857f, 0.96947151f, 1.05215812f, 0.65624017f, -1.29019403f, 0.64157075f, -0.40509227f, -0.65354455f, 0.42348680f, -1.34107757f, 0.05931387f, -0.54337227f, 0.95460182f, 1.59319806f, -0.44433126f, +-0.33717924f, 0.79566282f, 0.50112695f, -0.22244534f, 1.76904583f, -0.89817202f, 1.82985342f, 0.17671813f, 0.80720717f, 1.32469308f, 0.39417782f, -0.23720963f, 0.96796370f, -1.02348757f, -0.86615551f, -1.58120525f, -0.37634999f, 0.00905940f, 0.01880967f, 1.75771821f, -0.64372772f, 0.36687651f, 0.15854552f, -0.67599791f, +0.53726906f, -1.20158446f, -1.78549063f, 0.96476388f, -0.66158366f, -0.41681561f, -0.97541636f, 2.35928202f, 0.32130197f, 1.06886065f, 1.38736427f, -0.73718959f, 0.11215294f, 2.12865782f, -0.37927702f, 0.55621815f, -1.10108411f, -0.02032263f, 0.29595461f, 1.58737493f, 1.24001300f, -0.66748160f, 0.80729002f, -0.10575818f, +-1.03175950f, 1.80755460f, 0.10825710f, 2.20666361f, 1.33633149f, 1.39290452f, 0.45211342f, -0.07837920f, 2.08304930f, -0.28387162f, -0.70775616f, 0.43626297f, 0.53556961f, 0.06201901f, -0.59255266f, -0.11854446f, 2.10024118f, 0.37638292f, -0.56178707f, -0.25220188f, -1.23731256f, -1.30002999f, 0.34283713f, 0.30502397f, +-1.09233856f, 1.12430644f, 0.52273953f, -0.68507338f, -0.69913578f, 0.88440478f, -0.76959240f, 1.07093310f, -0.34802195f, 0.35683727f, -0.76079178f, -1.92807376f, 0.84499562f, 1.39131641f, 0.44825050f, 0.34567752f, 0.44607711f, -1.00986362f, -0.50038189f, -0.09060892f, -2.55645394f, 0.56416476f, -0.83058155f, -0.65931624f, +-0.73649710f, 0.59814465f, -0.86736494f, -0.32200798f, -1.28087902f, -0.76818323f, 0.86848933f, -0.98678392f, -1.30813944f, -0.20255326f, 0.26557815f, -0.31090519f, -1.46331608f, -0.62782109f, 0.59034890f, 1.63147473f, -0.17727259f, -0.37636510f, 1.27368402f, 0.19096918f, -0.29936951f, -1.99038267f, 0.54831523f, 0.48849005f, -2.55680346f, -0.63126534f, 1.21715927f, 1.22841084f, -0.67416084f, 0.02927168f, -0.36693662f, 0.63204330f, 0.13721083f, 0.28742912f, 0.19470036f, 0.74873924f, -1.47602463f, 0.86264688f, -0.23730527f, -0.99978864f, -1.17048764f, -0.34996086f, 1.43019187f, 0.26224539f, 0.60689932f, -0.75002515f, -0.79823422f, -1.37300086f, -0.19951135f, -0.12150808f, -0.75272322f, 0.23755015f, 0.31270382f, 1.66539109f, -1.04104745f, 0.79540199f, -0.54042423f, -0.54150617f, 0.43871084f, 0.24163951f, -0.24517761f, -0.66178995f, -1.13064528f, -0.84426326f, 0.56437236f, 0.09088907f, -0.82823074f, 0.81753862f, -1.74096012f, -1.80599844f, -0.60943592f, 1.36094582f, -1.47762752f, 0.15931177f, 1.05569172f, 0.36751524f, 0.06497604f, 0.13536447f, -1.57156146f, 0.22783801f, -0.96910107f, -1.24294984f, -1.47147155f, -1.04790676f, 0.64629447f, -0.32266054f, -0.55675793f, -0.95612079f, -0.23005411f, -0.75229394f, 0.03050950f, -1.72484553f, -2.06055546f, 0.19892083f, -0.13597751f, 0.65180075f, 0.27096850f, 0.08977254f, 0.57564765f, -0.43227410f, 0.09541437f, -0.00358280f, 0.65680492f, 0.04006556f, 0.57160908f, 0.43821687f, 1.96118212f, 0.42602235f, -0.36731303f, 0.67200917f, -0.56667900f, 0.44014785f, 0.06970236f, -1.34415269f, -1.13301528f, -0.08848868f, 0.35615012f, -0.06426942f, -0.81406075f, 0.94097465f, -0.54560357f, -0.65877116f, -1.29646838f, -1.13109028f, -1.64186084f, -2.12723470f, 1.86027610f, 1.22621441f, 0.26098135f, -0.05608099f, 0.21143445f, -0.87244326f, 0.79408187f, 1.24279130f, 0.14458629f, 0.25532281f, -1.24023473f, 2.42278886f, 0.00405578f, -1.00119174f, 1.19856644f, -1.37395728f, -0.16656208f, 0.46858498f, -0.00678801f, -0.34960639f, 0.16614936f, 2.41560221f, -0.53880709f, 0.91618651f, -1.77009308f, 0.32911557f, 0.30216452f, 0.02881077f, 0.77705866f, 0.27061903f, -0.07440855f, -1.14010465f, 1.25383139f, -1.58615100f, 1.04185510f, 0.15140508f, -0.88059032f, -0.33872122f, -0.42526904f, 2.17365575f, 0.29308075f, -2.24234557f, -1.03164542f, -0.09263755f, 0.08050421f, -0.74946511f, -0.64589006f, -1.13416314f, -0.64989561f, 0.16502371f, -0.33831969f, 0.22832428f, -0.08389475f, -0.28009200f, 1.34536922f, -0.19075738f, 0.36238208f, 0.83690089f, 0.26144615f, 0.04457319f, -2.55585861f, -0.01807522f, 1.68334866f, -0.05795629f, -0.21315987f, -1.84039557f, 0.06512877f, -1.77318645f, -0.27637982f, 0.20439345f, 0.67558700f, -0.77179354f, -0.17902173f, 0.70381826f, -0.40395790f, -0.96492916f, 0.84138173f, 2.43879008f, -0.32297835f, -1.74370265f, -0.10330839f, -1.07465363f, 1.85030377f, -0.59153467f, 0.99667048f, -0.56753993f, 0.57383025f, -1.90630126f, 1.24299097f, 0.22797665f, 0.30468231f, -0.07360230f, 1.64654350f, 0.57195550f, 0.03227921f, 1.11005175f, 0.00088721f, 1.19266295f, 0.61323351f, 0.13754399f, 0.59900171f, -0.75831634f, 1.11500823f, 0.99747783f, -1.36923385f, 1.26563418f, 0.01253266f, 0.35483193f, 1.95143735f, -2.02703261f, -1.38265920f, -0.02404256f, 2.02788448f, -0.75144875f, -0.58445263f, 0.26129767f, 0.60691077f, -1.84661067f, 0.65872228f, -0.58298993f, 0.33067298f, -0.09431327f, 0.43333948f, -1.52616286f, -0.25961858f, -1.65459549f, -0.72950101f, -0.89906919f, -0.80081612f, -1.32189929f, -1.36574399f, -0.35809481f, 0.36385000f, 0.31480747f, -0.35797358f, -1.04066050f, 0.07971872f, -0.21176252f, -0.76559299f, -0.10352154f, 0.29248312f, -1.75030553f, 0.68219930f, 0.56189102f, -1.11212170f, 0.06501702f, -0.07131009f, 1.23410738f, 0.29311740f, -1.02052307f, 1.40220940f, -1.00995779f, 0.57955760f, 0.22640309f, 0.74853230f, -0.02586563f, -0.33427954f, 1.70311153f, -0.53405988f, 0.90975094f, -0.46450076f, 0.19904344f, 0.28559047f, 0.23167793f, -0.69065529f, -0.17176504f, -0.29301846f, -0.85477978f, -0.00267053f, -0.28529504f, -0.64201307f, 1.03479636f, 1.03805065f, 0.83270210f, -0.09405448f, 2.50615931f, 0.62019676f, 0.31354564f, -1.51599669f, 0.42848015f, 0.66263914f, 0.74651009f, -1.13042867f, -0.58933645f, -0.35146511f, 0.06223279f, 0.28065836f, 0.66506970f, 0.16942430f, -0.23316263f, -0.87481076f, 1.21992230f, 1.48536301f, -0.79667616f, -0.75519305f, 1.40999961f, -0.42802793f, -0.20252463f, 0.30573779f, -0.23319976f, 1.77525878f, -1.80704832f, 2.71519923f, -0.67500192f, 0.12268137f, -0.13014549f, -0.07479453f, -1.51065743f, 1.04198146f, 0.96205556f, -2.00525570f, -0.37911776f, 0.89329720f, -0.39495832f, -0.03683375f, -0.90928614f, -1.56263304f, 0.45038295f, -2.62184358f, -0.45686841f, -0.52536523f, 1.05351484f, 0.89982438f, -0.63724512f, 3.21004057f, -0.08608918f, 1.55209303f, 0.62688643f, -0.59702635f, 1.85774517f, 0.38172096f, -1.25640929f, -2.59278178f, 0.85050315f, -1.10080361f, -1.26422560f, -1.80045366f, -0.34494889f, 0.68448657f, 1.25671864f, -1.26594126f, 0.32244179f, -0.51956522f, -0.56212711f, -0.95574015f, 0.71973872f, 0.46736258f, -0.11772985f, -1.52736545f, 0.19571695f, 0.73147154f, 0.87724912f, -0.26265728f, -2.60267401f, 0.19263546f, 0.18320183f, 0.11485019f, -0.82999659f, 0.13582672f, -0.08040185f, 0.28152901f, -0.51421624f, -2.32467175f, 0.19923948f, 0.64616692f, 0.29718629f, 0.32785949f, -0.62266952f, -0.98174316f, 1.23276305f, 0.58563638f, 1.28528512f, -2.13718534f, 0.28842899f, 0.12676710f, -1.72105229f, 0.15053287f, 2.19496536f, 1.28683448f, -0.96318281f, 0.17043279f, -0.05245409f, -0.38710704f, -0.30441490f, -0.08249986f, 0.28423953f, 0.72963721f, -1.49658203f, 0.99077344f, -0.78913772f, -1.12661564f, -1.26294816f, 0.16517465f, 0.10124251f, -0.77198768f, -0.16342169f, 0.08615876f, 0.49711797f, -0.66083062f, 0.76648003f, 1.04756033f, 1.46122825f, -0.42798752f, -2.29203916f, 0.30444992f, 0.58697921f, 1.22166932f, 0.09022947f, -0.03920181f, 0.10444995f, 0.10361757f, 1.18224072f, -0.76641631f, 0.90802073f, 1.41639423f, 1.55682337f, 1.28101575f, -0.35396016f, 1.11443567f, 1.18218529f, -0.06048089f, 0.85024464f, -1.01789165f, -0.69154263f, 0.06663221f, 0.68429029f, 0.12560424f, 0.37915874f, -0.66829866f, -0.64524972f, -0.05568011f, 0.12230454f, -0.35041061f, 0.62027830f, -0.16739209f, -0.72145337f, 0.46263054f, -1.67837834f, 0.69413221f, -0.57243419f, 0.37638462f, -0.21446526f, -0.89821470f, 0.60078722f, -1.06706369f, -1.26132309f, 0.35714921f, 2.39221811f, -0.09376130f, 0.30760849f, 0.59180892f, 0.55815399f, -0.32628775f, 1.28890121f, -2.53237987f, -0.98241091f, 1.10520673f, -1.74751687f, -0.90837651f, -0.25220659f, -0.56625104f, -0.30691949f, 0.16058689f, 0.44309673f, -1.09874964f, -0.76747823f, -0.33679363f, -0.02535496f, 0.00990100f, 1.35318136f, -0.70140815f, 0.50937581f, 0.55386209f, -1.21721983f, 0.71376961f, -0.18079315f, -0.11077732f, 0.09292522f, -0.57235324f, 0.62748206f, 0.42587611f, 0.64860481f, -1.10635614f, 1.66414368f, 0.47505483f, 1.48602211f, -0.59611166f, -0.41932896f, -0.96542233f, -0.41756630f, -1.02963889f, -0.70070386f, 1.65803933f, 0.20138647f, 0.05895034f, -1.46152759f, -0.37278318f, 1.05535650f, 0.34437978f, -1.13257408f, 0.17635690f, 0.09386671f, 0.37079874f, 1.47695887f, -1.58420062f, -0.26100200f, 0.44847637f, 0.88847303f, -0.13877590f, -0.64620668f, -0.38019657f, 1.01608157f, 0.13357787f, 0.05137976f, 0.93498152f, -0.62226880f, 0.80461699f, -0.71682596f, -0.88756353f, 0.40933055f, -1.52167451f, 0.79756850f, -0.17307425f, 0.62368619f, -0.22466940f, -1.72802913f, 0.59047443f, -0.58020931f, 0.09096476f, -0.07317388f, 0.44522321f, -0.64880705f, 0.15684015f, 0.08708375f, -0.41556796f, 1.11579072f, -0.81733495f, 0.11643656f, -0.73995101f, 0.93685871f, 1.57971406f, 0.67606360f, 0.70509088f, -0.25283816f, -0.00010609f, -0.61884147f, -0.86409342f, 0.95383751f, -0.05895388f, -1.45261180f, 0.45166013f, -1.01434863f, 0.18496066f, 1.06517637f, 1.81127059f, 0.89470667f, -0.13232610f, 0.46958798f, 0.13884509f, 0.57117194f, 0.29575035f, -0.97884250f, 0.83291447f, -0.59255791f, -0.04354135f, -0.19431923f, 0.30071029f, -0.95421529f, 0.76359886f, -0.47799742f, 0.68254346f, 1.19368529f, -0.48935115f, 0.30357337f, -0.50225669f, -0.23370270f, 1.96702433f, 1.46558523f, 2.68482018f, 0.41622332f, 0.73697484f, 1.43430734f, 0.15387188f, 0.20875402f, -2.49335337f, -1.39674246f, -0.22125854f, -0.00424605f, 0.91416460f, 0.33384630f, 0.44703746f, 0.25610185f, 0.38966551f, -0.01784045f, 1.66148460f, 0.36005461f, 0.95716912f, -0.18246566f, -0.15480693f, 0.38775176f, -0.56969136f, -0.29644895f, -1.04565966f, -1.00455630f, 0.30897698f, -1.46885884f, 0.03657720f, -0.49302089f, 1.34134722f, 0.01673754f, 1.22725964f, 0.55256772f, 0.63803208f, -0.29041430f, 1.11455286f, 0.76329172f, 0.27073982f, 0.77173829f, -1.79884446f, -0.11889492f, -1.92040312f, -0.46382675f, 0.20078070f, -0.98889589f, 1.46711135f, -1.68280172f, -0.52852470f, 0.66245162f, 0.29575166f, 1.34826505f, -0.22362417f, -0.14345661f, -2.34815073f, 1.26572001f, 0.66505629f, 1.01141500f, 1.08030057f, 0.17036134f, 0.00168786f, -0.37282917f, 0.69206375f, 1.07367527f, -0.49708191f, 1.49504781f, 0.58224988f, 0.96593714f, -1.07661915f, 0.25202179f, 0.25531644f, 0.42357162f, -0.31236249f, 0.48383278f, -0.06361829f, 0.24131298f, -0.95695931f, -0.12589653f, 0.36134180f, 3.20266032f, -0.40879184f, -0.66985190f, 1.51674330f, 0.34072638f, 1.15076303f, -0.40199137f, 0.46223637f, -0.48608047f, 0.99119538f, -0.22506073f, 0.30968750f, 0.64210880f, 0.54640514f, 0.18607031f, 1.26293361f, -0.77960914f, 0.79572529f, 1.01936150f, 2.27160740f, -1.48034489f, 0.74466604f, 0.14863680f, 0.31102443f, -1.15673816f, -0.38609681f, -2.65026069f, -0.45524642f, -0.74022961f, 2.74991131f, 0.00103815f, -3.03303242f, -0.41556966f, -0.87103498f, 0.78306234f, -0.88195556f, -0.77297026f, 1.21203196f, -1.09754920f, -0.03556008f, -0.31546223f, 0.72954375f, 0.25251788f, 0.11378583f, 0.50921023f, 0.30301905f, -1.60631680f, 0.27152416f, 1.17342317f, -0.70891970f, -0.08392961f, 0.92137378f, -0.10568139f, -0.31653777f, -0.28878728f, 1.22166574f, 1.12693942f, -0.21325994f, 0.94010323f, 1.21796405f, -0.68866694f, 2.30724216f, 0.28141466f, 0.83481526f, -0.04885862f, 0.01675143f, 1.04355800f, -0.81050140f, 1.51300573f, 0.53429186f, -0.56439877f, 0.38572624f, -0.05620475f, 0.67644542f, 0.72528905f, 0.05937041f, -1.06315899f, -0.51393986f, 0.46937627f, -0.34699562f, -0.64765716f, -1.45512629f, 0.47739139f, -0.88228017f, -2.00791359f, 1.29929042f, 0.05482405f, -0.66725296f, -0.54735124f, 0.09972951f, 0.76675093f, 0.98748523f, 0.08900899f, -0.78854066f, 1.47970486f, -0.61667502f, 0.45625573f, -0.21766303f, -0.46250847f, -0.07130960f, 0.64414692f, 0.12784545f, 0.26393634f, 1.07720757f, -1.23938286f, 0.62483376f, -0.55001754f, -0.05358591f, 0.07322436f, 1.12003291f, -1.00830650f, -0.20486419f, 0.76664752f, 0.28850746f, -0.04464776f, -0.40146068f, 0.73262817f, -1.12827921f, -0.19989438f, -1.15999687f, 1.37973154f, 0.78881019f, -0.34762639f, 1.22088552f, -1.64088547f, 0.63218033f, 0.45736769f, 0.05502866f, 2.22683382f, -1.78935897f, -1.49635041f, 0.83450896f, 1.67770112f, 1.33909333f, 1.51158953f, 0.28595078f, -0.08593627f, 0.45812801f, -0.15193029f, 1.14770603f, -0.88920450f, -1.96352005f, -1.49894583f, 0.49629962f, 1.59872091f, 0.00903497f, 2.15563583f, 2.25149560f, -2.01200557f, 2.56229877f, -1.38850498f, 0.73552012f, -0.39378855f, 0.52616280f, -0.03685786f, 0.87403935f, 0.12163408f, 0.74297994f, -0.30697080f, 0.38139752f, 0.49113834f, -0.95485127f, -0.99908817f, 0.71716321f, 0.04000283f, -2.09645271f, 1.38789880f, 1.37198520f, 0.82493287f, 0.17114936f, 0.53696346f, -0.19516060f, -0.50377476f, -0.91730285f, -0.70113552f, -0.02406530f, 0.84943396f, -0.17428185f, -1.09140801f, -0.68156958f, 1.70756388f, -1.00399911f, 0.03023832f, -0.39023280f, -1.89737976f, 1.14469039f, -0.58337289f, -0.60037899f, -1.17490256f, -1.56342828f, 0.48714057f, 0.62266618f, -0.15967095f, 1.32789338f, -1.25700688f, -0.55633998f, -0.83128709f, -0.49346271f, 1.59561753f, -0.24675299f, 0.38012561f, 0.91796309f, -0.38522810f, -0.65509188f, 0.94100451f, -0.57324487f, 2.19070768f, 1.24058700f, -0.75978851f, -0.40460554f, 0.79189235f, 0.70192885f, 1.93569362f, -0.03070199f, 0.77010989f, 0.58794290f, 0.51087004f, 0.22892070f, 0.35007235f, 1.56023848f, -0.67453802f, -0.18485607f, 0.64349502f, -0.31489357f, -1.95834625f, 0.06560058f, 2.30394220f, 1.18194163f, -0.88034087f, -1.05000436f, -1.05471325f, -0.98481798f, 0.49904808f, 0.16438948f, -1.10297823f, -1.39736509f, 0.01306054f, -1.85160267f, -0.87292641f, -0.15418227f, 0.43412164f, 1.16518164f, 0.06273691f, 0.24659210f, -0.08267246f, 1.28885782f, 0.73575675f, -0.01019809f, -0.08753663f, -0.61827368f, -0.40863234f, 2.12599611f, -0.53620332f, 0.53789747f, -0.66386080f, -1.70461988f, 0.86608189f, -1.11151052f, 0.14120635f, 1.18858743f, -0.31760478f, -0.73533046f, 0.20978074f, -0.84074509f, 0.16523147f, -1.03362834f, 0.59721231f, 0.21318658f, 0.23671274f, 1.75115061f, 0.25363782f, -1.32541454f, 1.13056135f, 0.24652456f, 0.60381413f, 0.21478581f, 0.75044096f, -0.63125616f, -1.69889998f, -0.02116571f, 1.46165359f, 1.03068244f, 0.63693464f, 0.67795700f, 1.20033514f, -1.39205134f, -0.61743122f, 0.56549704f, 0.65182322f, -0.74250507f, -1.61939359f, 1.14054918f, -0.45725963f, 1.74519682f, -0.66251940f, -0.94811529f, -1.60865819f, -0.59968346f, 0.86309159f, -1.91936195f, -1.02646923f, -1.50352538f, 0.58292735f, 0.05320299f, 1.53582895f, 0.01069612f, 0.15226212f, -0.71840125f, -1.36896348f, 2.14600968f, 0.96626586f, -0.52014917f, 0.41001406f, 0.59478027f, 0.15282436f, 0.27790198f, 0.76614654f, -0.38971323f, -0.01839927f, -1.57882118f, 0.61391610f, -0.62133092f, -0.03968323f, -0.88467252f, -1.24041140f, 2.07306671f, -0.41776338f, 0.14537935f, -0.91069067f, 1.67362070f, 4.72630215f, -0.07395106f, 0.46280116f, -0.40843824f, 0.70683080f, -0.27510864f, -0.63465804f, -0.83630908f, -0.44419941f, 0.60405648f, -0.65039170f, -1.02413189f, 1.05983019f, 1.73366308f, 0.73343736f, -0.00895882f, -1.00826013f, 0.17323074f, 0.73995626f, 0.24128854f, 0.94510227f, 0.25557515f, 0.02244723f, -0.95197725f, -0.16297856f, -0.38497585f, 1.17993331f, 1.20282137f, -1.31491220f, 0.44229278f, -0.24349044f, -0.01230415f, 1.37944865f, 0.48554277f, -0.54510897f, -0.10793537f, 0.41121426f, -0.12889031f, 0.26434359f, 1.27966082f, 0.64518744f, -0.15577169f, -0.99864733f, -0.61746484f, 2.01614976f, 1.56254935f, 1.86473298f, -0.54662132f, -0.22047071f, -0.06118120f, 0.84799510f, 0.17009684f, -1.30523121f, 0.64000309f, 0.36299205f, -0.59620583f, 1.36372304f, -0.05389515f, -0.93849313f, 0.98043185f, -0.39373067f, -0.84898937f, 1.32077873f, 1.05988657f, -1.35339200f, 0.23259017f, 0.63816410f, -0.80297333f, 0.60017115f, 1.25715804f, 1.18894124f, -0.62473553f, 1.05611980f, 0.02335166f, 1.07509828f, 0.25873449f, -1.68341100f, 0.54547334f, 0.79288185f, -0.93678916f, 0.19202201f, -1.48575914f, 1.08649087f, 0.50851744f, -0.45758674f, -0.39734635f, 0.35637981f, -1.63079453f, -0.75910008f, 0.92640859f, -0.55599529f, -0.40276715f, 0.31307653f, 0.39907026f, -1.18830419f, 0.71051043f, 0.14157933f, -0.39581308f, -1.64361024f, -0.06161860f, -0.25312796f, 1.10018682f, 0.56500763f, 0.80385065f, 0.35395023f, 0.81813669f, 0.27644628f, 0.65563256f, 1.73197234f, 0.68178749f, 0.76769936f, 0.44597456f, 0.67761195f, 0.67635447f, -0.32315412f, 0.19330767f, -0.25557944f, 1.91693723f, 0.38335562f, 0.07107610f, -0.57384586f, 0.79184365f, 1.87835479f, 0.60902315f, -0.94220877f, 0.79479855f, -0.25656971f, 0.08739131f, 0.53384244f, 1.22159266f, -0.39152125f, -1.46373534f, -0.02458516f, 1.62825716f, -1.26112676f, 0.19967082f, -0.71114451f, 0.27929229f, 0.65001321f, -0.11868202f, -0.55587751f, 0.78069001f, 0.57969242f, -0.60274386f, 0.31650013f, 0.90339553f, 0.09453616f, -0.37119162f, -1.00320566f, 0.33299938f, -0.48636708f, 0.26342997f, -0.91914523f, 0.28682709f, -1.24780893f, -1.59254742f, 0.97176319f, 0.14744301f, -0.53056234f, -1.73221612f, -0.67645556f, 0.98705006f, 0.79895812f, -2.04333115f, -0.60132772f, -0.91653955f, -0.28094748f, 0.47943443f, 0.38157779f, -0.67648011f, 1.09093642f, 1.66012859f, -0.29358891f, -1.26773024f, 0.36747769f, -1.10141146f, 0.82383633f, -0.89772314f, -0.47145563f, 0.63939518f, -0.64430422f, -0.48889321f, -0.37680882f, -1.06962025f, -1.28689516f, 1.28365147f, 0.61859220f, -0.84676331f, 1.38404000f, 1.21053445f, -0.14871351f, 1.06349385f, 1.45878971f, -0.47362664f, 1.40707004f, 1.25224137f, 0.87364739f, 0.92858213f, 0.00157326f, 1.45661485f, -0.27318576f, 0.15482858f, -1.07058907f, -0.06903186f, -0.74147576f, -1.64111829f, -0.67226541f, -1.13458407f, 1.28511488f, -0.41041154f, 2.09085560f, 0.45243183f, -0.67437285f, 0.84960121f, -1.49300814f, -0.42961186f, -2.35021853f, 0.57255560f, -0.73903763f, 1.37607956f, -2.44575167f, 1.25105727f, 1.38575912f, -1.16299784f, -0.13719854f, -1.11507034f, 0.35796806f, -0.64511567f, -0.87903833f, 0.32833642f, -0.87696886f, 0.02714214f, 0.30224666f, -0.69118696f, -1.23500824f, 0.76678628f, -3.20508122f, -0.24704689f, 0.49019828f, -1.20862615f, -0.03778638f, -0.07273687f, -0.11517122f, -1.75857520f, -1.64188445f, 1.21574795f, 0.57325113f, 1.14370298f, -1.07824504f, 1.70653832f, -0.03700557f, -0.47645858f, 0.11065386f, -1.03143036f, -2.18094873f, -0.94403434f, -0.09335683f, -0.44817665f, 1.39707148f, -1.21947956f, 0.56575936f, -0.69612634f, -1.12361753f, -0.17105591f, 1.15422392f, 0.02840637f, 0.09469353f, -0.52859986f, -2.08487725f, 1.28789508f, -0.03740775f, 0.61196613f, 1.23405397f, 1.56595814f, -0.65800631f, 2.02985072f, -0.69446486f, -0.88443804f, -0.23448054f, -0.43628734f, -0.45888957f, -0.21943338f, 1.78258693f, 1.75214970f, 0.71804136f, 0.49782532f, 0.37886053f, -1.59176385f, -1.74758542f, -0.02820176f, 0.75398153f, 1.00119829f, 0.80881971f, -0.53365272f, -0.22720885f, 0.37476870f, 0.01005529f, -1.23421800f, -0.13431595f, -1.01843679f, 1.87386346f, -1.68539488f, -1.04942071f, -0.77322137f, 0.53964764f, 0.29278332f, -0.58299130f, -1.56022692f, -0.79441273f, 0.49289709f, 0.44112054f, 1.07305002f, 0.54899335f, 1.13781393f, 0.77809113f, 0.81795985f, 0.16576190f, 0.32552773f, -0.20250474f, 1.46543837f, 0.12731771f, 0.21013761f, -1.34241438f, 0.44267517f, 0.93246883f, 0.08808212f, 0.92653406f, -1.21083558f, 0.17247954f, -0.70557106f, 0.04630012f, 0.48834828f, 0.89634645f, 0.46683592f, -0.29553145f, 0.46363977f, -0.48971879f, -0.88603491f, -0.12333342f, 0.37073737f, 0.92061806f, 0.54675460f, -0.14716248f, 0.75578392f, -0.98173791f, -1.15983224f, -0.58713156f, 0.07950903f, -0.59016788f, 0.41622928f, -0.32474482f, 0.42086437f, 0.23061797f, 0.62596649f, -0.22615278f, -2.14721417f, 1.01685894f, -0.25976995f, 0.00739352f, -1.31597066f, 0.39005190f, -1.09549701f, 1.68375242f, 0.43331525f, -0.37124026f, 0.22255214f, 0.59654880f, -0.73840386f, -1.20048976f, 0.12226126f, 0.12997478f, 1.04826224f, 0.03894836f, -0.36289826f, 1.14466560f, -1.18198848f, -0.03713558f, 0.67677927f, -0.42329931f, -0.89409167f, -0.77874780f, 0.58438253f, -0.35176343f, -1.53329861f, -0.02995299f, -0.40145162f, -1.51052392f, 0.09194464f, -1.13275242f, -0.61983156f, -0.40004560f, -0.19893464f, 0.22134103f, -0.03903082f, 1.14894116f, -0.03476744f, 0.22520730f, -0.55851930f, 0.76650429f, -0.57863152f, -1.34161711f, -0.31498179f, -1.19411755f, 1.70044947f, -0.17428267f, -0.35983825f, -0.42613637f, 0.58165723f, -0.77866900f, -1.59727287f, -0.61723864f, 1.51078022f, 0.32971445f, -0.86441469f, 0.60552609f, 0.00208178f, -0.47096625f, -1.10479307f, -1.21652532f, -0.08211990f, -1.43739200f, -1.31684434f, 0.43312529f, -0.76822090f, 1.88128507f, -0.02179282f, 1.04971325f, -1.55004108f, 1.25337446f, 0.11203052f, -1.16048300f, 1.59467411f, -1.29469275f, 1.14019871f, 1.20021439f, 1.84098923f, 0.05004879f, 0.73529941f, 2.05272865f, -0.13080600f, -0.08436690f, -1.17919350f, -0.66256678f, -0.36727047f, 0.73840511f, 1.22293818f, -0.00206342f, -0.29839504f, -0.00618613f, 1.04213119f, 1.21176076f, -0.62886089f, -0.02589060f, 0.96009409f, -0.64478731f, -1.16516542f, 0.57528079f, 1.04294407f, -0.09774588f, 0.45935291f, 1.03263175f, 1.00633478f, -1.82209253f, -0.18035053f, -0.28302726f, -0.83813244f, 0.57593471f, -0.03807700f, 1.60498738f, 0.16530658f, -1.43083501f, 2.10824299f, 0.30279446f, -0.03961089f, -0.38900724f, 1.31272805f, -0.56575215f, 0.57970244f, -0.48305038f, 1.34114623f, 0.21859215f, 0.66399640f, -1.52087069f, -1.30717897f, 0.14394683f, 0.97648209f, -0.71372712f, -1.22574198f, -0.27702177f, 0.04041927f, 0.02442212f, 2.19617033f, -0.48566443f, 0.81463927f, 0.20383844f, 1.17562282f, -0.33829874f, -0.42141283f, -0.96415234f, -2.39141965f, -1.04285860f, -0.23004992f, 0.41186509f, 0.03811268f, 0.36818987f, -0.71099734f, -0.56749570f, 0.18486284f, -0.44530040f, 2.14008284f, -0.27467576f, 1.70690107f, -1.40462613f, 0.24697532f, -1.31629777f, -2.20674944f, -0.67868507f, -1.15767133f, -0.64391804f, -1.79037917f, 0.58749497f, -1.58303332f, -0.69021022f, 1.64376318f, -0.95393223f, 1.98415601f, -0.10991055f, 0.02474386f, 0.23683345f, -0.63420391f, -0.57991928f, 0.83028817f, -0.40033704f, 0.19212338f, 0.74640590f, 1.10264432f, -1.65286255f, 0.92683482f, -1.42252541f, -0.74605089f, 2.14535880f, 0.12971123f, -0.47971717f, 1.67546797f, 0.42268261f, 0.22648531f, -0.42369929f, 0.77403021f, -1.31818616f, -0.67143595f, -0.04311426f, 1.64128351f, 0.34776631f, -0.39353722f, -0.42765084f, 0.16170517f, -0.54488391f, -0.38428506f, 0.42097485f, -0.55982012f, -1.74543798f, 1.53704774f, 0.43562424f, -0.30395737f, 0.31846946f, 0.39205357f, 0.57386035f, -1.11912560f, -1.39164317f, -1.04337609f, 0.31629622f, 1.51927638f, 0.88745505f, -0.40445471f, 0.25783861f, 1.88646257f, 0.36509129f, -1.13266826f, -0.45394278f, -0.48400903f, -1.22332740f, 0.38626808f, -1.10049105f, 0.84138852f, 1.27863181f, 0.53942156f, -0.67743856f, -0.03896645f, 1.70393491f, 0.60997570f, 0.43368068f, -0.13338457f, -0.18920666f, -0.29583672f, -1.40738738f, 1.03876019f, 1.71253765f, 2.12821221f, -0.96092403f, 0.93841934f, -0.79030478f, 1.36427641f, -1.39196694f, 0.08514920f, 0.16223004f, 0.71259701f, 0.20150672f, 0.25068361f, -0.99952722f, 1.80129099f, -1.28586197f, -0.64957166f, -0.94813949f, -0.40161121f, 0.31977695f, 0.54932386f, -0.67757767f, 1.88086259f, 0.92337233f, -1.64887333f, 0.44333732f, -0.19468001f, 0.12977587f, 0.21171951f, 0.27679422f, 0.49134475f, -1.44429457f, 1.25617445f, 0.39978400f, 0.99869555f, -1.61617446f, 1.61177349f, 0.70243025f, -0.95748568f, -0.61795151f, -0.77302909f, 0.72967088f, 0.81964350f, -0.71813750f, 0.90140164f, -1.45950246f, -0.79972702f, 0.40875742f, 0.00152073f, -1.74491429f, 1.53776145f, 0.75769204f, -0.22075878f, -0.58385569f, 2.18884754f, 0.33597681f, -1.66265559f, 1.03805876f, -1.55245185f, -0.03582226f, -1.94542754f, -0.76081425f, -0.50471377f, 1.35763168f, -0.39631784f, -0.17134467f, -0.82220149f, -0.41021580f, -0.00940776f, -0.80176353f, -0.19816744f, 1.22061026f, -0.14486519f, -0.71727395f, -0.65721530f, 0.47020102f, -0.70403302f, -0.94795334f, 1.79884899f, 0.07779162f, -1.50615680f, 0.04140327f, -0.22001404f, 0.63735324f, 0.79237640f, -2.25412822f, -0.52519119f, -0.87280381f, -0.07100742f, -0.94734806f, -0.12286110f, -0.13623615f, -0.42595413f, 0.17547913f, -0.81707209f, 0.36855817f, -1.68186557f, 0.19312963f, -0.66249490f, -0.98283452f, -0.33314428f, 0.40918943f, 0.88268638f, -0.05390308f, -0.22440539f, -0.15879378f, -0.34859571f, -0.01013108f, -0.30005428f, -1.19408464f, 0.21789688f, -1.07769871f, 0.81475031f, -0.69555300f, 2.35201311f, -0.40362412f, 0.93497628f, 1.13343573f, 0.92343372f, 0.26987928f, 0.46123627f, 0.22577702f, 1.26289701f, -0.45956740f, 0.55994868f, -0.58410591f, 0.13304594f, -0.25806463f, 0.49044946f, -0.82065403f, -3.06672239f, -0.27774641f, 0.68504512f, -0.21386372f, 1.11427057f, -0.73201770f, 0.51655543f, 1.77261138f, 0.72081727f, 0.11116749f, 0.16637769f, -0.74987584f, 0.66579849f, -0.75808716f, 0.20678560f, -0.67698354f, -0.82141948f, 0.61008269f, 0.66520184f, 0.44894725f, 0.73015076f, -1.52517414f, 0.11714164f, 1.90452611f, -1.30355322f, 0.12144456f, 1.18547559f, -0.07349755f, -2.28061509f, 0.83522540f, 0.78438890f, 2.19334102f, 0.90305614f, -0.59345531f, 0.77925014f, 1.32338643f, 0.14068902f, 1.19032264f, 0.20666829f, -0.76595837f, 0.74967057f, 2.86965609f, 0.55690205f, -1.72530472f, -0.83317834f, -0.85842621f, -0.29678273f, 1.80955839f, -0.70496303f, 1.19106734f, -0.92985237f, -1.00617313f, -0.56049556f, -0.29382578f, -2.04022193f, -1.95356870f, -0.42553005f, -0.33369407f, 1.02115977f, -1.45769477f, -0.67720300f, 0.53819913f, 1.57643425f, -0.47015440f, -1.47861958f, -0.00545934f, -0.97836047f, 0.42680529f, 1.56110144f, -1.49487829f, -0.65198445f, 0.22720462f, 1.83036661f, -0.47099793f, -0.09915133f, 0.14923312f, -1.16313052f, 0.67798084f, -1.63665557f, -0.38220280f, 0.01719763f, 0.30041245f, 0.43148938f, -0.44021657f, -1.25734651f, 0.02465564f, -1.00845659f, -0.28574651f, 0.01367745f, 0.77253437f, -0.99399441f, 0.61445391f, 0.18343423f, -0.50997210f, 0.41359940f, 0.77279282f, 0.83511519f, 0.27929801f, 0.70800692f, -0.20278299f, 1.57884383f, 0.22650529f, 0.43347472f, 0.74003208f, -0.71401161f, -0.69829476f, -1.56766701f, -0.99254119f, 1.27301061f, 2.73726511f, 0.66089469f, -1.95778012f, -1.24642098f, -0.63579029f, -1.63168180f, -0.66980726f, 0.81933254f, 0.61866677f, 1.40594471f, 0.05158535f, 0.00196500f, -0.24592508f, -0.50780547f, -0.83905292f, -0.10748957f, 0.04490763f, 0.27769178f, -0.23227681f, 0.82108080f, 0.03562285f, 0.95483875f, -1.49897683f, 0.67809856f, 0.35497451f, -0.44021592f, -1.67361462f, -0.88895375f, 1.44293678f, -0.85046643f, -0.46437624f, -1.87252641f, 0.26775804f, -0.24535774f, 0.73365933f, 0.52253938f, 0.27947086f, -0.58796054f, 0.59045380f, 1.93476331f, -0.46775359f, 0.25238225f, -1.26601815f, -0.13324316f, -0.71454948f, -0.21610366f, -1.49586582f, 1.04903507f, 0.22208478f, 0.25512528f, -0.46157327f, -0.41319233f, -0.63846964f, -0.25100923f, 0.81277549f, -0.26959971f, 0.88737756f, 1.24578953f, -0.91121447f, -1.05756927f, 0.44390878f, 0.16672316f, -1.22941923f, 0.89547867f, -1.50212002f, -1.69620168f, 0.53339505f, -0.23656729f, -1.69879091f, 0.01510374f, 0.08315694f, -0.73196459f, -1.60263407f, -1.07601058f, -0.76389569f, -1.65307498f, -0.61484390f, -0.43546933f, 0.71318507f, -0.16273083f, 0.64122051f, -0.15406294f, 1.17673671f, -0.91240519f, 0.71091145f, 2.40497613f, 1.26343656f, 0.71469337f, 0.20705548f, 0.81776261f, 0.36253929f, -1.92106628f, -0.09300470f, -0.36648872f, 1.27732766f, -0.39180157f, -0.61186749f, -1.03455031f, -0.25079829f, -0.61479062f, -1.07094336f, 0.82218504f, 0.89934880f, 0.41308978f, -0.59968555f, 0.37682834f, -1.77388155f, 0.00294951f, -0.66145372f, -0.50789726f, -0.85123241f, -0.89909405f, -1.89454281f, -0.56692821f, 1.52272677f, -0.11961794f, 0.27843913f, -0.60582250f, 1.01871169f, -0.36098275f, -0.12242325f, -0.67375034f, -0.11204147f, -2.62773919f, -0.95901299f, 0.14040214f, 1.32364666f, -1.35099924f, -0.11077739f, -0.79319423f, 0.75949597f, -0.25485823f, -0.90959758f, -0.42373934f, -1.29850340f, 0.85699379f, -1.11882365f, 0.63470817f, 0.49696380f, -0.07983235f, -0.23903450f, -0.22618714f, -0.12117998f, -0.09442677f, 1.55589819f, -0.11996678f, -1.72700179f, 0.54683149f, -0.40804827f, -0.50099218f, 0.34596699f, -1.81841791f, 0.06385052f, 0.84428120f, 0.69901514f, 1.94559097f, 0.43251973f, 0.16794942f, 1.82829034f, 1.70959795f, 0.36130908f, -0.94608402f, -0.53498030f, 0.47781768f, -0.24203247f, 1.25065851f, 0.51788396f, -2.09381890f, 0.72973937f, 0.03281829f, 0.58632666f, 1.85737121f, -0.49569523f, 0.45921183f, 1.87173629f, 0.22803484f, 1.66433418f, -1.05872321f, -1.13663685f, 0.12397861f, -0.65112090f, 0.98152941f, 0.83739656f, -0.18783289f, 1.84249437f, -0.90706986f, -0.80824369f, -1.23854923f, -0.86488134f, -1.02627063f, 0.10976455f, -0.61403006f, 1.27554715f, 0.14653525f, -0.03953953f, -0.08512071f, -1.30043304f, -0.02566035f, 0.12054887f, 0.00282162f, 0.48921332f, -1.74398839f, 1.44554436f, -1.35854721f, 0.69256759f, 0.34101671f, 2.50045252f, 0.49121150f, -0.27115449f, 0.93974596f, 0.26258010f, 0.27151433f, -0.87214381f, -0.92580765f, -1.03269923f, 0.20615758f, -0.37822601f, 0.58983004f, 0.16426525f, 0.68218285f, 1.98158526f, 0.47492698f, 0.54224718f, 1.28722692f, -1.76915324f, -1.11240053f, 0.77428484f, 0.27184650f, 2.22473478f, -0.05574624f, 0.39976570f, -0.43911108f, 0.52805597f, 0.17340177f, 1.36057591f, -0.35004014f, 1.72787797f, 0.68357420f, 1.25532615f, -0.56752264f, 0.51840127f, -0.21237844f, -0.58821255f, -0.85278064f, 1.90179110f, -0.67447448f, -0.36831430f, -0.22930753f, 0.98231596f, -0.07011599f, -0.08560387f, 0.05998110f, -0.02481356f, -0.57335132f, -0.44288307f, -0.24468307f, 0.53321087f, 1.19609559f, 0.10664973f, 0.24379487f, 0.93687552f, 0.93615580f, 1.74319768f, -0.68310338f, 1.32163060f, 0.61918712f, -0.76501870f, -0.54549301f, 1.74077415f, -0.69977754f, -0.66880983f, -1.15981388f, 0.81571609f, 0.53788543f, 0.47898352f, -0.02484704f, -1.64646924f, -0.69822907f, 0.27020717f, 0.05027051f, 1.75149667f, 0.01548872f, 0.32615909f, 2.55151844f, -1.29172051f, -0.36133784f, 0.98637396f, 0.14009331f, -0.50038946f, -0.92230296f, 0.17307127f, 1.05361068f, -1.46784890f, 2.38960409f, 1.19413340f, -1.33349669f, 1.59141159f, -0.71811068f, 1.22429430f, 1.26947939f, 1.08177102f, -1.18138707f, -0.72775704f, 0.17282635f, -0.40554270f, -0.40341887f, 0.46564049f, -1.02069795f, -0.07653128f, -0.13979210f, -0.31195050f, -1.72042310f, 1.37131393f, 0.63849634f, 0.75561279f, 1.81152904f, 0.26686314f, 1.32796574f, 0.56100166f, 0.70058894f, -0.88962644f, -0.04360984f, -0.88249093f, 0.24311203f, 0.50410056f, -2.22567797f, 0.94520348f, -2.12467694f, 0.47282359f, -0.71379906f, -0.09857135f, 0.62374717f, 1.37182784f, 0.73380554f, 0.59745449f, 2.80427694f, 0.67253572f, 1.65335357f, 1.69891667f, 1.34585941f, -0.79989213f, 1.44980943f, -0.52013642f, -0.46971673f, -1.50070012f, -0.25687039f, -0.56916732f, 0.71065760f, -1.31996286f, 0.96031237f, 0.13929774f, 1.49679291f, -0.05966444f, -0.58674580f, -0.08278833f, -0.93390942f, 0.42415768f, -1.77889526f, 0.75336021f, -0.72699982f, -0.82880586f, 0.63955617f, 0.42771208f, -0.42366457f, -0.91581815f, 0.94750947f, 0.43123913f, -0.99053741f, 0.70470595f, -1.16662264f, 1.14847183f, -0.83885664f, 0.46714026f, -2.27748466f, -1.23656678f, 0.14695056f, -0.33159894f, -0.52553117f, -0.04391259f, -0.29630372f, 0.25949728f, 0.96991086f, -0.37714824f, -0.28251833f, 0.16106486f, 1.38844633f, -0.18713553f, -1.30708838f, 0.48490265f, 0.29553881f, -0.45505449f, 0.83341682f, 0.87346369f, -0.63516861f, 0.66063565f, 0.93892503f, -2.73996735f, -0.81515318f, -0.91458052f, 0.00978268f, 0.43472794f, -0.08090764f, 1.37249672f, 0.76722521f, -1.19154143f, 0.22046764f, 0.34916410f, 0.51383299f, -0.56379753f, -2.49949312f, -0.74207872f, -0.68400806f, -0.09663232f, -0.07199454f, -1.05562651f, -0.75028551f, -0.87253797f, 0.69039482f, 0.45923674f, -1.27515161f, -0.04555376f, -1.41501272f, -0.83773375f, -0.74807298f, 1.36646152f, 0.06317432f, -1.32559633f, 1.89092779f, 1.24883330f, -1.03608561f, 1.08677161f, -0.99629849f, -0.69947034f, -0.85716367f, -0.07947286f, -0.25485426f, -0.19732477f, 1.64581251f, 1.04618108f, 1.87186897f, -0.18198362f, -0.83807969f, 0.70462501f, -3.18930101f, 0.74610996f, -0.60935193f, -0.49383929f, -2.88986492f, 0.51707613f, 1.04620326f, 1.09837818f, -1.19840038f, -0.10391295f, -0.20789115f, -1.51052022f, -0.31087330f, 0.22411564f, -1.30506921f, -1.52000105f, -1.51593041f, 1.04321992f, 0.97611690f, 0.90424490f, 1.83324766f, -0.08682299f, 0.47035542f, 1.70865905f, -0.31108001f, 0.04115159f, -1.36352801f, -0.90797836f, 0.32128647f, 0.66191489f, 0.08681208f, 0.14993365f, 0.47110486f, -0.31522670f, -0.38906571f, -0.08876022f, -0.13106902f, 2.25685239f, -0.62211353f, -1.68553007f, -0.23707703f, 0.69236159f, -0.46686995f, -0.27520603f, 0.26619941f, 1.48525345f, 1.61278927f, 0.49452963f, 1.20846486f, -1.11853909f, -0.30010033f, -0.75471467f, -1.69959772f, -0.52042168f, -0.43881389f, -1.45240712f, 1.02122891f, 1.73639011f, -0.03813924f, -0.22239220f, 0.15797073f, -0.64418089f, -0.60228932f, -0.83248150f, -0.02042520f, 0.38137484f, 0.86056453f, 0.06410559f, -0.62785137f, -0.49916875f, -2.53796315f, -0.79168582f, -0.69197005f, -0.77175534f, -0.28669405f, -0.79764080f, 0.97218460f, -0.10351621f, -0.52759898f, 1.02840185f, 1.16363287f, 0.08351815f, -0.61088538f, 0.59944046f, 1.54409397f, -1.39842033f, 0.27917057f, -0.27146137f, 1.46310735f, 0.03626106f, 0.15038440f, -0.07894899f, -1.42527366f, 1.69641745f, 1.48384345f, -0.43328866f, -0.54252565f, -0.94416499f, 1.54436302f, -0.81367069f, -1.67925239f, -0.17525831f, 0.27891046f, -0.69066733f, 0.89911050f, 0.11606655f, 0.67450327f, 0.41538724f, 0.90886223f, 1.19786549f, 0.85810721f, 1.32862210f, -0.83469814f, -1.09682298f, 0.88092703f, -0.97478902f, -0.11664717f, -0.07929394f, -0.69581884f, -0.16928329f, -0.70731819f, -0.40485084f, -0.28954300f, 0.52882415f, 0.38769314f, -1.38704026f, 1.15099049f, -0.43566978f, 0.34459323f, 0.49520254f, 1.11130333f, 0.28783718f, -0.53783375f, -1.63577271f, 1.02222812f, 0.86302060f, 0.48346213f, 0.46627176f, -1.30133855f, -1.48477137f, 0.31219670f, -1.21498191f, 0.89838904f, 0.87186617f, -0.39968935f, 0.34930915f, -0.32909471f, -1.39364409f, 2.13006306f, 0.33270469f, 0.00215986f, 0.97776711f, 0.24908836f, 1.56164885f, 0.45157790f, -1.55970144f, 0.27677536f, 0.07662498f, -0.08262251f, -0.17658773f, 0.65820259f, 2.01052690f, -1.71946216f, 0.84686053f, -1.23594892f, 1.40792072f, -1.47772563f, -0.36132276f, -0.50405115f, 0.09009213f, 0.81659186f, 1.85574234f, -0.64974433f, 0.63352364f, 1.01766217f, -1.54804432f, -0.42570522f, -0.24763709f, 0.72822112f, -0.93733686f, 0.68087620f, -1.40644944f, 0.48672482f, 0.09725539f, -0.64416331f, -0.95747960f, 0.36771363f, 0.39155054f, -0.71790671f, -2.17222738f, -0.08655047f, -0.97842115f, -0.22991380f, 0.52029115f, -1.42072022f, 0.29576331f, 0.32391560f, -1.00823236f, 1.67909145f, 1.16841447f, -0.32307062f, 0.15756166f, -0.97590631f, -0.39429301f, -0.03583352f, 0.17554663f, 0.57961231f, -0.46873134f, -0.23343173f, -0.85060924f, 1.71745574f, -0.04658702f, 0.63088381f, -0.67581934f, -1.53171062f, -1.58800113f, -1.17987096f, -1.16737640f, -0.87544650f, -1.17138922f, 0.38979119f, -2.39369726f, -1.34747124f, 0.58450359f, 0.87791806f, -0.04459394f, 0.97995293f, -0.10354915f, 0.65324986f, -0.17833626f, -0.85849386f, -0.42063358f, 0.19708554f, 0.10255250f, -0.59539181f, 0.86194044f, 1.68610668f, 0.55275291f, -0.43127069f, -0.04218780f, -0.08466262f, 0.31236625f, -0.92824298f, -0.09879152f, 0.32358822f, 1.04045570f, 0.35617545f, 0.09059231f, 1.19069445f, 1.96978688f, 0.63561743f, 0.15030998f, -0.29879019f, 0.22774190f, -1.01608860f, 1.03605175f, 0.47804731f, -0.30450734f, -0.61382371f, 0.45390254f, -1.93547988f, 2.01267338f, 0.52447683f, 0.18379784f, 1.11913633f, -1.24273467f, 0.15803322f, 1.72184098f, -0.79349059f, 0.10258614f, -1.53445125f, 0.02630571f, 0.81649125f, 0.91089755f, -1.12968338f, 1.04016411f, 0.28999722f, 0.74863863f, -0.61388236f, 0.01665530f, 1.43592548f, 0.68138391f, 0.11963340f, -1.26123953f, 1.36340797f, 0.25696915f, -0.58877039f, 1.42209792f, 0.55563360f, -1.33329606f, 1.84695840f, 0.88433737f, 1.04359078f, 0.18906727f, -0.03448994f, 1.17944050f, 0.86783957f, 0.44934425f, -0.77892244f, -1.76232874f, -1.01689589f, 0.78943914f, 0.92141974f, -1.00187087f, -0.13809921f, -0.90222073f, 1.10094714f, -0.13657950f, -0.44349849f, -1.61441302f, 1.05724919f, 1.50337231f, -0.05785890f, -0.76958144f, -0.51498759f, 0.69227600f, -0.37975949f, 1.31949317f, 0.82049531f, 0.32868597f, -0.31557772f, -0.75534385f, 1.27303052f, 0.43453619f, 0.11296938f, 1.18182182f, 2.23387384f, -0.86412978f, -0.01599468f, -0.70869064f, -0.09221385f, -1.23729551f, 0.79490280f, 0.03522846f, -0.95069039f, -1.73461652f, 0.72329187f, 1.40385795f, -0.11585230f, -0.78033113f, 0.07491048f, -1.12873089f, 0.18476245f, 0.57568848f, -0.28792691f, 1.35411644f, -0.76956165f, 0.29571572f, 1.03178787f, -0.38780826f, 0.31680650f, 0.69368076f, -1.23856580f, -0.49848995f, 0.14766994f, 1.02625990f, 3.03858209f, -0.51030380f, 0.96796870f, 1.35078156f, -1.07729447f, 0.84322494f, 0.54886484f, 1.31453705f, -0.45792100f, 0.31196272f, -0.15701357f, 0.83586836f, -0.74952888f, -1.17432022f, -0.31002575f, -1.02149463f, -0.36117774f, -1.22079086f, 0.03532525f, 0.00555908f, -0.45891216f, 0.29636297f, -0.68272704f, 0.41257843f, 0.37988129f, 0.01747893f, 0.82739186f, 1.52292180f, -0.79456621f, 2.20275712f, 2.13212132f, -0.81393015f, -1.15712392f, 0.22488308f, 0.62776327f, -0.85444915f, 0.44017896f, 0.05863331f, -0.83198178f, 0.93063420f, -0.16121253f, 0.12382501f, -0.37826315f, 0.93118382f, 0.19507533f, -0.58595538f, 1.46994352f, 0.13170272f, -0.70031989f, -0.12820166f, 0.30487457f, 0.84148771f, -0.68807501f, 0.21187615f, -0.67030680f, -1.79136002f, 0.70810199f, -1.20959783f, -0.08468831f, -0.06317700f, 1.35527098f, -0.47018668f, -0.91693246f, 0.14818805f, -0.05405350f, 1.16875637f, -0.17363262f, -1.61833882f, -0.32934523f, -0.38346377f, -0.62702698f, 0.34135151f, 0.48015586f, -0.65263331f, -0.04689486f, 0.01156854f, 0.37580970f, -0.16174591f, 0.59627324f, 0.24351901f, -0.87983090f, 1.57049024f, 1.25836349f, -0.41464049f, -0.62279183f, 0.09693756f, -0.23850618f, -0.49007827f, 0.22298151f, 0.10914832f, -0.35192192f, -1.27221346f, 1.10203624f, -0.86399704f, -0.47319838f, -0.77105570f, -1.68624854f, 0.81198281f, 0.82534081f, 0.75654501f, 1.47631240f, -0.61000234f, -0.58933264f, 0.54822850f, -1.22829592f, 0.11107657f, 0.56449169f, 1.50693524f, -0.59280968f, -0.64286685f, -0.20120731f, 0.27184448f, 1.55500400f, -0.48919386f, 1.04044867f, -0.87048137f, -0.40569979f, 0.21908638f, -0.51829034f, -1.48748124f, 0.02990401f, 1.83462536f, 0.29885170f, 1.32370698f, -1.30129600f, 2.43271399f, 0.22967771f, -1.13014007f, 0.95529765f, -0.83325785f, 0.43633386f, 0.85774118f, 0.78160155f, 0.58583075f, 1.18906367f, -1.54354560f, -0.68320692f, 0.01900371f, -0.79777133f, 0.12851712f, 1.10176420f, 0.79418170f, -1.41154039f, 0.36929929f, 1.12176800f, 1.23849642f, -0.89377707f, 1.01390159f, -0.50889206f, -1.12554002f, 0.17932732f, 0.48949540f, -0.54235244f, -0.28146735f, -1.39125514f, 0.13309635f, -1.12864995f, -1.29901242f, -0.04266220f, -1.98028529f, -1.34869373f, 0.00038156f, -0.92473024f, 1.48010647f, -0.02754467f, -0.26030368f, 0.93083733f, 0.27946711f, 0.64052200f, -0.04220961f, 1.25002527f, -1.07923257f, 0.19048618f, 0.08900311f, -0.40813437f, -0.73068553f, 0.52122378f, 0.68990833f, -0.38749605f, -1.09269309f, -1.63480806f, 1.01789618f, -0.61596102f, 0.81049860f, 1.30838764f, -1.49213874f, -0.77916288f, -0.72660202f, -0.92013240f, -1.61726642f, -0.11527207f, 0.35143322f, -1.11646879f, -1.45525432f, -0.82892823f, 0.15512508f, 1.01891017f, 1.40162635f, 1.02494884f, 0.33882582f, -0.78747398f, -0.26009330f, -0.38519114f, 0.79247451f, 0.02065756f, -0.48030257f, 1.01167107f, -1.74057114f, -0.84549171f, -0.15337363f, -1.92544484f, 1.01270044f, 0.00762185f, -0.16405612f, 1.61778915f, 0.93316060f, -0.68960994f, -1.13214970f, -0.94695878f, -0.28418848f, 0.17102109f, -0.08787476f, -1.83799696f, -0.13761258f, -0.18652774f, 1.46456254f, 0.34169790f, -0.40697145f, 1.49663997f, -0.99555492f, -0.67775637f, -0.51951116f, 1.35157657f, -0.27099034f, -0.46987835f, 2.28101230f, 0.59104478f, 0.75010139f, 1.01472175f, 0.25741309f, -0.56074983f, 1.12267506f, 0.35336846f, 0.61733276f, -1.63976014f, -0.17700450f, -0.25093642f, -0.75599891f, 2.10956192f, 0.95155340f, 0.72049862f, 0.50492924f, 0.62067389f, 2.08688402f, -0.73604703f, 0.63383341f, -0.53528428f, -2.11538506f, -0.98173052f, 0.59560484f, -0.26205051f, -0.91948050f, 0.00593397f, -0.11734286f, -1.41261208f, -0.83611172f, -0.27682739f, -0.20619918f, -0.36557615f, 0.77194935f, 1.67695415f, -1.39265156f, 0.04892010f, -0.37773246f, 0.16124558f, -0.18348448f, -1.38248885f, 0.58459854f, 0.65064198f, 1.11349559f, 0.36708066f, -0.15471332f, 0.14208725f, -2.06860566f, 0.29629150f, 0.93084633f, -0.47215626f, 0.60208917f, 0.95415461f, 1.03390312f, -0.03639749f, -0.23988228f, 1.27037442f, 0.95133096f, 0.33187470f, -0.34527761f, 0.22134073f, 1.01799667f, -0.81475645f, -1.18869019f, 0.23314142f, 0.25180560f, -1.23762786f, 1.25283313f, 0.16980635f, 0.40740708f, 0.59256923f, 0.16274920f, -0.69713289f, -0.16444311f, -2.41602516f, 0.37952334f, -0.05604568f, -0.23772651f, 0.20581599f, -0.54303211f, 1.71877348f, 0.83602583f, -0.32586128f, 0.73609394f, -1.73640239f, 0.07249248f, 0.31248692f, 1.77627432f, 0.97660398f, -0.42095289f, -0.18750280f, -0.84246057f, 0.29762223f, 1.87054563f, -1.46980762f, -0.45306337f, 1.52366042f, 1.39061129f, -0.04980387f, -0.55382830f, -0.96987218f, -0.06910808f, -0.41276473f, -0.83891344f, -0.92597574f, 0.60252470f, 0.21938549f, -0.04451685f, -1.00330937f, -0.36955237f, -1.52876902f, 0.27296364f, -1.96721256f, 0.05291027f, -0.91540521f, 0.48990685f, -1.99560380f, -0.68551093f, -0.14532298f, -1.56881595f, -0.08319287f, 0.31003201f, -1.42829597f, -0.61810297f, -0.03581250f, 0.77747720f, 1.25297558f, -1.36239243f, -1.13274276f, -0.35045877f, -2.34157228f, 0.04515179f, -0.83044821f, 1.81353962f, -1.36855912f, 0.39704823f, 0.16665934f, -0.16654585f, 1.17806077f, 1.00086153f, -1.25474250f, -1.46876431f, 1.18021631f, -0.32257929f, 2.12062597f, 0.86819613f, -1.18048275f, -1.69747460f, -0.74092305f, 0.05086798f, 1.15339577f, 1.32972670f, 0.27247882f, 0.98499072f, 2.35597157f, 0.30179837f, -0.66633248f, 0.13794266f, -0.22753908f, -0.22868259f, -1.81792033f, 0.50151759f, -0.79408127f, -1.05343878f, 0.45727381f, 0.84800923f, -1.73605800f, -0.02032863f, 1.82778001f, 1.41025102f, -0.81715560f, 0.25888795f, -0.25075480f, 0.66256499f, 0.11993053f, 1.81336939f, -0.06345166f, -1.49658346f, 0.07531686f, 0.96972889f, 0.87405980f, 0.75830793f, -0.13497087f, -2.45855975f, -0.65984958f, 0.93919373f, -0.97305542f, 0.73477978f, 1.04337513f, -1.22712576f, -0.46385625f, -1.20876372f, -0.82760453f, 0.01455977f, -1.05089867f, -0.02801843f, 0.60899758f, -0.82052249f, -1.48932517f, -0.98073828f, -0.19311285f, -0.25602359f, 0.50351876f, -1.24557400f, -0.82138073f, -1.45966852f, 0.44991320f, -0.75550151f, -0.98550314f, -1.21418869f, -1.15771639f, -1.72192061f, -0.39616469f, -0.55566746f, -1.31880891f, -0.08843257f, 1.00422776f, 0.35846478f, 0.46060917f, 0.77326930f, 1.60129988f, -1.85124147f, -0.30582917f, 1.30227256f, 1.81890345f, -0.44084981f, 0.25315762f, 0.70259613f, -0.94882858f, 1.97040296f, 0.71473581f, -0.68193883f, -0.36290962f, 1.16348684f, 0.15418798f, 1.07806778f, 0.40554729f, 0.10280909f, -1.06474805f, 0.64398485f, -0.63568884f, -0.06108581f, -1.03290677f, 1.02834034f, 1.15284693f, 0.14046004f, 1.86630619f, 0.46804786f, -0.68397558f, 1.60733378f, -1.64890087f, -1.03819239f, -1.19212389f, -0.78382361f, 0.03925850f, 1.52259934f, 0.09540676f, -0.21220762f, 0.55955195f, -0.39845437f, -2.14541650f, 0.49337825f, -0.68574250f, 0.74040270f, 0.50783634f, -1.60461199f, -1.26806450f, -0.12652303f, -0.83992827f, -0.15524681f, 0.40098447f, 0.23392735f, -0.23262636f, 0.06525709f, -0.35994548f, -1.08432877f, -0.21395946f, -0.78357452f, -0.57157278f, 0.71407390f, 0.86596155f, -1.13723528f, 0.13460183f, -1.20881450f, 0.71018457f, 0.68943661f, -0.70428050f, 0.64600736f, 0.01990297f, -0.10575775f, -0.80263519f, 0.10618331f, 0.08865548f, 1.51651669f, 0.60851854f, 1.15161908f, 1.04919207f, 1.18359745f, -0.04352076f, -0.83643389f, -0.07922365f, 0.10597949f, -1.34984851f, -1.91319740f, 0.71585363f, -2.10845160f, 0.64385056f, -0.54551518f, -1.02039802f, -1.62510490f, 1.65401149f, -0.42711899f, 0.07970079f, -0.21404363f, 0.30498922f, 1.07942021f, 0.63995659f, -1.82114816f, 0.56396323f, 1.07084870f, -2.00350380f, 0.53339815f, 0.18500003f, 1.15034151f, -0.21436051f, -0.99986565f, -0.58812016f, -0.07247020f, 0.78910017f, 0.48839527f, 0.98795873f, 0.10357288f, -0.05604928f, 0.38977858f, 0.73745090f, 1.40838420f, 0.25967824f, 0.23588051f, -0.03451392f, 1.04897523f, -1.77121758f, 2.35625434f, -0.67086869f, -0.84005541f, -0.85940343f, -1.04449213f, -0.65917015f, -0.78713167f, -0.95910054f, 0.38597879f, -0.31879017f, -0.86260867f, -1.08593106f, 0.02802678f, 0.99484950f, -0.55113328f, 2.60936737f, -0.03388772f, -0.47583574f, -0.14021793f, 0.99019170f, -1.22431207f, 0.78734446f, -1.77037835f, 0.15018673f, 0.36423206f, 1.36447549f, -1.61007094f, 0.51875496f, -1.60788095f, -1.73557448f, -0.41414359f, -0.93710536f, 0.38715765f, 0.04243837f, -1.59682858f, -1.10728157f, 1.88292623f, -1.01428258f, 0.01074958f, -1.88169158f, -0.31616244f, 0.45334938f, 1.12449574f, -1.16699445f, -1.59505820f, 0.04126552f, -0.89016622f, 0.45838884f, 0.71463561f, 0.14563711f, 0.30694655f, 0.67193079f, 0.61429602f, 1.00201404f, -0.49295208f, 0.05997690f, 0.99491668f, -0.73801446f, -1.17185295f, 0.94778723f, 0.36106884f, -0.43561545f, 0.04102699f, 0.52626407f, 0.08442099f, -1.57626402f, 1.56855237f, -1.65396678f, 1.74014664f, -0.38219589f, 0.39305371f, -0.31705827f, -1.15742850f, 0.11669596f, 0.54043210f, -0.52270615f, -0.13375773f, 0.68094701f, -1.84134769f, -1.49383473f, 0.14632171f, -0.54607725f, -1.20867658f, -1.28439069f, -1.81734920f, 1.54257309f, 0.78347659f, -0.24049839f, 1.69973648f, 0.99825776f, 0.99971974f, -0.26055810f, 0.34143049f, -0.44862366f, 0.11253342f, -0.60932243f, 0.70383030f, -1.87318194f, 0.21953633f, 0.82791799f, 1.64545465f, -0.42693698f, -0.64897031f, -0.97996652f, -1.06616282f, 0.52939081f, -0.12541170f, -0.57480675f, 0.73600835f, 0.35711968f, -0.03528263f, 0.79997194f, 0.55742902f, -0.28909785f, 0.64331138f, -1.79893720f, 1.01572442f, 0.27111965f, -0.51778597f, 0.12906317f, 0.76148927f, 1.51315522f, 0.41101140f, 0.38008851f, 0.66759896f, -0.13804778f, 0.64854795f, 1.73474562f, 0.75999504f, -0.73411214f, -0.05406699f, 1.35664344f, -0.25298578f, -0.12696666f, -0.42628938f, 0.61129904f, 1.55259824f, -0.05820796f, -0.38598019f, -0.87325627f, -0.55066222f, -1.24557889f, -0.26509118f, -0.32103062f, 1.14031804f, -0.75985742f, 0.70659167f, -1.15016067f, 1.24906838f, 0.90396994f, -0.16241251f, 0.43682271f, -1.42695689f, 0.47134697f, -1.66143429f, 0.08698819f, -1.00775325f, -2.24129725f, -1.04226267f, -0.98537570f, -0.89938259f, -1.80710697f, -1.22866321f, 0.78125423f, 1.55150509f, 0.46235040f, 0.18444096f, 0.19313288f, -2.20686269f, -0.40341458f, 0.50321484f, 0.47339424f, -0.81383848f, -0.21972439f, 0.66612029f, 0.60239881f, 1.20443010f, 0.70015103f, 0.30632916f, 0.01489905f, 0.68129027f, -0.89645082f, -2.68969011f, -0.96684915f, 1.66421318f, 0.74333072f, -0.78321886f, 1.60063362f, -1.27524030f, -1.95856726f, 0.47504124f, 0.15398432f, -0.20796098f, -0.13449343f, 0.93458968f, 1.60390890f, 0.21798505f, -0.27035928f, -1.23248971f, -1.25361061f, 1.34666133f, 1.07233441f, 0.88799530f, -1.23687923f, -0.40781614f, -0.11916534f, -0.88050151f, -0.66422415f, -2.61471510f, 0.78276747f, 2.42323995f, -1.70715427f, 0.71550035f, -0.60298312f, 0.70491880f, 0.46175584f, 0.80827898f, -0.45108104f, -0.98219043f, -1.72823501f, 1.73190725f, 0.53906441f, -1.50445580f, -0.59250867f, -0.07239901f, 0.44743437f, -0.13740127f, 1.69935930f, -1.00480616f, -0.58191377f, 0.39853972f, -0.60960841f, -0.45473522f, -0.76396072f, -0.31872150f, 1.74509728f, -0.59950751f, 0.89810580f, -0.81400329f, 1.14280319f, 1.11165059f, -1.31295311f, -1.60784578f, -0.87506992f, -1.13461006f, -2.09486437f, -0.16449419f, -0.37728927f, 0.47595578f, -0.55342919f, -0.17574213f, 2.21499181f, 1.14331865f, -0.14938518f, 0.18935619f, -0.33802557f, 0.52538890f, 0.82673949f, 1.16562462f, 1.24713838f, 0.98890215f, -0.64991701f, 1.49886703f, 1.97769642f, 0.08059916f, -1.60925281f, -1.23822486f, -1.40829837f, 0.51331180f, -0.29928651f, -1.04348791f, -0.39911583f, 0.69380492f, 1.54516888f, 1.22791195f, 2.25008130f, 1.33348894f, -0.21775827f, -0.71937007f, 0.54982573f, 1.70691478f, 0.32459491f, -0.57187974f, -0.21614684f, 1.08274269f, 0.41384646f, 0.24497485f, -1.43703413f, 0.89616930f, 0.82032162f, -0.24598582f, 0.84271127f, -0.81894702f, -0.01828136f, 1.70397091f, 0.39505738f, -0.51221430f, -0.87979966f, 0.10795479f, 0.45194778f, -0.76008922f, 1.23394477f, -0.56798172f, 1.06459570f, -0.44333413f, -2.40399075f, -0.37267187f, 1.42946172f, 0.95734519f, 1.86127949f, -0.15217264f, 1.68742633f, 1.97638428f, -0.44211119f, -0.98393327f, -0.54173928f, -1.72017395f, 0.74697793f, -1.77827263f, -1.92299354f, -0.17189410f, -0.48633271f, -2.21230388f, -0.45906609f, -0.53493047f, 0.37253976f, -0.56951141f, 0.07728028f, 0.03530006f, -1.18123293f, 1.94158125f, -1.55930352f, 0.69334733f, -1.95163214f, -0.95800400f, -0.01804711f, -0.56747472f, -0.99099451f, -1.52853060f, -0.98279524f, -1.67307866f, 0.96121490f, 0.35654056f, 1.74034202f, -1.44633865f, -0.27781928f, 1.79457986f, -0.41029963f, -0.76871634f, 0.36555341f, -0.77664107f, 0.19535238f, -0.76185411f, -0.19828433f, -0.88820636f, 0.63885397f, 0.11346363f, -2.50265074f, 0.16319332f, -1.01288569f, 1.86605489f, 0.89761645f, 1.11795115f, -0.00714116f, -0.89034635f, -0.76447034f, -0.18822117f, -0.48340848f, -0.99788517f, 1.02172959f, -0.39395007f, 0.72566581f, -0.81438208f, -0.71715081f, 0.96243578f, -1.36424279f, -1.13870537f, 1.17602491f, 0.16320205f, 0.71959788f, 1.66669416f, 0.55690295f, -0.28912008f, -1.19219172f, 0.23308393f, -0.37963116f, 0.45347008f, -0.42606446f, 1.30938649f, 1.25128853f, 0.57649273f, 0.34440875f, -0.23893952f, -1.06604803f, 0.31336102f, 0.75727910f, 0.46772480f, -0.37650385f, -0.06036821f, 1.03686309f, 0.46158856f, -1.81028461f, 1.43393028f, 0.85494965f, -2.34685564f, -0.17571987f, -0.45592231f, -1.31190526f, 1.73194158f, -0.11856517f, 0.07041293f, 0.25689471f, -0.56000596f, 2.06649089f, 0.38954756f, 1.36627376f, 0.13905638f, 0.77370811f, 0.43944249f, -0.08798827f, 0.07245751f, -1.30234015f, 0.29710820f, 0.74389762f, 0.11971968f, -0.07381748f, 1.32652700f, 1.34079397f}); auto input2 = NDArrayFactory::create('c', {3, 4, 4, 5}, {0.98114507,0.96400015,0.58669623,0.60073098,0.75425418,0.44258752,0.76373084,0.96593234,0.34067846,0.57962620,0.77517051,0.97472977,0.79237527,0.68690428,0.21719366,0.79959206,0.84814187,0.22496814,0.08646965,0.31110474,0.79813162,0.19661444,0.57760099,0.72138960,0.15244268,0.87687051,0.11130344,0.01087698,0.34817841,0.54992017,0.23443850,0.31725614,0.59755220,0.20364695,0.00531392,0.23403114,0.07442912,0.83707647,0.89291743,0.09044587,0.69041462,0.29904183,0.61904680,0.85306847,0.34467042,0.95839152,0.54517124,0.29640937,0.94855959,0.95970016,0.94045145,0.95510301,0.34666505,0.34717010,0.69245678,0.71669175,0.59043738,0.64924132,0.06033522,0.60185199,0.04690073,0.59241154,0.40229547,0.23002481,0.45161195,0.73743778,0.93209113,0.37294358,0.50177744,0.15072501,0.26146917,0.05252146,0.04758931,0.76448288,0.85149045,0.08840467,0.07692576,0.33180160,0.27241259,0.74834620,0.56453640,0.23057286,0.68429752,0.11961551,0.39045977,0.44356094,0.77018807,0.07984410,0.47926806,0.26165759,0.18606064,0.89972877,0.17962874,0.47273120,0.64641705,0.61890443,0.58730015,0.25937832,0.35231561,0.10243882,0.17459193,0.95906995,0.09227025,0.30003223,0.41601210,0.38269713,0.84799751,0.59295173,0.76277990,0.68910424,0.37672606,0.40675461,0.94346058,0.91438505,0.84728183,0.64367667,0.74899979,0.60570691,0.16417363,0.68852426,0.85486889,0.22585792,0.86953176,0.07465519,0.93096301,0.38008822,0.38752587,0.44004038,0.13170612,0.94541045,0.89349973,0.69245307,0.94978877,0.98776658,0.79445884,0.30607409,0.58264961,0.37980538,0.41810784,0.48903038,0.51615888,0.57682794,0.82481897,0.78341080,0.48446465,0.17447931,0.71125424,0.30263851,0.70675352,0.03215584,0.92381065,0.22343694,0.08851149,0.91402490,0.70074717,0.30912192,0.37723206,0.97579397,0.23554587,0.95939133,0.41565709,0.01741416,0.58362787,0.22106662,0.89065537,0.31900249,0.41280911,0.67947610,0.04545590,0.15352812,0.85412524,0.84933222,0.80000225,0.93147073,0.70094105,0.69269875,0.95282194,0.65913582,0.79186874,0.59855248,0.39707430,0.95126239,0.15618217,0.33446689,0.98123758,0.84770758,0.98081012,0.54427413,0.18728519,0.89792955,0.53360126,0.72812986,0.13307744,0.51217443,0.66708084,0.29416915,0.31298995,0.39155037,0.29288291,0.87063305,0.61759154,0.73723332,0.37167635,0.82122716,0.22937430,0.76570536,0.47911792,0.02826214,0.94277323,0.59945469,0.19042060,0.68173155,0.82771295,0.95649538,0.40833101,0.90838542,0.55245881,0.49011012,0.36773444,0.34513527,0.42050683,0.16113964,0.30969388,0.27174174,0.12117655,0.35270175,0.81967867,0.63723136,0.84309389,0.71822576,0.84883484,0.32306117,0.08176457,0.56175486,0.34892198,0.09306929,0.85437582,0.13925577,0.48629188,0.29923539}); auto exp = NDArrayFactory::create('c', {3, 8, 8, 16}, {5.98743296,-2.83037376,-0.87943113,1.41339970,1.32433391,-1.20299149,-0.02893090,2.05326009,1.19417048,5.58212376,3.28139353,1.19237995,-1.09431255,-2.55264497,3.11014652,6.81296825,-2.09029293,-4.32068443,-0.52808392,-1.97968531,-0.18673831,0.84605980,4.55825520,2.71503139,0.15210046,0.85310984,-3.82062817,2.76470995,3.69004202,-1.45017099,-2.59361267,-1.35094655,7.24145126,-5.25432396,0.19920218,-4.30596399,1.35318923,-3.88142037,3.67493343,2.25931478,2.87630725,1.66349852,6.21347952,0.94105923,-1.61742055,-2.35699606,0.12850338,1.79141688,-2.09535933,-6.35418081,-0.06303531,-4.38615131,0.48237842,0.26528549,3.38231516,3.76315165,-0.40254810,-0.23716694,-6.13381910,-0.41950428,-0.89680839,-1.46491277,-1.98541689,-0.99357355,5.58237648,-2.38937521,-0.00872564,-2.37138414,4.91117287,-4.51916361,0.97943687,2.91052818,-2.50362611,1.70252812,5.04137802,3.57108784,-1.87532270,-3.66677809,-2.38861251,5.55765152,-7.27571774,-1.68887305,-0.72266489,-4.42809057,-0.92118186,1.02381468,4.44284725,5.17150497,-0.42438728,2.02693963,-1.36484981,-1.47912180,0.26649538,-0.02091765,-2.86906910,-3.03046989,1.35122132,-3.21707630,2.21112418,0.24121630,3.96940088,-7.66105747,2.76352382,-0.99061489,-2.16720009,-1.63170409,1.12701774,-1.02415371,-0.90435314,-1.51372027,-0.76884907,0.39066136,-0.89562428,-2.03204703,1.28074932,-2.14551091,-2.36843777,0.46580017,0.75451565,-0.00336730,-1.06597757,3.27195978,-0.41307712,-0.10376054,-1.34102952,-2.22901654,2.31929803,1.40851438,-2.23774385,0.20417206,-1.12153268,-0.13188094,-3.96649432,2.10269976,0.49845099,6.18937683,-0.51783508,-0.48048639,-1.92970264,3.16670656,1.13355756,-0.07890664,1.31536257,-0.43924797,-0.04562932,-0.87974954,0.75411212,-2.39745235,-3.97132111,0.37202546,-2.40399146,-1.50796390,-3.08302689,0.23075986,-0.94316757,1.34948587,0.58591264,2.18529797,7.97652435,2.32798409,-4.09404373,0.89634895,0.77697754,-0.65091681,-7.05506849,5.86194515,2.51394033,4.69959354,0.20835471,3.18049693,-1.29682434,3.70832396,-0.48123091,-1.67904007,-1.35418940,1.58435583,-1.13851106,-1.19225955,0.59713769,-5.80462933,-7.45143986,-1.08658695,1.03244078,-1.75307107,-7.07100582,3.85825157,1.62127817,2.32572675,0.56171900,-0.80591971,3.98835945,0.15742642,-2.97832179,0.13821673,-0.72556758,-0.84936106,-7.28444147,3.94134307,0.80779338,7.47784615,8.23335075,4.80595016,-4.89574575,4.03362942,-6.67522192,-4.55204487,2.12511182,-2.70781207,-1.57226098,-3.08408356,-0.30812448,-5.32870674,-5.13238287,0.49605465,-0.55042171,0.46324944,-3.83545256,-0.12562510,-0.20978995,-0.13068712,-1.92144060,-1.68787408,5.45581436,-0.79583496,-2.38866687,-3.90546346,-0.47028148,-0.14319679,-3.37016582,2.00905991,-1.21345615,1.81376505,7.73004007,0.74310112,-4.64536428,3.78111577,-9.05182457,-0.10674095,1.53476238,0.63345337,-0.40907967,-1.44729769,-1.87145400,-2.46623540,1.07472968,0.77390999,-3.93438888,4.49174690,-0.96686655,1.92278123,0.30049133,-0.02388665,-1.99777114,-3.23885751,5.87784004,2.13776040,3.56758308,-3.37774134,-3.67526293,1.63700044,-1.69959962,-0.99112594,6.03103638,1.67399430,-1.28699589,7.16759014,12.63490295,3.62937450,-4.75982571,2.17861104,-2.03065681,4.30207729,-0.46797156,-2.96022511,-6.02702332,3.09229851,-1.39771092,-0.03471333,3.22175527,5.63565636,1.78195477,-0.63545251,-3.99497652,1.46043062,4.60050488,-2.96651959,-2.03159475,-1.52386189,-0.15129802,-3.90390921,-0.63852370,0.79210538,2.35288715,-5.55609035,5.36427498,-0.60248077,-0.26181316,5.04884720,8.53192806,5.05080223,-6.56371737,1.52260923,-7.13623667,6.49414349,2.33445597,-4.11490965,-6.44347477,-0.47079402,-0.63467920,2.60399365,1.05958164,3.66901422,-1.05657935,1.88611507,-6.37475634,2.01480770,3.36020517,-5.11001921,-0.46132171,2.16525555,4.21938848,-2.08346295,2.86168146,1.26987600,6.76066971,-7.84916353,4.11700916,0.47985530,-4.60113716,7.42062473,6.37472820,4.37820530,-7.12197018,0.01357239,-7.90392113,8.32131577,-0.87593079,-0.16994858,-5.86345863,-0.20697471,-1.37845206,1.63819647,1.59720242,-0.74357712,-1.88725603,-1.98357940,-8.57950306,-4.10104513,3.57231879,-2.89855957,-0.11263305,2.78033924,1.53078973,-2.93089223,0.73189604,3.20563078,3.92601013,-5.21916151,0.89163935,-0.42978728,-6.70888853,4.56477976,1.20105875,3.83393812,-6.27205181,4.05993128,-7.35513067,1.60660768,-1.21052051,1.58191252,-1.37899971,-1.20117283,2.93301678,1.06302834,1.38993621,-1.66884089,-3.34452581,1.04498529,-4.10412455,-4.03310585,1.61513603,-1.09388447,2.11451387,-0.94192362,-0.23287666,5.88265705,-0.83010495,-2.15317154,-0.60276151,-1.49265075,3.93397975,5.45194483,1.45161700,-2.57401872,-5.59288931,4.29170895,1.87151814,0.08362055,-0.28767288,1.17675185,0.85266006,1.30549634,-5.60830832,0.19398519,-0.83982587,1.75940764,-5.46077394,1.64495635,0.17102760,-0.54459631,-2.21975255,-0.37443402,-2.08474159,1.85959935,11.19680309,-0.18611598,-2.59765387,3.06330776,-1.52183700,-4.88415241,-0.75097847,2.58201051,7.40885210,3.58994508,1.62457407,3.12514591,-4.36833286,1.39830995,3.61003447,-0.63837433,-3.62661815,3.78898096,2.92802262,5.87374496,-4.38554621,-2.53411579,-2.87311554,-1.31391978,-4.26736879,3.45099425,1.58769250,1.73341393,-1.08842182,2.27120280,-1.78938174,-2.29940319,7.07046986,0.51426595,-6.22928905,5.28968811,2.31827855,-4.20915890,-1.27249205,5.92120600,3.19458675,7.09252501,3.96577907,6.41484213,-4.66009521,10.00181389,0.51108456,-4.62243366,-5.18351841,2.12961674,5.10694027,7.29412317,0.15912467,-3.38902974,-4.01918602,-2.17383957,0.13118666,0.27872476,-0.92317247,3.51440644,1.84171486,1.03378081,1.30569839,-2.09583759,9.03952980,-0.55187917,-2.04549074,1.08294606,-2.65263700,-2.93977118,1.88909876,0.96043622,1.76579499,3.14314699,5.86394691,7.36944389,-7.04524136,6.68673229,-5.52591467,-2.19745898,-4.32036924,0.52971321,2.26268244,6.91575766,-0.94590527,-3.98923349,-0.12266219,0.24294075,-1.07783222,1.87989080,-3.57109427,1.61553633,0.42486978,0.75852054,-6.19481468,-3.80570698,2.39946675,-1.93851781,-5.42234039,-6.34092760,-2.52374983,-1.85044456,3.92693520,0.40042299,4.69742584,5.40483189,-1.02398944,8.89605045,0.64680403,0.89943957,0.76993859,-1.88244629,1.90714884,3.10836840,-0.17064989,0.84892416,-6.94988108,1.92141032,-1.36458397,6.39284658,0.45201308,2.58823442,6.33375788,-4.76916075,-8.45738983,-0.48962492,2.40652561,4.56602001,-3.34420681,1.86862195,-7.01420689,-6.94657421,-2.47419310,-4.61693668,-0.18822384,-0.36949772,2.01374269,4.11018658,-5.11564064,8.04294395,2.88567662,-2.87645102,-1.23238611,-5.91409397,-0.62205851,1.38689423,-0.01120412,5.25955677,-1.98474956,-3.72012186,3.00445986,4.99141550,2.97457719,2.70827627,6.04544449,-0.20756161,-10.87035751,0.80454814,0.33568168,-2.48132324,-2.84452009,2.63126230,-3.99351716,-7.39294338,3.62798953,-8.65815926,2.65992808,-6.98126554,3.09881067,0.67735767,-1.15946686,5.63180256,-0.17694545,-8.59651184,3.75297594,-2.35913754,-0.20330384,5.49958467,1.00861740,1.42849684,0.00062013,-0.11073381,2.15207863,4.07368469,1.14344299,-1.27953362,6.64699316,-0.73672432,-8.55606937,-0.19439441,-4.14319754,-4.69964647,-5.86446047,2.87106085,-3.42714882,-5.00668287,6.22464132,-7.72335291,4.05667686,-5.72637177,6.35073948,-1.29593158,0.00813985,3.63368607,-1.05764008,-7.88486052,3.73919106,1.41835213,-1.04935634,0.65119827,0.03547254,1.88996327,1.58701086,-0.56215239,-0.80187100,4.55604362,-0.67249978,1.41084409,7.86281586,-2.38301182,-8.50535774,-3.82098866,-2.40856767,-5.33439016,-3.34747362,2.69389009,-1.64118791,4.52447939,0.04468334,-1.48768258,-0.69848812,-0.71123981,3.66259432,6.10314512,1.37305343,-0.62758982,-2.99383426,4.20510864,1.48497128,-0.08954811,2.43872309,-0.59880185,0.37431365,2.45458341,-3.28401661,-1.94629693,-1.93975246,-0.26385683,-0.45814323,-0.18108580,-3.74811840,-0.29739976,-2.24116230,-0.28150487,-2.24421668,3.46930790,8.35415077,0.05562943,-2.81079793,1.10388446,-2.82245207,-2.98102283,-1.08132946,1.19089699,8.00183105,6.35385323,3.72591257,4.59467506,-5.74890900,4.42238331,-3.36533451,0.18350232,3.05606651,1.18788099,2.87450886,0.27472210,-2.80111074,-0.66314960,-1.96376896,0.75167024,-4.72056293,1.10629988,-5.00775242,1.48246133,-3.91681528,-1.86573625,-6.17714882,-0.67820001,5.69730282,1.04399037,-4.93794823,3.09619617,2.18692017,-5.54232264,-3.10046840,-0.68972743,2.81824327,3.04334164,6.13203907,4.14081764,1.02573645,5.71970081,-6.01574707,-2.07346702,0.99554527,1.69641590,0.66776669,-0.80132431,-2.03513098,-3.42513680,-0.06704485,-1.87195873,-5.42428589,-0.20748445,-1.52408111,0.97084987,-0.48799962,-0.45379883,-0.26652339,-1.20720732,3.94169855,-3.18480229,-1.87440264,-1.18028760,0.52011997,-2.13437462,-4.52583313,1.69722807,-0.89371562,3.37972403,6.38838720,6.98663378,-4.05421400,6.89512825,-5.09085655,-2.16257906,-3.33272719,-3.01246452,0.37613097,1.80455804,-0.36456174,-5.32273912,-1.29978943,-0.53685790,-2.12896323,2.55506587,-2.57999182,3.40891910,1.36033249,0.83864629,-2.88629293,-7.36048365,5.61314154,1.32668555,-2.58041072,-3.71943092,1.60647738,-2.74816346,2.47269106,0.85507953,8.39183426,3.42624784,-0.01519036,5.68412066,2.51771593,1.03045523,-2.08733034,-2.44337177,0.81668580,1.30275154,2.99679208,-2.91957355,-1.71337795,3.34979844,1.51825011,5.20375061,2.27888370,1.38787699,4.23474550,-4.05878592,-4.85074377,-0.22794735,4.64402294,1.24391258,-2.04935098,1.26285601,-7.51862240,0.62138438,-1.95792389,-0.96587181,0.85141110,0.79354531,7.93766356,6.07677746,2.05947518,6.55480623,1.44032848,-0.70615625,-0.07896036,-5.08359432,-0.01047915,-1.89632201,2.57555676,3.83779287,0.42850614,1.80754125,-0.06942326,6.35997963,6.06101418,-0.97032297,5.71477222,-6.06671238,-3.46607208,-4.98306370,2.84659123,-2.11025190,-0.04609144,5.26831341,-9.56940651,-3.67193556,-1.71143103,-1.35221267,-4.26226807,-6.89146233,8.21761799,5.69823503,2.28137946,1.88911343,-1.44562483,-1.60295713,-0.52568185,-3.31892347,-2.81997776,0.35287106,2.98202395,-1.39432132,-2.70001364,-4.14169264,3.50194883,4.12610435,5.52755260,2.65859175,3.61353087,-0.83027136,-5.10652542,-4.48625374,2.06585884,-2.76383352,-0.64300913,8.19686604,0.96106279,2.45952058,2.47275925,-1.03288829,-0.64897656,-3.77937531,4.27940083,2.58320260,-0.57665241,1.87247813,-3.81604433,-0.24543774,-1.62118483,-0.73075479,-0.48533297,2.05016756,0.45561486,0.03316188,0.77791005,-1.56283605,2.36616826,5.58082104,-1.30925488,-1.06329608,2.17189479,-3.43008828,-4.71520567,-2.56184673,0.17508316,-3.25817418,-0.41749167,0.18119079,-0.73181152,3.99792433,-3.08002281,-0.99143314,-1.83520067,1.18565679,2.98040128,5.67814350,2.35128760,1.41600966,4.02718067,-0.08193968,0.64636409,1.35931289,2.37125754,1.75978124,3.90977740,1.50662971,-2.84089065,1.29824126,-3.38730979,-1.61005294,0.58292413,-0.03019404,-1.57986510,-0.56102908,-3.03128719,0.51644313,-2.01147819,0.98400700,3.00028515,0.74579155,-3.37098312,0.93339360,-1.29018497,-2.14695001,1.30411184,0.71501279,7.47793055,4.06516457,3.50772929,3.52762985,0.55643129,0.32272506,-4.30955982,2.49414706,2.07820845,-0.34377906,4.39805031,2.77561307,-3.91292810,2.43981409,0.18861845,-2.76658440,-4.97148752,3.25273705,-0.08929539,0.19818619,-5.83767605,-0.97381884,-5.68745661,-5.42433214,3.98769903,-0.40394354,-1.83387578,-0.80109525,1.47454357,-3.14899540,0.80130816,-2.26348829,4.06121159,6.13077354,5.31226397,2.94966197,-3.65217376,-1.08136678,-7.14119816,-0.85269439,-0.70365787,-0.81598872,3.62807679,3.08123684,-7.82739496,4.07951784,-0.14204243,-0.66969109,-5.07225513,2.88492823,0.47202343,0.72683257,-6.84280777,0.41807127,-5.09785986,-3.74514675,2.03936672,-1.06096244,-1.52409148,-0.97046643,2.27491093,-1.55597985,-1.29215479,-0.79737484,-0.01979581,7.65407991,5.54527044,4.04147148,-2.64274883,-1.89246953,-3.89547634,-1.06029689,-2.85982800,-1.41247237,1.55836034,3.38194537,-2.97655582,0.87510300,1.26282072,-1.77029657,-3.57144690,-4.19456863,0.53179169,-1.42221975,-3.09144497,-0.84294832,-5.02758694,-2.68011904,0.89156240,-0.34783912,4.64484835,-2.34453487,-1.28573155,0.09990287,0.01828218,-1.79960847,-1.06579173,1.08763921,0.43687880,3.24747229,3.83097172,1.07253766,-1.33810723,0.76530832,1.58660865,5.60743904,-3.54124737,-0.89264417,-3.83942485,-1.03707337,-1.61659896,1.65349591,1.72698796,4.96013832,0.78927267,-0.35563886,-3.48121166,3.79677629,2.59023166,2.74940348,-2.17589283,-5.91757107,2.43766379,-4.15906048,-1.74731481,-2.49113035,-0.57349741,-4.04455185,-1.46939647,2.21418452,0.09153593,2.23016739,7.91880608,4.04464149,0.07706618,-2.41892862,-2.19280314,7.61760712,-5.89153862,0.33551922,-1.70855618,-0.30561331,-0.14341974,-2.48878574,1.31269515,3.45388412,-0.02453184,-0.12132037,-4.27916241,1.25179088,4.09455204,-1.83801770,-1.86743176,-4.02864933,3.44515228,-4.39244986,-0.56988084,-1.69426417,2.18254852,-4.78135824,1.73193693,-2.27968478,-1.49523509,2.51696730,4.03677559,-2.03679037,1.32167840,-2.22570705,-2.74843621,6.29655170,-3.67230225,-1.86765468,-0.14842367,-1.21552539,-0.92038238,-0.51692355,1.08433771,-0.01929832,0.15660909,2.31432915,-3.86507082,-0.69797570,0.13505173,-1.50951028,-0.69980979,-1.51297045,3.63725281,0.13388813,2.73131752,-0.96528149,4.92000961,-5.92699385,1.69444644,-1.17121375,-2.33710480,1.35302818,1.39608085,1.68293881,0.94960749,1.89011908,-4.08865070,0.13722643,-1.62849212,-0.19044125,1.37906075,-3.92504406,-1.45033538,-0.42085981,3.38237071,-3.06508875,-1.39420545,1.13067436,0.92206454,0.49917889,-2.74508023,-2.19221997,1.77914095,0.10854459,-2.62178278,2.35042715,-0.15322030,-0.67014873,-1.75627899,2.64074945,2.76339936,2.67275214,-0.62736398,0.58251178,-4.64895678,5.50419283,2.53566456,-2.44196153,-0.07845879,-2.80389643,-0.64810950,-0.05813205,1.67155504,-2.69673729,-1.72486305,-0.53888649,1.86805439,-1.37128329,-5.37923479,-2.08133769,0.58187997,-1.39498150,0.21874082,4.33726025,6.29673958,0.72312093,-3.32683516,1.73482585,-0.00766110,-2.63785434,-0.13511759,4.07195950,0.94139838,3.15717316,1.53720927,1.87664819,-2.33655119,6.18176556,-2.73912525,-2.45279956,2.20392370,-0.56854641,0.98915887,-2.64472580,2.40633702,-4.93327999,-1.28942823,0.98247659,1.31774998,0.07669818,-5.91169453,-0.43135011,1.27404964,-0.59787154,-0.22716975,0.74409103,10.27316475,-2.29192710,-2.19403267,3.78925133,3.19553399,-4.42490482,-0.80781460,2.16568565,-2.54165983,2.54885101,4.18779039,1.73079813,-1.48891807,11.60153770,-0.98686743,-2.88813901,2.32898521,-0.36101711,2.34522438,0.29057693,1.39800644,-4.31848240,-3.21217132,0.11740226,-1.21613467,0.57248503,-4.44853830,1.54665899,3.14459944,1.76809108,0.26693153,0.86913753,9.47121620,-2.07677889,2.08578467,1.30181742,1.58683562,-3.52757788,-1.32763624,0.79821301,-2.19358301,1.17707348,6.01983643,4.11209440,-2.04209709,7.00413418,-1.84904683,-1.32542288,-0.01298118,0.70377320,0.27815005,2.07879829,-0.71606725,-4.94399881,-2.11898828,-0.39051518,-2.21034360,3.05337906,-1.56889665,1.97065282,2.61320901,-0.34063196,-0.57001418,-2.13183641,3.48879004,-0.12067288,0.48568326,-1.81424558,2.28868723,1.44802380,1.25918829,-1.76415455,5.35742331,3.50682044,4.71371317,5.89110756,8.51241302,4.07391453,-0.05887252,-0.18202400,2.27119660,6.78274727,-2.87470293,-5.14336634,0.76443815,2.04625130,-0.43199503,-1.01353514,2.42951298,2.35641170,0.32345510,-4.04195738,-4.77967072,0.26564783,6.11455107,-2.53868008,-3.11839914,-1.04203856,5.17195654,-4.15338612,-3.84149241,0.48130888,3.09706950,-4.18423653,5.26233864,3.55831861,3.75122595,8.14969349,6.80038738,4.68907356,-1.40135396,-3.19287133,-3.15895939,8.77363205,-4.48793411,-3.80537176,-2.40145254,-2.74341679,-2.02862644,5.33402443,9.25365734,2.50246119,0.32847846,-1.50564361,-4.26163197,-1.40994716,2.50708485,0.44500345,-0.62516934,4.09846306,5.29355669,-4.02224922,0.73442125,0.46648952,0.67028689,-6.30715466,6.56297970,3.80854273,-5.19078207,4.98839283,7.59161472,0.46010983,-2.10227895,0.29324162,-2.67019558,4.57838106,-3.02338457,-3.08647728,-2.00112700,-3.81710315,-0.08346784,1.69288683,5.68807268,3.29351830,0.54618967,1.83540761,-5.38810253,0.51326782,4.40081882,-4.03805828,0.49482727,-1.36024392,2.91845679,-2.00959015,2.47489738,-1.43354976,1.92024410,-6.55897284,1.79488957,-0.89570928,-6.13094234,-0.45504010,2.35239482,1.29039919,-4.78849840,-1.52545333,-6.50420475,2.99257326,-0.55620033,0.26807702,-2.52090979,-4.59419632,0.57965040,2.19423151,2.04760551,-0.57048106,-2.20812702,-0.04777686,1.38053393,-2.71448946,-1.06219673,-3.62008905,1.85719645,1.28355026,-2.76315832,1.65295160,-4.01645803,-3.10454416,-0.65713316,1.22384977,-0.70416176,4.45064926,1.31602776,2.06907344,2.48872757,4.25775290,3.50504255,-0.68262041,1.29799378,-1.01969171,2.98593879,0.12607655,0.37219539,-0.84196299,-3.80019331,-1.82315290,-0.38489276,-1.45200360,-4.00882292,0.61042011,-0.16738498,1.33787775,-2.26938057,1.03656030,8.89089870,-1.60370600,-5.38691807,5.72182989,2.72854710,-6.18535757,-3.13408709,2.79175353,5.18425512,9.46434212,2.40110517,1.11330092,-3.57366538,4.80967665,0.40691876,-3.65484858,0.92398167,2.53852940,3.17747331,2.14199781,-1.69107199,-1.91864693,-3.18452644,-2.42408276,-2.14332366,-1.35526609,-4.50732136,0.58234072,-1.81547785,0.57311213,1.10584176,-0.97226644,11.73174381,-2.00559855,-1.81175601,2.33131361,0.49264961,-0.42245382,-1.37528467,1.55768061,0.21152198,13.08896351,10.33674145,5.77929306,-6.19886398,5.67007637,-6.61288071,-2.58029866,-4.05192375,1.77221894,0.29821560,5.23508501,-5.09560966,-0.97536200,-5.17957878,1.02876794,-4.52072096,2.22126532,-4.81708670,0.44538212,-2.30738068,3.15900373,-4.99227905,0.82632786,9.65415478,-0.63819492,-3.25479436,-0.13276935,0.21337092,-2.22116399,-3.04922724,0.65568435,-0.10706246,4.58047390,7.80782652,5.49080181,-3.97114491,6.43327618,-6.54772758,-2.10962629,-0.79831678,-0.08316499,2.48658133,4.14070511,-0.59806836,-4.58636141,-0.31166920,0.31757897,-3.92562199,0.65357721,0.55871534,1.71843934,1.62395024,0.00695819,-4.56716251,-3.76420808,4.24979544,-0.86128616,0.23126510,-6.32968998,1.83346081,3.81335950,2.98407745,-1.80454743,6.61764765,-1.39372075,-0.86780751,7.24317265,2.24205112,1.05702817,0.55431479,-1.54557061,3.36389136,4.70898724,1.11327887,-3.78462076,-3.63381767,2.86510396,0.74203897,0.81488025,3.54250598,3.24824381,3.19000244,-0.58995843,-7.05670738,3.18306041,3.95191574,0.81820154,-1.91068232,-2.05426741,-1.05589008,-3.18377590,-1.86278260,-8.80374908,0.93416154,-4.60517359,8.38999462,5.26356745,-8.89992714,8.95298958,4.22590351,1.00351548,-6.90151119,-8.07641125,-4.82450199,8.02293015,4.11661243,0.95457208,-7.07843113,-4.30524826,5.02697992,5.21011686,0.80132771,3.23420191,3.82452774,-2.13171721,-7.88879967,1.31062031,1.90848613,-3.51572514,-3.75684500,3.62577081,-5.76075602,-2.79389215,0.32598805,-4.28981733,4.21048594,-3.84532523,3.19815183,-0.40756655,-2.19974327,6.25655174,3.42396951,-1.88986623,-1.92803884,-2.97344875,-0.09756154,5.24342251,-0.72513700,1.06113195,-1.30720282,4.69107103,0.58984971,2.33985567,1.46385121,3.16576266,6.77769995,-5.92685127,-12.61141014,-2.83663774,4.90253258,-6.32688522,-3.00096869,2.38634992,-7.21459866,-5.89208746,2.84085894,-1.21792030,6.70161343,-4.00450230,5.29881001,-1.45574808,0.77542424,1.38336325,-0.21572059,-3.38088870,2.33249640,0.68824625,-3.68440270,0.33481622,-0.39239681,0.14560902,1.61039007,-3.11967754,2.49372435,2.68783092,-1.17559779,0.95257235,4.35451412,-0.56818569,-7.32110357,-7.58534050,-2.10573673,-3.34446383,-0.32183546,-0.78525496,-1.76974547,5.19060802,-2.11319876,-3.41755080,-0.36864156,1.32680905,0.45004874,6.17223930,-1.60707474,0.46096295,-3.88852644,1.84729624,-0.03412050,0.99224162,-2.05553341,3.47793245,-0.06305170,0.51314175,-2.91650558,-1.78121483,-2.85465693,0.24649808,-2.70376635,0.42334458,-1.13862336,-0.98409218,-0.96593523,2.22128963,0.53402066,3.33979344,8.57430458,2.34217858,-2.40062976,5.81624222,1.13290989,-5.06850052,-4.72865725,1.82859278,6.78569555,8.56885242,2.76462936,0.33891773,-2.81092787,0.79498398,-2.27208567,1.55182552,2.17166376,6.12517643,3.56859684,0.27685475,-1.38408327,-1.03533340,-3.46618199,0.79240030,-3.89390516,-0.55852515,-1.16367757,-0.07008934,-2.20105195,3.81210446,-0.66834474,0.43603873,10.92334938,2.48571420,-6.34997845,4.23135757,0.45045292,-4.13489866,-3.92324209,1.88537407,2.57159734,9.90973091,4.37453461,7.34546280,-2.51120615,11.12575245,-3.23452854,-2.49947500,1.39819741,-3.78950691,2.40617585,5.10036278,-3.55743456,-6.42888737,-2.51929998,-1.90880990,-1.81618094,1.60946512,-4.09737110,1.96408439,-1.90115595,2.44444203,-2.31254292,-4.01332951,8.65541840,-0.58626485,-4.02226830,0.43893200,-3.78272748,-5.46277428,0.01306701,0.61185312,0.24469066,1.30214953,5.87789631,8.75197792,-5.31634712,3.43556309,-5.90755081,0.54375106,-2.48162293,-3.51843548,2.55853295,5.06387186,-2.09662485,-3.00377345,-3.21781397,-0.14537808,-4.65453672,1.92747557,0.41553855,4.09379959,0.83387995,1.50868511,-6.54959488,-8.38881016,5.50689125,-2.88616610,-1.21597648,-0.23817590,1.50816703,-2.26873541,2.29862142,-1.61143053,5.97371244,4.71440220,-0.20635787,8.85926723,0.56064367,-1.04103339,-4.47060108,-2.63824081,3.06782055,-2.07702565,3.38269401,-1.59988797,-3.80122590,2.35341501,2.69095278,3.87612104,1.89984226,0.95496917,3.14841127,-5.84543085,-7.24945450,-2.65708590,2.87417006,0.97556210,-3.75203967,1.55287778,-7.43401051,-1.29005826,-3.40252638,-4.01049423,2.82721639,-1.21479535,8.54563904,7.39749908,-0.61361837,7.60177565,1.65812778,-0.83008504,-3.60961151,-7.69062138,-1.26275063,-4.17071676,5.28448200,4.04685593,-1.18231702,1.15276611,1.58620787,6.75060844,3.29332161,-0.67640316,5.78984785,-3.14913464,-6.41867924,-2.58316016,-2.04366302,2.01089478,-3.81723452,3.63843751,-5.13238430,-3.79432917,4.86581373,-1.06922054,3.95978498,-0.78166616,8.35650539,5.35834265,0.35594034,9.41657066,-0.84108615,-6.54425859,-3.44328952,-6.55536795,-0.08963367,-1.53906262,0.17658240,-0.13108420,-0.44371247,-0.78411150,2.64754868,9.66306782,1.70506203,-0.31588936,4.31715870,-6.16665173,-10.43371868,-3.72962189,4.35245228,-1.75867891,-4.20046234,8.62637043,1.45946813,-3.30153608,0.85179043,-2.66643381,3.01863337,-2.52916121,8.35405540,-0.37298933,-0.89473486,6.88681793,-4.46370125,-7.50776386,3.80255938,-3.55003357,1.43528831,-2.20383263,2.34999895,2.03803205,1.94830751,-1.85976326,0.97718471,5.53710842,-0.80560827,0.23925614,5.98795223,-2.03578377,-7.77835321,-2.79955530,-1.88185954,-2.49112058,-0.76095992,2.71161270,-0.55918610,0.83789903,-1.42063200,-0.61528748,-4.18273115,1.76384258,4.21265936,5.50964785,-0.93324339,3.83215356,1.52210593,-0.91594946,1.31148386,3.20160103,1.24493563,-0.72693497,1.84716725,3.09897518,-1.34605026,-1.17511916,-1.05526352,-1.08590937,-1.41319299,-3.75052118,-2.67095542,-0.76179552,-3.32081509,-1.04692316,-1.30194843,-1.98795474,5.01223469,0.21895903,-1.85535169,3.12362719,0.16198632,-3.86784005,-2.03062248,-0.15415624,8.22020721,4.83055592,4.50315666,4.19443417,0.42727345,-4.67786789,-5.18739986,2.53988838,3.19683266,1.80313504,1.94664574,0.59795094,-4.21626759,0.50492239,-0.41232634,-0.99224532,-3.94929314,1.74060190,-0.92474866,-1.00664830,-6.17397356,-1.33146775,-3.78111315,-4.91876888,2.50303864,-0.34890354,-1.25013232,0.38168997,-1.84135628,-4.46107960,-4.05920792,-2.61709857,0.71046209,9.80566883,6.34086990,2.73394704,-2.03342366,-2.21424174,-5.56514263,-4.74755144,-2.20672894,0.09010231,1.70423889,3.19200158,-6.99027634,1.14216340,0.05824995,-0.76996505,-6.51575899,-0.41109252,0.78229940,1.36170781,-5.65170193,1.12221193,-4.60430050,-4.40174437,4.01805925,0.10774946,-2.77991009,-0.18023163,0.02151692,-1.77023101,-1.86639869,-0.69443607,4.92290831,6.83520412,4.27372265,6.54272366,-7.59249687,-1.40776849,-3.52368808,1.01398587,-3.58802676,-0.35658866,1.14716864,3.75847244,-2.30159235,-0.72130895,-0.24564353,-1.77531350,-3.08677864,-0.73486501,-1.20357263,0.60789430,-3.46990204,-0.20668676,-5.46096087,-5.22016764,0.98259866,1.81012678,3.92534304,-2.94997001,1.65154219,2.27040243,0.99095678,0.09144652,-0.99103236,-1.11210847,0.78181303,2.38706732,2.96695375,-0.17279971,0.31143007,1.35465562,2.03586054,6.19515753,-3.14652419,-2.89027119,-3.26665854,-1.93043876,-0.46601450,1.07655203,1.74946189,4.02148342,0.69275337,0.50094581,-4.07613230,2.98369169,4.24537849,0.49480581,-2.02408123,-2.02068973,6.54505825,-5.19377470,-0.12596917,-0.70204186,-0.98308045,-3.19708824,1.63609934,1.35475993,0.16313422,4.13918924,7.69187021,3.72601676,-1.97790039,-1.16739464,-3.31835508,8.14553452,-1.78718984,1.21505618,-3.84255409,-3.21992350,0.07376552,-0.81223297,3.57002878,1.48521733,-0.45995998,0.30551746,-3.33944130,1.39538884,1.84758544,-0.21494150,-2.27316713,-4.37771225,6.48841667,-5.00251961,-0.45162797,-5.01056004,0.70199943,-4.60057783,-2.22394514,0.07777429,-1.49820781,3.47308421,6.13231564,1.18605387,-4.78924608,-3.49548388,-2.73382568,6.24617863,-2.74291611,-1.03833354,-2.20752788,-2.33219409,1.48633552,1.65796840,4.95045471,2.58479190,-0.90922785,0.71312457,-4.44465590,1.37020862,2.37683725,0.18805164,-3.28422308,-1.64939332,3.64181972,-3.75277281,3.67203593,-0.11204052,2.24140930,-3.90657187,2.56883717,-1.44016707,-2.83842611,-0.29104578,2.17757058,-0.71431804,1.36911654,0.85083604,-1.60110259,-1.97247636,-1.61163378,-0.81236130,-0.38993555,-3.03631902,-0.38213277,0.06394482,3.19348621,0.36771113,1.36763072,2.49159527,-0.39599860,-2.69996762,-0.97561121,-2.97563028,-0.49662948,-0.17564940,-2.79042959,0.72395414,2.07260203,-0.99439794,-2.20248008,-0.07389921,0.65536159,4.73054695,-0.63917702,0.58788192,-3.60156059,6.59609890,3.88419437,-3.38469863,-3.56237841,-2.03295064,0.07279694,3.71804547,0.79928309,-2.13411403,-1.13909864,-0.34193408,-1.00338125,-1.44231665,-5.39835978,-0.45086145,1.16064668,2.58335257,2.10072684,4.64244223,7.10090065,1.01974952,-4.44687223,2.99792576,1.10303724,-1.22736573,-3.91514421,3.07458854,2.18765211,3.34481716,2.46166849,2.99648619,-0.94046807,5.55028200,0.92199719,-0.83934361,-0.72042274,0.84869325,1.46914721,0.85937387,4.77306223,-4.06436539,-2.59847593,2.44828081,0.50484699,-2.71092367,-6.39010477,0.91778028,3.25469685,1.30310678,1.35258150,3.56171441,7.82435083,-2.51527429,-4.24328852,2.36876059,1.94595242,-2.59290171,-6.62389565,3.32567835,2.13659120,4.09299326,3.48293996,2.64965177,-3.19157362,13.37204266,-0.50297594,-4.57448196,3.95582604,-0.69038916,0.10098404,1.18737555,3.65761185,-5.69623756,-2.03357077,1.02868807,-1.38448596,-0.05690211,-8.48874187,0.56755424,1.45485961,0.66273880,0.06495565,1.79539490,8.46864319,-1.22696662,-1.87585378,-0.99768794,2.72801924,-0.66980243,-2.31924677,0.33271110,0.11666083,1.86980045,5.95332909,7.38583708,-2.80956483,6.79227638,-6.78070831,1.21884382,-1.40695429,0.90236962,-1.13695288,0.50760663,1.00955284,-5.39029121,0.24987072,2.24283314,-4.02145576,2.18057394,-3.35627747,1.26061773,1.30342579,0.11311233,-1.11199212,-4.06509686,5.82649660,-1.24059582,5.51652861,-1.90937877,1.10658336,-0.47065550,-2.39167786,-1.95931304,4.12717247,1.15396059,1.26015663,7.97836876,7.33633423,2.27785325,-2.83802366,-2.74850106,0.86126029,6.18781090,-1.43707538,-6.97134876,-3.25486469,-1.95214593,0.91066706,0.89637989,1.06481194,6.25791073,0.81779671,-1.08384395,-3.21191931,2.04216075,4.76030350,-2.37217665,-1.42571259,-6.35876131,4.62536526,-5.40060568,-3.14868999,-1.00587153,1.80662942,-7.03201485,6.08373499,0.99862772,2.21717811,4.06814623,6.02428913,5.33422756,-0.87013257,-2.22477579,-2.51505303,5.82925224,-0.82854009,-4.30698347,-1.75007713,2.08352375,-2.25235629,1.17517352,5.77717733,2.27472878,2.72778273,-1.95411634,-4.52602863,1.13983536,1.16340065,-2.02740526,-3.11290503,-1.94906235,1.54855204,-4.52984142,1.97465122,-1.79415476,4.03510094,-8.45349979,10.87430096,2.19863629,-5.39083815,5.86213875,6.25744534,6.52600002,-4.72149038,-1.75254321,-5.51459169,7.03155518,-2.01889277,-4.58441257,-3.61226106,0.42395937,-0.93263882,2.28703761,2.80611467,2.59498215,0.65989012,-1.51268566,-4.49465561,-4.70453882,5.44696808,-4.37603617,0.46670085,2.82488608,2.18854523,-2.04817152,1.19557285,1.53618634,4.44758606,-7.31593513,7.43966007,-3.55480957,-5.29834652,2.14622784,1.65194583,2.71262598,-4.86145496,0.79726243,-8.88541985,1.19627261,0.79660845,-1.98016644,1.03741014,-3.93128228,1.05535269,2.01378822,-0.46086323,-0.77754641,-1.43942690,0.49809402,-2.27861357,-3.29815221,0.38201320,-3.98481083,4.88261318,-0.44555628,-2.57224536,2.35001850,-2.65835261,-2.43422794,-2.97889376,1.07349825,1.88157082,4.74075413,0.60376728,-0.48894715,-1.15800071,4.68110943,-0.86976886,1.49192941,0.62665290,0.20652676,0.53916287,-1.45706177,0.66133004,1.34405875,-4.27689552,-0.20838106,-5.14266443,-1.29718637,-1.74506426,-0.86022055,-3.57553625,0.46880072,-1.25287139,3.28596354,11.33191013,1.23942876,-3.87616491,7.57880497,-0.22940339,-5.68512678,-1.94969654,5.85449600,3.75705457,4.24395847,1.60086083,2.62553668,-0.93964291,5.84753895,-0.79931092,0.48274064,2.07170033,3.02243996,2.63509989,-0.76043403,-1.64048159,-6.17683458,-3.09974527,-2.12773156,-0.89379883,2.82242465,-1.99981332,-0.08763933,0.01921120,-1.94142103,2.48067307,0.41083777,8.24922180,-1.84516132,-1.39224625,5.03956223,0.49562740,-5.28296328,-0.20005548,3.13672113,0.51187158,7.11563921,6.43059587,3.48430967,-5.37095928,8.03863049,-5.53923941,-2.16421175,-3.77641368,3.29633045,5.04030085,2.25945377,-3.04169011,-2.16198015,-2.49559617,-0.26252726,-6.99201345,2.87374353,-0.12568980,0.23314142,-1.32087135,4.39030552,-0.24638844,-4.37242651,14.09276772,1.23987353,-1.72249663,0.31124914,-2.13725138,-3.74915648,-1.87147236,0.47318631,1.13337576,3.00416899,8.82548523,4.80538750,-5.28486395,5.51870108,-5.15801477,0.95712411,-1.50416136,2.34657240,4.20726633,5.56757259,-3.30645251,-3.39945269,-2.68488026,-2.53525281,-3.15145874,2.74529529,-0.96283442,2.87778258,0.22186530,1.24905694,-7.07941198,-5.45916176,3.46988297,0.92430985,-0.98330998,-2.23672342,-3.03262734,0.73941302,0.98004431,0.83219361,7.17411804,4.27849865,0.14765590,8.61269569,9.04497051,1.53991723,-2.08305025,-4.34939337,0.63786775,2.60098696,0.02432060,-1.48516297,-4.06825686,5.12420368,-0.75312757,1.96927559,4.91575956,3.41533065,3.62557888,-4.35002136,-5.91343403,0.45026422,4.93286371,3.45830250,-4.39032364,-0.51697755,-7.41543341,-3.06703568,1.01196158,2.47106576,5.54014874,-4.65312243,8.61000633,8.25905323,-1.41497111,8.69221878,0.40090930,1.11325574,-1.67089832,-4.01080132,1.07925677,2.68086481,-0.73093414,-1.35081220,-7.85765076,-5.98989439,-0.04651213,4.63693142,2.07757711,-0.22652936,3.45525455,-0.69198442,-10.39761639,-2.02106953,4.77755499,-2.67665577,-1.72481167,4.49634743,-2.55717134,-4.55044937,0.46377492,-3.08933020,3.86891365,-2.79104614,8.36974335,0.86471701,-5.39342690,12.54906940,-0.41536295,-5.29502535,-3.94430566,-5.67391300,-4.65079165,2.22505951,-0.30000746,2.27855444,-4.81604433,-1.73440599,4.68784523,5.00208044,0.18863934,-1.74989462,3.17923450,-1.59773099,-12.59962940,-1.54495025,-0.00576371,1.79913878,-2.43449807,1.49516344,-3.90507102,1.68647158,4.50177765,-5.32286358,3.47539330,-2.90529680,1.61576962,0.83679676,-5.55615807,3.78939056,-4.46644831,-5.95550919,0.37808037,0.51334500,1.74658906,-0.82085419,-0.65387219,3.67790437,0.03758264,-2.42622781,1.83335185,4.73835945,-0.83536482,-0.03993917,3.78230667,-4.81265640,-8.26869011,-1.30363441,-2.09106350,-3.96769738,-1.89037073,0.38682747,0.05434489,5.72213697,0.55685395,-3.47729349,-1.11535001,2.09416127,5.08877802,5.72183466,1.29632664,0.16822398,-2.43180108,3.49967623,2.15753818,-0.26548505,3.24446392,-0.00599277,1.08215356,-0.23225522,-2.40723038,0.18496060,-3.70608735,-0.19918591,-1.64028871,0.80792952,-0.85334057,-2.52314138,-3.12099195,0.17949918,-0.82650864,2.32224989,9.56476116,-0.20134282,-0.48428559,2.86784410,0.07289505,-3.92880869,-2.11887884,0.59164631,6.31267452,7.49149418,2.88749456,2.40504885,-3.57608175,-1.48019314,-0.69410253,0.90275228,-0.34111357,2.19190216,3.39090061,3.39631820,-5.19105434,2.67546582,-2.56549048,-0.59797800,-4.21802664,0.63918972,-0.69969130,0.47496963,-4.30976725,0.16531238,-3.59595251,-0.76877379,11.79971790,-0.93276632,-1.48630571,8.04754066,2.09168458,-3.77018499,-4.19337654,0.26171905,1.99359691,8.96759701,8.39609814,6.19231987,-5.36037970,4.69818354,-4.22453928,-4.61665344,-2.52073431,1.34026706,2.80182385,2.56681514,-4.04676390,-3.01466990,-4.10480118,0.38737059,-0.37146521,-2.26529670,-1.72867084,0.93472683,-2.47562981,0.89871657,-1.67618203,-0.28950238,5.30124855,-0.14731219,-0.81319761,-1.11265934,0.11356127,-2.52802444,-1.93826056,1.06187987,1.48062325,4.28070498,5.69893932,9.26904392,-4.23773003,5.78582096,-6.18445301,-2.85200453,-5.30461454,-4.16009140,-0.07239690,4.11531162,-1.12266588,-1.50265646,0.47661865,-1.90043914,-6.48978710,1.71005368,0.18256521,-0.88272136,-0.51324779,-0.78045660,-5.21036625,-4.11805344,3.99454761,-1.04999924,-6.99629354,-5.02737141,0.94748145,-2.35882139,4.13982439,-1.41835535,7.56763077,3.97024012,-4.08156776,6.90305424,0.53571963,-2.22625160,-2.09144926,-4.98530245,-0.15102190,0.59995949,3.28562784,0.77991986,-3.08389306,3.34046674,0.41394949,5.10031366,2.99692893,0.17706826,2.85998058,-6.68330860,-6.72653008,-0.04071128,3.71085787,3.17834806,-4.88019037,6.74075413,-7.41782188,-5.22026348,-1.94595623,-3.61318684,1.85610664,1.08613706,6.41580677,1.46376514,-4.11524010,9.59146214,-2.92772651,-1.70753336,-1.51594138,-4.88185692,1.47331417,-2.23893595,4.98459148,1.29359996,-2.29221845,-0.99594390,3.05759239,6.86030054,2.40487719,3.28339863,7.72739315,-3.60563445,-9.73502827,-1.51672328,-0.08473521,-2.43673515,-3.26616001,3.63767886,-11.25394535,-5.17597103,-1.27523947,-7.82669783,0.67929745,-4.50530529,5.49323797,6.78993320,-2.28033876,4.61412525,2.55109429,-12.38607693,-0.63024014,-3.45992327,-0.84092742,-0.03252453,4.58635283,5.28213978,-1.28417206,-1.71185923,-0.26850975,8.28257561,4.47432184,2.72818279,8.42217731,-4.22216320,-8.95128918,-1.57179546,1.34253705,-5.47035217,-5.50866985,4.64156532,-6.11207914,-5.46734476,3.54298997,-2.79237103,-0.70766860,-3.62739944,3.22660995,-2.02262759,0.11224222,2.63832402,-0.91955596,-4.65958309,-0.29729855,-1.78957534,-0.40749407,0.51688713,0.83725226,0.30945438,1.20769620,-1.75219965,2.59689760,5.01501608,-1.59034789,0.58155286,3.75831509,-5.26110506,-8.65382767,-6.19066620,-0.61932850,-2.71863723,-0.87443137,3.40582991,-1.27868056,3.51236677,-2.07806540,-0.85076392,-1.14599180,1.16361260,1.86411846,5.86179352,0.69029891,-0.06060839,1.54649436,-0.60351688,1.51970077,0.04187265,1.64540339,2.75502157,2.46308279,1.69071770,-3.23827076,0.92096543,-3.09458661,-1.23823690,0.24035048,-0.74456501,-1.85476089,-0.32914662,-2.10325241,1.19795251,-2.05372071,1.02114081,2.56286955,0.42165697,-1.65826249,4.00724554,-2.18727994,-1.05848944,-0.52338278,-0.28714985,8.08780861,5.04444599,3.51866961,3.37445784,-1.96067202,-1.21509445,-3.96595931,-0.80801201,0.76944816,1.80147493,4.14419460,-0.12201095,-2.77788162,1.13284469,-2.05441403,-0.61129224,-2.69690657,1.91634214,-2.17146754,-0.22308528,-6.02561045,0.49161875,-6.74280357,-4.62689781,2.47910833,1.86534905,-3.24152899,-1.39898300,0.29427958,-2.16338181,0.90073711,1.75551236,4.42651892,8.34437466,5.50070190,5.68162251,1.65345454,-2.72315669,-5.43411493,-0.29380533,1.07508349,-1.73533511,2.56912184,3.62010550,-6.30422783,1.74158525,-1.22070909,-0.80982518,-4.14757967,4.29217434,0.70600843,-2.09282112,-5.09018898,-0.11623126,-5.99775553,-4.66743088,1.61512172,-1.30276895,-3.17103505,-0.26310229,-1.00843918,-0.77664804,-2.05240250,0.04728425,1.15720487,4.01001406,7.24615860,2.55452180,-5.76347876,0.34683830,-6.05540276,-4.70677900,-0.93182588,-4.37759733,2.93209839,1.63947964,-2.43563962,1.35213876,0.00670356,-0.02742785,-2.16460943,1.39449501,0.23929763,2.37476778,-4.17733765,-0.81475425,-6.15027046,-5.74441719,3.53978682,0.66798484}); diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests14.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests14.cpp index 21ec3c764..861bb283a 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests14.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests14.cpp @@ -229,22 +229,22 @@ TEST_F(DeclarableOpsTests14, test_empty_fill_1) { } TEST_F(DeclarableOpsTests14, test_lstmBlockCell_1) { - auto a = NDArrayFactory::create('c', {1, 5}, {0.7787856f, 0.80119777f, 0.72437465f, 0.23089433f, 0.72714126f}); - auto b = NDArrayFactory::create('c', {1, 3}); - auto c = NDArrayFactory::create('c', {1, 3}); - auto d = NDArrayFactory::create('c', {8, 12}, {-0.15320599,-0.120416045,0.33126968,0.13921785,-0.32313538,-0.43956736,0.4756174,0.4335605,-0.5450856,-0.3943429,-0.28687626,0.068032146,-0.2793799,0.17298919,-0.36553562,-0.097853184,-0.2544747,-0.39872527,-0.14556861,-0.31479517,0.2559092,0.47166896,-0.31330687,0.47313118,0.5134543,-0.4678212,-0.12853557,0.26142156,0.43472284,-0.42842552,-0.1895876,0.538689,0.508651,-0.020272732,0.112327516,0.2704304,-0.046546757,0.32570732,-0.15148133,-0.19145513,0.18631572,-0.024152994,0.41603214,-0.3421499,0.0106860995,-0.2966229,-0.36713937,0.25841123,0.0843398,0.49082482,0.10800403,0.1874243,-0.26379472,-0.22531849,0.24924624,0.23119557,0.49940765,-0.051413506,0.20315129,-0.41888732,0.44097036,0.40453392,0.013338983,0.23434466,0.23942488,0.47894,-0.19898453,0.09253675,-0.032358468,-0.15213022,-0.3441009,-0.15600958,-0.08235118,0.12165731,-0.4481289,-0.4842423,-0.45797008,-0.4606034,0.08163166,-0.2981107,0.50207126,0.44195646,0.13850057,0.072246075,-0.34388685,0.030900061,0.35821778,0.47900867,0.5094063,0.23683065,0.18020362,-0.1369732,0.015235603,0.2786904,0.07954317,0.12543976}); - auto e = NDArrayFactory::create('c', {3}); - auto f = NDArrayFactory::create('c', {3}); - auto g = NDArrayFactory::create('c', {3}); - auto h = NDArrayFactory::create('c', {12}); + auto a = NDArrayFactory::create('c', {1, 5}, {0.7787856f, 0.80119777f, 0.72437465f, 0.23089433f, 0.72714126f}); + auto b = NDArrayFactory::create('c', {1, 3}); + auto c = NDArrayFactory::create('c', {1, 3}); + auto d = NDArrayFactory::create('c', {8, 12}, {-0.15320599,-0.120416045,0.33126968,0.13921785,-0.32313538,-0.43956736,0.4756174,0.4335605,-0.5450856,-0.3943429,-0.28687626,0.068032146,-0.2793799,0.17298919,-0.36553562,-0.097853184,-0.2544747,-0.39872527,-0.14556861,-0.31479517,0.2559092,0.47166896,-0.31330687,0.47313118,0.5134543,-0.4678212,-0.12853557,0.26142156,0.43472284,-0.42842552,-0.1895876,0.538689,0.508651,-0.020272732,0.112327516,0.2704304,-0.046546757,0.32570732,-0.15148133,-0.19145513,0.18631572,-0.024152994,0.41603214,-0.3421499,0.0106860995,-0.2966229,-0.36713937,0.25841123,0.0843398,0.49082482,0.10800403,0.1874243,-0.26379472,-0.22531849,0.24924624,0.23119557,0.49940765,-0.051413506,0.20315129,-0.41888732,0.44097036,0.40453392,0.013338983,0.23434466,0.23942488,0.47894,-0.19898453,0.09253675,-0.032358468,-0.15213022,-0.3441009,-0.15600958,-0.08235118,0.12165731,-0.4481289,-0.4842423,-0.45797008,-0.4606034,0.08163166,-0.2981107,0.50207126,0.44195646,0.13850057,0.072246075,-0.34388685,0.030900061,0.35821778,0.47900867,0.5094063,0.23683065,0.18020362,-0.1369732,0.015235603,0.2786904,0.07954317,0.12543976}); + auto e = NDArrayFactory::create('c', {3}); + auto f = NDArrayFactory::create('c', {3}); + auto g = NDArrayFactory::create('c', {3}); + auto h = NDArrayFactory::create('c', {12}); - auto z0 = NDArrayFactory::create('c', {1, 3}); - auto z1 = NDArrayFactory::create('c', {1, 3}); - auto z2 = NDArrayFactory::create('c', {1, 3}); - auto z3 = NDArrayFactory::create('c', {1, 3}); - auto z4 = NDArrayFactory::create('c', {1, 3}); - auto z5 = NDArrayFactory::create('c', {1, 3}); - auto z6 = NDArrayFactory::create('c', {1, 3}); + auto z0 = NDArrayFactory::create('c', {1, 3}); + auto z1 = NDArrayFactory::create('c', {1, 3}); + auto z2 = NDArrayFactory::create('c', {1, 3}); + auto z3 = NDArrayFactory::create('c', {1, 3}); + auto z4 = NDArrayFactory::create('c', {1, 3}); + auto z5 = NDArrayFactory::create('c', {1, 3}); + auto z6 = NDArrayFactory::create('c', {1, 3}); nd4j::ops::lstmBlockCell op; auto result = op.execute({&a, &b, &c, &d, &e, &f, &g, &h}, {&z0, &z1, &z2, &z3, &z4, &z5, &z6}, {1.0, -1.0}, {0}, {}); diff --git a/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp b/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp index 552d49dd6..fe190d9bb 100644 --- a/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp @@ -1049,7 +1049,8 @@ TEST_F(NativeOpsTests, ConcatTest_1) { //y.assign(2.); x.syncToDevice(); z.syncToDevice(); - auto dimension = NDArrayFactory::create('c', {1}, {(int)0}); + int d = 0; + auto dimension = NDArrayFactory::create('c', {1}, {d}); auto dimensions = reinterpret_cast(dimension.buffer()); //auto tadPackX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf()); auto tadPackZ = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dimensions, dimension.lengthOf()); @@ -1087,7 +1088,8 @@ TEST_F(NativeOpsTests, ConcatTest_2) { //y.assign(2.); x.syncToDevice(); z.syncToDevice(); - auto dimension = NDArrayFactory::create('c', {1}, {(int)0}); + int d = 0; + auto dimension = NDArrayFactory::create('c', {1}, {d}); auto dimensions = reinterpret_cast(dimension.buffer()); //auto tadPackX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf()); auto tadPackZ = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dimensions, dimension.lengthOf());