2021-02-09 05:16:31 +01:00
|
|
|
/* ******************************************************************************
|
|
|
|
*
|
2019-11-13 15:04:59 +01:00
|
|
|
*
|
|
|
|
* This program and the accompanying materials are made available under the
|
|
|
|
* terms of the Apache License, Version 2.0 which is available at
|
|
|
|
* https://www.apache.org/licenses/LICENSE-2.0.
|
|
|
|
*
|
2021-02-09 05:16:31 +01:00
|
|
|
* See the NOTICE file distributed with this work for additional
|
|
|
|
* information regarding copyright ownership.
|
2019-11-13 15:04:59 +01:00
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
|
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
|
|
* License for the specific language governing permissions and limitations
|
|
|
|
* under the License.
|
|
|
|
*
|
|
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
|
|
******************************************************************************/
|
|
|
|
|
2020-03-09 06:22:49 +01:00
|
|
|
//
|
|
|
|
// @author raver119@gmail.com
|
|
|
|
//
|
2019-11-13 15:04:59 +01:00
|
|
|
#ifndef SAMEDIFF_THREADS_H
|
|
|
|
#define SAMEDIFF_THREADS_H
|
|
|
|
|
|
|
|
#include <functional>
|
2020-03-02 10:49:41 +01:00
|
|
|
#include <system/openmp_pragmas.h>
|
|
|
|
#include <system/op_boilerplate.h>
|
|
|
|
#include <system/Environment.h>
|
|
|
|
#include <system/op_enums.h>
|
2019-11-13 15:04:59 +01:00
|
|
|
|
2020-03-09 06:22:49 +01:00
|
|
|
namespace samediff {
|
2019-12-02 19:37:21 +01:00
|
|
|
class ND4J_EXPORT ThreadsHelper {
|
2019-11-13 15:04:59 +01:00
|
|
|
public:
|
|
|
|
static int numberOfThreads(int maxThreads, uint64_t numberOfElements);
|
|
|
|
static int numberOfThreads2d(int maxThreads, uint64_t iters_x, uint64_t iters_y);
|
|
|
|
static int numberOfThreads3d(int maxThreads, uint64_t iters_x, uint64_t iters_y, uint64_t iters_z);
|
|
|
|
static int pickLoop2d(int numThreads, uint64_t iters_x, uint64_t iters_y);
|
|
|
|
static int pickLoop3d(int numThreads, uint64_t iters_x, uint64_t iters_y, uint64_t iters_z);
|
|
|
|
};
|
|
|
|
|
2019-12-02 19:37:21 +01:00
|
|
|
class ND4J_EXPORT Span {
|
2019-11-13 15:04:59 +01:00
|
|
|
private:
|
|
|
|
int64_t _startX, _stopX, _incX;
|
|
|
|
public:
|
|
|
|
Span(int64_t start_x, int64_t stop_x, int64_t inc_x);
|
|
|
|
~Span() = default;
|
|
|
|
|
|
|
|
int64_t startX() const;
|
|
|
|
int64_t stopX() const;
|
|
|
|
int64_t incX() const;
|
|
|
|
|
|
|
|
static Span build(uint64_t thread_id, uint64_t num_threads, int64_t start_x, int64_t stop_x, int64_t inc_x);
|
|
|
|
};
|
|
|
|
|
2019-12-02 19:37:21 +01:00
|
|
|
class ND4J_EXPORT Span2 {
|
2019-11-13 15:04:59 +01:00
|
|
|
private:
|
|
|
|
int64_t _startX, _stopX, _incX;
|
|
|
|
int64_t _startY, _stopY, _incY;
|
|
|
|
public:
|
|
|
|
Span2(int64_t start_x, int64_t stop_x, int64_t inc_x, int64_t start_y, int64_t stop_y, int64_t inc_y);
|
|
|
|
~Span2() = default;
|
|
|
|
|
|
|
|
int64_t startX() const;
|
|
|
|
int64_t startY() const;
|
|
|
|
|
|
|
|
int64_t stopX() const;
|
|
|
|
int64_t stopY() const;
|
|
|
|
|
|
|
|
int64_t incX() const;
|
|
|
|
int64_t incY() const;
|
|
|
|
|
|
|
|
static Span2 build(int loop, uint64_t thread_id, uint64_t num_threads, int64_t start_x, int64_t stop_x, int64_t inc_x, int64_t start_y, int64_t stop_y, int64_t inc_y);
|
|
|
|
};
|
|
|
|
|
2019-12-02 19:37:21 +01:00
|
|
|
class ND4J_EXPORT Span3 {
|
2019-11-13 15:04:59 +01:00
|
|
|
private:
|
|
|
|
int64_t _startX, _stopX, _incX;
|
|
|
|
int64_t _startY, _stopY, _incY;
|
|
|
|
int64_t _startZ, _stopZ, _incZ;
|
|
|
|
public:
|
|
|
|
Span3(int64_t start_x, int64_t stop_x, int64_t inc_x, int64_t start_y, int64_t stop_y, int64_t inc_y, int64_t start_z, int64_t stop_z, int64_t inc_z);
|
|
|
|
~Span3() = default;
|
|
|
|
|
|
|
|
int64_t startX() const;
|
|
|
|
int64_t startY() const;
|
|
|
|
int64_t startZ() const;
|
|
|
|
|
|
|
|
int64_t stopX() const;
|
|
|
|
int64_t stopY() const;
|
|
|
|
int64_t stopZ() const;
|
|
|
|
|
|
|
|
int64_t incX() const;
|
|
|
|
int64_t incY() const;
|
|
|
|
int64_t incZ() const;
|
|
|
|
|
|
|
|
static Span3 build(int loop, uint64_t thread_id, uint64_t num_threads, int64_t start_x, int64_t stop_x, int64_t inc_x, int64_t start_y, int64_t stop_y, int64_t inc_y, int64_t start_z, int64_t stop_z, int64_t inc_z);
|
|
|
|
};
|
|
|
|
|
2019-12-02 19:37:21 +01:00
|
|
|
class ND4J_EXPORT Threads {
|
2019-11-13 15:04:59 +01:00
|
|
|
public:
|
|
|
|
/**
|
|
|
|
* This function executes 1 dimensional loop for a given number of threads
|
|
|
|
* PLEASE NOTE: this function can use smaller number of threads than requested.
|
|
|
|
*
|
|
|
|
* @param function
|
|
|
|
* @param numThreads
|
|
|
|
* @param start
|
|
|
|
* @param stop
|
|
|
|
* @param increment
|
|
|
|
* @return
|
|
|
|
*/
|
2020-06-06 14:26:55 +02:00
|
|
|
static int parallel_for(FUNC_1D function, int64_t start, int64_t stop, int64_t increment = 1, uint32_t numThreads = sd::Environment::getInstance().maxMasterThreads());
|
2019-11-13 15:04:59 +01:00
|
|
|
|
2019-11-14 12:35:02 +01:00
|
|
|
/**
|
|
|
|
* This function executes 1 dimensional loop for a given number of threads
|
|
|
|
*
|
|
|
|
* @param function
|
|
|
|
* @param start
|
|
|
|
* @param stop
|
|
|
|
* @param increment
|
|
|
|
* @param numThreads
|
|
|
|
* @return
|
|
|
|
*/
|
2020-06-06 14:26:55 +02:00
|
|
|
static int parallel_tad(FUNC_1D function, int64_t start, int64_t stop, int64_t increment = 1, uint32_t numThreads = sd::Environment::getInstance().maxMasterThreads());
|
2019-11-13 15:04:59 +01:00
|
|
|
|
|
|
|
/**
|
2019-11-14 12:35:02 +01:00
|
|
|
* This method will execute function splitting 2 nested loops space with multiple threads
|
2019-11-13 15:04:59 +01:00
|
|
|
*
|
|
|
|
* @param function
|
|
|
|
* @param numThreads
|
|
|
|
* @param start_x
|
|
|
|
* @param stop_x
|
|
|
|
* @param inc_x
|
|
|
|
* @param start_y
|
|
|
|
* @param stop_y
|
|
|
|
* @param inc_y
|
|
|
|
* @return
|
|
|
|
*/
|
2020-06-06 14:26:55 +02:00
|
|
|
static int parallel_for(FUNC_2D function, int64_t start_x, int64_t stop_x, int64_t inc_x, int64_t start_y, int64_t stop_y, int64_t inc_y, uint64_t numThreads = sd::Environment::getInstance().maxMasterThreads(), bool debug = false);
|
2019-11-13 15:04:59 +01:00
|
|
|
|
|
|
|
/**
|
2019-11-14 12:35:02 +01:00
|
|
|
* This method will execute function splitting 3 nested loops space with multiple threads
|
2019-11-13 15:04:59 +01:00
|
|
|
*
|
|
|
|
* @param function
|
|
|
|
* @param numThreads
|
|
|
|
* @param start_x
|
|
|
|
* @param stop_x
|
|
|
|
* @param inc_x
|
|
|
|
* @param start_y
|
|
|
|
* @param stop_y
|
|
|
|
* @param inc_y
|
|
|
|
* @param start_z
|
|
|
|
* @param stop_z
|
|
|
|
* @param inc_z
|
|
|
|
* @return
|
|
|
|
*/
|
2020-06-06 14:26:55 +02:00
|
|
|
static int parallel_for(FUNC_3D function, int64_t start_x, int64_t stop_x, int64_t inc_x, int64_t start_y, int64_t stop_y, int64_t inc_y, int64_t start_z, int64_t stop_z, int64_t inc_z, uint64_t numThreads = sd::Environment::getInstance().maxMasterThreads());
|
2019-11-13 15:04:59 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @param function
|
|
|
|
* @param numThreads
|
|
|
|
* @return
|
|
|
|
*/
|
2020-06-06 14:26:55 +02:00
|
|
|
static int parallel_do(FUNC_DO function, uint64_t numThreads = sd::Environment::getInstance().maxMasterThreads());
|
2019-11-13 15:04:59 +01:00
|
|
|
|
2020-06-06 14:26:55 +02:00
|
|
|
static int64_t parallel_long(FUNC_RL function, FUNC_AL aggregator, int64_t start, int64_t stop, int64_t increment = 1, uint64_t numThreads = sd::Environment::getInstance().maxMasterThreads());
|
2019-11-13 15:04:59 +01:00
|
|
|
|
2020-06-06 14:26:55 +02:00
|
|
|
static double parallel_double(FUNC_RD function, FUNC_AD aggregator, int64_t start, int64_t stop, int64_t increment = 1, uint64_t numThreads = sd::Environment::getInstance().maxMasterThreads());
|
2020-02-08 13:31:30 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* This method will execute function in parallel preserving the parts to be aligned increment size
|
|
|
|
* PLEASE NOTE: this function can use smaller number of threads than requested.
|
|
|
|
*
|
|
|
|
*/
|
2020-06-06 14:26:55 +02:00
|
|
|
static int parallel_aligned_increment(FUNC_1D function, int64_t start, int64_t stop, int64_t increment, size_t type_size = sizeof(float), uint32_t req_numThreads = sd::Environment::getInstance().maxMasterThreads());
|
2020-02-08 13:31:30 +01:00
|
|
|
|
2019-11-13 15:04:59 +01:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#endif //SAMEDIFF_THREADS_H
|