Minor fixes (#165)

* ios-arm excluded

Signed-off-by: raver119 <raver119@gmail.com>

* histogram single threaded

Signed-off-by: raver119 <raver119@gmail.com>
master
raver119 2020-01-04 15:27:16 +03:00 committed by GitHub
parent 29e8e09db6
commit d9ef5e2467
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 10 additions and 22 deletions

View File

@ -101,16 +101,17 @@ ELSE()
endif()
ENDIF()
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang")
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang" AND X86_BUILD)
# apple clang but not ios-arm
SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_TUNE}")
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
# using Clang
SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_TUNE}")
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
# using Intel C++
SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_TUNE} -O3 -fp-model fast")
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
# using Visual Studio C++
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_TUNE}")
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
# using GCC

View File

@ -29,26 +29,16 @@ namespace nd4j {
auto result = reinterpret_cast<Z*>(zBuffer);
int length = shape::length(xShapeInfo);
// FIXME: 2???
int _threads = 2;
int span = (length / _threads) + 8;
X binSize = (max_val - min_val) / (numBins);
PRAGMA_OMP_PARALLEL_THREADS(_threads)
// FIXME: this op should be parallelized
{
int tid, start, end;
int *bins = new int[numBins];
std::memset(bins, 0, sizeof(int) * numBins);
tid = omp_get_thread_num();
start = span * tid;
end = span * (tid + 1);
if (end > length) end = length;
PRAGMA_OMP_SIMD
for (int x = start; x < end; x++) {
for (int x = 0; x < length; x++) {
int idx = (int) ((dx[x] - min_val) / binSize);
if (idx < 0)
idx = 0;
@ -58,15 +48,12 @@ namespace nd4j {
bins[idx]++;
}
PRAGMA_OMP_CRITICAL
{
PRAGMA_OMP_SIMD
for (int x = 0; x < numBins; x++) {
result[x] += bins[x];
}
PRAGMA_OMP_SIMD
for (int x = 0; x < numBins; x++) {
result[x] += bins[x];
}
delete[] bins;
}
}