Minor fixes (#165)
* ios-arm excluded Signed-off-by: raver119 <raver119@gmail.com> * histogram single threaded Signed-off-by: raver119 <raver119@gmail.com>master
parent
29e8e09db6
commit
d9ef5e2467
|
@ -101,16 +101,17 @@ ELSE()
|
||||||
endif()
|
endif()
|
||||||
ENDIF()
|
ENDIF()
|
||||||
|
|
||||||
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang")
|
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang" AND X86_BUILD)
|
||||||
|
# apple clang but not ios-arm
|
||||||
|
SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_TUNE}")
|
||||||
|
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
|
||||||
# using Clang
|
# using Clang
|
||||||
SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_TUNE}")
|
SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_TUNE}")
|
||||||
|
|
||||||
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
|
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
|
||||||
# using Intel C++
|
# using Intel C++
|
||||||
SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_TUNE} -O3 -fp-model fast")
|
SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_TUNE} -O3 -fp-model fast")
|
||||||
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
|
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
|
||||||
# using Visual Studio C++
|
# using Visual Studio C++
|
||||||
|
|
||||||
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_TUNE}")
|
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_TUNE}")
|
||||||
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
|
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
|
||||||
# using GCC
|
# using GCC
|
||||||
|
|
|
@ -29,26 +29,16 @@ namespace nd4j {
|
||||||
auto result = reinterpret_cast<Z*>(zBuffer);
|
auto result = reinterpret_cast<Z*>(zBuffer);
|
||||||
|
|
||||||
int length = shape::length(xShapeInfo);
|
int length = shape::length(xShapeInfo);
|
||||||
// FIXME: 2???
|
|
||||||
int _threads = 2;
|
|
||||||
|
|
||||||
int span = (length / _threads) + 8;
|
|
||||||
|
|
||||||
X binSize = (max_val - min_val) / (numBins);
|
X binSize = (max_val - min_val) / (numBins);
|
||||||
|
|
||||||
PRAGMA_OMP_PARALLEL_THREADS(_threads)
|
// FIXME: this op should be parallelized
|
||||||
{
|
{
|
||||||
int tid, start, end;
|
|
||||||
|
|
||||||
int *bins = new int[numBins];
|
int *bins = new int[numBins];
|
||||||
std::memset(bins, 0, sizeof(int) * numBins);
|
std::memset(bins, 0, sizeof(int) * numBins);
|
||||||
tid = omp_get_thread_num();
|
|
||||||
start = span * tid;
|
|
||||||
end = span * (tid + 1);
|
|
||||||
if (end > length) end = length;
|
|
||||||
|
|
||||||
PRAGMA_OMP_SIMD
|
PRAGMA_OMP_SIMD
|
||||||
for (int x = start; x < end; x++) {
|
for (int x = 0; x < length; x++) {
|
||||||
int idx = (int) ((dx[x] - min_val) / binSize);
|
int idx = (int) ((dx[x] - min_val) / binSize);
|
||||||
if (idx < 0)
|
if (idx < 0)
|
||||||
idx = 0;
|
idx = 0;
|
||||||
|
@ -58,15 +48,12 @@ namespace nd4j {
|
||||||
bins[idx]++;
|
bins[idx]++;
|
||||||
}
|
}
|
||||||
|
|
||||||
PRAGMA_OMP_CRITICAL
|
PRAGMA_OMP_SIMD
|
||||||
{
|
for (int x = 0; x < numBins; x++) {
|
||||||
PRAGMA_OMP_SIMD
|
result[x] += bins[x];
|
||||||
for (int x = 0; x < numBins; x++) {
|
|
||||||
result[x] += bins[x];
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
delete[] bins;
|
delete[] bins;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue