/******************************************************************************* * Copyright (c) 2015-2018 Skymind, Inc. * * This program and the accompanying materials are made available under the * terms of the Apache License, Version 2.0 which is available at * https://www.apache.org/licenses/LICENSE-2.0. * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. * * SPDX-License-Identifier: Apache-2.0 ******************************************************************************/ // // @author raver119@gmail.com, created on 07.10.2017. // @author Yurii Shyrma (iuriish@yahoo.com) // #include #include #include #include #include #include #include #include #include namespace sd { template void SpecialTypeConverter::convertGeneric(Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz) { auto x = reinterpret_cast(dx); auto z = reinterpret_cast(dz); auto func = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i++) { z[i] = static_cast(x[i]); } }; sd::Threads::parallel_for(func, 0, N); }; template void quickSort_parallel_internal_key(X* key, Nd4jLong *xShapeInfo, Y* values, Nd4jLong *yShapeInfo, int left, int right, int cutoff, bool descending) { int i = left, j = right; X ktmp; X pivot = key[shape::getIndexOffset((left + right) / 2, xShapeInfo)]; Y vtmp; { /* PARTITION PART */ while (i <= j) { if (descending) { while (key[shape::getIndexOffset(i, xShapeInfo)] > pivot) i++; while (key[shape::getIndexOffset(j, xShapeInfo)] < pivot) j--; if (i <= j) { ktmp = key[shape::getIndexOffset(i, xShapeInfo)]; key[shape::getIndexOffset(i, xShapeInfo)] = key[shape::getIndexOffset(j, xShapeInfo)]; key[shape::getIndexOffset(j, xShapeInfo)] = ktmp; vtmp = values[shape::getIndexOffset(i, yShapeInfo)]; values[shape::getIndexOffset(i, yShapeInfo)] = values[shape::getIndexOffset(j, yShapeInfo)]; values[shape::getIndexOffset(j, yShapeInfo)] = vtmp; i++; j--; } } else { while (key[shape::getIndexOffset(i, xShapeInfo)] < pivot) i++; while (key[shape::getIndexOffset(j, xShapeInfo)] > pivot) j--; if (i <= j) { ktmp = key[shape::getIndexOffset(i, xShapeInfo)]; key[shape::getIndexOffset(i, xShapeInfo)] = key[shape::getIndexOffset(j, xShapeInfo)]; key[shape::getIndexOffset(j, xShapeInfo)] = ktmp; vtmp = values[shape::getIndexOffset(i, yShapeInfo)]; values[shape::getIndexOffset(i, yShapeInfo)] = values[shape::getIndexOffset(j, yShapeInfo)]; values[shape::getIndexOffset(j, yShapeInfo)] = vtmp; i++; j--; } } } } // if ( ((right-left) void quickSort_parallel_internal_value(X* key, Nd4jLong *xShapeInfo, Y* value, Nd4jLong *yShapeInfo, int left, int right, int cutoff, bool descending) { int i = left, j = right; X ktmp; Y pivot = value[shape::getIndexOffset((left + right) / 2, yShapeInfo)]; Y vtmp; { /* PARTITION PART */ while (i <= j) { if (descending) { while (value[shape::getIndexOffset(i, yShapeInfo)] > pivot) i++; while (value[shape::getIndexOffset(j, yShapeInfo)] < pivot) j--; if (i <= j) { ktmp = key[shape::getIndexOffset(i, xShapeInfo)]; key[shape::getIndexOffset(i, xShapeInfo)] = key[shape::getIndexOffset(j, xShapeInfo)]; key[shape::getIndexOffset(j, xShapeInfo)] = ktmp; vtmp = value[shape::getIndexOffset(i, yShapeInfo)]; value[shape::getIndexOffset(i, yShapeInfo)] = value[shape::getIndexOffset(j, yShapeInfo)]; value[shape::getIndexOffset(j, yShapeInfo)] = vtmp; i++; j--; } } else { while (value[shape::getIndexOffset(i, yShapeInfo)] < pivot) i++; while (value[shape::getIndexOffset(j, yShapeInfo)] > pivot) j--; if (i <= j) { ktmp = key[shape::getIndexOffset(i, xShapeInfo)]; key[shape::getIndexOffset(i, xShapeInfo)] = key[shape::getIndexOffset(j, xShapeInfo)]; key[shape::getIndexOffset(j, xShapeInfo)] = ktmp; vtmp = value[shape::getIndexOffset(i, yShapeInfo)]; value[shape::getIndexOffset(i, yShapeInfo)] = value[shape::getIndexOffset(j, yShapeInfo)]; value[shape::getIndexOffset(j, yShapeInfo)] = vtmp; i++; j--; } } } } // if ( ((right-left) static void quickSort_parallel_key(void *varray, Nd4jLong *xShapeInfo, void *yarray, Nd4jLong *yShapeInfo, Nd4jLong lenArray, int numThreads, bool descending){ auto array = reinterpret_cast(varray); auto values = reinterpret_cast(yarray); int cutoff = 1000; PRAGMA_OMP_PARALLEL_THREADS(numThreads) { PRAGMA_OMP_SINGLE_ARGS(nowait) { quickSort_parallel_internal_key(array, xShapeInfo, values, yShapeInfo, 0, lenArray-1, cutoff, descending); } } } template static void quickSort_parallel_value(void *varray, Nd4jLong *xShapeInfo, void *yarray, Nd4jLong *yShapeInfo, Nd4jLong lenArray, int numThreads, bool descending){ auto array = reinterpret_cast(varray); auto values = reinterpret_cast(yarray); int cutoff = 1000; PRAGMA_OMP_PARALLEL_THREADS(numThreads) { PRAGMA_OMP_SINGLE_ARGS(nowait) { quickSort_parallel_internal_value(array, xShapeInfo, values, yShapeInfo, 0, lenArray-1, cutoff, descending); } } } template void DoubleMethods::sortByKey(void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, bool descending) { quickSort_parallel_key(vx, xShapeInfo, vy, yShapeInfo, shape::length(xShapeInfo), omp_get_max_threads(), descending); } template void DoubleMethods::sortByValue(void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, bool descending) { quickSort_parallel_value(vx, xShapeInfo, vy, yShapeInfo, shape::length(xShapeInfo), omp_get_max_threads(), descending); } template void DoubleMethods::sortTadByKey(void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int *dimension, int dimensionLength, bool descending) { auto x = reinterpret_cast(vx); auto y = reinterpret_cast(vy); auto packX = ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); auto packY = ConstantTadHelper::getInstance()->tadForDimensions(yShapeInfo, dimension, dimensionLength); auto xLength = shape::length(xShapeInfo); auto xTadLength = shape::length(packX.primaryShapeInfo()); auto numTads = packX.numberOfTads(); auto func = PRAGMA_THREADS_FOR { for (auto r = start; r < stop; r++) { auto dx = x + packX.primaryOffsets()[r]; auto dy = y + packY.primaryOffsets()[r]; quickSort_parallel_key(dx, packX.primaryShapeInfo(), dy, packY.primaryShapeInfo(), xTadLength, 1, descending); } }; sd::Threads::parallel_tad(func, 0, numTads); } template void DoubleMethods::sortTadByValue(void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int *dimension, int dimensionLength, bool descending) { auto x = reinterpret_cast(vx); auto y = reinterpret_cast(vy); auto packX = ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); auto packY = ConstantTadHelper::getInstance()->tadForDimensions(yShapeInfo, dimension, dimensionLength); auto xLength = shape::length(xShapeInfo); auto xTadLength = shape::length(packX.primaryShapeInfo()); auto numTads = packX.numberOfTads(); auto func = PRAGMA_THREADS_FOR { for (auto r = start; r < stop; r++) { auto dx = x + packX.primaryOffsets()[r]; auto dy = y + packY.primaryOffsets()[r]; quickSort_parallel_value(dx, packX.primaryShapeInfo(), dy, packY.primaryShapeInfo(), xTadLength, 1, descending); } }; sd::Threads::parallel_tad(func, 0, numTads); } }