Oleh b4575d11e9
Loops auto-vectorization problem fix ()
* libnd4j cast loop types

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j more type castination added to loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j sync casting types of iterated variable in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j more loops reviewed for vectorization problem fix

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j fixed several typos

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j several more files reviewed to fix auto-vectorization problem in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j merge master and reviewed more files to fix auto-vectorization problem in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j several type casting added in broadcasting that were missed, fixed mac builds

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j double check all files and fix several more places in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j fixed builds

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j revert changes for lup.cpp

Signed-off-by: Oleg <oleg.semeniv@gmail.com>
2020-02-26 21:12:19 +03:00

58 lines
2.2 KiB
C++

/*******************************************************************************
* Copyright (c) 2015-2018 Skymind, Inc.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
//
// @author raver119@gmail.com
//
#include <ops/declarable/helpers/flatten.h>
namespace nd4j {
namespace ops {
namespace helpers {
template <typename T>
static void flatten_(std::vector<NDArray*> &inputs, NDArray *output, const char order) {
int numArrays = inputs.size();
std::vector<Nd4jLong> offsets(numArrays);
Nd4jLong cOffset = 0;
// calculating offsets in output
for (int e = 0; e < numArrays; e++) {
offsets[e] = cOffset;
cOffset += inputs[e]->lengthOf();
}
// actually transferring data
for (int e = 0; e < numArrays; e++) {
auto z = reinterpret_cast<T *>(output->bufferWithOffset(offsets[e]));
auto xBuffer = inputs[e]->bufferAsT<T>();
auto xShapeInfo = inputs[e]->shapeInfo();
auto xLength = inputs[e]->lengthOf();
for (Nd4jLong i = 0; i < xLength; i++)
z[i] = xBuffer[getIndexOffsetOrdered(i, xShapeInfo, order)];
}
}
void flatten(nd4j::LaunchContext *context, std::vector<NDArray*> &inputs, NDArray *output, char order) {
BUILD_SINGLE_SELECTOR(output->dataType(), flatten_, (inputs, output, order), LIBND4J_TYPES);
}
}
}
}