328 lines
13 KiB
C++
328 lines
13 KiB
C++
/*******************************************************************************
|
|
* Copyright (c) 2019 Konduit K.K.
|
|
* This program and the accompanying materials are made available under the
|
|
* terms of the Apache License, Version 2.0 which is available at
|
|
* https://www.apache.org/licenses/LICENSE-2.0.
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
* License for the specific language governing permissions and limitations
|
|
* under the License.
|
|
*
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
******************************************************************************/
|
|
|
|
// Created by Abdelrauf 2020
|
|
|
|
#ifndef DEV_TESTSARMCOMPUTEUTILS_H
|
|
#define DEV_TESTSARMCOMPUTEUTILS_H
|
|
|
|
|
|
#include <legacy/NativeOps.h>
|
|
#include <array/NDArray.h>
|
|
#include <graph/Context.h>
|
|
#include <ops/declarable/PlatformHelper.h>
|
|
#include <system/platform_boilerplate.h>
|
|
#include <arm_compute/runtime/NEON/NEFunctions.h>
|
|
#include <arm_compute/core/Types.h>
|
|
#include <arm_compute/core/TensorInfo.h>
|
|
#include <arm_compute/core/TensorShape.h>
|
|
#include <arm_compute/core/Strides.h>
|
|
#include <arm_compute/core/Helpers.h>
|
|
#include <arm_compute/core/ITensor.h>
|
|
#include <arm_compute/core/Types.h>
|
|
#include <arm_compute/core/Validate.h>
|
|
#include <arm_compute/core/Window.h>
|
|
#include <arm_compute/runtime/Tensor.h>
|
|
#include <arm_compute/runtime/TensorAllocator.h>
|
|
#include <iostream>
|
|
|
|
using namespace samediff;
|
|
|
|
#if 0
|
|
#define internal_printf(FORMAT, ...) nd4j_printf(FORMAT, __VA_ARGS__)
|
|
//define ARM_COMPUTE_ASSERTS_ENABLED 1
|
|
#define internal_print_arm_array(a,b) print_tensor(a,b)
|
|
#define internal_print_nd_array(a,b) ((a).printIndexedBuffer(b))
|
|
#define internal_print_nd_shape(a,b) ((a).printShapeInfo(b))
|
|
#else
|
|
#define internal_printf(FORMAT, ...)
|
|
#define internal_print_arm_array(a,b)
|
|
#define internal_print_nd_array(a,b)
|
|
#define internal_print_nd_shape(a,b)
|
|
#endif
|
|
|
|
namespace sd {
|
|
namespace ops {
|
|
namespace platforms {
|
|
|
|
using Arm_DataType = arm_compute::DataType;
|
|
using Arm_Tensor = arm_compute::Tensor;
|
|
using Arm_ITensor = arm_compute::ITensor;
|
|
using Arm_TensorInfo = arm_compute::TensorInfo;
|
|
using Arm_TensorShape = arm_compute::TensorShape;
|
|
using Arm_Strides = arm_compute::Strides;
|
|
using Arm_WeightsInfo = arm_compute::WeightsInfo;
|
|
using Arm_PermutationVector = arm_compute::PermutationVector;
|
|
using Arm_DataLayout = arm_compute::DataLayout;
|
|
|
|
/**
|
|
* Here we actually declare our platform helpers
|
|
*/
|
|
DECLARE_PLATFORM(maxpool2d, ENGINE_CPU);
|
|
|
|
DECLARE_PLATFORM(avgpool2d, ENGINE_CPU);
|
|
|
|
DECLARE_PLATFORM(conv2d, ENGINE_CPU);
|
|
|
|
DECLARE_PLATFORM(deconv2d, ENGINE_CPU);
|
|
|
|
//utils
|
|
Arm_DataType getArmType(const sd::DataType& dType);
|
|
|
|
Arm_TensorInfo getArmTensorInfo(int rank, Nd4jLong* bases, sd::DataType ndArrayType, Arm_DataLayout layout = Arm_DataLayout::UNKNOWN);
|
|
|
|
Arm_TensorInfo getArmTensorInfo(const NDArray& arr, Arm_DataLayout layout = Arm_DataLayout::UNKNOWN);
|
|
|
|
Arm_Tensor getArmTensor(const NDArray& arr, Arm_DataLayout layout = Arm_DataLayout::UNKNOWN);
|
|
|
|
void copyFromTensor(const Arm_Tensor& inTensor, NDArray& output);
|
|
void copyToTensor(const NDArray& input, Arm_Tensor& outTensor);
|
|
void print_tensor(Arm_ITensor& tensor, const char* msg);
|
|
bool isArmcomputeFriendly(const NDArray& arr);
|
|
|
|
template<typename F>
|
|
class ArmFunction {
|
|
public:
|
|
template<typename ...Args>
|
|
void configure( NDArray* input, NDArray* output, Arm_DataLayout layout, Args&& ...args) {
|
|
bool inputHasPaddedBuffer = input->hasPaddedBuffer();
|
|
bool outputHasPaddedBuffer = output->hasPaddedBuffer();
|
|
if (inputHasPaddedBuffer) {
|
|
in = getArmTensor(*input, layout);
|
|
internal_printf("input is a padded buffer %d\n", 0);
|
|
}
|
|
else {
|
|
auto inInfo = getArmTensorInfo(*input, layout);
|
|
in.allocator()->init(inInfo);
|
|
}
|
|
if (outputHasPaddedBuffer) {
|
|
out = getArmTensor(*output, layout);
|
|
internal_printf("output is a padded buffer %d\n", 0);
|
|
}
|
|
else {
|
|
auto outInfo = getArmTensorInfo(*output, layout);
|
|
out.allocator()->init(outInfo);
|
|
}
|
|
armFunction.configure(&in, &out, std::forward<Args>(args) ...);
|
|
if (!inputHasPaddedBuffer) {
|
|
if (in.info()->has_padding() || input->ews() != 1) {
|
|
//allocate and copy
|
|
in.allocator()->allocate();
|
|
inputNd = input;
|
|
}
|
|
else {
|
|
//import only for ews()==1
|
|
in.allocator()->import_memory(input->buffer());
|
|
internal_printf("input import %d\n", 0);
|
|
}
|
|
}
|
|
if (!outputHasPaddedBuffer) {
|
|
if (out.info()->has_padding() || output->ews()!=1) {
|
|
//store pointer to our array to copy after run
|
|
out.allocator()->allocate();
|
|
outNd = output;
|
|
}
|
|
else {
|
|
//import only for ews()==1
|
|
out.allocator()->import_memory(output->buffer());
|
|
internal_printf("output import %d\n", 0);
|
|
}
|
|
}
|
|
}
|
|
void run() {
|
|
if (inputNd) {
|
|
//copy
|
|
copyToTensor(*inputNd, in);
|
|
internal_printf("input copy %d\n", 0);
|
|
internal_print_nd_array(*inputNd,"input");
|
|
internal_print_arm_array(in, "in");
|
|
}
|
|
armFunction.run();
|
|
if (outNd) {
|
|
copyFromTensor(out, *outNd);
|
|
internal_printf("output copy %d\n", 0);
|
|
internal_print_arm_array(out, "out");
|
|
}
|
|
}
|
|
private:
|
|
Arm_Tensor in;
|
|
Arm_Tensor out;
|
|
NDArray* inputNd = nullptr;
|
|
NDArray* outNd = nullptr;
|
|
F armFunction{};
|
|
};
|
|
|
|
template<typename F>
|
|
class ArmFunctionWeighted {
|
|
public:
|
|
template<typename ...Args>
|
|
void configure( NDArray* input, NDArray* weights, NDArray* biases, NDArray* output, Arm_DataLayout layout, arm_compute::PermutationVector permuteVector, Args&& ...args) {
|
|
bool inputHasPaddedBuffer = input->hasPaddedBuffer();
|
|
bool weightsHasPaddedBuffer = weights->hasPaddedBuffer();
|
|
bool outputHasPaddedBuffer = output->hasPaddedBuffer();
|
|
bool biasesHasPaddedBuffer = false;
|
|
if (inputHasPaddedBuffer) {
|
|
in = getArmTensor(*input, layout);
|
|
internal_printf("input is a padded buffer %d\n", 1);
|
|
}
|
|
else {
|
|
in.allocator()->init(getArmTensorInfo(*input, layout));
|
|
}
|
|
if (weightsHasPaddedBuffer) {
|
|
w = getArmTensor(*weights, layout);
|
|
internal_printf("weights is a padded buffer %d\n", 1);
|
|
}
|
|
else {
|
|
w.allocator()->init(getArmTensorInfo(*weights, layout));
|
|
}
|
|
if (outputHasPaddedBuffer) {
|
|
out = getArmTensor(*output, layout);
|
|
internal_printf("output is a padded buffer %d\n", 1);
|
|
}
|
|
else {
|
|
out.allocator()->init(getArmTensorInfo(*output, layout));
|
|
}
|
|
Arm_Tensor* bias_ptr = nullptr;
|
|
if (biases) {
|
|
biasesHasPaddedBuffer = biases->hasPaddedBuffer();
|
|
if (biasesHasPaddedBuffer) {
|
|
b = getArmTensor(*biases, layout);
|
|
internal_printf("biases is a padded buffer %d\n", 1);
|
|
}
|
|
else {
|
|
b.allocator()->init(getArmTensorInfo(*biases, layout));
|
|
}
|
|
bias_ptr = &b;
|
|
}
|
|
if (permuteVector.num_dimensions() == 0) {
|
|
armFunction.configure(&in, &w, bias_ptr, &out, std::forward<Args>(args)...);
|
|
}
|
|
else {
|
|
//configure with permute kernel
|
|
Arm_TensorShape shape;
|
|
int rank = permuteVector.num_dimensions();
|
|
shape.set_num_dimensions(rank);
|
|
auto wInfoPtr = w.info();
|
|
for (int i = 0; i < rank; i++) {
|
|
shape[i] = wInfoPtr->dimension(permuteVector[i]);
|
|
}
|
|
for (int i = rank; i < arm_compute::MAX_DIMS; i++) {
|
|
shape[i] = 1;
|
|
}
|
|
Arm_TensorInfo wPermInfo(shape, 1, wInfoPtr->data_type(), layout);
|
|
wPerm.allocator()->init(wPermInfo);
|
|
permuter.configure(&w, &wPerm, permuteVector);
|
|
armFunction.configure(&in, &wPerm, bias_ptr, &out, std::forward<Args>(args)...);
|
|
wPerm.allocator()->allocate();
|
|
runPerm = true;
|
|
}
|
|
//import buffer
|
|
if (!inputHasPaddedBuffer) {
|
|
if (in.info()->has_padding() || input->ews()!=1) {
|
|
//allocate and copy
|
|
in.allocator()->allocate();
|
|
inputNd = input;
|
|
}
|
|
else {
|
|
//import buffer
|
|
in.allocator()->import_memory(input->buffer());
|
|
internal_printf("input import %d\n", 1);
|
|
}
|
|
}
|
|
if (!weightsHasPaddedBuffer) {
|
|
if (w.info()->has_padding() || weights->ews()!=1) {
|
|
//store pointer to our array to copy after run
|
|
w.allocator()->allocate();
|
|
wNd = weights;
|
|
}
|
|
else {
|
|
//import
|
|
w.allocator()->import_memory(weights->buffer());
|
|
internal_printf("weights import %d\n", 1);
|
|
}
|
|
}
|
|
if (biases && !biasesHasPaddedBuffer) {
|
|
if (b.info()->has_padding() || biases->ews()!=1) {
|
|
//store pointer to our array to copy after run
|
|
b.allocator()->allocate();
|
|
bNd = biases;
|
|
}
|
|
else {
|
|
//import
|
|
b.allocator()->import_memory(biases->buffer());
|
|
internal_printf("biases import %d\n", 1);
|
|
}
|
|
}
|
|
if (!outputHasPaddedBuffer) {
|
|
if (out.info()->has_padding() || output->ews()!=1) {
|
|
//store pointer to our array to copy after run
|
|
out.allocator()->allocate();
|
|
outNd = output;
|
|
}
|
|
else {
|
|
//import
|
|
out.allocator()->import_memory(output->buffer());
|
|
internal_printf("output import %d\n", 1);
|
|
}
|
|
}
|
|
}
|
|
void run() {
|
|
if (inputNd) {
|
|
//copy
|
|
copyToTensor(*inputNd, in);
|
|
internal_printf("input copy %d\n", 1);
|
|
}
|
|
if (bNd) {
|
|
//copy
|
|
copyToTensor(*bNd, b);
|
|
internal_printf("biases copy %d\n", 1);
|
|
}
|
|
if (wNd) {
|
|
//copy
|
|
copyToTensor(*wNd, w);
|
|
internal_printf("weights copy %d\n", 1);
|
|
}
|
|
if (runPerm) {
|
|
permuter.run();
|
|
}
|
|
armFunction.run();
|
|
if (outNd) {
|
|
copyFromTensor(out, *outNd);
|
|
internal_printf("output copy %d\n", 1);
|
|
}
|
|
}
|
|
private:
|
|
bool runPerm = false;
|
|
Arm_Tensor in;
|
|
Arm_Tensor b;
|
|
Arm_Tensor w;
|
|
Arm_Tensor wPerm;
|
|
Arm_Tensor out;
|
|
NDArray* inputNd = nullptr;
|
|
NDArray* wNd = nullptr;
|
|
NDArray* bNd = nullptr;
|
|
NDArray* outNd = nullptr;
|
|
arm_compute::NEPermute permuter;
|
|
F armFunction{};
|
|
};
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
#endif //DEV_TESTSARMCOMPUTEUTILS_H
|