cavis/libnd4j/include/ops/declarable/helpers/s_t_b.h

84 lines
4.0 KiB
C
Raw Normal View History

2019-06-06 14:21:15 +02:00
/*******************************************************************************
* Copyright (c) 2015-2018 Skymind, Inc.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
//
// Created by raver119 on 19.01.18.
//
#ifndef LIBND4J_S_T_B_H
#define LIBND4J_S_T_B_H
#include <ops/declarable/helpers/helpers.h>
namespace nd4j {
namespace ops {
namespace helpers {
// this method MUST be platform-specific
template <typename T, int NUM_BLOCK_DIMS, bool B2S>
void _execute(nd4j::LaunchContext * context, void *ptrSpace, const Nd4jLong *space_shape, const Nd4jLong *space_strides, const Nd4jLong *block_shape, const Nd4jLong *pad_start, const Nd4jLong *block_offsets, void *ptrBatch, const Nd4jLong *batch_shape, const Nd4jLong *batch_strides);
template <int NUM_BLOCK_DIMS, bool B2S>
FORCEINLINE void _prepare(nd4j::LaunchContext * context, NDArray * space, NDArray *batch, const Nd4jLong block_array[NUM_BLOCK_DIMS], const Nd4jLong padding_array[NUM_BLOCK_DIMS * 2]) {
Nd4jLong pad_start[NUM_BLOCK_DIMS];
Nd4jLong block_shape[NUM_BLOCK_DIMS];
Nd4jLong space_shape[NUM_BLOCK_DIMS];
Nd4jLong batch_shape[NUM_BLOCK_DIMS];
const int batch_size = batch->sizeAt(0);
const int space_size = space->sizeAt(0);
#pragma unroll
for (int block_dim = 0; block_dim < NUM_BLOCK_DIMS; block_dim++) {
pad_start[block_dim] = padding_array[block_dim * 2];
block_shape[block_dim] = block_array[block_dim];
space_shape[block_dim] = space->sizeAt(block_dim + 1);
batch_shape[block_dim] = batch->sizeAt(block_dim + 1);
}
auto space_strides = space->stridesOf();
auto batch_strides = batch->stridesOf();
// TODO: this loop should be moved to _execute phase
for (int batch_b = 0; batch_b < batch_size; ++batch_b) {
const Nd4jLong space_b = batch_b % space_size;
Nd4jLong block_index = batch_b / space_size;
Nd4jLong block_offsets[NUM_BLOCK_DIMS];
for (Nd4jLong block_dim = NUM_BLOCK_DIMS - 1; block_dim >= 0; --block_dim) {
block_offsets[block_dim] = block_dim > 0 ? block_index % block_shape[block_dim] : block_index;
block_index /= block_shape[block_dim];
}
Nd4jLong space_offset = space_b * space_strides[0];
Nd4jLong batch_offset = batch_b * batch_strides[0];
auto xType = space->dataType();
//_execute<T, NUM_BLOCK_DIMS, B2S>(space->buffer() + space_offset, space_shape, &space_strides[1], block_shape, pad_start, block_offsets, batch->buffer() + batch_offset, batch_shape, &batch_strides[1]);
BUILD_SINGLE_PARTIAL_SELECTOR(xType, _execute<, (NUM_BLOCK_DIMS, B2S>(context, space->bufferWithOffset(space_offset), space_shape, &space_strides[1], block_shape, pad_start, block_offsets, batch->bufferWithOffset(batch_offset), batch_shape, &batch_strides[1])), LIBND4J_TYPES);
}
};
Nd4jStatus _spaceToBatch(nd4j::LaunchContext * context, int internal_block_dims, NDArray *input, NDArray *output, std::vector<Nd4jLong> &internal_input_shape, std::vector<Nd4jLong> &internal_output_shape, Nd4jLong *block_shape, Nd4jLong *paddings);
Nd4jStatus _batchToSpace(nd4j::LaunchContext * context, int internal_block_dims, NDArray *input, NDArray *output, std::vector<Nd4jLong> &internal_input_shape, std::vector<Nd4jLong> &internal_output_shape, Nd4jLong *block_shape, Nd4jLong *crops);
}
}
}
#endif //LIBND4J_S_T_B_H