cavis/libnd4j/include/ops/ops.h

4683 lines
122 KiB
C
Raw Normal View History

2019-06-06 14:21:15 +02:00
/*******************************************************************************
* Copyright (c) 2015-2018 Skymind, Inc.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
#pragma once
#ifndef OPS_H_
#define OPS_H_
#include <system/op_boilerplate.h>
2019-06-06 14:21:15 +02:00
#include <array/DataTypeUtils.h>
#include <helpers/shape.h>
#include <vector>
#include <system/Environment.h>
2019-06-06 14:21:15 +02:00
#include <loops/summarystatsreduce.h>
#include <loops/ReduceType.h>
#define MIN_V 1e-12
#define MAX_FLOAT 1e37
#define MIN_FLOAT 1e-37
#define MAX_INT 2147483647
#define MIN_CUTFOFF -3.79297773665f
#define FLOAT_MIN_NORMAL 1.17549435e-38
#define EPS 1e-5
#define AFFINITY close
#define DOUBLE_PI_T T(2.0 * 3.14159265358979323846)
#define DOUBLE_PI_X X(2.0 * 3.14159265358979323846)
Legacy API changes (#441) * initial commit Signed-off-by: raver119@gmail.com <raver119@gmail.com> * another initial commit Signed-off-by: raver119@gmail.com <raver119@gmail.com> * another initial commit Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more initial commit Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Refactored buffer() and shapeInfo() methods usage with NDArray class. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt Graph class methods to use const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt choose op to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt where op shape method to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt lstsq op to use constant empty shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt matrix_diag_part op shape routine to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt determinant ops to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt mean_pairwssqerr_loss ops to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt ops shape methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt shape methods for loss ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt log_loss op shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt shape methods for ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt dilation2d ops shape methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted deconv2d ops shape methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted dynamicRNN op shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape methods for ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape methods for lstm layer ops. Signed-off-by: shugeo <sgazeos@gmail.com> * few updates Signed-off-by: raver119@gmail.com <raver119@gmail.com> * first cuda tweak Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Adopt constant shapes for sconv2d ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt constant shapes for gru ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt constant shapes with shape methods for segment ops and so on. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted constant shapes with unsorted_segment_* ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted constant shapes with gamma op shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape methods of reduce_stddev ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape methods for reduce_* ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt shape method for squeeze op. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt strided_slice shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored concat op shape method to adopt constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape method for mirror_pad op. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted split op shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted tile ops shape methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Added const cast for mkldnn routines handles. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored logSoftMaxForVector_ routine to conform with proper data and shape pointer casts. Signed-off-by: shugeo <sgazeos@gmail.com> * Cosmetic changes to proper usage of constant pointers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored a couple shape comparators for strides and addBias helpers to proper use data pointers with inplace option. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored depthToSpace helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored histogram helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored im2col helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored gather and gatherND helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage on percentile helper. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed gather shape with helpers and range buffer usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with space to depth helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage and constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with LUP decomposition> Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored onehot_ helper. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored pad and prefix to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactoed softmax helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed space to batch helpers to use buffers properly. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed stack and split helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with sparse to dense helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with mindistance_ helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with tile helper. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed constant shape usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed constant shape usage with legacy pairwise bool ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored a couple of methods to adopt constant shape usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed broadcasting with constant shape." Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const usage with inplace reverse and constant shapes with legacy reduction. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored legacy ops with const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored sort to adopt constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected sort for constant shape usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed constant shape usage with special methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored Context to conform with constant shape usage. Signed-off-by: shugeo <sgazeos@gmail.com> * CUDA broadcasting headers Signed-off-by: raver119@gmail.com <raver119@gmail.com> * pairwise/indexreduce/random headers Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Refactored native ops to adopt constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * legacy reduce3/scalar headers Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Corrected pullRow signature and tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected routines to proper use of constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored tests to use constant shapes properly. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored legacy ops tests to use constant shapes properly. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored buffer usage with NDArray tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed native ops tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed special concat routine. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with test. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with a test. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored TAD.h and tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored calcStrides* routines to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed miscelaneous errors with constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * NativeOps const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Corrected definitions for declared functions. Signed-off-by: shugeo <sgazeos@gmail.com> * NativeOps const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * few more const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fixed const shapes with shape routines. Signed-off-by: shugeo <sgazeos@gmail.com> * few more const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fixed shape method for broadcastable case. Signed-off-by: shugeo <sgazeos@gmail.com> * few more const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * xw_plus_b BP shape fn restored Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fixed signatures with broadcasting. Signed-off-by: shugeo <sgazeos@gmail.com> * Repaired backprops shape methods for a set of operations. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored broadcast bool for cuda. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored methods for 3 args with const qualifier. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed a couple of kernel signatures for broadcasting. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed kernels signatures for const buffers and shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored pairwise methods to persistent buffers and shapes usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt const to buffers and shapes with kernels. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt const to buffers and shapes with scalar kernels. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored indexreduce kernels signatures to use const buffers and shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored pairwise kernels to adopt cons shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored pairwise bool kernels to adopt cons shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored random special ops to conform with const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored native ops to conform with const shapes and buffers under cuda platform. Signed-off-by: shugeo <sgazeos@gmail.com> * Cosmetical changes only. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shapes and buffers error. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected start pos routine. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored methods to conform with const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored helpers to use proper methods instead. Signed-off-by: shugeo <sgazeos@gmail.com> * bunch of changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next bunch of changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next bunch of changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fixed execScalar declaration. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed execScalar declaration. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected const shape cases with sort and so on. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shapes for sort. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored kernel declarations to adopt const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed kernels declarations to adopt const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected kernel declarations to adopt const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed kernels declarations to adopt const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed segment helpers kernels declarations and so on to adopt const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shape usage with segment and solve helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed kernel declaration with adjustWeight helper. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed cuda implementations for constant shape helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted const shape usage with kernels. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted top_k kernels to use const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected kernels declarations to adopt const shapes with helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored NDArray definitions to adopt const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shapes with image suppression helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Slight improvement with buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored buffer usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored buffer usage with tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shape usage with definitions. Signed-off-by: shugeo <sgazeos@gmail.com> * minor updates on cpu side Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Refactored const shape usage with ConstantDescritor and native ops with cuda platform. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored tear and tile kernels to adopt with const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * softmax_loop fix Signed-off-by: raver119 <raver119@gmail.com> * update missing signature Signed-off-by: raver119@gmail.com <raver119@gmail.com> * softmax again Signed-off-by: raver119@gmail.com <raver119@gmail.com> * few more missing consts Signed-off-by: raver119 <raver119@gmail.com> * new methods updated Signed-off-by: raver119@gmail.com <raver119@gmail.com> Co-authored-by: shugeo <sgazeos@gmail.com>
2020-05-09 07:06:14 +02:00
#define no_op_exec_special_any static const bool requiresSpecial = false; static void execSpecial(const X *dx, const Nd4jLong *xShapeBuffer, Z *result, const Nd4jLong *resultShapeBuffer, X *extraParams, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {}
#define no_op_exec_special_bool static const bool requiresSpecial = false; static void execSpecial(const X *dx, const Nd4jLong *xShapeBuffer, Z *result, const Nd4jLong *resultShapeBuffer, X *extraParams, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {}
#define no_op_exec_special_same static const bool requiresSpecial = false; static void execSpecial(const X *dx, const Nd4jLong *xShapeBuffer, X *result, const Nd4jLong *resultShapeBuffer, X *extraParams, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {}
#define no_op_exec_special static const bool requiresSpecial = false; static void execSpecial(const X *dx, const Nd4jLong *xShapeBuffer, Z *result, const Nd4jLong *resultShapeBuffer, Z *extraParams, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {}
#define no_op_exec_special_accumulation static const bool requiresSpecialAccumulation = false; static void execSpecial(const X *x, const Nd4jLong *xShapeInfo, Z *extraParams, Z *result, const Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset){}
#define no_op_exec_special_accumulation_long static const bool requiresSpecialAccumulation = false; static void execSpecial(const X *x, const Nd4jLong *xShapeInfo, X *extraParams, Z *result, const Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset){}
#define no_op_exec_special_accumulation_same static const bool requiresSpecialAccumulation = false; static void execSpecial(const X *x, const Nd4jLong *xShapeInfo, X *extraParams, X *result, const Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset){}
2019-06-06 14:21:15 +02:00
#ifdef __CUDACC__
Legacy API changes (#441) * initial commit Signed-off-by: raver119@gmail.com <raver119@gmail.com> * another initial commit Signed-off-by: raver119@gmail.com <raver119@gmail.com> * another initial commit Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more initial commit Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Refactored buffer() and shapeInfo() methods usage with NDArray class. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt Graph class methods to use const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt choose op to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt where op shape method to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt lstsq op to use constant empty shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt matrix_diag_part op shape routine to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt determinant ops to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt mean_pairwssqerr_loss ops to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt ops shape methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt shape methods for loss ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt log_loss op shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt shape methods for ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt dilation2d ops shape methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted deconv2d ops shape methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted dynamicRNN op shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape methods for ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape methods for lstm layer ops. Signed-off-by: shugeo <sgazeos@gmail.com> * few updates Signed-off-by: raver119@gmail.com <raver119@gmail.com> * first cuda tweak Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Adopt constant shapes for sconv2d ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt constant shapes for gru ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt constant shapes with shape methods for segment ops and so on. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted constant shapes with unsorted_segment_* ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted constant shapes with gamma op shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape methods of reduce_stddev ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape methods for reduce_* ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt shape method for squeeze op. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt strided_slice shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored concat op shape method to adopt constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape method for mirror_pad op. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted split op shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted tile ops shape methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Added const cast for mkldnn routines handles. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored logSoftMaxForVector_ routine to conform with proper data and shape pointer casts. Signed-off-by: shugeo <sgazeos@gmail.com> * Cosmetic changes to proper usage of constant pointers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored a couple shape comparators for strides and addBias helpers to proper use data pointers with inplace option. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored depthToSpace helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored histogram helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored im2col helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored gather and gatherND helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage on percentile helper. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed gather shape with helpers and range buffer usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with space to depth helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage and constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with LUP decomposition> Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored onehot_ helper. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored pad and prefix to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactoed softmax helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed space to batch helpers to use buffers properly. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed stack and split helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with sparse to dense helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with mindistance_ helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with tile helper. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed constant shape usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed constant shape usage with legacy pairwise bool ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored a couple of methods to adopt constant shape usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed broadcasting with constant shape." Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const usage with inplace reverse and constant shapes with legacy reduction. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored legacy ops with const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored sort to adopt constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected sort for constant shape usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed constant shape usage with special methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored Context to conform with constant shape usage. Signed-off-by: shugeo <sgazeos@gmail.com> * CUDA broadcasting headers Signed-off-by: raver119@gmail.com <raver119@gmail.com> * pairwise/indexreduce/random headers Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Refactored native ops to adopt constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * legacy reduce3/scalar headers Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Corrected pullRow signature and tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected routines to proper use of constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored tests to use constant shapes properly. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored legacy ops tests to use constant shapes properly. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored buffer usage with NDArray tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed native ops tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed special concat routine. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with test. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with a test. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored TAD.h and tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored calcStrides* routines to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed miscelaneous errors with constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * NativeOps const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Corrected definitions for declared functions. Signed-off-by: shugeo <sgazeos@gmail.com> * NativeOps const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * few more const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fixed const shapes with shape routines. Signed-off-by: shugeo <sgazeos@gmail.com> * few more const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fixed shape method for broadcastable case. Signed-off-by: shugeo <sgazeos@gmail.com> * few more const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * xw_plus_b BP shape fn restored Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fixed signatures with broadcasting. Signed-off-by: shugeo <sgazeos@gmail.com> * Repaired backprops shape methods for a set of operations. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored broadcast bool for cuda. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored methods for 3 args with const qualifier. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed a couple of kernel signatures for broadcasting. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed kernels signatures for const buffers and shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored pairwise methods to persistent buffers and shapes usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt const to buffers and shapes with kernels. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt const to buffers and shapes with scalar kernels. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored indexreduce kernels signatures to use const buffers and shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored pairwise kernels to adopt cons shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored pairwise bool kernels to adopt cons shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored random special ops to conform with const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored native ops to conform with const shapes and buffers under cuda platform. Signed-off-by: shugeo <sgazeos@gmail.com> * Cosmetical changes only. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shapes and buffers error. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected start pos routine. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored methods to conform with const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored helpers to use proper methods instead. Signed-off-by: shugeo <sgazeos@gmail.com> * bunch of changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next bunch of changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next bunch of changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fixed execScalar declaration. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed execScalar declaration. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected const shape cases with sort and so on. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shapes for sort. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored kernel declarations to adopt const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed kernels declarations to adopt const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected kernel declarations to adopt const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed kernels declarations to adopt const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed segment helpers kernels declarations and so on to adopt const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shape usage with segment and solve helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed kernel declaration with adjustWeight helper. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed cuda implementations for constant shape helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted const shape usage with kernels. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted top_k kernels to use const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected kernels declarations to adopt const shapes with helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored NDArray definitions to adopt const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shapes with image suppression helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Slight improvement with buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored buffer usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored buffer usage with tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shape usage with definitions. Signed-off-by: shugeo <sgazeos@gmail.com> * minor updates on cpu side Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Refactored const shape usage with ConstantDescritor and native ops with cuda platform. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored tear and tile kernels to adopt with const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * softmax_loop fix Signed-off-by: raver119 <raver119@gmail.com> * update missing signature Signed-off-by: raver119@gmail.com <raver119@gmail.com> * softmax again Signed-off-by: raver119@gmail.com <raver119@gmail.com> * few more missing consts Signed-off-by: raver119 <raver119@gmail.com> * new methods updated Signed-off-by: raver119@gmail.com <raver119@gmail.com> Co-authored-by: shugeo <sgazeos@gmail.com>
2020-05-09 07:06:14 +02:00
#define no_op_exec_special_any_cuda static __device__ void execSpecialCuda(const X *dx, const Nd4jLong *xShapeBuffer, Z *result, const Nd4jLong *resultShapeBuffer, X *extraParams, int *allocationPointer, Z *reductionPointer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {}
#define no_op_exec_special_bool_cuda static __device__ void execSpecialCuda(const X *dx, const Nd4jLong *xShapeBuffer, Z *result, const Nd4jLong *resultShapeBuffer, X *extraParams, int *allocationPointer, Z *reductionPointer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {}
#define no_op_exec_special_same_cuda static __device__ void execSpecialCuda(const X *dx, const Nd4jLong *xShapeBuffer, X *result, const Nd4jLong *resultShapeBuffer, X *extraParams, int *allocationPointer, X *reductionPointer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {}
#define no_op_exec_special_cuda static __device__ void execSpecialCuda(const X *dx, const Nd4jLong *xShapeBuffer,Z *result, const Nd4jLong *resultShapeBuffer,Z *extraParams, int *allocationPointer, Z *reductionPointer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {}
#define no_op_exec_special_accumulation_same_cuda static inline __device__ void execSpecialCuda(const X *dx, const Nd4jLong *xShapeInfo, X *extraParams, X *result, const Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, X *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets) {}
#define no_op_exec_special_accumulation_long_cuda static inline __device__ void execSpecialCuda(const X *dx, const Nd4jLong *xShapeInfo, X *extraParams, Z *result, const Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, Z *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets) {}
#define no_op_exec_special_accumulation_cuda static inline __device__ void execSpecialCuda(const X *dx, const Nd4jLong *xShapeInfo, Z *extraParams, Z *result, const Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, Z *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets) {}
2019-06-06 14:21:15 +02:00
#else
// hacky fix for isnan/being being out of scope
//#ifdef IOS
//#define isinf(x) 0 // this isn't right. But std::isinf fails
//#define isnan(x) 0
//#else
//#define isnan std::isnan
//#define isinf std::isinf
//#endif
#define no_op_exec_special_cuda
#define no_op_exec_special_accumulation_cuda
#define no_op_exec_special_accumulation_same_cuda
#define no_op_exec_special_accumulation_long_cuda
#define no_op_exec_special_any_cuda
#define no_op_exec_special_bool_cuda
#define no_op_exec_special_same_cuda
#define no_op_exec_special_accumulation_same_cuda
#endif
#define SELU_ALPHA 1.6732632423543772848170429916717
#define SELU_LAMBDA 1.0507009873554804934193349852946
namespace functions {
namespace indexreduce {
template <typename T>
struct IndexValue {
T value;
Nd4jLong index;
_CUDA_HD IndexValue() = default;
_CUDA_HD IndexValue(const T val, const Nd4jLong ind): index(ind), value(val) {}
};
}
namespace summarystats {
template <typename T>
class SummaryStatsData;
}
}
namespace simdOps {
template <typename X, typename Y, typename Z>
class Add {
public:
op_def static Z op(X d1, Y d2) {
return static_cast<Z>(d1 + d2);
}
op_def static Z op(X d1, Y d2, Z *params) {
return static_cast<Z>(d1 + d2);
}
op_def static Z op(X d1) {
return static_cast<Z>(d1);
}
// op for MetaOps
op_def static Z op(X d1, Y *params) {
return static_cast<Z>(d1 + params[0]);
}
op_def static X startingValue() {
return static_cast<X>(0.f);
}
};
template <typename X, typename Y>
class NewAdd {
public:
op_def static X op(X d1, Y d2, X *params) {
return d1 + d2;
}
};
template <typename X, typename Y, typename Z>
class Subtract {
public:
op_def static Z op(X d1, Y d2) {
return static_cast<Z>(d1 - d2);
}
op_def static Z op(X d1, Y d2, Z *params) {
return static_cast<Z>(d1 - d2);
}
op_def static Z op(X d1) {
return static_cast<Z>(d1);
}
// op for MetaOps
op_def static Z op(X d1, Y *params) {
return static_cast<Z>(d1 - params[0]);
}
};
template <typename X, typename Y, typename Z>
class SquaredSubtract {
public:
op_def static Z op(X d1, Y d2) {
auto d = static_cast<Z>(d1 - d2);
return d * d;
}
op_def static Z op(X d1, Y d2, Z *params) {
auto d = static_cast<Z>(d1 - d2);
return d * d;
}
op_def static Z op(X d1) {
return d1;
}
// op for MetaOps
op_def static Z op(X d1, Y *params) {
auto d = static_cast<Z>(d1 - params[0]);
return d * d;
}
};
template <typename X, typename Y, typename Z>
class SquaredReverseSubtract {
public:
op_def static Z op(X d1, Y d2) {
auto d = static_cast<Z>(d2 - d1);
return d * d;
}
op_def static Z op(X d1, Y d2, Z *params) {
auto d = static_cast<Z>(d2 - d1);
return d * d;
}
op_def static Z op(X d1) {
return d1;
}
// op for MetaOps
op_def static Z op(X d1, Y *params) {
auto d = static_cast<Z>(params[0] - d1);
return d * d;
}
};
template <typename X, typename Y, typename Z>
class ReverseSubtract {
public:
op_def static Z op(X d1, Y d2) {
return static_cast<Z>(d2 - d1);
}
op_def static Z op(X d1, Y d2, Z *params) {
return static_cast<Z>(d2 - d1);
}
op_def static Z op(X d1) {
return d1;
}
// op for MetaOps
op_def static Z op(X d1, Y *params) {
return static_cast<Z>(params[0] - d1);
}
};
template <typename X, typename Y, typename Z>
class LogPoissonLossFull {
public:
op_def static Z op(X z, Y c) {
auto zz = static_cast<Z>(z);
auto zc = static_cast<Z>(c);
return (sd::math::nd4j_exp<Y, Z>(c) - zz * zc + (zz * sd::math::nd4j_log<X, Z>(z) - zz + static_cast<Z>(0.5f) * sd::math::nd4j_log<Z, Z>(static_cast<Z>(DOUBLE_PI_X) * zz)));
2019-06-06 14:21:15 +02:00
}
op_def static Z op(X z, Y c, Z *params) {
auto zz = static_cast<Z>(z);
auto zc = static_cast<Z>(c);
return (sd::math::nd4j_exp<Y, Z>(c) - zz * zc + (zz * sd::math::nd4j_log<X, Z>(z) - zz + static_cast<Z>(0.5f) * sd::math::nd4j_log<Z, Z>(static_cast<Z>(DOUBLE_PI_X) * zz)));
2019-06-06 14:21:15 +02:00
}
op_def static Z op(X z) {
auto zz = static_cast<Z>(z);
return (zz * sd::math::nd4j_log<Y, Z>(z) - zz + static_cast<Z>(0.5f) * sd::math::nd4j_log<Z, Z>(static_cast<Z>(DOUBLE_PI_X) * zz));
2019-06-06 14:21:15 +02:00
}
// op for MetaOps
op_def static X op(X z, Y *params) {
return (sd::math::nd4j_exp<X, X>(params[0]) - z * params[0] + (z * sd::math::nd4j_log<X, Z>(z) - z + static_cast<X>(0.5f) * sd::math::nd4j_log<X, Z>(DOUBLE_PI_X * z)));
2019-06-06 14:21:15 +02:00
}
};
template <typename X, typename Y, typename Z>
class LogPoissonLoss {
public:
op_def static Z op(X z, Y c) {
auto zz = static_cast<Z>(z);
auto zc = static_cast<Z>(c);
return (sd::math::nd4j_exp<Y, Z>(c) - zz * zc);
2019-06-06 14:21:15 +02:00
}
op_def static Z op(X z, Y c, Z *params) {
auto zz = static_cast<Z>(z);
auto zc = static_cast<Z>(c);
return (sd::math::nd4j_exp<Y, Z>(c) - zz * zc);
2019-06-06 14:21:15 +02:00
}
op_def static Z op(X z) {
return static_cast<Z>(z);
}
// op for MetaOps
op_def static Z op(X z, Y *params) {
return (sd::math::nd4j_exp<Y, Z>(params[0]) - static_cast<Z>(z) * static_cast<Z>(params[0]));
2019-06-06 14:21:15 +02:00
}
};
template <typename X, typename Y, typename Z>
class Multiply {
public:
op_def static Z op(X d1, Y d2) {
return static_cast<Z>(d1 * d2);
}
op_def static Z op(X d1, Y d2, Z *params) {
return static_cast<Z>(d1 * d2);
}
op_def static Z op(X d1) {
return static_cast<Z>(d1);
}
// op for MetaOps
op_def static Z op(X d1, Y *params) {
return static_cast<Z>(d1 * params[0]);
}
op_def static X startingValue() {
return static_cast<X>(1.f);
}
};
template <typename X, typename Y, typename Z>
class Divide {
public:
op_def static Z op(X d1, Y d2) {
return static_cast<Z>(d1 / d2);
}
op_def static Z op(X d1, Y d2, Z *params) {
return static_cast<Z>(d1 / d2);
}
op_def static Z op(X d1) {
return static_cast<Z>(d1);
}
// op for MetaOps
op_def static Z op(X d1, Y *params) {
return static_cast<Z>(d1 / params[0]);
}
op_def static X startingValue() {
return static_cast<X>(1);
}
};
template <typename X, typename Y, typename Z>
class DivideNoNan {
public:
op_def static Z op(X d1, Y d2) {
if (d2 == (Y)0) return (Z)0;
return static_cast<Z>(d1 / d2);
}
op_def static Z op(X d1, Y d2, Z *params) {
if (d2 == (Y)0) return (Z)0;
return static_cast<Z>(d1 / d2);
}
op_def static Z op(X d1) {
return static_cast<Z>(d1);
}
// op for MetaOps
op_def static Z op(X d1, Y *params) {
if (params[0] == (Y)0) return (Z)0;
return static_cast<Z>(d1 / params[0]);
}
op_def static X startingValue() {
return static_cast<X>(1);
}
};
2019-06-06 14:21:15 +02:00
template <typename X, typename Y, typename Z>
class SafeDivide {
public:
op_def static Z op(X d1, Y d2) {
if(d2 == static_cast<Y>(0))
return static_cast<Z>(0);
return static_cast<Z>(d1 / d2);
}
op_def static Z op(X d1, Y d2, Z *params) {
if(d2 == static_cast<Y>(0))
return static_cast<Z>(0);
return static_cast<Z>(d1 / d2);
}
op_def static Z op(X d1) {
return static_cast<Z>(d1);
}
// op for MetaOps
op_def static Z op(X d1, Y *params) {
if(params[0] == static_cast<Y>(0))
return static_cast<Z>(0);
return static_cast<Z>(d1 / params[0]);
}
};
template <typename X, typename Y, typename Z>
class FloorDiv {
public:
op_def static Z op(X d1, Y d2) {
return sd::math::nd4j_floor<Z,Z>(static_cast<Z>(d1 / d2));
2019-06-06 14:21:15 +02:00
}
op_def static Z op(X d1, Y d2, Z *params) {
return sd::math::nd4j_floor<Z,Z>(static_cast<Z>(d1 / d2));
2019-06-06 14:21:15 +02:00
}
op_def static Z op(X d1) {
return sd::math::nd4j_floor<Z,Z>(static_cast<Z>(d1));
2019-06-06 14:21:15 +02:00
}
// op for MetaOps
op_def static Z op(X d1, Y *params) {
return sd::math::nd4j_floor<Z,Z>(static_cast<Z>(d1 / params[0]));
2019-06-06 14:21:15 +02:00
}
};
template <typename X, typename Y, typename Z>
class TruncateDiv {
public:
op_def static Z op(X d1, Y d2) {
auto i1 = static_cast<int>(d1);
auto i2 = static_cast<int>(d2);
return static_cast<Z>(i1 / i2);
}
op_def static Z op(X d1, Y d2, Z *params) {
auto i1 = static_cast<int>(d1);
auto i2 = static_cast<int>(d2);
return static_cast<Z>(i1 / i2);
}
op_def static Z op(X d1) {
return d1;
}
// op for MetaOps
op_def static Z op(X d1, Y *params) {
auto i1 = static_cast<int>(d1);
auto i2 = static_cast<int>(params[0]);
return static_cast<Z>(i1 / i2);
}
};
template <typename X, typename Y, typename Z>
class TruncateMod {
public:
op_def static Z op(X d1, Y d2) {
auto i1 = static_cast<int>(d1);
auto i2 = static_cast<int>(d2);
return static_cast<Z>(i1 % i2);
}
op_def static Z op(X d1, Y d2, Z *params) {
auto i1 = static_cast<int>(d1);
auto i2 = static_cast<int>(d2);
return static_cast<Z>(i1 % i2);
}
op_def static Z op(X d1) {
return static_cast<Z>(d1);
}
// op for MetaOps
op_def static Z op(X d1, Y *params) {
auto i1 = static_cast<int>(d1);
auto i2 = static_cast<int>(params[0]);
return static_cast<Z>(i1 % i2);
}
};
template<typename X, typename Y, typename Z>
class Remainder {
public:
op_def static Z op(X d1, Y d2) {
return sd::math::nd4j_remainder<X, Y, Z>(d1, d2);
2019-06-06 14:21:15 +02:00
}
op_def static Z op(X d1, Y d2, Z *params) {
return sd::math::nd4j_remainder<X, Y, Z>(d1, d2);
2019-06-06 14:21:15 +02:00
}
op_def static Z op(X d1) {
return d1;
}
// op for MetaOps
op_def static Z op(X d1, Y *params) {
return sd::math::nd4j_remainder<X, Y, Z>(d1, params[0]);
2019-06-06 14:21:15 +02:00
}
};
template <typename X, typename Y, typename Z>
class FMod {
public:
op_def static Z op(X d1, Y d2) {
return sd::math::nd4j_fmod<X, Y, Z>(d1, d2);
2019-06-06 14:21:15 +02:00
}
op_def static Z op(X d1, Y d2, Z *params) {
return sd::math::nd4j_fmod<X, Y, Z>(d1, d2);
2019-06-06 14:21:15 +02:00
}
op_def static Z op(X d1) {
return d1;
}
// op for MetaOps
op_def static Z op(X d1, Y *params) {
return sd::math::nd4j_fmod<X, Y, Z>(d1, params[0]);
2019-06-06 14:21:15 +02:00
}
};
template <typename X, typename Y, typename Z>
class FloorMod {
public:
op_def static Z op(X d1, Y d2) {
auto m = sd::math::nd4j_fmod<X, Y, Z>(d1, d2);
return (d1 < static_cast<X>(0)) == (d2 < static_cast<Y>(0)) ? m : sd::math::nd4j_fmod<Z, Y, Z>(m + static_cast<Z>(d2), d2);
2019-06-06 14:21:15 +02:00
}
op_def static Z op(X d1, Y d2, Z *params) {
auto m = sd::math::nd4j_fmod<X, Y, Z>(d1, d2);
return (d1 < static_cast<X>(0.0f)) == (d2 < static_cast<Y>(0)) ? m : sd::math::nd4j_fmod<Z, Y, Z>(m + static_cast<Z>(d2), d2);
2019-06-06 14:21:15 +02:00
}
op_def static Z op(X d1) {
return d1;
}
// op for MetaOps
op_def static Z op(X d1, Y *params) {
return op(d1, params[0]);
}
};
template <typename X, typename Y, typename Z>
class ReverseDivide {
public:
op_def static Z op(X d1, Y d2) {
return static_cast<Z>(d2 / d1);
}
op_def static Z op(X d1, Y d2, Z *params) {
return static_cast<Z>(d2 / d1);
}
op_def static Z op(X d1) {
return static_cast<Z>(d1);
}
// op for MetaOps
op_def static Z op(X d1, Y *params) {
return static_cast<Z>(params[0] / d1);
}
};
template <typename X, typename Y, typename Z>
class CopyPws {
public:
op_def static Z op(X d1, Y d2) {
return static_cast<Z>(d2);
}
op_def static Z op(X d1, Y d2, Z *params) {
return static_cast<Z>(d2);
}
op_def static Z op(X d1) {
return static_cast<Z>(d1);
}
op_def static Z op(X d1, Y *params) {
return static_cast<Z>(d1);
}
};
template <typename X>
class Copy {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return d1;
}
};
template <typename X, typename Y, typename Z>
class Copy2 {
public:
op_def static Z op(X d1, Y d2) {
return static_cast<Z>(d2);
}
op_def static Z op(X d1, Y d2, Z *params) {
return static_cast<Z>(d2);
}
op_def static Z op(X d1) {
return static_cast<Z>(d1);
}
op_def static Z op(X d1, Y *params) {
return static_cast<Z>(d1);
}
};
template <typename X, typename Y, typename Z>
class Axpy {
public:
op_def static Z op(X d1, Y d2) {
return static_cast<Z>(d2 + d1);
}
op_def static Z op(X d1, Y d2, Z *params) {
auto alpha = params[0];
return alpha * static_cast<Z>(d1) + static_cast<Z>(d2);
}
op_def static Z op(X d1) {
return static_cast<Z>(d1);
}
};
template <typename X, typename Z>
class Assign {
public:
no_op_exec_special_any
no_op_exec_special_any_cuda
op_def static Z op(X d1, X *params) {
return static_cast<Z>(d1);
}
};
template <typename X, typename Z>
class And {
public:
no_op_exec_special_bool
no_op_exec_special_bool_cuda
op_def static Z op(X d1, X d2) {
return d2 + d1;
}
op_def static Z op(X d1, X d2, X *params) {
if (params != nullptr) {
auto comp = params[0];
return d1 != comp && d2 != comp ? static_cast<Z>(1) : static_cast<Z>(0);
} else {
auto b1 = static_cast<bool>(d1);
auto b2 = static_cast<bool>(d2);
return (b1 && b2) ? static_cast<Z>(1) : static_cast<Z>(0);
}
}
op_def static Z op(X d1) {
return d1;
}
// op for MetaOps
op_def static Z op(X d1, X *params) {
return static_cast<Z>(119);
}
};
template <typename X>
class IntOr {
public:
op_def static X op(X d1, X d2) {
return d2 | d1;
}
op_def static X op(X d1, X d2, X *params) {
return op(d1, d2);
}
};
template <typename X>
class IntAnd {
public:
op_def static X op(X d1, X d2) {
return d2 & d1;
}
op_def static X op(X d1, X d2, X *params) {
return op(d1, d2);
}
};
template <typename X>
class IntXor {
public:
op_def static X op(X d1, X d2) {
return d2 ^ d1;
}
op_def static X op(X d1, X d2, X *params) {
return op(d1, d2);
}
};
template <typename X>
class ShiftLeft {
public:
op_def static X op(X d1, X d2) {
return d1 << d2;
}
op_def static X op(X d1, X d2, X *params) {
return op(d1, d2);
}
};
template <typename X>
class ShiftRight {
public:
op_def static X op(X d1, X d2) {
return d1 >> d2;
}
op_def static X op(X d1, X d2, X *params) {
return op(d1, d2);
}
};
template <typename X>
class CyclicShiftLeft {
public:
op_def static X op(X d1, X d2) {
return sd::math::nd4j_rotl<X>(d1, d2);
}
op_def static X op(X d1, X d2, X *params) {
return op(d1, d2);
}
};
template <typename X>
class CyclicShiftRight {
public:
op_def static X op(X d1, X d2) {
return sd::math::nd4j_rotr<X>(d1, d2);
}
op_def static X op(X d1, X d2, X *params) {
return op(d1, d2);
}
};
2019-06-06 14:21:15 +02:00
template <typename X, typename Z>
class Or {
public:
no_op_exec_special_bool
no_op_exec_special_bool_cuda
op_def static Z op(X d1, X d2) {
return d2 + d1;
}
op_def static Z op(X d1, X d2, X *params) {
if (params != nullptr) {
auto comp = params[0];
return d1 != comp || d2 != comp ? static_cast<Z>(1) : static_cast<Z>(0);
} else {
auto b1 = static_cast<bool>(d1);
auto b2 = static_cast<bool>(d2);
return b1 || b2 ? static_cast<Z>(1) : static_cast<Z>(0);
}
}
op_def static Z op(X d1) {
return d1;
}
// op for MetaOps
op_def static Z op(X d1, X *params) {
return static_cast<Z>(119);
}
};
template <typename X, typename Z>
class Xor {
public:
no_op_exec_special_bool
no_op_exec_special_bool_cuda
op_def static Z op(X d1, X d2) {
return d2 + d1;
}
op_def static Z op(X d1, X d2, X *params) {
if (params != nullptr) {
auto comp = params[0];
return ((d1 == comp && d2 != comp) || (d1 != comp && d2 == comp)) ? static_cast<Z>(1) : static_cast<Z>(0);
} else {
auto b1 = static_cast<bool>(d1);
auto b2 = static_cast<bool>(d2);
return (!b1 && b2 )||(b1 && !b2) ? static_cast<Z>(1) : static_cast<Z>(0);
}
}
op_def static Z op(X d1) {
return d1;
}
};
template <typename X, typename Z>
class Not {
public:
no_op_exec_special_bool
no_op_exec_special_bool_cuda
op_def static Z op(X d1, X d2) {
return static_cast<Z>(0);
}
op_def static Z op(X d1, X d2, X *params) {
return d1 != d2 ? static_cast<Z>(1) : static_cast<Z>(0);
}
// this transform op should run only on boolean input
op_def static Z op(X d1, X *params) {
auto b1 = static_cast<bool>(d1);
return !b1;
}
};
template <typename X, typename Y, typename Z>
class LogicalNot {
public:
op_def static Z op(X d1, Y d2) {
return !((int) d1 && (int) d2);
}
op_def static Z op(X d1, Y d2, Z *params) {
return static_cast<X>(!(static_cast<int>(d1) && static_cast<int>(d2)));
}
op_def static Z op(X d1) {
return d1;
}
// op for MetaOps
op_def static Z op(X d1, Y *params) {
return static_cast<X>(119);
}
};
template <typename X, typename Y, typename Z>
class LogicalXor {
public:
op_def static Z op(X d1, Y d2) {
auto i1 = static_cast<int>(d1);
auto i2 = static_cast<int>(d2);
return (i1 | i2) &~ (i1 & i2);
}
op_def static Z op(X d1, Y d2, Z *params) {
return op(d1, d2);
}
op_def static Z op(X d1) {
return d1;
}
// op for MetaOps
op_def static Z op(X d1, Y *params) {
return static_cast<Z>(119);
}
};
template <typename X, typename Y, typename Z>
class LogicalAnd {
public:
op_def static Z op(X d1, Y d2) {
return static_cast<int>(d1) & static_cast<int>(d2);
}
op_def static Z op(X d1, Y d2, Z *params) {
return op(d1, d2);
}
op_def static Z op(Y d1) {
return d1;
}
// op for MetaOps
op_def static Z op(X d1, Y *params) {
return static_cast<Z>(119);
}
};
template <typename X, typename Y, typename Z>
class LogicalOr {
public:
op_def static Z op(X d1, Y d2) {
return static_cast<int>(d1) | static_cast<int>(d2);
}
op_def static Z op(X d1, Y d2, Z *params) {
return op(d1, d2);
}
op_def static Z op(X d1) {
return d1;
}
// op for MetaOps
op_def static Z op(X d1, Y *params) {
return static_cast<X>(119);
}
};
template <typename X, typename Y, typename Z>
class Mod {
public:
/*
// just a optional note, feel free to remove later
op_def static half op(half d1, half d2, half *params) {
return __float2half(simdOps::Mod<float>::op(__half2float(d1), __half2float(d2), nullptr));
}
*/
op_def static Z op(X d1, Y d2) {
return static_cast<int>(d1) % static_cast<int>(d2);
}
op_def static Z op(X d1, Y d2, Z *params) {
return op(d1, d2);
}
// op for MetaOp
op_def static Z op(X d1, Y *params) {
return op(d1, params[0]);
}
};
template <typename X, typename Y, typename Z>
class ReverseMod {
public:
op_def static Z op(X d1, Y d2) {
return static_cast<int>(d2) % static_cast<int>(d1);
}
op_def static Z op(X d1, Y d2, Z *params) {
return op(d1, d2);
}
// op for MetaOp
op_def static Z op(X d1, Y *params) {
return op(d1, params[0]);
}
};
/**
* Whether 2 elements in an array
* are epsilion equal
*/
template <typename X, typename Z>
class Epsilon {
public:
op_def static Z op(X d1, X d2) {
X diff = d1 - d2;
X absDiff = sd::math::nd4j_abs<X>(diff);
2019-06-06 14:21:15 +02:00
if (absDiff <= static_cast<X>(MIN_V))
return static_cast<Z>(1);
return static_cast<Z>(0);
}
op_def static Z op(X d1, X d2, X *params) {
return op(d1, d2);
}
op_def static Z op(X d1, X *params) {
return d1;
}
};
template <typename X, typename Z>
class EqualTo {
public:
op_def static Z op(X d1, X d2) {
return d1 == d2;
}
op_def static Z op(X d1, X d2, X *params) {
return op(d1, d2);
}
op_def static Z op(X d1, X *params) {
return d1;
}
};
template <typename X, typename Z>
class NotEqualTo {
public:
op_def static Z op(X d1, X d2) {
return d1 != d2;
}
op_def static Z op(X d1, X d2, X *params) {
return op(d1, d2);
}
op_def static Z op(X d1, X *params) {
return d1;
}
};
template <typename X, typename Z>
class GreaterThanOrEqual {
public:
op_def static Z op(X d1, X d2) {
return d1 >= d2;
}
op_def static Z op(X d1, X d2, X *params) {
return op(d1, d2);
}
// FIXME: this signature clashes with MetaOp stuff
op_def static Z op(X d1, X *params) {
return d1;
}
};
template <typename X, typename Z>
class GreaterThan {
public:
op_def static Z op(X d1, X d2) {
return d1 > d2;
}
op_def static Z op(X d1, X d2, X *params) {
return op(d1, d2);
}
// FIXME: this signature clashes with MetaOp stuff
op_def static Z op(X d1, X *params) {
return d1;
}
};
template <typename X, typename Z>
class LessThan {
public:
op_def static Z op(X d1, X d2) {
return d1 < d2;
}
op_def static Z op(X d1, X d2, X *params) {
return op(d1, d2);
}
op_def static Z op(X d1, X *params) {
return d1;
}
};
template <typename X, typename Z>
class LessThanOrEqual {
public:
op_def static Z op(X d1, X d2) {
return d1 <= d2;
}
op_def static Z op(X d1, X d2, X *params) {
return op(d1, d2);
}
op_def static Z op(X d1, X *params) {
return d1;
}
};
template <typename X>
class Abs {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_abs<X>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class Ceiling {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_ceil<X,X>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class Cosine {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_cos<X,X>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class Exp {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_exp<X, X>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class HardTanhDerivative {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return ((d1 >= static_cast<X>(-1.f) && d1 <= static_cast<X>(1.f)) ? static_cast<X>(1.f) : static_cast<X>(0.f));
}
};
template <typename X>
class HardTanh {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
if (d1 < static_cast<X>(-1))
return static_cast<X>(-1);
else if (d1 > static_cast<X>(1))
return static_cast<X>(1);
else
return d1;
}
};
template <typename X>
class Floor {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_floor<X,X>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class Log {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_log<X, X>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class Log1p {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_log<X, X>(1 + d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X, typename Y, typename Z>
class LogX {
public:
op_def static Z op(X d1, Y d2, Z *params) {
return sd::math::nd4j_log<X, Z>(d1) / sd::math::nd4j_log<Y, Z>(d2) ;
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class StabilizeFP16 {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
if (d1 <= static_cast<X>(0))
return static_cast<X>(sd::DataTypeUtils::min<float16>());
2019-06-06 14:21:15 +02:00
else return d1;
}
};
template <typename X>
class StabilizeX {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
if (d1 <= static_cast<X>(0))
return sd::DataTypeUtils::min<X>();
2019-06-06 14:21:15 +02:00
else return d1;
}
};
template <typename X>
class SpecialDerivative {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return d1 * (static_cast<X>(1.f) - d1);
}
};
template <typename X>
class Neg {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return -d1;
}
};
template <typename X>
class Erf {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_erf<X,X>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class Erfc {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_erfc<X,X>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class Reciprocal {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
// op_def static T op(T d1) {
// return (T(1.0f) / d1);
// }
// op for MetaOps
op_def static X op(X d1, X *params) {
return (static_cast<X>(1) / d1);
}
};
template <typename X, typename Z>
class Sqr {
public:
no_op_exec_special
no_op_exec_special_cuda
op_def static Z op(X d1, Z *params) {
return sd::math::nd4j_pow<X, X, Z>(d1, static_cast<X>(2));
2019-06-06 14:21:15 +02:00
}
op_def static Z op(X d1) {
return sd::math::nd4j_pow<X, X, Z>(d1, static_cast<X>(2));
2019-06-06 14:21:15 +02:00
}
};
template <typename X, typename Y, typename Z>
class RelativeError {
public:
no_op_exec_special
no_op_exec_special_cuda
op_def static Z op(X d1, Y d2) {
return sd::math::nd4j_re<X>(d1, d2);
2019-06-06 14:21:15 +02:00
}
op_def static Z op(X d1, Y d2, Z *params) {
return op(d1, d2);
}
op_def static Z op(X d1) {
return static_cast<Z>(0);
}
};
template <typename X, typename Y, typename Z>
class BinaryRelativeError {
public:
no_op_exec_special
no_op_exec_special_cuda
op_def static Z op(X d1, Y d2, Z *params) {
X threshold = params[0];
return sd::math::nd4j_re<X>(d1, d2) > threshold ? static_cast<Z>(1) : static_cast<Z>(0);
2019-06-06 14:21:15 +02:00
}
op_def static Z op(X d1) {
return static_cast<Z>(0);
}
};
template <typename X, typename Y, typename Z>
class BinaryMinimumAbsoluteRelativeError {
public:
no_op_exec_special
no_op_exec_special_cuda
op_def static Z op(X d1, X *params) {
X d2 = params[0];
X thresholdRelative = params[1];
X thresholdAbsolute = params[2];
return sd::math::nd4j_re<X>(d1, d2) > thresholdRelative ? (sd::math::nd4j_abs<X>(d1 - static_cast<X>(d2)) < thresholdAbsolute ? static_cast<Z>(0) : static_cast<Z>(1)) : static_cast<Z>(0);
2019-06-06 14:21:15 +02:00
}
op_def static Z op(X d1, Y d2, Z *params) {
X thresholdRelative = params[0];
X thresholdAbsolute = params[1];
return sd::math::nd4j_re<X>(d1, d2) > thresholdRelative ? (sd::math::nd4j_abs<X>(d1 - static_cast<X>(d2)) < thresholdAbsolute ? static_cast<Z>(0) : static_cast<Z>(1)) : static_cast<Z>(0);
2019-06-06 14:21:15 +02:00
}
op_def static Z op(X d1) {
return static_cast<Z>(0);
}
};
template <typename X, typename Y, typename Z>
class ReversePow {
public:
no_op_exec_special
no_op_exec_special_cuda
op_def static Z op(X d1, Z *params) {
return sd::math::nd4j_pow<X, X, Z>(params[0], d1);
2019-06-06 14:21:15 +02:00
}
op_def static Z op(X d1, Y d2) {
return sd::math::nd4j_pow<X, Y, Z>(d2, d1);
2019-06-06 14:21:15 +02:00
}
op_def static Z op(X d1, Y d2, Z *params) {
return sd::math::nd4j_pow<X, Y, Z>(d2, d1);
2019-06-06 14:21:15 +02:00
}
op_def static Z op(X d1) {
return d1;
}
};
template <typename X, typename Y, typename Z>
class Pow {
public:
no_op_exec_special
no_op_exec_special_cuda
op_def static Z op(X d1, Z *params) {
return sd::math::nd4j_pow<X, X, Z>(d1, params[0]);
2019-06-06 14:21:15 +02:00
}
op_def static Z op(X d1, Y d2) {
return sd::math::nd4j_pow<X, Y, Z>(d1, d2);
2019-06-06 14:21:15 +02:00
}
op_def static Z op(X d1, Y d2, Z *params) {
return sd::math::nd4j_pow<X, Y, Z>(d1, d2);
2019-06-06 14:21:15 +02:00
}
op_def static Z op(X d1) {
return d1;
}
};
template <typename X, typename Y, typename Z>
class PowDerivative {
public:
no_op_exec_special
no_op_exec_special_cuda
op_def static Z op(X d1, Z *params) {
return params[0] * sd::math::nd4j_pow<X, Z, Z>(d1, static_cast<Z>(params[0]) - static_cast<Z>(1.f));
2019-06-06 14:21:15 +02:00
}
op_def static Z op(X d1, Y d2) {
return static_cast<Z>(d2) * sd::math::nd4j_pow<X, Z, Z>(d1, static_cast<Z>(d2) - static_cast<Z>(1.f));
2019-06-06 14:21:15 +02:00
}
op_def static Z op(X d1, Y d2, Z *params) {
return static_cast<Z>(d2) * sd::math::nd4j_pow<X, Z, Z>(d1, static_cast<Z>(d2) - static_cast<Z>(1.f));
2019-06-06 14:21:15 +02:00
}
op_def static Z op(X d1) {
Shyrma lstm layer bp (#370) * - start working on bp for lstm Signed-off-by: Yurii <iuriish@yahoo.com> * - further working on bp for lstmLayer Signed-off-by: Yurii <iuriish@yahoo.com> * - minor change Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on bp for lstmLayer 2 Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on bp for lstmLayer 3 Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on bp for lstmLayer 4 Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on bp for lstmLayer 5 Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on bp for lstmLayer 6 Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on bp for lstmLayer 7 Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on bp for lstmLayer 8 Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on bp for lstmLayer 9 Signed-off-by: Yurii <iuriish@yahoo.com> * - provide lstmLayerCell and lstmLayerCellBp as separate CUSTOM_OPs Signed-off-by: Yurii <iuriish@yahoo.com> * - testing and fixing lstmLayerCellBp helper Signed-off-by: Yurii <iuriish@yahoo.com> * - implement lstmLayerCellBp as separate op Signed-off-by: Yurii <iuriish@yahoo.com> * - implement lstmLayerBp as separate op (not tested) Signed-off-by: Yurii <iuriish@yahoo.com> * - fixing calculations of dLdWp and dLdb in lstmLayerCellBp Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on bp for lstmLayer 10 Signed-off-by: Yurii <iuriish@yahoo.com> * - fixing typo in lstmLayerTimeLoop Signed-off-by: Yurii <iuriish@yahoo.com> * - forgot to perform clipping of c array and calculate corresponding derivative in lstmLayerCellBp Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on bp for lstmLayer 10 Signed-off-by: Yurii <iuriish@yahoo.com> * - testing and fixing bugs in lstmLayer_bp op 1 Signed-off-by: Yurii <iuriish@yahoo.com> * - testing and fixing bugs in lstmLayer_bp op 2 Signed-off-by: Yurii <iuriish@yahoo.com> * - turn off heavy tests for cuda for lstmLayer_bp op Signed-off-by: Yurii <iuriish@yahoo.com> * - forgot to nullify gradients at eliminated time steps (when sequnce length array is present ) Signed-off-by: Yurii <iuriish@yahoo.com>
2020-04-13 12:21:51 +02:00
return static_cast<Z>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X, typename Y, typename Z>
class IGamma {
public:
no_op_exec_special
no_op_exec_special_cuda
op_def static Z op(X d1, Z *params) {
return sd::math::nd4j_igamma<X, X, Z>(d1, params[0]);
}
op_def static Z op(X d1, Y d2) {
return sd::math::nd4j_igamma<X, Y, Z>(d1, d2);
}
op_def static Z op(X d1, Y d2, Z *params) {
return sd::math::nd4j_igamma<X, Y, Z>(d1, d2);
}
op_def static Z op(X d1) {
return d1;
}
};
template <typename X, typename Y, typename Z>
class IGammac {
public:
no_op_exec_special
no_op_exec_special_cuda
op_def static Z op(X d1, Z *params) {
return sd::math::nd4j_igammac<X, X, Z>(d1, params[0]);
}
op_def static Z op(X d1, Y d2) {
return sd::math::nd4j_igammac<X, Y, Z>(d1, d2);
}
op_def static Z op(X d1, Y d2, Z *params) {
return sd::math::nd4j_igammac<X, Y, Z>(d1, d2);
}
op_def static Z op(X d1) {
return d1;
}
};
2019-06-06 14:21:15 +02:00
template <typename X>
class Round {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_round<X,X>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X, typename Z>
class IsNan {
public:
no_op_exec_special_bool
no_op_exec_special_bool_cuda
no_op_exec_special_accumulation
no_op_exec_special_accumulation_cuda
op_def static Z op(X d1, X *params) {
return sd::math::nd4j_isnan(d1) ? static_cast<X>(1) : static_cast<X>(0);
2019-06-06 14:21:15 +02:00
}
op_def static X startingValue(const X *input) {
return static_cast<X>(0);
}
op_def static Z merge(X old, X opOutput, X *extraParams) {
return opOutput + old;
}
op_def static Z update(X old, X opOutput, X *extraParams) {
return opOutput + old;
}
op_def static Z postProcess(X reduction, Nd4jLong n, X *extraParams) {
return reduction;
}
};
template <typename X>
class Expm1 {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_exp<X, X>(d1) - static_cast<X>(1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X, typename Z>
class IsPositive {
public:
no_op_exec_special_bool
no_op_exec_special_bool_cuda
no_op_exec_special_accumulation
no_op_exec_special_accumulation_cuda
op_def static Z op(X d1, X *params) {
return d1 > (X)0.f;
}
op_def static X startingValue(const X *input) {
return static_cast<X>(0);
}
op_def static Z merge(X old, X opOutput, X *extraParams) {
return opOutput + old;
}
op_def static Z update(X old, X opOutput, X *extraParams) {
return opOutput + old;
}
op_def static Z postProcess(X reduction, Nd4jLong n, X *extraParams) {
return reduction;
}
};
template <typename X, typename Z>
class IsNegative {
public:
no_op_exec_special_bool
no_op_exec_special_bool_cuda
no_op_exec_special_accumulation
no_op_exec_special_accumulation_cuda
op_def static Z op(X d1, X *params) {
return d1 < (X)0.f;
}
op_def static X startingValue(const X *input) {
return static_cast<X>(0);
}
op_def static Z merge(X old, X opOutput, X *extraParams) {
return opOutput + old;
}
op_def static Z update(X old, X opOutput, X *extraParams) {
return opOutput + old;
}
2019-06-06 14:21:15 +02:00
op_def static Z postProcess(X reduction, Nd4jLong n, X *extraParams) {
return reduction;
}
};
template <typename X, typename Z>
class IsInf {
public:
no_op_exec_special_bool
no_op_exec_special_bool_cuda
no_op_exec_special_accumulation
no_op_exec_special_accumulation_cuda
op_def static Z op(X d1, X *params) {
return sd::math::nd4j_isinf<X>(d1) ? static_cast<Z>(1) : static_cast<Z>(0);
2019-06-06 14:21:15 +02:00
}
op_def static X startingValue(const X *input) {
return static_cast<X>(0);
}
op_def static Z merge(X old, X opOutput, X *extraParams) {
return opOutput + old;
}
op_def static Z update(X old, X opOutput, X *extraParams) {
return opOutput + old;
}
op_def static Z postProcess(X reduction, Nd4jLong n, X *extraParams) {
return reduction;
}
};
template <typename X, typename Z>
class IsInfOrNan{
public:
no_op_exec_special_bool
no_op_exec_special_bool_cuda
no_op_exec_special_accumulation
no_op_exec_special_accumulation_cuda
op_def static Z op(X d1, X *params) {
return sd::math::nd4j_isfin<X>(d1) ? static_cast<Z>(0) : static_cast<Z>(1);
2019-06-06 14:21:15 +02:00
}
op_def static X startingValue(const X *input) {
return static_cast<X>(0);
}
op_def static Z merge(X old, X opOutput, X *extraParams) {
Merge master to upstream (#7945) * Shugeo strided slice zeros (#14) * Modified strided_slice op to properly work with empty-like shapes. * Fixed test for reduce_mean with empty-like input. * [WIP] Last merge (#15) * correct logsoftmax looss (#2) * Small SameDiff listener fix (#4) * Various fixes (#6) * #7839 Fix for asXMatrix and tests * #7866 EmbeddingSequenceLayer dtype fix + test * #7856 SameDiff save/load stream methods * #7859 RegressionEvaluation rank 4 fix + tests + axis configuration * EvaluationBinary 3d/4d * More evaluation 3d/4d tests * #7847 Evaluation empty checks * Small test ifx * #7848 Fix median edge case * Improve DL4J samediff layer tests * [WIP] FastText wrapper implemented (#8) * FastText implemented * Some fixes * Fix shapes for wordsNearest * Validation of input vectors * Fixes * Fixed test * Thread tagged * Some tweaks * setContextClassLoader for DeallocatorServiceThread * Numpy format tests (#1) * Various fixes (#11) * #7852 SameDiff gather fix * #7892 SameDiff placeholder to constant conversion * #7890 validate input rank for MLN/CG init methods * Fix broken permute shape calculation * Permute and gather fixes * Tests * #7850 LogSumExp fix + test * Handful of test fixes * Empty arrays with non-scalar shapes (#10) * minor rearrangements for lambdas * empty tensors with non-scalar shapes * numpy empty tensors with non-scalar shapes * few more empty tweaks * Small fixes * conv3d signature update * micro fix in batchnorm mkldnn * Import fixes * Fix * MKL-DNN update * Small fill fix * fill with empty input + test * Fixes * Small error improvement * Fix * one special test * couple of fixes for lstm * Rewrite TFGraphMapper.getNDArrayFromTensor to be maintainable and less error prone * Fixes * FP16 * Unsigned * BFloat16 * Fill op - empty tweaks * - couple of fixes for empty arrays construction - stack updated * strided slice fix * one transform test * provide method for reducing shapeInfo in case of input array is empty * Fixed reduceAlongDimensions to use empty input properly. * couple of broadcast tests * couple of tests broadcast tests + tweak to make them pass * add check of non-empty to methods producing sub-arrays * Fixed reshapeC with zeros in shape. * complete empty check in reduce_... legacy ops * Concat and cumsum/prod * Tweak to empty shape inference on import * add empty check to the rest of reduce legacy ops * one more test * correct typo in evalReduceShapeInfoEmpty * Added tests for reduce_* ops to tests with zero shapes. * few more tests for empty reductions * Fixed strided_slice op with empty case and tests. * one more empty reduction test * Fixed strided_slice test. * add empty check to NDArray::reshapei * infOrMax * empty min/max with infinity tests * made unstack working correctly with empty arrays * few IndexReduce tests + tweaks for empty shapes * add test for empty concat * few tests fixed * Validation fix for reductions on empty shapes * Reverse fix * Reduction shape calc fixes * SameDiff.generateOutputVariable: don't use shape function to determine number of outputs * Range fix * - NDArray constructor updated for scalars/empty arrays - few tests fixed * More fixes * Empty creator fixes * concat fix * concat fix * TF import tests: allow 'both all NaN' and 'both all inf' to pass * Slice, zero fraction, and reshape fixes * transpose, gather * Zero fraction * scalar cast fix * Empty reduction axis support * few more tests fixed * Fixed input checks conforming with TF for concat op and tests. * few tests fixed * matmul scalar shape fix * Fixed checkout for data type and scalarity with concat to allow non-empty scalars with vector concats. * broadcast bool fix * few more tests * few more tests * correct evalReduceShapeInfoEmpty * argmax/argmin + tests * one more empty edge case + one more test * argmax/argmin/realdiv_bp tweaks * empty reshape test + fix * Helper fixes * Small fixes * Gather test fix * Gather test fix * Small fixes * reduce scalar zero values * scalar mean workaround * Remove debug code * along dim mean workaround * one more test * - equalsTo() tweak for empty arrays - one more test * broadcast tweaks * [WIP] Fixing outstanding issues for NLP (#9) * Avoid using not-inited objects * Test fixed. * Redundant method avoided for models like FastText * KMeans++ implementation * KMeans++ implementation * Disable parallel execution * KMeans++ * Tests * Dev branch merge (#16) * SameDiff: convertDataType and gradient check util improvements (#12) * GradCheck util improvements * StopGradient constructor + test * SameDiff: Add datatype conversion * Javadoc and add DataType.isNumerical() * Small fix * Fix SameDiff TF import test cases intermediate naming (workaround for bad default) * TFGraphTestAllHelper: check intermediates in execution order * Add missing debug listener * [WIP] lstmBlock fix + other changes (#13) - fixes lstmBlock issue - changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer - CheckNumerics op - fixes for ReduceBool IsInfOrNan & IsFinite * Small test fix * CheckNumerics op wrapper * Fix some issues on master (#17) * Fix DataVec test issue * Fix issue with dl4j SameDiff output layer * Dtype fix for lambda layers * #7912 BertIterator dtype fix (use float32 not global default) * [WIP] Next set of CUDA stuff (#7) New CUDA implementations and improvements * bad file * Dev branch master merge (#23) * SameDiff: convertDataType and gradient check util improvements (#12) * GradCheck util improvements * StopGradient constructor + test * SameDiff: Add datatype conversion * Javadoc and add DataType.isNumerical() * Small fix * Fix SameDiff TF import test cases intermediate naming (workaround for bad default) * TFGraphTestAllHelper: check intermediates in execution order * Add missing debug listener * [WIP] lstmBlock fix + other changes (#13) - fixes lstmBlock issue - changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer - CheckNumerics op - fixes for ReduceBool IsInfOrNan & IsFinite * Small test fix * CheckNumerics op wrapper * Compatibility of deserialization (#18) Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com> * SameDiff: add activation gradient checking support for debugging (#19) * SameDiff gradient checker: first pass on activation gradient checks * Fixes + tests for activation gradient checking * Javadoc * [WIP] Some nd4j data type corrections (#20) * Adjust data type * Set correct Data type. * Size of proper data type. * fix averaged cpu load (#22) * SameDiff ops, TF import and fixes (#24) * CheckNumerics tests + fixes + misc fixes Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fake quant Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fixes Signed-off-by: AlexDBlack <blacka101@gmail.com> * FakeQuantWithMinMaxArgs Signed-off-by: AlexDBlack <blacka101@gmail.com> * CheckNumerics fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fix libnd4j ALL_INTS and ALL_FLOATS declaration (uint and bfloat types) Signed-off-by: AlexDBlack <blacka101@gmail.com> * Small fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * Javadoc Signed-off-by: AlexDBlack <blacka101@gmail.com> * Exception tweak Signed-off-by: AlexDBlack <blacka101@gmail.com> * fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fix for out of scope stack allocated var use Signed-off-by: AlexDBlack <blacka101@gmail.com> * Ignores Signed-off-by: AlexDBlack <blacka101@gmail.com> * Ignore for known failing test (already logged issue) Signed-off-by: AlexDBlack <blacka101@gmail.com> * Merge upstream to fork (#25) * Add thousand-separator commas to TotalParams (#7915) * Add thousand-separator commas to TotalParams The number of parameters can be quite large, and it would help the reading of the summary printout to have the TotalParams column & values at the bottom have thousand-separator-commas in them. * Add thousand-separator commas to MultiLayerNetwork Corresponding change to MultiLayerNetwork Signed-off-by: Jxtps Jxtps <jxtps435@gmail.com> * Update contributing and issue/PR templates (#7934) Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fix link to AdaDelta paper (#7942) Fix link to AdaDelta paper hosted on matthewzeiler.com Signed-off-by: Jxtps * Fixes, and ignores for known/logged failing issues (#7943) Signed-off-by: AlexDBlack <blacka101@gmail.com> * SameDiff + DL4J/SameDiff: Multiple fixes (#28) * #7919 HDF5 attribute buffer length fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7909 Arbiter constructor exception ux improvements Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7925 RNN output layer length checks Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7939 Add listener for validating inputs are not incorrectly modified Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7939 Integrate NonInplaceValidationListener into tests * #7844 DL4J SameDiff fixes for variable minibatch size * DL4J SameDiff fixes - ensure gradient for input placeholder is available Signed-off-by: AlexDBlack <blacka101@gmail.com> * Tweaks to ExternalErrorsFunction - use placeholders, make more robust * Another fix * More fixes * More SameDiff/DL4J fixes * Scope out scalar array creation in BaseScalarOp * Remove debug code Signed-off-by: AlexDBlack <blacka101@gmail.com> * [WIP] Final dev branch merge (#29) * SameDiff: convertDataType and gradient check util improvements (#12) * GradCheck util improvements * StopGradient constructor + test * SameDiff: Add datatype conversion * Javadoc and add DataType.isNumerical() * Small fix * Fix SameDiff TF import test cases intermediate naming (workaround for bad default) * TFGraphTestAllHelper: check intermediates in execution order * Add missing debug listener * [WIP] lstmBlock fix + other changes (#13) - fixes lstmBlock issue - changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer - CheckNumerics op - fixes for ReduceBool IsInfOrNan & IsFinite * Small test fix * CheckNumerics op wrapper * Compatibility of deserialization (#18) Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com> * SameDiff: add activation gradient checking support for debugging (#19) * SameDiff gradient checker: first pass on activation gradient checks * Fixes + tests for activation gradient checking * Javadoc * [WIP] Some nd4j data type corrections (#20) * Adjust data type * Set correct Data type. * Size of proper data type. * fix averaged cpu load (#22) * [WIP] Multiple dataset iterators (#27) * Splitting dataset into arbitrary number * Fixes * Multiple split of iterator * Test * Test * Some fixes * signature change * one more tweak Signed-off-by: raver119 <raver119@gmail.com> * one more test for sequential use of DataSetIteratorSplitter Signed-off-by: raver119 <raver119@gmail.com> * Fixes * Fixes * one more test for Alexander Signed-off-by: raver119 <raver119@gmail.com> * Some fixes * Some fixes * one more test for Alexander Signed-off-by: raver119 <raver119@gmail.com> * minor test fix Signed-off-by: raver119 <raver119@gmail.com> * Some fixes * Some fixes * couple of assertions tweaked Signed-off-by: raver119 <raver119@gmail.com> * MDS splitter test :/ Signed-off-by: raver119 <raver119@gmail.com> * Minor refactoring * Multi dataset * Some fixes * More tests * Small number of test fixes/improvements (failures on CI) (#31) Signed-off-by: AlexDBlack <blacka101@gmail.com> * [WIP] More CUDA stuff (#26) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * LRN BP CUDA Signed-off-by: raver119 <raver119@gmail.com> * less memory Signed-off-by: raver119 <raver119@gmail.com> * Fixed bug with crop_and_resize op helper. * get rid of unnecessary index-calculation dunction Signed-off-by: Yurii <yurii@skymind.io> * Fixed sort with nth_element cuda-based helper. * Refactored nth_element. * Refactored nth_element op and tests. * Modified usage of dim array with sortTad routine. * Refactored main routine of helper for non_max_image_suppression op. * non_max_image_suppression op helper with cuda kernel implementation. Initial revision. * fix vol2col cuda kernel * meh Signed-off-by: raver119 <raver119@gmail.com> * topK concept Signed-off-by: raver119 <raver119@gmail.com> * unsorted topK with scanWitdh of 1 Signed-off-by: raver119 <raver119@gmail.com> * correct vol2col tests * sorted/unsorted topK Signed-off-by: raver119 <raver119@gmail.com> * implementation and fixing col2im/col2vol * Corrected usage flags with input/output with reverse op. * dup is const now Signed-off-by: raver119 <raver119@gmail.com> * percentile op Signed-off-by: raver119 <raver119@gmail.com> * group tests for mapool2d Signed-off-by: Yurii <yurii@skymind.io> * special test for george Signed-off-by: raver119 <raver119@gmail.com> * less threads for sortTad Signed-off-by: raver119 <raver119@gmail.com> * provide conv2d for cuda Signed-off-by: Yurii <yurii@skymind.io> * remove auther in sort tad kernel code Signed-off-by: Yurii <yurii@skymind.io> * provide depthwise_conv2d for cuda Signed-off-by: Yurii <yurii@skymind.io> * - max_pooling_with_argmax - null check for special use Signed-off-by: raver119 <raver119@gmail.com> * dts cuda Signed-off-by: raver119 <raver119@gmail.com> * provide sconv2d for cuda Signed-off-by: Yurii <yurii@skymind.io> * std cuda Signed-off-by: raver119 <raver119@gmail.com> * Refactored non_max_suppression op to conform TF implementation. * Improved suppression helper. * provide pooling3d for cuda Signed-off-by: Yurii <yurii@skymind.io> * minor lstm rearrangements Signed-off-by: raver119 <raver119@gmail.com> * more of minor lstm rearrangements Signed-off-by: raver119 <raver119@gmail.com> * (bi)dynamic_rnn Signed-off-by: raver119 <raver119@gmail.com> * templates init order Signed-off-by: raver119 <raver119@gmail.com> * Refactored non_max_suppression op. * Added cuda kernel for non_max_suppression. * CPU sort by key/value Signed-off-by: raver119 <raver119@gmail.com> * CPU sort TAD by key/value Signed-off-by: raver119 <raver119@gmail.com> * CPU sort TAD by key/value tests Signed-off-by: raver119 <raver119@gmail.com> * Eliminate compiler error with cuda implementation. * - repaired gradCheck in cuda - provide conv2d_bp for cuda Signed-off-by: Yurii <yurii@skymind.io> * missed signature Signed-off-by: raver119 <raver119@gmail.com> * provide depthwise_conv2d_bp for cuda Signed-off-by: Yurii <yurii@skymind.io> * Implementation of lup helper with cuda kernel. Initial commit. * further work on backprops for convolutions Signed-off-by: Yurii <yurii@skymind.io> * CUDA linear sort by key/val Signed-off-by: raver119 <raver119@gmail.com> * CUDA tad sort by key/val Signed-off-by: raver119 <raver119@gmail.com> * start providing of backprop for pooling2d/3d Signed-off-by: Yurii <yurii@skymind.io> * Added atomicAdd for bool datatype. * dynamic partition concept Signed-off-by: raver119 <raver119@gmail.com> * dynamic partition concept Signed-off-by: raver119 <raver119@gmail.com> * dynamic partition scalar CUDA Signed-off-by: raver119 <raver119@gmail.com> * important comment Signed-off-by: raver119 <raver119@gmail.com> * fix pooling2d/3d backprop helpers Signed-off-by: Yurii <yurii@skymind.io> * Added non-linear test with dynamic_partition. * Improved test for dynamic_partition. * dynamic_partition TAD concept Signed-off-by: raver119 <raver119@gmail.com> * - dynamic_partition TAD CUDA impl - dynamic_partition TAD CPU fix Signed-off-by: raver119 <raver119@gmail.com> * - rewrite cpu code for usampling2d/3d - write cuda code for usampling2d/3d Signed-off-by: Yurii <yurii@skymind.io> * dynamic_stitch CUDA vector case Signed-off-by: raver119 <raver119@gmail.com> * dynamic_stitch CUDA TAD case concept Signed-off-by: raver119 <raver119@gmail.com> * dynamic_stitch CUDA TAD case impl Signed-off-by: raver119 <raver119@gmail.com> * Added tests for dynamic_stitch 3D-4D cases. * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * Fixed type check for dynamic stitch. * min/max bp Signed-off-by: raver119 <raver119@gmail.com> * rewrite code for upsampling2d/3d cpu Signed-off-by: Yurii <yurii@skymind.io> * reduce min/max/norm_max bp Signed-off-by: raver119 <raver119@gmail.com> * lup implementation. Additional enhancements. * provide code for upsamling2d/3d backprop Signed-off-by: Yurii <yurii@skymind.io> * weightedCrossEntropyWithLogits Signed-off-by: raver119 <raver119@gmail.com> * Fixed template math atomicMul for 64bit ints. * Refactored dynamic_partition_bp op. * inverseBroadcast fix Signed-off-by: raver119 <raver119@gmail.com> * DynamicPartitionBP test datatype fixed. * - nd4j_atomicMul Windows fix - cpu/NDArrayLambda.hpp excluded from CUDA Signed-off-by: raver119 <raver119@gmail.com>
2019-06-27 17:37:04 +02:00
return opOutput == static_cast<X>(0) && old == static_cast<X>(0) ? static_cast<Z>(0) : static_cast<Z>(1);
2019-06-06 14:21:15 +02:00
}
op_def static Z update(X old, X opOutput, X *extraParams) {
Merge master to upstream (#7945) * Shugeo strided slice zeros (#14) * Modified strided_slice op to properly work with empty-like shapes. * Fixed test for reduce_mean with empty-like input. * [WIP] Last merge (#15) * correct logsoftmax looss (#2) * Small SameDiff listener fix (#4) * Various fixes (#6) * #7839 Fix for asXMatrix and tests * #7866 EmbeddingSequenceLayer dtype fix + test * #7856 SameDiff save/load stream methods * #7859 RegressionEvaluation rank 4 fix + tests + axis configuration * EvaluationBinary 3d/4d * More evaluation 3d/4d tests * #7847 Evaluation empty checks * Small test ifx * #7848 Fix median edge case * Improve DL4J samediff layer tests * [WIP] FastText wrapper implemented (#8) * FastText implemented * Some fixes * Fix shapes for wordsNearest * Validation of input vectors * Fixes * Fixed test * Thread tagged * Some tweaks * setContextClassLoader for DeallocatorServiceThread * Numpy format tests (#1) * Various fixes (#11) * #7852 SameDiff gather fix * #7892 SameDiff placeholder to constant conversion * #7890 validate input rank for MLN/CG init methods * Fix broken permute shape calculation * Permute and gather fixes * Tests * #7850 LogSumExp fix + test * Handful of test fixes * Empty arrays with non-scalar shapes (#10) * minor rearrangements for lambdas * empty tensors with non-scalar shapes * numpy empty tensors with non-scalar shapes * few more empty tweaks * Small fixes * conv3d signature update * micro fix in batchnorm mkldnn * Import fixes * Fix * MKL-DNN update * Small fill fix * fill with empty input + test * Fixes * Small error improvement * Fix * one special test * couple of fixes for lstm * Rewrite TFGraphMapper.getNDArrayFromTensor to be maintainable and less error prone * Fixes * FP16 * Unsigned * BFloat16 * Fill op - empty tweaks * - couple of fixes for empty arrays construction - stack updated * strided slice fix * one transform test * provide method for reducing shapeInfo in case of input array is empty * Fixed reduceAlongDimensions to use empty input properly. * couple of broadcast tests * couple of tests broadcast tests + tweak to make them pass * add check of non-empty to methods producing sub-arrays * Fixed reshapeC with zeros in shape. * complete empty check in reduce_... legacy ops * Concat and cumsum/prod * Tweak to empty shape inference on import * add empty check to the rest of reduce legacy ops * one more test * correct typo in evalReduceShapeInfoEmpty * Added tests for reduce_* ops to tests with zero shapes. * few more tests for empty reductions * Fixed strided_slice op with empty case and tests. * one more empty reduction test * Fixed strided_slice test. * add empty check to NDArray::reshapei * infOrMax * empty min/max with infinity tests * made unstack working correctly with empty arrays * few IndexReduce tests + tweaks for empty shapes * add test for empty concat * few tests fixed * Validation fix for reductions on empty shapes * Reverse fix * Reduction shape calc fixes * SameDiff.generateOutputVariable: don't use shape function to determine number of outputs * Range fix * - NDArray constructor updated for scalars/empty arrays - few tests fixed * More fixes * Empty creator fixes * concat fix * concat fix * TF import tests: allow 'both all NaN' and 'both all inf' to pass * Slice, zero fraction, and reshape fixes * transpose, gather * Zero fraction * scalar cast fix * Empty reduction axis support * few more tests fixed * Fixed input checks conforming with TF for concat op and tests. * few tests fixed * matmul scalar shape fix * Fixed checkout for data type and scalarity with concat to allow non-empty scalars with vector concats. * broadcast bool fix * few more tests * few more tests * correct evalReduceShapeInfoEmpty * argmax/argmin + tests * one more empty edge case + one more test * argmax/argmin/realdiv_bp tweaks * empty reshape test + fix * Helper fixes * Small fixes * Gather test fix * Gather test fix * Small fixes * reduce scalar zero values * scalar mean workaround * Remove debug code * along dim mean workaround * one more test * - equalsTo() tweak for empty arrays - one more test * broadcast tweaks * [WIP] Fixing outstanding issues for NLP (#9) * Avoid using not-inited objects * Test fixed. * Redundant method avoided for models like FastText * KMeans++ implementation * KMeans++ implementation * Disable parallel execution * KMeans++ * Tests * Dev branch merge (#16) * SameDiff: convertDataType and gradient check util improvements (#12) * GradCheck util improvements * StopGradient constructor + test * SameDiff: Add datatype conversion * Javadoc and add DataType.isNumerical() * Small fix * Fix SameDiff TF import test cases intermediate naming (workaround for bad default) * TFGraphTestAllHelper: check intermediates in execution order * Add missing debug listener * [WIP] lstmBlock fix + other changes (#13) - fixes lstmBlock issue - changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer - CheckNumerics op - fixes for ReduceBool IsInfOrNan & IsFinite * Small test fix * CheckNumerics op wrapper * Fix some issues on master (#17) * Fix DataVec test issue * Fix issue with dl4j SameDiff output layer * Dtype fix for lambda layers * #7912 BertIterator dtype fix (use float32 not global default) * [WIP] Next set of CUDA stuff (#7) New CUDA implementations and improvements * bad file * Dev branch master merge (#23) * SameDiff: convertDataType and gradient check util improvements (#12) * GradCheck util improvements * StopGradient constructor + test * SameDiff: Add datatype conversion * Javadoc and add DataType.isNumerical() * Small fix * Fix SameDiff TF import test cases intermediate naming (workaround for bad default) * TFGraphTestAllHelper: check intermediates in execution order * Add missing debug listener * [WIP] lstmBlock fix + other changes (#13) - fixes lstmBlock issue - changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer - CheckNumerics op - fixes for ReduceBool IsInfOrNan & IsFinite * Small test fix * CheckNumerics op wrapper * Compatibility of deserialization (#18) Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com> * SameDiff: add activation gradient checking support for debugging (#19) * SameDiff gradient checker: first pass on activation gradient checks * Fixes + tests for activation gradient checking * Javadoc * [WIP] Some nd4j data type corrections (#20) * Adjust data type * Set correct Data type. * Size of proper data type. * fix averaged cpu load (#22) * SameDiff ops, TF import and fixes (#24) * CheckNumerics tests + fixes + misc fixes Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fake quant Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fixes Signed-off-by: AlexDBlack <blacka101@gmail.com> * FakeQuantWithMinMaxArgs Signed-off-by: AlexDBlack <blacka101@gmail.com> * CheckNumerics fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fix libnd4j ALL_INTS and ALL_FLOATS declaration (uint and bfloat types) Signed-off-by: AlexDBlack <blacka101@gmail.com> * Small fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * Javadoc Signed-off-by: AlexDBlack <blacka101@gmail.com> * Exception tweak Signed-off-by: AlexDBlack <blacka101@gmail.com> * fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fix for out of scope stack allocated var use Signed-off-by: AlexDBlack <blacka101@gmail.com> * Ignores Signed-off-by: AlexDBlack <blacka101@gmail.com> * Ignore for known failing test (already logged issue) Signed-off-by: AlexDBlack <blacka101@gmail.com> * Merge upstream to fork (#25) * Add thousand-separator commas to TotalParams (#7915) * Add thousand-separator commas to TotalParams The number of parameters can be quite large, and it would help the reading of the summary printout to have the TotalParams column & values at the bottom have thousand-separator-commas in them. * Add thousand-separator commas to MultiLayerNetwork Corresponding change to MultiLayerNetwork Signed-off-by: Jxtps Jxtps <jxtps435@gmail.com> * Update contributing and issue/PR templates (#7934) Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fix link to AdaDelta paper (#7942) Fix link to AdaDelta paper hosted on matthewzeiler.com Signed-off-by: Jxtps * Fixes, and ignores for known/logged failing issues (#7943) Signed-off-by: AlexDBlack <blacka101@gmail.com> * SameDiff + DL4J/SameDiff: Multiple fixes (#28) * #7919 HDF5 attribute buffer length fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7909 Arbiter constructor exception ux improvements Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7925 RNN output layer length checks Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7939 Add listener for validating inputs are not incorrectly modified Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7939 Integrate NonInplaceValidationListener into tests * #7844 DL4J SameDiff fixes for variable minibatch size * DL4J SameDiff fixes - ensure gradient for input placeholder is available Signed-off-by: AlexDBlack <blacka101@gmail.com> * Tweaks to ExternalErrorsFunction - use placeholders, make more robust * Another fix * More fixes * More SameDiff/DL4J fixes * Scope out scalar array creation in BaseScalarOp * Remove debug code Signed-off-by: AlexDBlack <blacka101@gmail.com> * [WIP] Final dev branch merge (#29) * SameDiff: convertDataType and gradient check util improvements (#12) * GradCheck util improvements * StopGradient constructor + test * SameDiff: Add datatype conversion * Javadoc and add DataType.isNumerical() * Small fix * Fix SameDiff TF import test cases intermediate naming (workaround for bad default) * TFGraphTestAllHelper: check intermediates in execution order * Add missing debug listener * [WIP] lstmBlock fix + other changes (#13) - fixes lstmBlock issue - changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer - CheckNumerics op - fixes for ReduceBool IsInfOrNan & IsFinite * Small test fix * CheckNumerics op wrapper * Compatibility of deserialization (#18) Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com> * SameDiff: add activation gradient checking support for debugging (#19) * SameDiff gradient checker: first pass on activation gradient checks * Fixes + tests for activation gradient checking * Javadoc * [WIP] Some nd4j data type corrections (#20) * Adjust data type * Set correct Data type. * Size of proper data type. * fix averaged cpu load (#22) * [WIP] Multiple dataset iterators (#27) * Splitting dataset into arbitrary number * Fixes * Multiple split of iterator * Test * Test * Some fixes * signature change * one more tweak Signed-off-by: raver119 <raver119@gmail.com> * one more test for sequential use of DataSetIteratorSplitter Signed-off-by: raver119 <raver119@gmail.com> * Fixes * Fixes * one more test for Alexander Signed-off-by: raver119 <raver119@gmail.com> * Some fixes * Some fixes * one more test for Alexander Signed-off-by: raver119 <raver119@gmail.com> * minor test fix Signed-off-by: raver119 <raver119@gmail.com> * Some fixes * Some fixes * couple of assertions tweaked Signed-off-by: raver119 <raver119@gmail.com> * MDS splitter test :/ Signed-off-by: raver119 <raver119@gmail.com> * Minor refactoring * Multi dataset * Some fixes * More tests * Small number of test fixes/improvements (failures on CI) (#31) Signed-off-by: AlexDBlack <blacka101@gmail.com> * [WIP] More CUDA stuff (#26) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * LRN BP CUDA Signed-off-by: raver119 <raver119@gmail.com> * less memory Signed-off-by: raver119 <raver119@gmail.com> * Fixed bug with crop_and_resize op helper. * get rid of unnecessary index-calculation dunction Signed-off-by: Yurii <yurii@skymind.io> * Fixed sort with nth_element cuda-based helper. * Refactored nth_element. * Refactored nth_element op and tests. * Modified usage of dim array with sortTad routine. * Refactored main routine of helper for non_max_image_suppression op. * non_max_image_suppression op helper with cuda kernel implementation. Initial revision. * fix vol2col cuda kernel * meh Signed-off-by: raver119 <raver119@gmail.com> * topK concept Signed-off-by: raver119 <raver119@gmail.com> * unsorted topK with scanWitdh of 1 Signed-off-by: raver119 <raver119@gmail.com> * correct vol2col tests * sorted/unsorted topK Signed-off-by: raver119 <raver119@gmail.com> * implementation and fixing col2im/col2vol * Corrected usage flags with input/output with reverse op. * dup is const now Signed-off-by: raver119 <raver119@gmail.com> * percentile op Signed-off-by: raver119 <raver119@gmail.com> * group tests for mapool2d Signed-off-by: Yurii <yurii@skymind.io> * special test for george Signed-off-by: raver119 <raver119@gmail.com> * less threads for sortTad Signed-off-by: raver119 <raver119@gmail.com> * provide conv2d for cuda Signed-off-by: Yurii <yurii@skymind.io> * remove auther in sort tad kernel code Signed-off-by: Yurii <yurii@skymind.io> * provide depthwise_conv2d for cuda Signed-off-by: Yurii <yurii@skymind.io> * - max_pooling_with_argmax - null check for special use Signed-off-by: raver119 <raver119@gmail.com> * dts cuda Signed-off-by: raver119 <raver119@gmail.com> * provide sconv2d for cuda Signed-off-by: Yurii <yurii@skymind.io> * std cuda Signed-off-by: raver119 <raver119@gmail.com> * Refactored non_max_suppression op to conform TF implementation. * Improved suppression helper. * provide pooling3d for cuda Signed-off-by: Yurii <yurii@skymind.io> * minor lstm rearrangements Signed-off-by: raver119 <raver119@gmail.com> * more of minor lstm rearrangements Signed-off-by: raver119 <raver119@gmail.com> * (bi)dynamic_rnn Signed-off-by: raver119 <raver119@gmail.com> * templates init order Signed-off-by: raver119 <raver119@gmail.com> * Refactored non_max_suppression op. * Added cuda kernel for non_max_suppression. * CPU sort by key/value Signed-off-by: raver119 <raver119@gmail.com> * CPU sort TAD by key/value Signed-off-by: raver119 <raver119@gmail.com> * CPU sort TAD by key/value tests Signed-off-by: raver119 <raver119@gmail.com> * Eliminate compiler error with cuda implementation. * - repaired gradCheck in cuda - provide conv2d_bp for cuda Signed-off-by: Yurii <yurii@skymind.io> * missed signature Signed-off-by: raver119 <raver119@gmail.com> * provide depthwise_conv2d_bp for cuda Signed-off-by: Yurii <yurii@skymind.io> * Implementation of lup helper with cuda kernel. Initial commit. * further work on backprops for convolutions Signed-off-by: Yurii <yurii@skymind.io> * CUDA linear sort by key/val Signed-off-by: raver119 <raver119@gmail.com> * CUDA tad sort by key/val Signed-off-by: raver119 <raver119@gmail.com> * start providing of backprop for pooling2d/3d Signed-off-by: Yurii <yurii@skymind.io> * Added atomicAdd for bool datatype. * dynamic partition concept Signed-off-by: raver119 <raver119@gmail.com> * dynamic partition concept Signed-off-by: raver119 <raver119@gmail.com> * dynamic partition scalar CUDA Signed-off-by: raver119 <raver119@gmail.com> * important comment Signed-off-by: raver119 <raver119@gmail.com> * fix pooling2d/3d backprop helpers Signed-off-by: Yurii <yurii@skymind.io> * Added non-linear test with dynamic_partition. * Improved test for dynamic_partition. * dynamic_partition TAD concept Signed-off-by: raver119 <raver119@gmail.com> * - dynamic_partition TAD CUDA impl - dynamic_partition TAD CPU fix Signed-off-by: raver119 <raver119@gmail.com> * - rewrite cpu code for usampling2d/3d - write cuda code for usampling2d/3d Signed-off-by: Yurii <yurii@skymind.io> * dynamic_stitch CUDA vector case Signed-off-by: raver119 <raver119@gmail.com> * dynamic_stitch CUDA TAD case concept Signed-off-by: raver119 <raver119@gmail.com> * dynamic_stitch CUDA TAD case impl Signed-off-by: raver119 <raver119@gmail.com> * Added tests for dynamic_stitch 3D-4D cases. * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * Fixed type check for dynamic stitch. * min/max bp Signed-off-by: raver119 <raver119@gmail.com> * rewrite code for upsampling2d/3d cpu Signed-off-by: Yurii <yurii@skymind.io> * reduce min/max/norm_max bp Signed-off-by: raver119 <raver119@gmail.com> * lup implementation. Additional enhancements. * provide code for upsamling2d/3d backprop Signed-off-by: Yurii <yurii@skymind.io> * weightedCrossEntropyWithLogits Signed-off-by: raver119 <raver119@gmail.com> * Fixed template math atomicMul for 64bit ints. * Refactored dynamic_partition_bp op. * inverseBroadcast fix Signed-off-by: raver119 <raver119@gmail.com> * DynamicPartitionBP test datatype fixed. * - nd4j_atomicMul Windows fix - cpu/NDArrayLambda.hpp excluded from CUDA Signed-off-by: raver119 <raver119@gmail.com>
2019-06-27 17:37:04 +02:00
return opOutput == static_cast<X>(0) && old == static_cast<X>(0) ? static_cast<Z>(0) : static_cast<Z>(1);
2019-06-06 14:21:15 +02:00
}
op_def static Z postProcess(X reduction, Nd4jLong n, X *extraParams) {
Merge master to upstream (#7945) * Shugeo strided slice zeros (#14) * Modified strided_slice op to properly work with empty-like shapes. * Fixed test for reduce_mean with empty-like input. * [WIP] Last merge (#15) * correct logsoftmax looss (#2) * Small SameDiff listener fix (#4) * Various fixes (#6) * #7839 Fix for asXMatrix and tests * #7866 EmbeddingSequenceLayer dtype fix + test * #7856 SameDiff save/load stream methods * #7859 RegressionEvaluation rank 4 fix + tests + axis configuration * EvaluationBinary 3d/4d * More evaluation 3d/4d tests * #7847 Evaluation empty checks * Small test ifx * #7848 Fix median edge case * Improve DL4J samediff layer tests * [WIP] FastText wrapper implemented (#8) * FastText implemented * Some fixes * Fix shapes for wordsNearest * Validation of input vectors * Fixes * Fixed test * Thread tagged * Some tweaks * setContextClassLoader for DeallocatorServiceThread * Numpy format tests (#1) * Various fixes (#11) * #7852 SameDiff gather fix * #7892 SameDiff placeholder to constant conversion * #7890 validate input rank for MLN/CG init methods * Fix broken permute shape calculation * Permute and gather fixes * Tests * #7850 LogSumExp fix + test * Handful of test fixes * Empty arrays with non-scalar shapes (#10) * minor rearrangements for lambdas * empty tensors with non-scalar shapes * numpy empty tensors with non-scalar shapes * few more empty tweaks * Small fixes * conv3d signature update * micro fix in batchnorm mkldnn * Import fixes * Fix * MKL-DNN update * Small fill fix * fill with empty input + test * Fixes * Small error improvement * Fix * one special test * couple of fixes for lstm * Rewrite TFGraphMapper.getNDArrayFromTensor to be maintainable and less error prone * Fixes * FP16 * Unsigned * BFloat16 * Fill op - empty tweaks * - couple of fixes for empty arrays construction - stack updated * strided slice fix * one transform test * provide method for reducing shapeInfo in case of input array is empty * Fixed reduceAlongDimensions to use empty input properly. * couple of broadcast tests * couple of tests broadcast tests + tweak to make them pass * add check of non-empty to methods producing sub-arrays * Fixed reshapeC with zeros in shape. * complete empty check in reduce_... legacy ops * Concat and cumsum/prod * Tweak to empty shape inference on import * add empty check to the rest of reduce legacy ops * one more test * correct typo in evalReduceShapeInfoEmpty * Added tests for reduce_* ops to tests with zero shapes. * few more tests for empty reductions * Fixed strided_slice op with empty case and tests. * one more empty reduction test * Fixed strided_slice test. * add empty check to NDArray::reshapei * infOrMax * empty min/max with infinity tests * made unstack working correctly with empty arrays * few IndexReduce tests + tweaks for empty shapes * add test for empty concat * few tests fixed * Validation fix for reductions on empty shapes * Reverse fix * Reduction shape calc fixes * SameDiff.generateOutputVariable: don't use shape function to determine number of outputs * Range fix * - NDArray constructor updated for scalars/empty arrays - few tests fixed * More fixes * Empty creator fixes * concat fix * concat fix * TF import tests: allow 'both all NaN' and 'both all inf' to pass * Slice, zero fraction, and reshape fixes * transpose, gather * Zero fraction * scalar cast fix * Empty reduction axis support * few more tests fixed * Fixed input checks conforming with TF for concat op and tests. * few tests fixed * matmul scalar shape fix * Fixed checkout for data type and scalarity with concat to allow non-empty scalars with vector concats. * broadcast bool fix * few more tests * few more tests * correct evalReduceShapeInfoEmpty * argmax/argmin + tests * one more empty edge case + one more test * argmax/argmin/realdiv_bp tweaks * empty reshape test + fix * Helper fixes * Small fixes * Gather test fix * Gather test fix * Small fixes * reduce scalar zero values * scalar mean workaround * Remove debug code * along dim mean workaround * one more test * - equalsTo() tweak for empty arrays - one more test * broadcast tweaks * [WIP] Fixing outstanding issues for NLP (#9) * Avoid using not-inited objects * Test fixed. * Redundant method avoided for models like FastText * KMeans++ implementation * KMeans++ implementation * Disable parallel execution * KMeans++ * Tests * Dev branch merge (#16) * SameDiff: convertDataType and gradient check util improvements (#12) * GradCheck util improvements * StopGradient constructor + test * SameDiff: Add datatype conversion * Javadoc and add DataType.isNumerical() * Small fix * Fix SameDiff TF import test cases intermediate naming (workaround for bad default) * TFGraphTestAllHelper: check intermediates in execution order * Add missing debug listener * [WIP] lstmBlock fix + other changes (#13) - fixes lstmBlock issue - changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer - CheckNumerics op - fixes for ReduceBool IsInfOrNan & IsFinite * Small test fix * CheckNumerics op wrapper * Fix some issues on master (#17) * Fix DataVec test issue * Fix issue with dl4j SameDiff output layer * Dtype fix for lambda layers * #7912 BertIterator dtype fix (use float32 not global default) * [WIP] Next set of CUDA stuff (#7) New CUDA implementations and improvements * bad file * Dev branch master merge (#23) * SameDiff: convertDataType and gradient check util improvements (#12) * GradCheck util improvements * StopGradient constructor + test * SameDiff: Add datatype conversion * Javadoc and add DataType.isNumerical() * Small fix * Fix SameDiff TF import test cases intermediate naming (workaround for bad default) * TFGraphTestAllHelper: check intermediates in execution order * Add missing debug listener * [WIP] lstmBlock fix + other changes (#13) - fixes lstmBlock issue - changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer - CheckNumerics op - fixes for ReduceBool IsInfOrNan & IsFinite * Small test fix * CheckNumerics op wrapper * Compatibility of deserialization (#18) Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com> * SameDiff: add activation gradient checking support for debugging (#19) * SameDiff gradient checker: first pass on activation gradient checks * Fixes + tests for activation gradient checking * Javadoc * [WIP] Some nd4j data type corrections (#20) * Adjust data type * Set correct Data type. * Size of proper data type. * fix averaged cpu load (#22) * SameDiff ops, TF import and fixes (#24) * CheckNumerics tests + fixes + misc fixes Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fake quant Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fixes Signed-off-by: AlexDBlack <blacka101@gmail.com> * FakeQuantWithMinMaxArgs Signed-off-by: AlexDBlack <blacka101@gmail.com> * CheckNumerics fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fix libnd4j ALL_INTS and ALL_FLOATS declaration (uint and bfloat types) Signed-off-by: AlexDBlack <blacka101@gmail.com> * Small fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * Javadoc Signed-off-by: AlexDBlack <blacka101@gmail.com> * Exception tweak Signed-off-by: AlexDBlack <blacka101@gmail.com> * fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fix for out of scope stack allocated var use Signed-off-by: AlexDBlack <blacka101@gmail.com> * Ignores Signed-off-by: AlexDBlack <blacka101@gmail.com> * Ignore for known failing test (already logged issue) Signed-off-by: AlexDBlack <blacka101@gmail.com> * Merge upstream to fork (#25) * Add thousand-separator commas to TotalParams (#7915) * Add thousand-separator commas to TotalParams The number of parameters can be quite large, and it would help the reading of the summary printout to have the TotalParams column & values at the bottom have thousand-separator-commas in them. * Add thousand-separator commas to MultiLayerNetwork Corresponding change to MultiLayerNetwork Signed-off-by: Jxtps Jxtps <jxtps435@gmail.com> * Update contributing and issue/PR templates (#7934) Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fix link to AdaDelta paper (#7942) Fix link to AdaDelta paper hosted on matthewzeiler.com Signed-off-by: Jxtps * Fixes, and ignores for known/logged failing issues (#7943) Signed-off-by: AlexDBlack <blacka101@gmail.com> * SameDiff + DL4J/SameDiff: Multiple fixes (#28) * #7919 HDF5 attribute buffer length fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7909 Arbiter constructor exception ux improvements Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7925 RNN output layer length checks Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7939 Add listener for validating inputs are not incorrectly modified Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7939 Integrate NonInplaceValidationListener into tests * #7844 DL4J SameDiff fixes for variable minibatch size * DL4J SameDiff fixes - ensure gradient for input placeholder is available Signed-off-by: AlexDBlack <blacka101@gmail.com> * Tweaks to ExternalErrorsFunction - use placeholders, make more robust * Another fix * More fixes * More SameDiff/DL4J fixes * Scope out scalar array creation in BaseScalarOp * Remove debug code Signed-off-by: AlexDBlack <blacka101@gmail.com> * [WIP] Final dev branch merge (#29) * SameDiff: convertDataType and gradient check util improvements (#12) * GradCheck util improvements * StopGradient constructor + test * SameDiff: Add datatype conversion * Javadoc and add DataType.isNumerical() * Small fix * Fix SameDiff TF import test cases intermediate naming (workaround for bad default) * TFGraphTestAllHelper: check intermediates in execution order * Add missing debug listener * [WIP] lstmBlock fix + other changes (#13) - fixes lstmBlock issue - changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer - CheckNumerics op - fixes for ReduceBool IsInfOrNan & IsFinite * Small test fix * CheckNumerics op wrapper * Compatibility of deserialization (#18) Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com> * SameDiff: add activation gradient checking support for debugging (#19) * SameDiff gradient checker: first pass on activation gradient checks * Fixes + tests for activation gradient checking * Javadoc * [WIP] Some nd4j data type corrections (#20) * Adjust data type * Set correct Data type. * Size of proper data type. * fix averaged cpu load (#22) * [WIP] Multiple dataset iterators (#27) * Splitting dataset into arbitrary number * Fixes * Multiple split of iterator * Test * Test * Some fixes * signature change * one more tweak Signed-off-by: raver119 <raver119@gmail.com> * one more test for sequential use of DataSetIteratorSplitter Signed-off-by: raver119 <raver119@gmail.com> * Fixes * Fixes * one more test for Alexander Signed-off-by: raver119 <raver119@gmail.com> * Some fixes * Some fixes * one more test for Alexander Signed-off-by: raver119 <raver119@gmail.com> * minor test fix Signed-off-by: raver119 <raver119@gmail.com> * Some fixes * Some fixes * couple of assertions tweaked Signed-off-by: raver119 <raver119@gmail.com> * MDS splitter test :/ Signed-off-by: raver119 <raver119@gmail.com> * Minor refactoring * Multi dataset * Some fixes * More tests * Small number of test fixes/improvements (failures on CI) (#31) Signed-off-by: AlexDBlack <blacka101@gmail.com> * [WIP] More CUDA stuff (#26) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * LRN BP CUDA Signed-off-by: raver119 <raver119@gmail.com> * less memory Signed-off-by: raver119 <raver119@gmail.com> * Fixed bug with crop_and_resize op helper. * get rid of unnecessary index-calculation dunction Signed-off-by: Yurii <yurii@skymind.io> * Fixed sort with nth_element cuda-based helper. * Refactored nth_element. * Refactored nth_element op and tests. * Modified usage of dim array with sortTad routine. * Refactored main routine of helper for non_max_image_suppression op. * non_max_image_suppression op helper with cuda kernel implementation. Initial revision. * fix vol2col cuda kernel * meh Signed-off-by: raver119 <raver119@gmail.com> * topK concept Signed-off-by: raver119 <raver119@gmail.com> * unsorted topK with scanWitdh of 1 Signed-off-by: raver119 <raver119@gmail.com> * correct vol2col tests * sorted/unsorted topK Signed-off-by: raver119 <raver119@gmail.com> * implementation and fixing col2im/col2vol * Corrected usage flags with input/output with reverse op. * dup is const now Signed-off-by: raver119 <raver119@gmail.com> * percentile op Signed-off-by: raver119 <raver119@gmail.com> * group tests for mapool2d Signed-off-by: Yurii <yurii@skymind.io> * special test for george Signed-off-by: raver119 <raver119@gmail.com> * less threads for sortTad Signed-off-by: raver119 <raver119@gmail.com> * provide conv2d for cuda Signed-off-by: Yurii <yurii@skymind.io> * remove auther in sort tad kernel code Signed-off-by: Yurii <yurii@skymind.io> * provide depthwise_conv2d for cuda Signed-off-by: Yurii <yurii@skymind.io> * - max_pooling_with_argmax - null check for special use Signed-off-by: raver119 <raver119@gmail.com> * dts cuda Signed-off-by: raver119 <raver119@gmail.com> * provide sconv2d for cuda Signed-off-by: Yurii <yurii@skymind.io> * std cuda Signed-off-by: raver119 <raver119@gmail.com> * Refactored non_max_suppression op to conform TF implementation. * Improved suppression helper. * provide pooling3d for cuda Signed-off-by: Yurii <yurii@skymind.io> * minor lstm rearrangements Signed-off-by: raver119 <raver119@gmail.com> * more of minor lstm rearrangements Signed-off-by: raver119 <raver119@gmail.com> * (bi)dynamic_rnn Signed-off-by: raver119 <raver119@gmail.com> * templates init order Signed-off-by: raver119 <raver119@gmail.com> * Refactored non_max_suppression op. * Added cuda kernel for non_max_suppression. * CPU sort by key/value Signed-off-by: raver119 <raver119@gmail.com> * CPU sort TAD by key/value Signed-off-by: raver119 <raver119@gmail.com> * CPU sort TAD by key/value tests Signed-off-by: raver119 <raver119@gmail.com> * Eliminate compiler error with cuda implementation. * - repaired gradCheck in cuda - provide conv2d_bp for cuda Signed-off-by: Yurii <yurii@skymind.io> * missed signature Signed-off-by: raver119 <raver119@gmail.com> * provide depthwise_conv2d_bp for cuda Signed-off-by: Yurii <yurii@skymind.io> * Implementation of lup helper with cuda kernel. Initial commit. * further work on backprops for convolutions Signed-off-by: Yurii <yurii@skymind.io> * CUDA linear sort by key/val Signed-off-by: raver119 <raver119@gmail.com> * CUDA tad sort by key/val Signed-off-by: raver119 <raver119@gmail.com> * start providing of backprop for pooling2d/3d Signed-off-by: Yurii <yurii@skymind.io> * Added atomicAdd for bool datatype. * dynamic partition concept Signed-off-by: raver119 <raver119@gmail.com> * dynamic partition concept Signed-off-by: raver119 <raver119@gmail.com> * dynamic partition scalar CUDA Signed-off-by: raver119 <raver119@gmail.com> * important comment Signed-off-by: raver119 <raver119@gmail.com> * fix pooling2d/3d backprop helpers Signed-off-by: Yurii <yurii@skymind.io> * Added non-linear test with dynamic_partition. * Improved test for dynamic_partition. * dynamic_partition TAD concept Signed-off-by: raver119 <raver119@gmail.com> * - dynamic_partition TAD CUDA impl - dynamic_partition TAD CPU fix Signed-off-by: raver119 <raver119@gmail.com> * - rewrite cpu code for usampling2d/3d - write cuda code for usampling2d/3d Signed-off-by: Yurii <yurii@skymind.io> * dynamic_stitch CUDA vector case Signed-off-by: raver119 <raver119@gmail.com> * dynamic_stitch CUDA TAD case concept Signed-off-by: raver119 <raver119@gmail.com> * dynamic_stitch CUDA TAD case impl Signed-off-by: raver119 <raver119@gmail.com> * Added tests for dynamic_stitch 3D-4D cases. * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * Fixed type check for dynamic stitch. * min/max bp Signed-off-by: raver119 <raver119@gmail.com> * rewrite code for upsampling2d/3d cpu Signed-off-by: Yurii <yurii@skymind.io> * reduce min/max/norm_max bp Signed-off-by: raver119 <raver119@gmail.com> * lup implementation. Additional enhancements. * provide code for upsamling2d/3d backprop Signed-off-by: Yurii <yurii@skymind.io> * weightedCrossEntropyWithLogits Signed-off-by: raver119 <raver119@gmail.com> * Fixed template math atomicMul for 64bit ints. * Refactored dynamic_partition_bp op. * inverseBroadcast fix Signed-off-by: raver119 <raver119@gmail.com> * DynamicPartitionBP test datatype fixed. * - nd4j_atomicMul Windows fix - cpu/NDArrayLambda.hpp excluded from CUDA Signed-off-by: raver119 <raver119@gmail.com>
2019-06-27 17:37:04 +02:00
return reduction != static_cast<X>(0);
2019-06-06 14:21:15 +02:00
}
};
template <typename X, typename Z>
class IsFinite {
public:
no_op_exec_special_bool
no_op_exec_special_bool_cuda
no_op_exec_special_accumulation
no_op_exec_special_accumulation_cuda
op_def static Z op(X d1, X *params) {
return sd::math::nd4j_isfin<X>(d1) ? static_cast<Z>(1) : static_cast<Z>(0);
2019-06-06 14:21:15 +02:00
}
op_def static X startingValue(const X *input) {
Merge master to upstream (#7945) * Shugeo strided slice zeros (#14) * Modified strided_slice op to properly work with empty-like shapes. * Fixed test for reduce_mean with empty-like input. * [WIP] Last merge (#15) * correct logsoftmax looss (#2) * Small SameDiff listener fix (#4) * Various fixes (#6) * #7839 Fix for asXMatrix and tests * #7866 EmbeddingSequenceLayer dtype fix + test * #7856 SameDiff save/load stream methods * #7859 RegressionEvaluation rank 4 fix + tests + axis configuration * EvaluationBinary 3d/4d * More evaluation 3d/4d tests * #7847 Evaluation empty checks * Small test ifx * #7848 Fix median edge case * Improve DL4J samediff layer tests * [WIP] FastText wrapper implemented (#8) * FastText implemented * Some fixes * Fix shapes for wordsNearest * Validation of input vectors * Fixes * Fixed test * Thread tagged * Some tweaks * setContextClassLoader for DeallocatorServiceThread * Numpy format tests (#1) * Various fixes (#11) * #7852 SameDiff gather fix * #7892 SameDiff placeholder to constant conversion * #7890 validate input rank for MLN/CG init methods * Fix broken permute shape calculation * Permute and gather fixes * Tests * #7850 LogSumExp fix + test * Handful of test fixes * Empty arrays with non-scalar shapes (#10) * minor rearrangements for lambdas * empty tensors with non-scalar shapes * numpy empty tensors with non-scalar shapes * few more empty tweaks * Small fixes * conv3d signature update * micro fix in batchnorm mkldnn * Import fixes * Fix * MKL-DNN update * Small fill fix * fill with empty input + test * Fixes * Small error improvement * Fix * one special test * couple of fixes for lstm * Rewrite TFGraphMapper.getNDArrayFromTensor to be maintainable and less error prone * Fixes * FP16 * Unsigned * BFloat16 * Fill op - empty tweaks * - couple of fixes for empty arrays construction - stack updated * strided slice fix * one transform test * provide method for reducing shapeInfo in case of input array is empty * Fixed reduceAlongDimensions to use empty input properly. * couple of broadcast tests * couple of tests broadcast tests + tweak to make them pass * add check of non-empty to methods producing sub-arrays * Fixed reshapeC with zeros in shape. * complete empty check in reduce_... legacy ops * Concat and cumsum/prod * Tweak to empty shape inference on import * add empty check to the rest of reduce legacy ops * one more test * correct typo in evalReduceShapeInfoEmpty * Added tests for reduce_* ops to tests with zero shapes. * few more tests for empty reductions * Fixed strided_slice op with empty case and tests. * one more empty reduction test * Fixed strided_slice test. * add empty check to NDArray::reshapei * infOrMax * empty min/max with infinity tests * made unstack working correctly with empty arrays * few IndexReduce tests + tweaks for empty shapes * add test for empty concat * few tests fixed * Validation fix for reductions on empty shapes * Reverse fix * Reduction shape calc fixes * SameDiff.generateOutputVariable: don't use shape function to determine number of outputs * Range fix * - NDArray constructor updated for scalars/empty arrays - few tests fixed * More fixes * Empty creator fixes * concat fix * concat fix * TF import tests: allow 'both all NaN' and 'both all inf' to pass * Slice, zero fraction, and reshape fixes * transpose, gather * Zero fraction * scalar cast fix * Empty reduction axis support * few more tests fixed * Fixed input checks conforming with TF for concat op and tests. * few tests fixed * matmul scalar shape fix * Fixed checkout for data type and scalarity with concat to allow non-empty scalars with vector concats. * broadcast bool fix * few more tests * few more tests * correct evalReduceShapeInfoEmpty * argmax/argmin + tests * one more empty edge case + one more test * argmax/argmin/realdiv_bp tweaks * empty reshape test + fix * Helper fixes * Small fixes * Gather test fix * Gather test fix * Small fixes * reduce scalar zero values * scalar mean workaround * Remove debug code * along dim mean workaround * one more test * - equalsTo() tweak for empty arrays - one more test * broadcast tweaks * [WIP] Fixing outstanding issues for NLP (#9) * Avoid using not-inited objects * Test fixed. * Redundant method avoided for models like FastText * KMeans++ implementation * KMeans++ implementation * Disable parallel execution * KMeans++ * Tests * Dev branch merge (#16) * SameDiff: convertDataType and gradient check util improvements (#12) * GradCheck util improvements * StopGradient constructor + test * SameDiff: Add datatype conversion * Javadoc and add DataType.isNumerical() * Small fix * Fix SameDiff TF import test cases intermediate naming (workaround for bad default) * TFGraphTestAllHelper: check intermediates in execution order * Add missing debug listener * [WIP] lstmBlock fix + other changes (#13) - fixes lstmBlock issue - changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer - CheckNumerics op - fixes for ReduceBool IsInfOrNan & IsFinite * Small test fix * CheckNumerics op wrapper * Fix some issues on master (#17) * Fix DataVec test issue * Fix issue with dl4j SameDiff output layer * Dtype fix for lambda layers * #7912 BertIterator dtype fix (use float32 not global default) * [WIP] Next set of CUDA stuff (#7) New CUDA implementations and improvements * bad file * Dev branch master merge (#23) * SameDiff: convertDataType and gradient check util improvements (#12) * GradCheck util improvements * StopGradient constructor + test * SameDiff: Add datatype conversion * Javadoc and add DataType.isNumerical() * Small fix * Fix SameDiff TF import test cases intermediate naming (workaround for bad default) * TFGraphTestAllHelper: check intermediates in execution order * Add missing debug listener * [WIP] lstmBlock fix + other changes (#13) - fixes lstmBlock issue - changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer - CheckNumerics op - fixes for ReduceBool IsInfOrNan & IsFinite * Small test fix * CheckNumerics op wrapper * Compatibility of deserialization (#18) Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com> * SameDiff: add activation gradient checking support for debugging (#19) * SameDiff gradient checker: first pass on activation gradient checks * Fixes + tests for activation gradient checking * Javadoc * [WIP] Some nd4j data type corrections (#20) * Adjust data type * Set correct Data type. * Size of proper data type. * fix averaged cpu load (#22) * SameDiff ops, TF import and fixes (#24) * CheckNumerics tests + fixes + misc fixes Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fake quant Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fixes Signed-off-by: AlexDBlack <blacka101@gmail.com> * FakeQuantWithMinMaxArgs Signed-off-by: AlexDBlack <blacka101@gmail.com> * CheckNumerics fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fix libnd4j ALL_INTS and ALL_FLOATS declaration (uint and bfloat types) Signed-off-by: AlexDBlack <blacka101@gmail.com> * Small fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * Javadoc Signed-off-by: AlexDBlack <blacka101@gmail.com> * Exception tweak Signed-off-by: AlexDBlack <blacka101@gmail.com> * fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fix for out of scope stack allocated var use Signed-off-by: AlexDBlack <blacka101@gmail.com> * Ignores Signed-off-by: AlexDBlack <blacka101@gmail.com> * Ignore for known failing test (already logged issue) Signed-off-by: AlexDBlack <blacka101@gmail.com> * Merge upstream to fork (#25) * Add thousand-separator commas to TotalParams (#7915) * Add thousand-separator commas to TotalParams The number of parameters can be quite large, and it would help the reading of the summary printout to have the TotalParams column & values at the bottom have thousand-separator-commas in them. * Add thousand-separator commas to MultiLayerNetwork Corresponding change to MultiLayerNetwork Signed-off-by: Jxtps Jxtps <jxtps435@gmail.com> * Update contributing and issue/PR templates (#7934) Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fix link to AdaDelta paper (#7942) Fix link to AdaDelta paper hosted on matthewzeiler.com Signed-off-by: Jxtps * Fixes, and ignores for known/logged failing issues (#7943) Signed-off-by: AlexDBlack <blacka101@gmail.com> * SameDiff + DL4J/SameDiff: Multiple fixes (#28) * #7919 HDF5 attribute buffer length fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7909 Arbiter constructor exception ux improvements Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7925 RNN output layer length checks Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7939 Add listener for validating inputs are not incorrectly modified Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7939 Integrate NonInplaceValidationListener into tests * #7844 DL4J SameDiff fixes for variable minibatch size * DL4J SameDiff fixes - ensure gradient for input placeholder is available Signed-off-by: AlexDBlack <blacka101@gmail.com> * Tweaks to ExternalErrorsFunction - use placeholders, make more robust * Another fix * More fixes * More SameDiff/DL4J fixes * Scope out scalar array creation in BaseScalarOp * Remove debug code Signed-off-by: AlexDBlack <blacka101@gmail.com> * [WIP] Final dev branch merge (#29) * SameDiff: convertDataType and gradient check util improvements (#12) * GradCheck util improvements * StopGradient constructor + test * SameDiff: Add datatype conversion * Javadoc and add DataType.isNumerical() * Small fix * Fix SameDiff TF import test cases intermediate naming (workaround for bad default) * TFGraphTestAllHelper: check intermediates in execution order * Add missing debug listener * [WIP] lstmBlock fix + other changes (#13) - fixes lstmBlock issue - changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer - CheckNumerics op - fixes for ReduceBool IsInfOrNan & IsFinite * Small test fix * CheckNumerics op wrapper * Compatibility of deserialization (#18) Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com> * SameDiff: add activation gradient checking support for debugging (#19) * SameDiff gradient checker: first pass on activation gradient checks * Fixes + tests for activation gradient checking * Javadoc * [WIP] Some nd4j data type corrections (#20) * Adjust data type * Set correct Data type. * Size of proper data type. * fix averaged cpu load (#22) * [WIP] Multiple dataset iterators (#27) * Splitting dataset into arbitrary number * Fixes * Multiple split of iterator * Test * Test * Some fixes * signature change * one more tweak Signed-off-by: raver119 <raver119@gmail.com> * one more test for sequential use of DataSetIteratorSplitter Signed-off-by: raver119 <raver119@gmail.com> * Fixes * Fixes * one more test for Alexander Signed-off-by: raver119 <raver119@gmail.com> * Some fixes * Some fixes * one more test for Alexander Signed-off-by: raver119 <raver119@gmail.com> * minor test fix Signed-off-by: raver119 <raver119@gmail.com> * Some fixes * Some fixes * couple of assertions tweaked Signed-off-by: raver119 <raver119@gmail.com> * MDS splitter test :/ Signed-off-by: raver119 <raver119@gmail.com> * Minor refactoring * Multi dataset * Some fixes * More tests * Small number of test fixes/improvements (failures on CI) (#31) Signed-off-by: AlexDBlack <blacka101@gmail.com> * [WIP] More CUDA stuff (#26) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * LRN BP CUDA Signed-off-by: raver119 <raver119@gmail.com> * less memory Signed-off-by: raver119 <raver119@gmail.com> * Fixed bug with crop_and_resize op helper. * get rid of unnecessary index-calculation dunction Signed-off-by: Yurii <yurii@skymind.io> * Fixed sort with nth_element cuda-based helper. * Refactored nth_element. * Refactored nth_element op and tests. * Modified usage of dim array with sortTad routine. * Refactored main routine of helper for non_max_image_suppression op. * non_max_image_suppression op helper with cuda kernel implementation. Initial revision. * fix vol2col cuda kernel * meh Signed-off-by: raver119 <raver119@gmail.com> * topK concept Signed-off-by: raver119 <raver119@gmail.com> * unsorted topK with scanWitdh of 1 Signed-off-by: raver119 <raver119@gmail.com> * correct vol2col tests * sorted/unsorted topK Signed-off-by: raver119 <raver119@gmail.com> * implementation and fixing col2im/col2vol * Corrected usage flags with input/output with reverse op. * dup is const now Signed-off-by: raver119 <raver119@gmail.com> * percentile op Signed-off-by: raver119 <raver119@gmail.com> * group tests for mapool2d Signed-off-by: Yurii <yurii@skymind.io> * special test for george Signed-off-by: raver119 <raver119@gmail.com> * less threads for sortTad Signed-off-by: raver119 <raver119@gmail.com> * provide conv2d for cuda Signed-off-by: Yurii <yurii@skymind.io> * remove auther in sort tad kernel code Signed-off-by: Yurii <yurii@skymind.io> * provide depthwise_conv2d for cuda Signed-off-by: Yurii <yurii@skymind.io> * - max_pooling_with_argmax - null check for special use Signed-off-by: raver119 <raver119@gmail.com> * dts cuda Signed-off-by: raver119 <raver119@gmail.com> * provide sconv2d for cuda Signed-off-by: Yurii <yurii@skymind.io> * std cuda Signed-off-by: raver119 <raver119@gmail.com> * Refactored non_max_suppression op to conform TF implementation. * Improved suppression helper. * provide pooling3d for cuda Signed-off-by: Yurii <yurii@skymind.io> * minor lstm rearrangements Signed-off-by: raver119 <raver119@gmail.com> * more of minor lstm rearrangements Signed-off-by: raver119 <raver119@gmail.com> * (bi)dynamic_rnn Signed-off-by: raver119 <raver119@gmail.com> * templates init order Signed-off-by: raver119 <raver119@gmail.com> * Refactored non_max_suppression op. * Added cuda kernel for non_max_suppression. * CPU sort by key/value Signed-off-by: raver119 <raver119@gmail.com> * CPU sort TAD by key/value Signed-off-by: raver119 <raver119@gmail.com> * CPU sort TAD by key/value tests Signed-off-by: raver119 <raver119@gmail.com> * Eliminate compiler error with cuda implementation. * - repaired gradCheck in cuda - provide conv2d_bp for cuda Signed-off-by: Yurii <yurii@skymind.io> * missed signature Signed-off-by: raver119 <raver119@gmail.com> * provide depthwise_conv2d_bp for cuda Signed-off-by: Yurii <yurii@skymind.io> * Implementation of lup helper with cuda kernel. Initial commit. * further work on backprops for convolutions Signed-off-by: Yurii <yurii@skymind.io> * CUDA linear sort by key/val Signed-off-by: raver119 <raver119@gmail.com> * CUDA tad sort by key/val Signed-off-by: raver119 <raver119@gmail.com> * start providing of backprop for pooling2d/3d Signed-off-by: Yurii <yurii@skymind.io> * Added atomicAdd for bool datatype. * dynamic partition concept Signed-off-by: raver119 <raver119@gmail.com> * dynamic partition concept Signed-off-by: raver119 <raver119@gmail.com> * dynamic partition scalar CUDA Signed-off-by: raver119 <raver119@gmail.com> * important comment Signed-off-by: raver119 <raver119@gmail.com> * fix pooling2d/3d backprop helpers Signed-off-by: Yurii <yurii@skymind.io> * Added non-linear test with dynamic_partition. * Improved test for dynamic_partition. * dynamic_partition TAD concept Signed-off-by: raver119 <raver119@gmail.com> * - dynamic_partition TAD CUDA impl - dynamic_partition TAD CPU fix Signed-off-by: raver119 <raver119@gmail.com> * - rewrite cpu code for usampling2d/3d - write cuda code for usampling2d/3d Signed-off-by: Yurii <yurii@skymind.io> * dynamic_stitch CUDA vector case Signed-off-by: raver119 <raver119@gmail.com> * dynamic_stitch CUDA TAD case concept Signed-off-by: raver119 <raver119@gmail.com> * dynamic_stitch CUDA TAD case impl Signed-off-by: raver119 <raver119@gmail.com> * Added tests for dynamic_stitch 3D-4D cases. * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * Fixed type check for dynamic stitch. * min/max bp Signed-off-by: raver119 <raver119@gmail.com> * rewrite code for upsampling2d/3d cpu Signed-off-by: Yurii <yurii@skymind.io> * reduce min/max/norm_max bp Signed-off-by: raver119 <raver119@gmail.com> * lup implementation. Additional enhancements. * provide code for upsamling2d/3d backprop Signed-off-by: Yurii <yurii@skymind.io> * weightedCrossEntropyWithLogits Signed-off-by: raver119 <raver119@gmail.com> * Fixed template math atomicMul for 64bit ints. * Refactored dynamic_partition_bp op. * inverseBroadcast fix Signed-off-by: raver119 <raver119@gmail.com> * DynamicPartitionBP test datatype fixed. * - nd4j_atomicMul Windows fix - cpu/NDArrayLambda.hpp excluded from CUDA Signed-off-by: raver119 <raver119@gmail.com>
2019-06-27 17:37:04 +02:00
return static_cast<X>(1);
2019-06-06 14:21:15 +02:00
}
op_def static Z merge(X old, X opOutput, X *extraParams) {
Merge master to upstream (#7945) * Shugeo strided slice zeros (#14) * Modified strided_slice op to properly work with empty-like shapes. * Fixed test for reduce_mean with empty-like input. * [WIP] Last merge (#15) * correct logsoftmax looss (#2) * Small SameDiff listener fix (#4) * Various fixes (#6) * #7839 Fix for asXMatrix and tests * #7866 EmbeddingSequenceLayer dtype fix + test * #7856 SameDiff save/load stream methods * #7859 RegressionEvaluation rank 4 fix + tests + axis configuration * EvaluationBinary 3d/4d * More evaluation 3d/4d tests * #7847 Evaluation empty checks * Small test ifx * #7848 Fix median edge case * Improve DL4J samediff layer tests * [WIP] FastText wrapper implemented (#8) * FastText implemented * Some fixes * Fix shapes for wordsNearest * Validation of input vectors * Fixes * Fixed test * Thread tagged * Some tweaks * setContextClassLoader for DeallocatorServiceThread * Numpy format tests (#1) * Various fixes (#11) * #7852 SameDiff gather fix * #7892 SameDiff placeholder to constant conversion * #7890 validate input rank for MLN/CG init methods * Fix broken permute shape calculation * Permute and gather fixes * Tests * #7850 LogSumExp fix + test * Handful of test fixes * Empty arrays with non-scalar shapes (#10) * minor rearrangements for lambdas * empty tensors with non-scalar shapes * numpy empty tensors with non-scalar shapes * few more empty tweaks * Small fixes * conv3d signature update * micro fix in batchnorm mkldnn * Import fixes * Fix * MKL-DNN update * Small fill fix * fill with empty input + test * Fixes * Small error improvement * Fix * one special test * couple of fixes for lstm * Rewrite TFGraphMapper.getNDArrayFromTensor to be maintainable and less error prone * Fixes * FP16 * Unsigned * BFloat16 * Fill op - empty tweaks * - couple of fixes for empty arrays construction - stack updated * strided slice fix * one transform test * provide method for reducing shapeInfo in case of input array is empty * Fixed reduceAlongDimensions to use empty input properly. * couple of broadcast tests * couple of tests broadcast tests + tweak to make them pass * add check of non-empty to methods producing sub-arrays * Fixed reshapeC with zeros in shape. * complete empty check in reduce_... legacy ops * Concat and cumsum/prod * Tweak to empty shape inference on import * add empty check to the rest of reduce legacy ops * one more test * correct typo in evalReduceShapeInfoEmpty * Added tests for reduce_* ops to tests with zero shapes. * few more tests for empty reductions * Fixed strided_slice op with empty case and tests. * one more empty reduction test * Fixed strided_slice test. * add empty check to NDArray::reshapei * infOrMax * empty min/max with infinity tests * made unstack working correctly with empty arrays * few IndexReduce tests + tweaks for empty shapes * add test for empty concat * few tests fixed * Validation fix for reductions on empty shapes * Reverse fix * Reduction shape calc fixes * SameDiff.generateOutputVariable: don't use shape function to determine number of outputs * Range fix * - NDArray constructor updated for scalars/empty arrays - few tests fixed * More fixes * Empty creator fixes * concat fix * concat fix * TF import tests: allow 'both all NaN' and 'both all inf' to pass * Slice, zero fraction, and reshape fixes * transpose, gather * Zero fraction * scalar cast fix * Empty reduction axis support * few more tests fixed * Fixed input checks conforming with TF for concat op and tests. * few tests fixed * matmul scalar shape fix * Fixed checkout for data type and scalarity with concat to allow non-empty scalars with vector concats. * broadcast bool fix * few more tests * few more tests * correct evalReduceShapeInfoEmpty * argmax/argmin + tests * one more empty edge case + one more test * argmax/argmin/realdiv_bp tweaks * empty reshape test + fix * Helper fixes * Small fixes * Gather test fix * Gather test fix * Small fixes * reduce scalar zero values * scalar mean workaround * Remove debug code * along dim mean workaround * one more test * - equalsTo() tweak for empty arrays - one more test * broadcast tweaks * [WIP] Fixing outstanding issues for NLP (#9) * Avoid using not-inited objects * Test fixed. * Redundant method avoided for models like FastText * KMeans++ implementation * KMeans++ implementation * Disable parallel execution * KMeans++ * Tests * Dev branch merge (#16) * SameDiff: convertDataType and gradient check util improvements (#12) * GradCheck util improvements * StopGradient constructor + test * SameDiff: Add datatype conversion * Javadoc and add DataType.isNumerical() * Small fix * Fix SameDiff TF import test cases intermediate naming (workaround for bad default) * TFGraphTestAllHelper: check intermediates in execution order * Add missing debug listener * [WIP] lstmBlock fix + other changes (#13) - fixes lstmBlock issue - changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer - CheckNumerics op - fixes for ReduceBool IsInfOrNan & IsFinite * Small test fix * CheckNumerics op wrapper * Fix some issues on master (#17) * Fix DataVec test issue * Fix issue with dl4j SameDiff output layer * Dtype fix for lambda layers * #7912 BertIterator dtype fix (use float32 not global default) * [WIP] Next set of CUDA stuff (#7) New CUDA implementations and improvements * bad file * Dev branch master merge (#23) * SameDiff: convertDataType and gradient check util improvements (#12) * GradCheck util improvements * StopGradient constructor + test * SameDiff: Add datatype conversion * Javadoc and add DataType.isNumerical() * Small fix * Fix SameDiff TF import test cases intermediate naming (workaround for bad default) * TFGraphTestAllHelper: check intermediates in execution order * Add missing debug listener * [WIP] lstmBlock fix + other changes (#13) - fixes lstmBlock issue - changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer - CheckNumerics op - fixes for ReduceBool IsInfOrNan & IsFinite * Small test fix * CheckNumerics op wrapper * Compatibility of deserialization (#18) Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com> * SameDiff: add activation gradient checking support for debugging (#19) * SameDiff gradient checker: first pass on activation gradient checks * Fixes + tests for activation gradient checking * Javadoc * [WIP] Some nd4j data type corrections (#20) * Adjust data type * Set correct Data type. * Size of proper data type. * fix averaged cpu load (#22) * SameDiff ops, TF import and fixes (#24) * CheckNumerics tests + fixes + misc fixes Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fake quant Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fixes Signed-off-by: AlexDBlack <blacka101@gmail.com> * FakeQuantWithMinMaxArgs Signed-off-by: AlexDBlack <blacka101@gmail.com> * CheckNumerics fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fix libnd4j ALL_INTS and ALL_FLOATS declaration (uint and bfloat types) Signed-off-by: AlexDBlack <blacka101@gmail.com> * Small fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * Javadoc Signed-off-by: AlexDBlack <blacka101@gmail.com> * Exception tweak Signed-off-by: AlexDBlack <blacka101@gmail.com> * fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fix for out of scope stack allocated var use Signed-off-by: AlexDBlack <blacka101@gmail.com> * Ignores Signed-off-by: AlexDBlack <blacka101@gmail.com> * Ignore for known failing test (already logged issue) Signed-off-by: AlexDBlack <blacka101@gmail.com> * Merge upstream to fork (#25) * Add thousand-separator commas to TotalParams (#7915) * Add thousand-separator commas to TotalParams The number of parameters can be quite large, and it would help the reading of the summary printout to have the TotalParams column & values at the bottom have thousand-separator-commas in them. * Add thousand-separator commas to MultiLayerNetwork Corresponding change to MultiLayerNetwork Signed-off-by: Jxtps Jxtps <jxtps435@gmail.com> * Update contributing and issue/PR templates (#7934) Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fix link to AdaDelta paper (#7942) Fix link to AdaDelta paper hosted on matthewzeiler.com Signed-off-by: Jxtps * Fixes, and ignores for known/logged failing issues (#7943) Signed-off-by: AlexDBlack <blacka101@gmail.com> * SameDiff + DL4J/SameDiff: Multiple fixes (#28) * #7919 HDF5 attribute buffer length fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7909 Arbiter constructor exception ux improvements Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7925 RNN output layer length checks Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7939 Add listener for validating inputs are not incorrectly modified Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7939 Integrate NonInplaceValidationListener into tests * #7844 DL4J SameDiff fixes for variable minibatch size * DL4J SameDiff fixes - ensure gradient for input placeholder is available Signed-off-by: AlexDBlack <blacka101@gmail.com> * Tweaks to ExternalErrorsFunction - use placeholders, make more robust * Another fix * More fixes * More SameDiff/DL4J fixes * Scope out scalar array creation in BaseScalarOp * Remove debug code Signed-off-by: AlexDBlack <blacka101@gmail.com> * [WIP] Final dev branch merge (#29) * SameDiff: convertDataType and gradient check util improvements (#12) * GradCheck util improvements * StopGradient constructor + test * SameDiff: Add datatype conversion * Javadoc and add DataType.isNumerical() * Small fix * Fix SameDiff TF import test cases intermediate naming (workaround for bad default) * TFGraphTestAllHelper: check intermediates in execution order * Add missing debug listener * [WIP] lstmBlock fix + other changes (#13) - fixes lstmBlock issue - changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer - CheckNumerics op - fixes for ReduceBool IsInfOrNan & IsFinite * Small test fix * CheckNumerics op wrapper * Compatibility of deserialization (#18) Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com> * SameDiff: add activation gradient checking support for debugging (#19) * SameDiff gradient checker: first pass on activation gradient checks * Fixes + tests for activation gradient checking * Javadoc * [WIP] Some nd4j data type corrections (#20) * Adjust data type * Set correct Data type. * Size of proper data type. * fix averaged cpu load (#22) * [WIP] Multiple dataset iterators (#27) * Splitting dataset into arbitrary number * Fixes * Multiple split of iterator * Test * Test * Some fixes * signature change * one more tweak Signed-off-by: raver119 <raver119@gmail.com> * one more test for sequential use of DataSetIteratorSplitter Signed-off-by: raver119 <raver119@gmail.com> * Fixes * Fixes * one more test for Alexander Signed-off-by: raver119 <raver119@gmail.com> * Some fixes * Some fixes * one more test for Alexander Signed-off-by: raver119 <raver119@gmail.com> * minor test fix Signed-off-by: raver119 <raver119@gmail.com> * Some fixes * Some fixes * couple of assertions tweaked Signed-off-by: raver119 <raver119@gmail.com> * MDS splitter test :/ Signed-off-by: raver119 <raver119@gmail.com> * Minor refactoring * Multi dataset * Some fixes * More tests * Small number of test fixes/improvements (failures on CI) (#31) Signed-off-by: AlexDBlack <blacka101@gmail.com> * [WIP] More CUDA stuff (#26) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * LRN BP CUDA Signed-off-by: raver119 <raver119@gmail.com> * less memory Signed-off-by: raver119 <raver119@gmail.com> * Fixed bug with crop_and_resize op helper. * get rid of unnecessary index-calculation dunction Signed-off-by: Yurii <yurii@skymind.io> * Fixed sort with nth_element cuda-based helper. * Refactored nth_element. * Refactored nth_element op and tests. * Modified usage of dim array with sortTad routine. * Refactored main routine of helper for non_max_image_suppression op. * non_max_image_suppression op helper with cuda kernel implementation. Initial revision. * fix vol2col cuda kernel * meh Signed-off-by: raver119 <raver119@gmail.com> * topK concept Signed-off-by: raver119 <raver119@gmail.com> * unsorted topK with scanWitdh of 1 Signed-off-by: raver119 <raver119@gmail.com> * correct vol2col tests * sorted/unsorted topK Signed-off-by: raver119 <raver119@gmail.com> * implementation and fixing col2im/col2vol * Corrected usage flags with input/output with reverse op. * dup is const now Signed-off-by: raver119 <raver119@gmail.com> * percentile op Signed-off-by: raver119 <raver119@gmail.com> * group tests for mapool2d Signed-off-by: Yurii <yurii@skymind.io> * special test for george Signed-off-by: raver119 <raver119@gmail.com> * less threads for sortTad Signed-off-by: raver119 <raver119@gmail.com> * provide conv2d for cuda Signed-off-by: Yurii <yurii@skymind.io> * remove auther in sort tad kernel code Signed-off-by: Yurii <yurii@skymind.io> * provide depthwise_conv2d for cuda Signed-off-by: Yurii <yurii@skymind.io> * - max_pooling_with_argmax - null check for special use Signed-off-by: raver119 <raver119@gmail.com> * dts cuda Signed-off-by: raver119 <raver119@gmail.com> * provide sconv2d for cuda Signed-off-by: Yurii <yurii@skymind.io> * std cuda Signed-off-by: raver119 <raver119@gmail.com> * Refactored non_max_suppression op to conform TF implementation. * Improved suppression helper. * provide pooling3d for cuda Signed-off-by: Yurii <yurii@skymind.io> * minor lstm rearrangements Signed-off-by: raver119 <raver119@gmail.com> * more of minor lstm rearrangements Signed-off-by: raver119 <raver119@gmail.com> * (bi)dynamic_rnn Signed-off-by: raver119 <raver119@gmail.com> * templates init order Signed-off-by: raver119 <raver119@gmail.com> * Refactored non_max_suppression op. * Added cuda kernel for non_max_suppression. * CPU sort by key/value Signed-off-by: raver119 <raver119@gmail.com> * CPU sort TAD by key/value Signed-off-by: raver119 <raver119@gmail.com> * CPU sort TAD by key/value tests Signed-off-by: raver119 <raver119@gmail.com> * Eliminate compiler error with cuda implementation. * - repaired gradCheck in cuda - provide conv2d_bp for cuda Signed-off-by: Yurii <yurii@skymind.io> * missed signature Signed-off-by: raver119 <raver119@gmail.com> * provide depthwise_conv2d_bp for cuda Signed-off-by: Yurii <yurii@skymind.io> * Implementation of lup helper with cuda kernel. Initial commit. * further work on backprops for convolutions Signed-off-by: Yurii <yurii@skymind.io> * CUDA linear sort by key/val Signed-off-by: raver119 <raver119@gmail.com> * CUDA tad sort by key/val Signed-off-by: raver119 <raver119@gmail.com> * start providing of backprop for pooling2d/3d Signed-off-by: Yurii <yurii@skymind.io> * Added atomicAdd for bool datatype. * dynamic partition concept Signed-off-by: raver119 <raver119@gmail.com> * dynamic partition concept Signed-off-by: raver119 <raver119@gmail.com> * dynamic partition scalar CUDA Signed-off-by: raver119 <raver119@gmail.com> * important comment Signed-off-by: raver119 <raver119@gmail.com> * fix pooling2d/3d backprop helpers Signed-off-by: Yurii <yurii@skymind.io> * Added non-linear test with dynamic_partition. * Improved test for dynamic_partition. * dynamic_partition TAD concept Signed-off-by: raver119 <raver119@gmail.com> * - dynamic_partition TAD CUDA impl - dynamic_partition TAD CPU fix Signed-off-by: raver119 <raver119@gmail.com> * - rewrite cpu code for usampling2d/3d - write cuda code for usampling2d/3d Signed-off-by: Yurii <yurii@skymind.io> * dynamic_stitch CUDA vector case Signed-off-by: raver119 <raver119@gmail.com> * dynamic_stitch CUDA TAD case concept Signed-off-by: raver119 <raver119@gmail.com> * dynamic_stitch CUDA TAD case impl Signed-off-by: raver119 <raver119@gmail.com> * Added tests for dynamic_stitch 3D-4D cases. * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * Fixed type check for dynamic stitch. * min/max bp Signed-off-by: raver119 <raver119@gmail.com> * rewrite code for upsampling2d/3d cpu Signed-off-by: Yurii <yurii@skymind.io> * reduce min/max/norm_max bp Signed-off-by: raver119 <raver119@gmail.com> * lup implementation. Additional enhancements. * provide code for upsamling2d/3d backprop Signed-off-by: Yurii <yurii@skymind.io> * weightedCrossEntropyWithLogits Signed-off-by: raver119 <raver119@gmail.com> * Fixed template math atomicMul for 64bit ints. * Refactored dynamic_partition_bp op. * inverseBroadcast fix Signed-off-by: raver119 <raver119@gmail.com> * DynamicPartitionBP test datatype fixed. * - nd4j_atomicMul Windows fix - cpu/NDArrayLambda.hpp excluded from CUDA Signed-off-by: raver119 <raver119@gmail.com>
2019-06-27 17:37:04 +02:00
return opOutput == static_cast<X>(0) || old == static_cast<X>(0) ? static_cast<Z>(0) : static_cast<Z>(1);
2019-06-06 14:21:15 +02:00
}
op_def static Z update(X old, X opOutput, X *extraParams) {
Merge master to upstream (#7945) * Shugeo strided slice zeros (#14) * Modified strided_slice op to properly work with empty-like shapes. * Fixed test for reduce_mean with empty-like input. * [WIP] Last merge (#15) * correct logsoftmax looss (#2) * Small SameDiff listener fix (#4) * Various fixes (#6) * #7839 Fix for asXMatrix and tests * #7866 EmbeddingSequenceLayer dtype fix + test * #7856 SameDiff save/load stream methods * #7859 RegressionEvaluation rank 4 fix + tests + axis configuration * EvaluationBinary 3d/4d * More evaluation 3d/4d tests * #7847 Evaluation empty checks * Small test ifx * #7848 Fix median edge case * Improve DL4J samediff layer tests * [WIP] FastText wrapper implemented (#8) * FastText implemented * Some fixes * Fix shapes for wordsNearest * Validation of input vectors * Fixes * Fixed test * Thread tagged * Some tweaks * setContextClassLoader for DeallocatorServiceThread * Numpy format tests (#1) * Various fixes (#11) * #7852 SameDiff gather fix * #7892 SameDiff placeholder to constant conversion * #7890 validate input rank for MLN/CG init methods * Fix broken permute shape calculation * Permute and gather fixes * Tests * #7850 LogSumExp fix + test * Handful of test fixes * Empty arrays with non-scalar shapes (#10) * minor rearrangements for lambdas * empty tensors with non-scalar shapes * numpy empty tensors with non-scalar shapes * few more empty tweaks * Small fixes * conv3d signature update * micro fix in batchnorm mkldnn * Import fixes * Fix * MKL-DNN update * Small fill fix * fill with empty input + test * Fixes * Small error improvement * Fix * one special test * couple of fixes for lstm * Rewrite TFGraphMapper.getNDArrayFromTensor to be maintainable and less error prone * Fixes * FP16 * Unsigned * BFloat16 * Fill op - empty tweaks * - couple of fixes for empty arrays construction - stack updated * strided slice fix * one transform test * provide method for reducing shapeInfo in case of input array is empty * Fixed reduceAlongDimensions to use empty input properly. * couple of broadcast tests * couple of tests broadcast tests + tweak to make them pass * add check of non-empty to methods producing sub-arrays * Fixed reshapeC with zeros in shape. * complete empty check in reduce_... legacy ops * Concat and cumsum/prod * Tweak to empty shape inference on import * add empty check to the rest of reduce legacy ops * one more test * correct typo in evalReduceShapeInfoEmpty * Added tests for reduce_* ops to tests with zero shapes. * few more tests for empty reductions * Fixed strided_slice op with empty case and tests. * one more empty reduction test * Fixed strided_slice test. * add empty check to NDArray::reshapei * infOrMax * empty min/max with infinity tests * made unstack working correctly with empty arrays * few IndexReduce tests + tweaks for empty shapes * add test for empty concat * few tests fixed * Validation fix for reductions on empty shapes * Reverse fix * Reduction shape calc fixes * SameDiff.generateOutputVariable: don't use shape function to determine number of outputs * Range fix * - NDArray constructor updated for scalars/empty arrays - few tests fixed * More fixes * Empty creator fixes * concat fix * concat fix * TF import tests: allow 'both all NaN' and 'both all inf' to pass * Slice, zero fraction, and reshape fixes * transpose, gather * Zero fraction * scalar cast fix * Empty reduction axis support * few more tests fixed * Fixed input checks conforming with TF for concat op and tests. * few tests fixed * matmul scalar shape fix * Fixed checkout for data type and scalarity with concat to allow non-empty scalars with vector concats. * broadcast bool fix * few more tests * few more tests * correct evalReduceShapeInfoEmpty * argmax/argmin + tests * one more empty edge case + one more test * argmax/argmin/realdiv_bp tweaks * empty reshape test + fix * Helper fixes * Small fixes * Gather test fix * Gather test fix * Small fixes * reduce scalar zero values * scalar mean workaround * Remove debug code * along dim mean workaround * one more test * - equalsTo() tweak for empty arrays - one more test * broadcast tweaks * [WIP] Fixing outstanding issues for NLP (#9) * Avoid using not-inited objects * Test fixed. * Redundant method avoided for models like FastText * KMeans++ implementation * KMeans++ implementation * Disable parallel execution * KMeans++ * Tests * Dev branch merge (#16) * SameDiff: convertDataType and gradient check util improvements (#12) * GradCheck util improvements * StopGradient constructor + test * SameDiff: Add datatype conversion * Javadoc and add DataType.isNumerical() * Small fix * Fix SameDiff TF import test cases intermediate naming (workaround for bad default) * TFGraphTestAllHelper: check intermediates in execution order * Add missing debug listener * [WIP] lstmBlock fix + other changes (#13) - fixes lstmBlock issue - changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer - CheckNumerics op - fixes for ReduceBool IsInfOrNan & IsFinite * Small test fix * CheckNumerics op wrapper * Fix some issues on master (#17) * Fix DataVec test issue * Fix issue with dl4j SameDiff output layer * Dtype fix for lambda layers * #7912 BertIterator dtype fix (use float32 not global default) * [WIP] Next set of CUDA stuff (#7) New CUDA implementations and improvements * bad file * Dev branch master merge (#23) * SameDiff: convertDataType and gradient check util improvements (#12) * GradCheck util improvements * StopGradient constructor + test * SameDiff: Add datatype conversion * Javadoc and add DataType.isNumerical() * Small fix * Fix SameDiff TF import test cases intermediate naming (workaround for bad default) * TFGraphTestAllHelper: check intermediates in execution order * Add missing debug listener * [WIP] lstmBlock fix + other changes (#13) - fixes lstmBlock issue - changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer - CheckNumerics op - fixes for ReduceBool IsInfOrNan & IsFinite * Small test fix * CheckNumerics op wrapper * Compatibility of deserialization (#18) Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com> * SameDiff: add activation gradient checking support for debugging (#19) * SameDiff gradient checker: first pass on activation gradient checks * Fixes + tests for activation gradient checking * Javadoc * [WIP] Some nd4j data type corrections (#20) * Adjust data type * Set correct Data type. * Size of proper data type. * fix averaged cpu load (#22) * SameDiff ops, TF import and fixes (#24) * CheckNumerics tests + fixes + misc fixes Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fake quant Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fixes Signed-off-by: AlexDBlack <blacka101@gmail.com> * FakeQuantWithMinMaxArgs Signed-off-by: AlexDBlack <blacka101@gmail.com> * CheckNumerics fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fix libnd4j ALL_INTS and ALL_FLOATS declaration (uint and bfloat types) Signed-off-by: AlexDBlack <blacka101@gmail.com> * Small fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * Javadoc Signed-off-by: AlexDBlack <blacka101@gmail.com> * Exception tweak Signed-off-by: AlexDBlack <blacka101@gmail.com> * fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fix for out of scope stack allocated var use Signed-off-by: AlexDBlack <blacka101@gmail.com> * Ignores Signed-off-by: AlexDBlack <blacka101@gmail.com> * Ignore for known failing test (already logged issue) Signed-off-by: AlexDBlack <blacka101@gmail.com> * Merge upstream to fork (#25) * Add thousand-separator commas to TotalParams (#7915) * Add thousand-separator commas to TotalParams The number of parameters can be quite large, and it would help the reading of the summary printout to have the TotalParams column & values at the bottom have thousand-separator-commas in them. * Add thousand-separator commas to MultiLayerNetwork Corresponding change to MultiLayerNetwork Signed-off-by: Jxtps Jxtps <jxtps435@gmail.com> * Update contributing and issue/PR templates (#7934) Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fix link to AdaDelta paper (#7942) Fix link to AdaDelta paper hosted on matthewzeiler.com Signed-off-by: Jxtps * Fixes, and ignores for known/logged failing issues (#7943) Signed-off-by: AlexDBlack <blacka101@gmail.com> * SameDiff + DL4J/SameDiff: Multiple fixes (#28) * #7919 HDF5 attribute buffer length fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7909 Arbiter constructor exception ux improvements Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7925 RNN output layer length checks Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7939 Add listener for validating inputs are not incorrectly modified Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7939 Integrate NonInplaceValidationListener into tests * #7844 DL4J SameDiff fixes for variable minibatch size * DL4J SameDiff fixes - ensure gradient for input placeholder is available Signed-off-by: AlexDBlack <blacka101@gmail.com> * Tweaks to ExternalErrorsFunction - use placeholders, make more robust * Another fix * More fixes * More SameDiff/DL4J fixes * Scope out scalar array creation in BaseScalarOp * Remove debug code Signed-off-by: AlexDBlack <blacka101@gmail.com> * [WIP] Final dev branch merge (#29) * SameDiff: convertDataType and gradient check util improvements (#12) * GradCheck util improvements * StopGradient constructor + test * SameDiff: Add datatype conversion * Javadoc and add DataType.isNumerical() * Small fix * Fix SameDiff TF import test cases intermediate naming (workaround for bad default) * TFGraphTestAllHelper: check intermediates in execution order * Add missing debug listener * [WIP] lstmBlock fix + other changes (#13) - fixes lstmBlock issue - changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer - CheckNumerics op - fixes for ReduceBool IsInfOrNan & IsFinite * Small test fix * CheckNumerics op wrapper * Compatibility of deserialization (#18) Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com> * SameDiff: add activation gradient checking support for debugging (#19) * SameDiff gradient checker: first pass on activation gradient checks * Fixes + tests for activation gradient checking * Javadoc * [WIP] Some nd4j data type corrections (#20) * Adjust data type * Set correct Data type. * Size of proper data type. * fix averaged cpu load (#22) * [WIP] Multiple dataset iterators (#27) * Splitting dataset into arbitrary number * Fixes * Multiple split of iterator * Test * Test * Some fixes * signature change * one more tweak Signed-off-by: raver119 <raver119@gmail.com> * one more test for sequential use of DataSetIteratorSplitter Signed-off-by: raver119 <raver119@gmail.com> * Fixes * Fixes * one more test for Alexander Signed-off-by: raver119 <raver119@gmail.com> * Some fixes * Some fixes * one more test for Alexander Signed-off-by: raver119 <raver119@gmail.com> * minor test fix Signed-off-by: raver119 <raver119@gmail.com> * Some fixes * Some fixes * couple of assertions tweaked Signed-off-by: raver119 <raver119@gmail.com> * MDS splitter test :/ Signed-off-by: raver119 <raver119@gmail.com> * Minor refactoring * Multi dataset * Some fixes * More tests * Small number of test fixes/improvements (failures on CI) (#31) Signed-off-by: AlexDBlack <blacka101@gmail.com> * [WIP] More CUDA stuff (#26) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * LRN BP CUDA Signed-off-by: raver119 <raver119@gmail.com> * less memory Signed-off-by: raver119 <raver119@gmail.com> * Fixed bug with crop_and_resize op helper. * get rid of unnecessary index-calculation dunction Signed-off-by: Yurii <yurii@skymind.io> * Fixed sort with nth_element cuda-based helper. * Refactored nth_element. * Refactored nth_element op and tests. * Modified usage of dim array with sortTad routine. * Refactored main routine of helper for non_max_image_suppression op. * non_max_image_suppression op helper with cuda kernel implementation. Initial revision. * fix vol2col cuda kernel * meh Signed-off-by: raver119 <raver119@gmail.com> * topK concept Signed-off-by: raver119 <raver119@gmail.com> * unsorted topK with scanWitdh of 1 Signed-off-by: raver119 <raver119@gmail.com> * correct vol2col tests * sorted/unsorted topK Signed-off-by: raver119 <raver119@gmail.com> * implementation and fixing col2im/col2vol * Corrected usage flags with input/output with reverse op. * dup is const now Signed-off-by: raver119 <raver119@gmail.com> * percentile op Signed-off-by: raver119 <raver119@gmail.com> * group tests for mapool2d Signed-off-by: Yurii <yurii@skymind.io> * special test for george Signed-off-by: raver119 <raver119@gmail.com> * less threads for sortTad Signed-off-by: raver119 <raver119@gmail.com> * provide conv2d for cuda Signed-off-by: Yurii <yurii@skymind.io> * remove auther in sort tad kernel code Signed-off-by: Yurii <yurii@skymind.io> * provide depthwise_conv2d for cuda Signed-off-by: Yurii <yurii@skymind.io> * - max_pooling_with_argmax - null check for special use Signed-off-by: raver119 <raver119@gmail.com> * dts cuda Signed-off-by: raver119 <raver119@gmail.com> * provide sconv2d for cuda Signed-off-by: Yurii <yurii@skymind.io> * std cuda Signed-off-by: raver119 <raver119@gmail.com> * Refactored non_max_suppression op to conform TF implementation. * Improved suppression helper. * provide pooling3d for cuda Signed-off-by: Yurii <yurii@skymind.io> * minor lstm rearrangements Signed-off-by: raver119 <raver119@gmail.com> * more of minor lstm rearrangements Signed-off-by: raver119 <raver119@gmail.com> * (bi)dynamic_rnn Signed-off-by: raver119 <raver119@gmail.com> * templates init order Signed-off-by: raver119 <raver119@gmail.com> * Refactored non_max_suppression op. * Added cuda kernel for non_max_suppression. * CPU sort by key/value Signed-off-by: raver119 <raver119@gmail.com> * CPU sort TAD by key/value Signed-off-by: raver119 <raver119@gmail.com> * CPU sort TAD by key/value tests Signed-off-by: raver119 <raver119@gmail.com> * Eliminate compiler error with cuda implementation. * - repaired gradCheck in cuda - provide conv2d_bp for cuda Signed-off-by: Yurii <yurii@skymind.io> * missed signature Signed-off-by: raver119 <raver119@gmail.com> * provide depthwise_conv2d_bp for cuda Signed-off-by: Yurii <yurii@skymind.io> * Implementation of lup helper with cuda kernel. Initial commit. * further work on backprops for convolutions Signed-off-by: Yurii <yurii@skymind.io> * CUDA linear sort by key/val Signed-off-by: raver119 <raver119@gmail.com> * CUDA tad sort by key/val Signed-off-by: raver119 <raver119@gmail.com> * start providing of backprop for pooling2d/3d Signed-off-by: Yurii <yurii@skymind.io> * Added atomicAdd for bool datatype. * dynamic partition concept Signed-off-by: raver119 <raver119@gmail.com> * dynamic partition concept Signed-off-by: raver119 <raver119@gmail.com> * dynamic partition scalar CUDA Signed-off-by: raver119 <raver119@gmail.com> * important comment Signed-off-by: raver119 <raver119@gmail.com> * fix pooling2d/3d backprop helpers Signed-off-by: Yurii <yurii@skymind.io> * Added non-linear test with dynamic_partition. * Improved test for dynamic_partition. * dynamic_partition TAD concept Signed-off-by: raver119 <raver119@gmail.com> * - dynamic_partition TAD CUDA impl - dynamic_partition TAD CPU fix Signed-off-by: raver119 <raver119@gmail.com> * - rewrite cpu code for usampling2d/3d - write cuda code for usampling2d/3d Signed-off-by: Yurii <yurii@skymind.io> * dynamic_stitch CUDA vector case Signed-off-by: raver119 <raver119@gmail.com> * dynamic_stitch CUDA TAD case concept Signed-off-by: raver119 <raver119@gmail.com> * dynamic_stitch CUDA TAD case impl Signed-off-by: raver119 <raver119@gmail.com> * Added tests for dynamic_stitch 3D-4D cases. * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * Fixed type check for dynamic stitch. * min/max bp Signed-off-by: raver119 <raver119@gmail.com> * rewrite code for upsampling2d/3d cpu Signed-off-by: Yurii <yurii@skymind.io> * reduce min/max/norm_max bp Signed-off-by: raver119 <raver119@gmail.com> * lup implementation. Additional enhancements. * provide code for upsamling2d/3d backprop Signed-off-by: Yurii <yurii@skymind.io> * weightedCrossEntropyWithLogits Signed-off-by: raver119 <raver119@gmail.com> * Fixed template math atomicMul for 64bit ints. * Refactored dynamic_partition_bp op. * inverseBroadcast fix Signed-off-by: raver119 <raver119@gmail.com> * DynamicPartitionBP test datatype fixed. * - nd4j_atomicMul Windows fix - cpu/NDArrayLambda.hpp excluded from CUDA Signed-off-by: raver119 <raver119@gmail.com>
2019-06-27 17:37:04 +02:00
return opOutput == static_cast<X>(0) || old == static_cast<X>(0) ? static_cast<Z>(0) : static_cast<Z>(1);
2019-06-06 14:21:15 +02:00
}
op_def static Z postProcess(X reduction, Nd4jLong n, X *extraParams) {
Merge master to upstream (#7945) * Shugeo strided slice zeros (#14) * Modified strided_slice op to properly work with empty-like shapes. * Fixed test for reduce_mean with empty-like input. * [WIP] Last merge (#15) * correct logsoftmax looss (#2) * Small SameDiff listener fix (#4) * Various fixes (#6) * #7839 Fix for asXMatrix and tests * #7866 EmbeddingSequenceLayer dtype fix + test * #7856 SameDiff save/load stream methods * #7859 RegressionEvaluation rank 4 fix + tests + axis configuration * EvaluationBinary 3d/4d * More evaluation 3d/4d tests * #7847 Evaluation empty checks * Small test ifx * #7848 Fix median edge case * Improve DL4J samediff layer tests * [WIP] FastText wrapper implemented (#8) * FastText implemented * Some fixes * Fix shapes for wordsNearest * Validation of input vectors * Fixes * Fixed test * Thread tagged * Some tweaks * setContextClassLoader for DeallocatorServiceThread * Numpy format tests (#1) * Various fixes (#11) * #7852 SameDiff gather fix * #7892 SameDiff placeholder to constant conversion * #7890 validate input rank for MLN/CG init methods * Fix broken permute shape calculation * Permute and gather fixes * Tests * #7850 LogSumExp fix + test * Handful of test fixes * Empty arrays with non-scalar shapes (#10) * minor rearrangements for lambdas * empty tensors with non-scalar shapes * numpy empty tensors with non-scalar shapes * few more empty tweaks * Small fixes * conv3d signature update * micro fix in batchnorm mkldnn * Import fixes * Fix * MKL-DNN update * Small fill fix * fill with empty input + test * Fixes * Small error improvement * Fix * one special test * couple of fixes for lstm * Rewrite TFGraphMapper.getNDArrayFromTensor to be maintainable and less error prone * Fixes * FP16 * Unsigned * BFloat16 * Fill op - empty tweaks * - couple of fixes for empty arrays construction - stack updated * strided slice fix * one transform test * provide method for reducing shapeInfo in case of input array is empty * Fixed reduceAlongDimensions to use empty input properly. * couple of broadcast tests * couple of tests broadcast tests + tweak to make them pass * add check of non-empty to methods producing sub-arrays * Fixed reshapeC with zeros in shape. * complete empty check in reduce_... legacy ops * Concat and cumsum/prod * Tweak to empty shape inference on import * add empty check to the rest of reduce legacy ops * one more test * correct typo in evalReduceShapeInfoEmpty * Added tests for reduce_* ops to tests with zero shapes. * few more tests for empty reductions * Fixed strided_slice op with empty case and tests. * one more empty reduction test * Fixed strided_slice test. * add empty check to NDArray::reshapei * infOrMax * empty min/max with infinity tests * made unstack working correctly with empty arrays * few IndexReduce tests + tweaks for empty shapes * add test for empty concat * few tests fixed * Validation fix for reductions on empty shapes * Reverse fix * Reduction shape calc fixes * SameDiff.generateOutputVariable: don't use shape function to determine number of outputs * Range fix * - NDArray constructor updated for scalars/empty arrays - few tests fixed * More fixes * Empty creator fixes * concat fix * concat fix * TF import tests: allow 'both all NaN' and 'both all inf' to pass * Slice, zero fraction, and reshape fixes * transpose, gather * Zero fraction * scalar cast fix * Empty reduction axis support * few more tests fixed * Fixed input checks conforming with TF for concat op and tests. * few tests fixed * matmul scalar shape fix * Fixed checkout for data type and scalarity with concat to allow non-empty scalars with vector concats. * broadcast bool fix * few more tests * few more tests * correct evalReduceShapeInfoEmpty * argmax/argmin + tests * one more empty edge case + one more test * argmax/argmin/realdiv_bp tweaks * empty reshape test + fix * Helper fixes * Small fixes * Gather test fix * Gather test fix * Small fixes * reduce scalar zero values * scalar mean workaround * Remove debug code * along dim mean workaround * one more test * - equalsTo() tweak for empty arrays - one more test * broadcast tweaks * [WIP] Fixing outstanding issues for NLP (#9) * Avoid using not-inited objects * Test fixed. * Redundant method avoided for models like FastText * KMeans++ implementation * KMeans++ implementation * Disable parallel execution * KMeans++ * Tests * Dev branch merge (#16) * SameDiff: convertDataType and gradient check util improvements (#12) * GradCheck util improvements * StopGradient constructor + test * SameDiff: Add datatype conversion * Javadoc and add DataType.isNumerical() * Small fix * Fix SameDiff TF import test cases intermediate naming (workaround for bad default) * TFGraphTestAllHelper: check intermediates in execution order * Add missing debug listener * [WIP] lstmBlock fix + other changes (#13) - fixes lstmBlock issue - changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer - CheckNumerics op - fixes for ReduceBool IsInfOrNan & IsFinite * Small test fix * CheckNumerics op wrapper * Fix some issues on master (#17) * Fix DataVec test issue * Fix issue with dl4j SameDiff output layer * Dtype fix for lambda layers * #7912 BertIterator dtype fix (use float32 not global default) * [WIP] Next set of CUDA stuff (#7) New CUDA implementations and improvements * bad file * Dev branch master merge (#23) * SameDiff: convertDataType and gradient check util improvements (#12) * GradCheck util improvements * StopGradient constructor + test * SameDiff: Add datatype conversion * Javadoc and add DataType.isNumerical() * Small fix * Fix SameDiff TF import test cases intermediate naming (workaround for bad default) * TFGraphTestAllHelper: check intermediates in execution order * Add missing debug listener * [WIP] lstmBlock fix + other changes (#13) - fixes lstmBlock issue - changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer - CheckNumerics op - fixes for ReduceBool IsInfOrNan & IsFinite * Small test fix * CheckNumerics op wrapper * Compatibility of deserialization (#18) Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com> * SameDiff: add activation gradient checking support for debugging (#19) * SameDiff gradient checker: first pass on activation gradient checks * Fixes + tests for activation gradient checking * Javadoc * [WIP] Some nd4j data type corrections (#20) * Adjust data type * Set correct Data type. * Size of proper data type. * fix averaged cpu load (#22) * SameDiff ops, TF import and fixes (#24) * CheckNumerics tests + fixes + misc fixes Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fake quant Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fixes Signed-off-by: AlexDBlack <blacka101@gmail.com> * FakeQuantWithMinMaxArgs Signed-off-by: AlexDBlack <blacka101@gmail.com> * CheckNumerics fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fix libnd4j ALL_INTS and ALL_FLOATS declaration (uint and bfloat types) Signed-off-by: AlexDBlack <blacka101@gmail.com> * Small fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * Javadoc Signed-off-by: AlexDBlack <blacka101@gmail.com> * Exception tweak Signed-off-by: AlexDBlack <blacka101@gmail.com> * fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fix for out of scope stack allocated var use Signed-off-by: AlexDBlack <blacka101@gmail.com> * Ignores Signed-off-by: AlexDBlack <blacka101@gmail.com> * Ignore for known failing test (already logged issue) Signed-off-by: AlexDBlack <blacka101@gmail.com> * Merge upstream to fork (#25) * Add thousand-separator commas to TotalParams (#7915) * Add thousand-separator commas to TotalParams The number of parameters can be quite large, and it would help the reading of the summary printout to have the TotalParams column & values at the bottom have thousand-separator-commas in them. * Add thousand-separator commas to MultiLayerNetwork Corresponding change to MultiLayerNetwork Signed-off-by: Jxtps Jxtps <jxtps435@gmail.com> * Update contributing and issue/PR templates (#7934) Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fix link to AdaDelta paper (#7942) Fix link to AdaDelta paper hosted on matthewzeiler.com Signed-off-by: Jxtps * Fixes, and ignores for known/logged failing issues (#7943) Signed-off-by: AlexDBlack <blacka101@gmail.com> * SameDiff + DL4J/SameDiff: Multiple fixes (#28) * #7919 HDF5 attribute buffer length fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7909 Arbiter constructor exception ux improvements Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7925 RNN output layer length checks Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7939 Add listener for validating inputs are not incorrectly modified Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7939 Integrate NonInplaceValidationListener into tests * #7844 DL4J SameDiff fixes for variable minibatch size * DL4J SameDiff fixes - ensure gradient for input placeholder is available Signed-off-by: AlexDBlack <blacka101@gmail.com> * Tweaks to ExternalErrorsFunction - use placeholders, make more robust * Another fix * More fixes * More SameDiff/DL4J fixes * Scope out scalar array creation in BaseScalarOp * Remove debug code Signed-off-by: AlexDBlack <blacka101@gmail.com> * [WIP] Final dev branch merge (#29) * SameDiff: convertDataType and gradient check util improvements (#12) * GradCheck util improvements * StopGradient constructor + test * SameDiff: Add datatype conversion * Javadoc and add DataType.isNumerical() * Small fix * Fix SameDiff TF import test cases intermediate naming (workaround for bad default) * TFGraphTestAllHelper: check intermediates in execution order * Add missing debug listener * [WIP] lstmBlock fix + other changes (#13) - fixes lstmBlock issue - changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer - CheckNumerics op - fixes for ReduceBool IsInfOrNan & IsFinite * Small test fix * CheckNumerics op wrapper * Compatibility of deserialization (#18) Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com> * SameDiff: add activation gradient checking support for debugging (#19) * SameDiff gradient checker: first pass on activation gradient checks * Fixes + tests for activation gradient checking * Javadoc * [WIP] Some nd4j data type corrections (#20) * Adjust data type * Set correct Data type. * Size of proper data type. * fix averaged cpu load (#22) * [WIP] Multiple dataset iterators (#27) * Splitting dataset into arbitrary number * Fixes * Multiple split of iterator * Test * Test * Some fixes * signature change * one more tweak Signed-off-by: raver119 <raver119@gmail.com> * one more test for sequential use of DataSetIteratorSplitter Signed-off-by: raver119 <raver119@gmail.com> * Fixes * Fixes * one more test for Alexander Signed-off-by: raver119 <raver119@gmail.com> * Some fixes * Some fixes * one more test for Alexander Signed-off-by: raver119 <raver119@gmail.com> * minor test fix Signed-off-by: raver119 <raver119@gmail.com> * Some fixes * Some fixes * couple of assertions tweaked Signed-off-by: raver119 <raver119@gmail.com> * MDS splitter test :/ Signed-off-by: raver119 <raver119@gmail.com> * Minor refactoring * Multi dataset * Some fixes * More tests * Small number of test fixes/improvements (failures on CI) (#31) Signed-off-by: AlexDBlack <blacka101@gmail.com> * [WIP] More CUDA stuff (#26) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * LRN BP CUDA Signed-off-by: raver119 <raver119@gmail.com> * less memory Signed-off-by: raver119 <raver119@gmail.com> * Fixed bug with crop_and_resize op helper. * get rid of unnecessary index-calculation dunction Signed-off-by: Yurii <yurii@skymind.io> * Fixed sort with nth_element cuda-based helper. * Refactored nth_element. * Refactored nth_element op and tests. * Modified usage of dim array with sortTad routine. * Refactored main routine of helper for non_max_image_suppression op. * non_max_image_suppression op helper with cuda kernel implementation. Initial revision. * fix vol2col cuda kernel * meh Signed-off-by: raver119 <raver119@gmail.com> * topK concept Signed-off-by: raver119 <raver119@gmail.com> * unsorted topK with scanWitdh of 1 Signed-off-by: raver119 <raver119@gmail.com> * correct vol2col tests * sorted/unsorted topK Signed-off-by: raver119 <raver119@gmail.com> * implementation and fixing col2im/col2vol * Corrected usage flags with input/output with reverse op. * dup is const now Signed-off-by: raver119 <raver119@gmail.com> * percentile op Signed-off-by: raver119 <raver119@gmail.com> * group tests for mapool2d Signed-off-by: Yurii <yurii@skymind.io> * special test for george Signed-off-by: raver119 <raver119@gmail.com> * less threads for sortTad Signed-off-by: raver119 <raver119@gmail.com> * provide conv2d for cuda Signed-off-by: Yurii <yurii@skymind.io> * remove auther in sort tad kernel code Signed-off-by: Yurii <yurii@skymind.io> * provide depthwise_conv2d for cuda Signed-off-by: Yurii <yurii@skymind.io> * - max_pooling_with_argmax - null check for special use Signed-off-by: raver119 <raver119@gmail.com> * dts cuda Signed-off-by: raver119 <raver119@gmail.com> * provide sconv2d for cuda Signed-off-by: Yurii <yurii@skymind.io> * std cuda Signed-off-by: raver119 <raver119@gmail.com> * Refactored non_max_suppression op to conform TF implementation. * Improved suppression helper. * provide pooling3d for cuda Signed-off-by: Yurii <yurii@skymind.io> * minor lstm rearrangements Signed-off-by: raver119 <raver119@gmail.com> * more of minor lstm rearrangements Signed-off-by: raver119 <raver119@gmail.com> * (bi)dynamic_rnn Signed-off-by: raver119 <raver119@gmail.com> * templates init order Signed-off-by: raver119 <raver119@gmail.com> * Refactored non_max_suppression op. * Added cuda kernel for non_max_suppression. * CPU sort by key/value Signed-off-by: raver119 <raver119@gmail.com> * CPU sort TAD by key/value Signed-off-by: raver119 <raver119@gmail.com> * CPU sort TAD by key/value tests Signed-off-by: raver119 <raver119@gmail.com> * Eliminate compiler error with cuda implementation. * - repaired gradCheck in cuda - provide conv2d_bp for cuda Signed-off-by: Yurii <yurii@skymind.io> * missed signature Signed-off-by: raver119 <raver119@gmail.com> * provide depthwise_conv2d_bp for cuda Signed-off-by: Yurii <yurii@skymind.io> * Implementation of lup helper with cuda kernel. Initial commit. * further work on backprops for convolutions Signed-off-by: Yurii <yurii@skymind.io> * CUDA linear sort by key/val Signed-off-by: raver119 <raver119@gmail.com> * CUDA tad sort by key/val Signed-off-by: raver119 <raver119@gmail.com> * start providing of backprop for pooling2d/3d Signed-off-by: Yurii <yurii@skymind.io> * Added atomicAdd for bool datatype. * dynamic partition concept Signed-off-by: raver119 <raver119@gmail.com> * dynamic partition concept Signed-off-by: raver119 <raver119@gmail.com> * dynamic partition scalar CUDA Signed-off-by: raver119 <raver119@gmail.com> * important comment Signed-off-by: raver119 <raver119@gmail.com> * fix pooling2d/3d backprop helpers Signed-off-by: Yurii <yurii@skymind.io> * Added non-linear test with dynamic_partition. * Improved test for dynamic_partition. * dynamic_partition TAD concept Signed-off-by: raver119 <raver119@gmail.com> * - dynamic_partition TAD CUDA impl - dynamic_partition TAD CPU fix Signed-off-by: raver119 <raver119@gmail.com> * - rewrite cpu code for usampling2d/3d - write cuda code for usampling2d/3d Signed-off-by: Yurii <yurii@skymind.io> * dynamic_stitch CUDA vector case Signed-off-by: raver119 <raver119@gmail.com> * dynamic_stitch CUDA TAD case concept Signed-off-by: raver119 <raver119@gmail.com> * dynamic_stitch CUDA TAD case impl Signed-off-by: raver119 <raver119@gmail.com> * Added tests for dynamic_stitch 3D-4D cases. * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * Fixed type check for dynamic stitch. * min/max bp Signed-off-by: raver119 <raver119@gmail.com> * rewrite code for upsampling2d/3d cpu Signed-off-by: Yurii <yurii@skymind.io> * reduce min/max/norm_max bp Signed-off-by: raver119 <raver119@gmail.com> * lup implementation. Additional enhancements. * provide code for upsamling2d/3d backprop Signed-off-by: Yurii <yurii@skymind.io> * weightedCrossEntropyWithLogits Signed-off-by: raver119 <raver119@gmail.com> * Fixed template math atomicMul for 64bit ints. * Refactored dynamic_partition_bp op. * inverseBroadcast fix Signed-off-by: raver119 <raver119@gmail.com> * DynamicPartitionBP test datatype fixed. * - nd4j_atomicMul Windows fix - cpu/NDArrayLambda.hpp excluded from CUDA Signed-off-by: raver119 <raver119@gmail.com>
2019-06-27 17:37:04 +02:00
return reduction != static_cast<X>(0);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class ClipByValue {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
if (d1 > params[1])
return params[1];
if (d1 < params[0])
return params[0];
return d1;
}
};
template <typename X, typename Y, typename Z>
class LstmClip {
public:
no_op_exec_special
no_op_exec_special_cuda
op_def static Z op(X d1, Y d2, Z *params) {
X _v = (X) d2;
if (d1 > _v)
return _v;
else if (d1 < -_v)
return -_v;
else return d1;
}
};
template <typename X>
class Swish {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return d1 * sd::math::nd4j_sigmoid<X,X>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class Mish {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return d1 * sd::math::nd4j_tanh<X,X>(sd::math::nd4j_softplus<X,X>(d1));
}
};
template <typename X>
class MishDerivative {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
auto ex = sd::math::nd4j_exp<X,X>(d1);
auto e2x = ex * ex;
auto e3x = ex * ex * ex;
return (ex * (4 * (d1 + 1) + 4 * e2x + e3x + ex *(4 * d1 + 6))) / sd::math::nd4j_pow<X, X, X>((2 * ex + e2x + 2), (X) 2.f);
}
};
2019-06-06 14:21:15 +02:00
template <typename X>
class GELU {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return d1 * sd::math::nd4j_sigmoid<X,X>(static_cast<X>(1.702f) * d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class PreciseGELU {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
auto sp = sd::math::nd4j_sqrt<X, X>(static_cast<X>(2) / static_cast<X>(M_PI));
auto xp = d1 + sd::math::nd4j_pow<X, X, X>(static_cast<X>(0.044715) * d1, static_cast<X>(3));
return (d1 / static_cast<X>(2)) * (static_cast<X>(1) + sd::math::nd4j_tanh<X, X>(sp * xp));
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class GELUDerivative {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
auto x17 = static_cast<X>(1.702f) * d1;
auto ep = sd::math::nd4j_pow<X,X,X>(static_cast<X>(M_E), x17);
2019-06-06 14:21:15 +02:00
// (E^(1.702 x) (1. + E^(1.702 x) + 1.702 x))/(1. + E^(1.702 x))^2
return (ep * (static_cast<X>(1.f) + ep + x17)) / sd::math::nd4j_pow<X, int, X>((static_cast<X>(1.f) + ep), 2);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class PreciseGELUDerivative {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
auto x79 = static_cast<X>(0.797885) * d1;
auto x03 = sd::math::nd4j_pow<X, int, X>(static_cast<X>(0.0356774) * d1, 3);
2019-06-06 14:21:15 +02:00
auto x39 = static_cast<X>(0.398942) * d1;
auto x05 = sd::math::nd4j_pow<X, int, X>(static_cast<X>(0.0535161) * d1, 3);
auto scz = sd::math::nd4j_sech<X, X>(x79 + x03);
2019-06-06 14:21:15 +02:00
// 0.5 + (0.398942 x + 0.0535161 x^3) Sech[0.797885 x + 0.0356774 x^3]^2 + 0.5 Tanh[0.797885 x + 0.0356774 x^3]
return static_cast<X>(0.5) + (x39 + x05) * (scz * scz) + static_cast<X>(0.5) * sd::math::nd4j_tanh<X, X>(x79 + x03);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class SwishDerivative {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
X ex = sd::math::nd4j_pow<X, X, X>(static_cast<X>(M_E), d1);
return (ex * (d1 + ex + static_cast<X>(1.f))) / sd::math::nd4j_pow<X, X, X>((ex + static_cast<X>(1.f)) , static_cast<X>(2.f));
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class LogSigmoid {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_log<X, X>(sd::math::nd4j_sigmoid<X, X>(d1));
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class LogSigmoidDerivative {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
X ex = sd::math::nd4j_pow<X, X, X>(M_E, d1);
2019-06-06 14:21:15 +02:00
return static_cast<X>(1.f) / (ex + static_cast<X>(1.f));
}
};
template <typename X>
class Sigmoid {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_sigmoid<X, X>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class Affine {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return params[0] * d1 + params[1];
}
};
2019-06-06 14:21:15 +02:00
template <typename X>
class SigmoidDerivative {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_sigmoidderivative<X, X>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class HardSigmoid {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_min<X>(static_cast<X>(1), sd::math::nd4j_max<X>(static_cast<X>(0), (static_cast<X>(0.2f)) * d1 + static_cast<X>(0.5f)));
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class HardSigmoidDerivative {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return d1 < static_cast<X>(-2.5f) || d1 > static_cast<X>(2.5f) ? static_cast<X>(0.f) : static_cast<X>(0.2f);
}
};
/**
* Scale to be between a min and max
*/
template <typename X>
class SetRange {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
auto min = params[0];
auto max = params[1];
if (static_cast<X>(d1) >= min && static_cast<X>(d1) <= max)
return d1;
if (min == static_cast<X>(0) && max == static_cast<X>(1)) {
auto val = static_cast<X>(1) / (static_cast<X>(1) + sd::math::nd4j_exp<X, X>(-d1));
return (sd::math::nd4j_floor<X,X>(val * (max - min)) + min);
2019-06-06 14:21:15 +02:00
}
return (sd::math::nd4j_floor<X,X>(d1 * (max - min)) + min);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class Sin {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_sin<X,X>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class Square {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return d1 * d1;
}
};
template <typename X, typename Z>
class Sqrt {
public:
no_op_exec_special
no_op_exec_special_cuda
op_def static Z op(X d1, Z *params) {
return sd::math::nd4j_sqrt<X, Z>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X, typename Z>
class RSqrt {
public:
no_op_exec_special
no_op_exec_special_cuda
op_def static Z op(X d1, Z *params) {
return static_cast<Z>(1) / sd::math::nd4j_sqrt<X, Z>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class Rint {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_rint<X,X>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class SoftPlus {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_softplus<X, X>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class Sign {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return (d1 > static_cast<X>(0)) - (d1 < static_cast<X>(0));
}
};
template <typename X>
class TimesOneMinus {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return d1 * (static_cast<X>(1) - d1);
}
};
template <typename X>
class RationalTanh {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
// keep 2/3 as runtime variable, to match precision
auto dis = (static_cast<X>(2) / static_cast<X>(3)) * d1;
auto tanh = sd::math::nd4j_sgn<X,X>(dis) * (static_cast<X>(1) - (static_cast<X>(1) / (static_cast<X>(1) + static_cast<X>(sd::math::nd4j_abs<X>(dis)) + sd::math::nd4j_pow<X, X, X>(dis, static_cast<X>(2)) + static_cast<X>(1.41645f) * sd::math::nd4j_pow<X, X, X>(dis, static_cast<X>(4)) )));
2019-06-06 14:21:15 +02:00
return static_cast<X>(1.7159f) * tanh;
}
};
template <typename X>
class RationalTanhDerivative {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
auto dis = (static_cast<X>(2.f) / static_cast<X>(3.f)) * d1;
auto a = static_cast<X>(1.f) + sd::math::nd4j_abs<X>(dis) + sd::math::nd4j_pow<X, X, X>(dis, static_cast<X>(2.f)) + static_cast<X>(1.41645f) * sd::math::nd4j_pow<X, X, X>(dis, static_cast<X>(4));
2019-06-06 14:21:15 +02:00
auto tDeriv = (static_cast<X>(1.f) + sd::math::nd4j_sign<X,X>(dis) * (static_cast<X>(2.f) * dis + static_cast<X>(4.f) * static_cast<X>(1.41645f) * sd::math::nd4j_pow<X, X, X>(dis, static_cast<X>(3)))) / (a * a);
2019-06-06 14:21:15 +02:00
return static_cast<X>(1.7159f) * (static_cast<X>(2.f) / static_cast<X>(3.f)) * tDeriv;
}
};
template <typename X>
class Tanh {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_tanh<X, X>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class ScaledTanh {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return params[0] * sd::math::nd4j_tanh<X, X>(params[1] * d1);
}
};
2019-06-06 14:21:15 +02:00
template <typename X>
class RectifiedTanh {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_max<X>(static_cast<X>(0), sd::math::nd4j_tanh<X,X>(d1));
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class RectifiedTanhDerivative {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return d1 > static_cast<X>(0.f) ? sd::math::nd4j_tanhderivative<X,X>(d1) : static_cast<X>(0.f);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class ATanh {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_atanh<X,X>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class TanhDerivative {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_tanhderivative<X,X>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class Cube {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return d1 * d1 * d1;
}
};
template <typename X>
class CubeDerivative {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return static_cast<X>(3) * d1 * d1;
}
};
template <typename X>
class ACos {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_acos<X, X>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class ASinh {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_asinh<X, X>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class ASinhDerivative {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return static_cast<X>(1.f) / (sd::math::nd4j_sqrt<X, X>(sd::math::nd4j_pow<X, X, X>(d1, static_cast<X>(2.f)) + static_cast<X>(1.f)));
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class ACosh {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_acosh<X, X>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class ACoshDerivative {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return static_cast<X>(1.f) / (sd::math::nd4j_sqrt<X, X>(d1 - static_cast<X>(1.f)) * sd::math::nd4j_sqrt<X, X>(d1 + static_cast<X>(1.f)));
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class Ones {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return static_cast<X>(1.0f);
}
};
template <typename X>
class SoftSign {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_softsign<X, X>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class SoftSignDerivative {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_softsignderivative<X,X>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X, typename Z>
class MatchConditionBool {
public:
no_op_exec_special_bool
no_op_exec_special_bool_cuda
// this op return 1.0 if condition met, 0.0 otherwise
op_def static Z op(X d1, X *extraParams) {
X compare = extraParams[0];
X eps = extraParams[1];
auto mode = static_cast<int>(extraParams[2]);
//nd4j_printf("value: %f; comp: %f; eps: %f; mode: %i;\n", d1, compare, eps, mode);
switch (mode) {
case 0: // equals
return sd::math::nd4j_abs<X>(d1 - compare) <= eps ? true : false;
2019-06-06 14:21:15 +02:00
case 1: // not equals
return sd::math::nd4j_abs<X>(d1 - compare) > eps ? true : false;
2019-06-06 14:21:15 +02:00
case 2: // less_than
return d1 < compare ? true : false;
case 3: // greater_than
return d1 > compare ? true : false;
case 4: // less_or_equals_than
return d1 <= compare ? true : false;
case 5: // greater_or_equals_than
return d1 >= compare ? true : false;
case 6: // abs_less_than
return sd::math::nd4j_abs<X>(d1) < compare ? true : false;
2019-06-06 14:21:15 +02:00
case 7: // abs_greater_than
return sd::math::nd4j_abs<X>(d1) > compare ? true : false;
2019-06-06 14:21:15 +02:00
case 8: // is inf
return sd::math::nd4j_isinf(d1) ? true : false;
2019-06-06 14:21:15 +02:00
case 9: // is nan
return sd::math::nd4j_isnan(d1) ? true : false;
2019-06-06 14:21:15 +02:00
case 10:
return (d1 == compare) ? true : false;
case 11:
return (d1 != compare) ? true : false;
case 12: // abs_greater_or_equals_than
return sd::math::nd4j_abs<X>(d1) >= compare ? true : false;
2019-06-06 14:21:15 +02:00
case 13: // abs_less_or_equals_than
return sd::math::nd4j_abs<X>(d1) <= compare ? true : false;
2019-06-06 14:21:15 +02:00
case 14:
// isFinite
return !(sd::math::nd4j_isinf(d1) || sd::math::nd4j_isnan(d1));
2019-06-06 14:21:15 +02:00
case 15:
// isInfinite
return sd::math::nd4j_isinf(d1) || sd::math::nd4j_isnan(d1);
2019-06-06 14:21:15 +02:00
default:
printf("Undefined match condition: [%i]\n", mode);
}
return d1;
}
};
template <typename X, typename Z>
class MatchCondition {
public:
no_op_exec_special
no_op_exec_special_cuda
no_op_exec_special_accumulation_long
no_op_exec_special_accumulation_cuda
op_def static Z startingValue(const X *input) {
return static_cast<Z>(0);
}
op_def static Z merge(Z old, Z opOutput, X *extraParams) {
return old + opOutput;
}
op_def static Z update(Z old, Z opOutput, X *extraParams) {
return old + opOutput;
}
op_def static Z op(X d1, X compare, X eps, int mode) {
switch (mode) {
case 0: // equals
return sd::math::nd4j_abs<X>(d1 - compare) <= eps ? 1 : 0;
case 1: // not equals
return sd::math::nd4j_abs<X>(d1 - compare) > eps ? 1 : 0;
case 2: // less_than
return d1 < compare ? 1 : 0;
case 3: // greater_than
return d1 > compare ? 1 : 0;
case 4: // less_or_equals_than
return d1 <= compare ? 1 : 0;
case 5: // greater_or_equals_than
return d1 >= compare ? 1 : 0;
case 6: // abs_less_than
return sd::math::nd4j_abs<X>(d1) < compare ? 1 : 0;
case 7: // abs_greater_than
return sd::math::nd4j_abs<X>(d1) > compare ? 1 : 0;
case 8: // is inf
return sd::math::nd4j_isinf(d1) ? 1 : 0;
case 9: // is nan
return sd::math::nd4j_isnan(d1) ? 1 : 0;
case 10:
return (d1 == compare) ? 1 : 0;
case 11:
return (d1 != compare) ? 1 : 0;
case 12: // abs_greater_or_equals_than
return sd::math::nd4j_abs<X>(d1) >= compare ? 1 : 0;
case 13: // abs_less_or_equals_than
return sd::math::nd4j_abs<X>(d1) <= compare ? 1 : 0;
2019-06-06 14:21:15 +02:00
case 14:
// isFinite
return !(sd::math::nd4j_isinf(d1) || sd::math::nd4j_isnan(d1)) ? 1 : 0;
2019-06-06 14:21:15 +02:00
case 15:
// isInfinite
return sd::math::nd4j_isinf(d1) || sd::math::nd4j_isnan(d1) ? 1 : 0;
default:
printf("Undefined match condition: [%i]\n", mode);
}
2019-06-06 14:21:15 +02:00
return d1;
}
// this op return 1.0 if condition met, 0.0 otherwise
op_def static Z op(X d1, X compare, X *extraParams) {
X eps = extraParams[1];
auto mode = static_cast<int>(extraParams[0]);
return op(d1, compare, eps, mode);
}
// this op return 1.0 if condition met, 0.0 otherwise
op_def static Z op(X d1, X *extraParams) {
X compare = extraParams[0];
X eps = extraParams[1];
auto mode = static_cast<int>(extraParams[2]);
return op(d1, compare, eps, mode);
2019-06-06 14:21:15 +02:00
}
op_def static Z postProcess(Z reduction, Nd4jLong n, X *extraParams) {
return reduction;
}
};
template <typename X, typename Y, typename Z>
2019-06-06 14:21:15 +02:00
class ELU {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static Z op(X d1, Y d2, Z *params) {
return sd::math::nd4j_elu<X,Z>(d1, static_cast<X>(d2));
2019-06-06 14:21:15 +02:00
}
};
template <typename X, typename Y, typename Z>
2019-06-06 14:21:15 +02:00
class ELUDerivative {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static Z op(X d1, Y d2, Z *params) {
return sd::math::nd4j_eluderivative<X,Z>(d1, static_cast<X>(d2));
2019-06-06 14:21:15 +02:00
}
};
template <typename X, typename Y, typename Z>
class RELU {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static Z op(X d1, Y d2, Z *params) {
auto xt = static_cast<Z>(d1);
auto xf = static_cast<Z>(d2);
return xt < xf ? xf : xt;
}
};
Shyrma lstm layer bp (#370) * - start working on bp for lstm Signed-off-by: Yurii <iuriish@yahoo.com> * - further working on bp for lstmLayer Signed-off-by: Yurii <iuriish@yahoo.com> * - minor change Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on bp for lstmLayer 2 Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on bp for lstmLayer 3 Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on bp for lstmLayer 4 Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on bp for lstmLayer 5 Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on bp for lstmLayer 6 Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on bp for lstmLayer 7 Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on bp for lstmLayer 8 Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on bp for lstmLayer 9 Signed-off-by: Yurii <iuriish@yahoo.com> * - provide lstmLayerCell and lstmLayerCellBp as separate CUSTOM_OPs Signed-off-by: Yurii <iuriish@yahoo.com> * - testing and fixing lstmLayerCellBp helper Signed-off-by: Yurii <iuriish@yahoo.com> * - implement lstmLayerCellBp as separate op Signed-off-by: Yurii <iuriish@yahoo.com> * - implement lstmLayerBp as separate op (not tested) Signed-off-by: Yurii <iuriish@yahoo.com> * - fixing calculations of dLdWp and dLdb in lstmLayerCellBp Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on bp for lstmLayer 10 Signed-off-by: Yurii <iuriish@yahoo.com> * - fixing typo in lstmLayerTimeLoop Signed-off-by: Yurii <iuriish@yahoo.com> * - forgot to perform clipping of c array and calculate corresponding derivative in lstmLayerCellBp Signed-off-by: Yurii <iuriish@yahoo.com> * - further work on bp for lstmLayer 10 Signed-off-by: Yurii <iuriish@yahoo.com> * - testing and fixing bugs in lstmLayer_bp op 1 Signed-off-by: Yurii <iuriish@yahoo.com> * - testing and fixing bugs in lstmLayer_bp op 2 Signed-off-by: Yurii <iuriish@yahoo.com> * - turn off heavy tests for cuda for lstmLayer_bp op Signed-off-by: Yurii <iuriish@yahoo.com> * - forgot to nullify gradients at eliminated time steps (when sequnce length array is present ) Signed-off-by: Yurii <iuriish@yahoo.com>
2020-04-13 12:21:51 +02:00
template <typename X, typename Y, typename Z>
class RELUDerivative {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static Z op(X d1, Y d2, Z *params) {
auto xt = static_cast<Z>(d1);
auto xf = static_cast<Z>(d2);
return xt > xf ? static_cast<Z>(1.f) : static_cast<Z>(0.f);
}
};
2019-06-06 14:21:15 +02:00
template <typename X, typename Y, typename Z>
class SXELogitsSmoother {
public:
op_def static Z op(X d1, Y d2, Z *params) {
return d1 * ((X)1.f - (X) d2) + (X)(0.5f) * (X) d2;
}
};
template <typename X, typename Y, typename Z>
class RELU6 {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static Z op(X d1, Y d2, Z *params) {
auto relu = simdOps::RELU<X,Y,Z>::op(d1, d2, params);
return relu < static_cast<Z>(6) ? relu : static_cast<Z>(6);
}
};
template <typename X, typename Y, typename Z>
class LeakyRELU {
public:
no_op_exec_special
no_op_exec_special_cuda
op_def static Z op(X d1, Y d2, Z *params) {
auto val = static_cast<Z>(d1);
auto alpha = static_cast<Z>(d2);
return val < 0.0f ? alpha * val : val;
}
};
template <typename X>
class SELU {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return d1 > static_cast<X>(0.0f) ? static_cast<X>(SELU_LAMBDA) * static_cast<X>(d1) : static_cast<X>(SELU_LAMBDA) * (static_cast<X>(SELU_ALPHA) * sd::math::nd4j_exp<X, X>(d1) - static_cast<X>(SELU_ALPHA));
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class SELUDerivative {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return d1 > static_cast<X>(0.f) ? static_cast<X>(SELU_LAMBDA) : static_cast<X>(SELU_ALPHA) * static_cast<X>(SELU_LAMBDA) * sd::math::nd4j_exp<X, X>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X, typename Y, typename Z>
class LeakyRELUDerivative {
public:
no_op_exec_special
no_op_exec_special_cuda
op_def static Z op(X d1, Y d2, Z *params) {
if (d1 >= static_cast<X>(0))
return static_cast<Z>(1);
else
return static_cast<Z>(d2);
}
};
template <typename X>
class ASin {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_asin<X,X>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class Sinh {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_sinh<X,X>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class SinhDerivative {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_cosh<X, X>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class Cosh {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_cosh<X,X>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class Tan {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_tan<X,X>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class TanDerivative {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return static_cast<X>(1.f) / sd::math::nd4j_pow<X, X, X>(sd::math::nd4j_cos<X, X>(d1), static_cast<X>(2.0f));
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class ATan {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return sd::math::nd4j_atan<X, X>(d1);
2019-06-06 14:21:15 +02:00
}
};
template <typename X, typename Y, typename Z>
class Atan2 {
public:
no_op_exec_special
no_op_exec_special_cuda
op_def static Z op(X d1, Y d2) {
return sd::math::nd4j_atan2<X, Z>(d2, d1);
2019-06-06 14:21:15 +02:00
}
op_def static Z op(X d1, Y d2, Z *params) {
return op(d1, d2);
}
// op for MetaOps
op_def static Z op(X d1, Y *params) {
return op(d1, params[0]);
}
};
template <typename X>
class Identity {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return d1;
}
};
template <typename X>
class Stabilize {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
X k = params[0];
if (d1 * k > static_cast<X>(- MIN_CUTFOFF))
return static_cast<X>(- MIN_CUTFOFF) / k;
else if (d1 * k < static_cast<X>(MIN_CUTFOFF))
return static_cast<X>(MIN_CUTFOFF) / k;
return d1;
}
};
template <typename X, typename Y, typename Z>
class Step {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static Z op(X d1, Y d2, Z *params) {
return (d1 > static_cast<X>(d2) ? static_cast<Z>(1) : static_cast<Z>(0));
}
};
template <typename X>
class OneMinus {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
op_def static X op(X d1, X *params) {
return static_cast<X>(1) - d1;
}
};
template <typename X>
class Sum {
public:
no_op_exec_special_accumulation_same
no_op_exec_special_accumulation_same_cuda
op_def static X startingValue(const X *input) {
return static_cast<X>(0.0f);
}
op_def static X merge(X old, X opOutput, X *extraParams) {
return opOutput + old;
}
op_def static X update(X old, X opOutput, X *extraParams) {
return opOutput + old;
}
op_def static X op(X d1, X *extraParams) {
return d1;
}
op_def static X postProcess(X reduction, Nd4jLong n, X *extraParams) {
return reduction;
}
};
template <typename X>
class ReduceSameBenchmarkOp {
public:
no_op_exec_special_accumulation_same
no_op_exec_special_accumulation_same_cuda
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
op_def static X startingValue(const X *input) {
return static_cast<X>(0.0f);
}
op_def static X merge(X old, X opOutput, X *extraParams) {
return opOutput + old;
}
op_def static X update(X old, X opOutput, X *extraParams) {
return opOutput + old;
}
op_def static X op(X d1, X *extraParams) {
auto f1 = static_cast<float>(d1);
return static_cast<X>(sd::math::nd4j_pow<float,float,float>(f1, 3)
+ sd::math::nd4j_log<float,float>(f1) * sd::math::nd4j_sin<float,float>(f1)
/ sd::math::nd4j_tanh<float,float>(static_cast<float>(M_E) * static_cast<float>(M_PI) * f1)
* sd::math::nd4j_sqrt<float,float>(static_cast<float>(M_PI) / f1)
- sd::math::nd4j_atan<float,float>(static_cast<float>(M_E) / f1));
2019-06-06 14:21:15 +02:00
}
op_def static X postProcess(X reduction, Nd4jLong n, X *extraParams) {
return reduction;
}
};
template <typename X, typename Z>
class ShannonEntropy {
public:
no_op_exec_special_accumulation
no_op_exec_special_accumulation_cuda
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
op_def static X startingValue(const X *input) {
return static_cast<X>(0);
}
op_def static Z merge(Z old, Z opOutput, Z *extraParams) {
return opOutput + old;
}
op_def static Z update(Z old, Z opOutput, Z *extraParams) {
return opOutput + old;
}
op_def static Z op(X d1, Z *extraParams) {
auto p = d1 * d1;
return static_cast<Z>(p) * sd::math::nd4j_log<X, Z>(p);
2019-06-06 14:21:15 +02:00
}
op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) {
return -reduction;
}
};
template <typename X, typename Z>
class LogEntropy {
public:
no_op_exec_special_accumulation
no_op_exec_special_accumulation_cuda
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
op_def static X startingValue(const X *input) {
return static_cast<X>(0);
}
op_def static Z merge(Z old, Z opOutput, Z *extraParams) {
return opOutput + old;
}
op_def static Z update(Z old, Z opOutput, Z *extraParams) {
return opOutput + old;
}
op_def static Z op(X d1, Z *extraParams) {
return static_cast<Z>(d1) * sd::math::nd4j_log<X, Z>(d1);
2019-06-06 14:21:15 +02:00
}
op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) {
//entropy is -sum(p(x) * log(p(x))); log entropy is log of this
return sd::math::nd4j_log<Z, Z>(-reduction);
2019-06-06 14:21:15 +02:00
}
};
template <typename X, typename Z>
class Entropy {
public:
no_op_exec_special_accumulation
no_op_exec_special_accumulation_cuda
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
op_def static X startingValue(const X *input) {
return static_cast<X>(0);
}
op_def static Z merge(Z old, Z opOutput, Z *extraParams) {
return opOutput + old;
}
op_def static Z update(Z old, Z opOutput, Z *extraParams) {
return opOutput + old;
}
op_def static Z op(X d1, Z *extraParams) {
return static_cast<Z>(d1) * sd::math::nd4j_log<X, Z>(d1);
2019-06-06 14:21:15 +02:00
}
op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) {
return static_cast<Z>(-reduction); //entropy is -sum(p(x) * log(p(x)))
}
};
template <typename X>
class ASum {
public:
no_op_exec_special_accumulation_same
no_op_exec_special_accumulation_same_cuda
const static functions::ReduceType reduceType = functions::ReduceType::ASUM;
op_def static X startingValue(const X *input) {
return static_cast<X>(0);
}
op_def static X merge(X old, X opOutput, X *extraParams) {
return sd::math::nd4j_abs<X>(opOutput) + sd::math::nd4j_abs<X>(old);
2019-06-06 14:21:15 +02:00
}
op_def static X update(X old, X opOutput, X *extraParams) {
return sd::math::nd4j_abs<X>(opOutput) + sd::math::nd4j_abs<X>(old);
2019-06-06 14:21:15 +02:00
}
op_def static X op(X d1, X *extraParams) {
return sd::math::nd4j_abs<X>(d1);
2019-06-06 14:21:15 +02:00
}
op_def static X postProcess(X reduction, Nd4jLong n, X *extraParams) {
return sd::math::nd4j_abs<X>(reduction);
2019-06-06 14:21:15 +02:00
}
};
template <typename X, typename Z>
class CountNonZero {
public:
no_op_exec_special_accumulation_long
no_op_exec_special_accumulation_cuda
const static functions::ReduceType reduceType = functions::ReduceType::ASUM;
op_def static Z startingValue(const X *input) {
return static_cast<Z>(0);
}
op_def static Z merge(Z old, Z opOutput, X *extraParams) {
return opOutput + old;
}
op_def static Z update(Z old, Z opOutput, X *extraParams) {
return opOutput + old;
}
op_def static Z op(X d1, X *extraParams) {
return d1 == static_cast<X>(0.0f) ? static_cast<Z>(0.0f) : static_cast<Z>(1.0f);
}
op_def static Z postProcess(Z reduction, Nd4jLong n, X *extraParams) {
return reduction;
}
};
template <typename X, typename Z>
class CountZero {
public:
no_op_exec_special_accumulation_long
no_op_exec_special_accumulation_cuda
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
op_def static Z startingValue(const X *input) {
return static_cast<Z>(0.0f);
}
op_def static Z merge(Z old, Z opOutput, X *extraParams) {
return opOutput + old;
}
op_def static Z update(Z old, Z opOutput, X *extraParams) {
return opOutput + old;
}
op_def static Z op(X d1, X *extraParams) {
return d1 == static_cast<X>(0) ? static_cast<X>(1) : static_cast<X>(0);
}
op_def static Z postProcess(X reduction, Nd4jLong n, X *extraParams) {
return static_cast<Z>(reduction);
}
};
template <typename X>
class Prod {
public:
no_op_exec_special_accumulation_same
no_op_exec_special_accumulation_same_cuda
const static functions::ReduceType reduceType = functions::ReduceType::PRODUCT;
op_def static X startingValue(const X *input) {
return static_cast<X>(1);
}
op_def static X merge(X old, X opOutput, X *extraParams) {
return opOutput * old;
}
op_def static X update(X old, X opOutput, X *extraParams) {
return opOutput * old;
}
op_def static X op(X d1, X *extraParams) {
return d1;
}
op_def static X postProcess(X reduction, Nd4jLong n, X *extraParams) {
return reduction;
}
};
template <typename X, typename Z>
class Any {
public:
no_op_exec_special_accumulation
no_op_exec_special_accumulation_cuda
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
op_def static X startingValue(const X *input) {
return static_cast<X>(0.0f);
}
op_def static Z merge(X old, X opOutput, X *extraParams) {
return opOutput + old;
}
op_def static Z update(X old, X opOutput, X *extraParams) {
return opOutput + old;
}
op_def static Z op(X d1, X *extraParams) {
return d1;
}
op_def static Z postProcess(X reduction, Nd4jLong n, X *extraParams) {
return reduction > static_cast<X>(0) ? static_cast<Z>(1) : static_cast<Z>(0) ;
}
};
template <typename X, typename Z>
class All {
public:
no_op_exec_special_accumulation
no_op_exec_special_accumulation_cuda
const static functions::ReduceType reduceType = functions::ReduceType::PRODUCT;
op_def static X startingValue(const X *input) {
return static_cast<X>(1);
}
op_def static Z merge(X old, X opOutput, X *extraParams) {
return opOutput * old;
}
op_def static Z update(X old, X opOutput, X *extraParams) {
return opOutput * old;
}
op_def static Z op(X d1, X *extraParams) {
return d1;
}
op_def static Z postProcess(X reduction, Nd4jLong n, X *extraParams) {
return reduction > static_cast<X>(0) ? static_cast<Z>(1) : static_cast<Z>(0);
}
};
template <typename X, typename Z>
class Mean {
public:
no_op_exec_special_accumulation
no_op_exec_special_accumulation_cuda
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
op_def static X startingValue(const X *input) {
return static_cast<X>(0);
}
op_def static Z merge(Z old, Z opOutput, Z *extraParams) {
return opOutput + old;
}
op_def static Z update(Z old, Z opOutput, Z *extraParams) {
return opOutput + old;
}
op_def static Z op(X d1, Z *extraParams) {
return d1;
}
op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) {
return reduction / (Z) n;
}
};
template <typename X, typename Z>
class ReduceFloatBenchmarkOp {
public:
no_op_exec_special_accumulation
no_op_exec_special_accumulation_cuda
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
op_def static X startingValue(const X *input) {
return static_cast<X>(0);
}
op_def static Z merge(Z old, Z opOutput, Z *extraParams) {
return opOutput + old;
}
op_def static Z update(Z old, Z opOutput, Z *extraParams) {
return opOutput + old;
}
op_def static Z op(X d1, Z *extraParams) {
auto f1 = static_cast<float>(d1);
return static_cast<Z>(sd::math::nd4j_pow<float,float,float>(f1, 3)
+ sd::math::nd4j_log<float,float>(f1) * sd::math::nd4j_sin<float,float>(f1)
/ sd::math::nd4j_tanh<float,float>(static_cast<float>(M_E) * static_cast<float>(M_PI) * f1)
* sd::math::nd4j_sqrt<float,float>(static_cast<float>(M_PI) / f1)
- sd::math::nd4j_atan<float,float>(static_cast<float>(M_E) / f1));
2019-06-06 14:21:15 +02:00
}
op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) {
return (Z) reduction / (Z) n;
}
};
template <typename X, typename Z>
class AMean {
public:
no_op_exec_special_accumulation
no_op_exec_special_accumulation_cuda
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
op_def static X startingValue(const X *input) {
return static_cast<X>(0);
}
op_def static Z merge(Z old, Z opOutput, Z *extraParams) {
return sd::math::nd4j_abs<X>(opOutput) + sd::math::nd4j_abs<X>(old);
2019-06-06 14:21:15 +02:00
}
op_def static Z update(Z old, Z opOutput, Z *extraParams) {
return opOutput + old;
}
op_def static Z op(X d1, Z *extraParams) {
return sd::math::nd4j_abs<X>(d1);
2019-06-06 14:21:15 +02:00
}
op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) {
return sd::math::nd4j_abs<Z>(reduction) / static_cast<Z>(n);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class Max {
public:
no_op_exec_special_accumulation_same
no_op_exec_special_accumulation_same_cuda
const static functions::ReduceType reduceType = functions::ReduceType::MAX;
op_def static X startingValue(const X *input) {
return -sd::DataTypeUtils::infOrMax<X>();
2019-06-06 14:21:15 +02:00
}
op_def static X merge(X old, X opOutput, X *extraParams) {
return sd::math::nd4j_max<X>(old, opOutput);
2019-06-06 14:21:15 +02:00
}
op_def static X update(X old, X opOutput, X *extraParams) {
return sd::math::nd4j_max<X>(opOutput, old);
2019-06-06 14:21:15 +02:00
}
op_def static X op(X d1, X d2, X *params) {
return sd::math::nd4j_max<X>(d1, d2);
2019-06-06 14:21:15 +02:00
}
op_def static X op(X d1, X d2) {
return sd::math::nd4j_max<X>(d1, d2);
2019-06-06 14:21:15 +02:00
}
// FIXME: this signature overlaps with MetaOp
op_def static X op(X d1, X *extraParams) {
return d1;
}
op_def static X postProcess(X reduction, Nd4jLong n, X *extraParams) {
return reduction;
}
};
template <typename X, typename Y, typename Z>
class AMaxPairwise {
public:
op_def static Z op(X d1, Y d2, Z *params) {
return op(d1, d2);
}
op_def static Z op(X d1, Y d2) {
auto z1 = static_cast<Z>(d1);
auto z2 = static_cast<Z>(d2);
if (sd::math::nd4j_abs<Z>(z1) > sd::math::nd4j_abs<Z>(z2))
2019-06-06 14:21:15 +02:00
return z1;
else
return z2;
}
};
template <typename X, typename Y, typename Z>
class AMinPairwise {
public:
op_def static Z op(X d1, Y d2, Z *params) {
return op(d1, d2);
}
op_def static Z op(X d1, Y d2) {
auto z1 = static_cast<Z>(d1);
auto z2 = static_cast<Z>(d2);
if (sd::math::nd4j_abs<Z>(z1) < sd::math::nd4j_abs<Z>(z2))
2019-06-06 14:21:15 +02:00
return z1;
else
return z2;
}
};
template <typename X, typename Y, typename Z>
class MaxPairwise {
public:
op_def static Z op(X d1, Y d2, Z *params) {
return sd::math::nd4j_max<Z>(static_cast<Z>(d1), static_cast<Z>(d2));
2019-06-06 14:21:15 +02:00
}
op_def static Z op(X d1, Y d2) {
return sd::math::nd4j_max<Z>(static_cast<Z>(d1), static_cast<Z>(d2));
2019-06-06 14:21:15 +02:00
}
};
template <typename X, typename Y, typename Z>
class MinPairwise {
public:
op_def static Z op(X d1, Y d2, Z *params) {
return sd::math::nd4j_min<Z>(static_cast<Z>(d1), static_cast<Z>(d2));
2019-06-06 14:21:15 +02:00
}
op_def static Z op(X d1, Y d2) {
return sd::math::nd4j_min<Z>(static_cast<Z>(d1), static_cast<Z>(d2));
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class AMax {
public:
no_op_exec_special_accumulation_same
no_op_exec_special_accumulation_same_cuda
const static functions::ReduceType reduceType = functions::ReduceType::AMAX;
op_def static X startingValue(const X *input) {
return input[0];
}
op_def static X merge(X old, X opOutput, X *extraParams) {
return sd::math::nd4j_max<X>(sd::math::nd4j_abs<X>(old), sd::math::nd4j_abs<X>(opOutput));
2019-06-06 14:21:15 +02:00
}
op_def static X update(X old, X opOutput, X *extraParams) {
return sd::math::nd4j_max<X>(sd::math::nd4j_abs<X>(opOutput), sd::math::nd4j_abs<X>(old));
2019-06-06 14:21:15 +02:00
}
op_def static X op(X d1, X d2, X *params) {
return sd::math::nd4j_max<X>(sd::math::nd4j_abs<X>(d1), sd::math::nd4j_abs<X>(d2));
2019-06-06 14:21:15 +02:00
}
op_def static X op(X d1, X d2) {
return sd::math::nd4j_abs<X>(d1) > sd::math::nd4j_abs<X>(d2) ? d1 : d2;
2019-06-06 14:21:15 +02:00
}
// FIXME: this signature overlaps with MetaOp
op_def static X op(X d1, X *extraParams) {
return sd::math::nd4j_abs<X>(d1);
2019-06-06 14:21:15 +02:00
}
op_def static X postProcess(X reduction, Nd4jLong n, X *extraParams) {
return sd::math::nd4j_abs<X>(reduction);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class AMin {
public:
no_op_exec_special_accumulation_same
no_op_exec_special_accumulation_same_cuda
const static functions::ReduceType reduceType = functions::ReduceType::AMIN;
op_def static X startingValue(const X *input) {
return input[0];
}
op_def static X merge(X old, X opOutput, X *extraParams) {
return sd::math::nd4j_min<X>(sd::math::nd4j_abs<X>(old), sd::math::nd4j_abs<X>(opOutput));
2019-06-06 14:21:15 +02:00
}
op_def static X update(X old, X opOutput, X *extraParams) {
return sd::math::nd4j_min<X>(sd::math::nd4j_abs<X>(opOutput), sd::math::nd4j_abs<X>(old));
2019-06-06 14:21:15 +02:00
}
op_def static X op(X d1, X d2, X *params) {
return sd::math::nd4j_min<X>(sd::math::nd4j_abs<X>(d1), sd::math::nd4j_abs<X>(d2));
2019-06-06 14:21:15 +02:00
}
op_def static X op(X d1, X d2) {
return sd::math::nd4j_min<X>(sd::math::nd4j_abs<X>(d1), sd::math::nd4j_abs<X>(d2));
2019-06-06 14:21:15 +02:00
}
// FIXME: this signature overlaps with MetaOp
op_def static X op(X d1, X *extraParams) {
return sd::math::nd4j_abs<X>(d1);
2019-06-06 14:21:15 +02:00
}
op_def static X postProcess(X reduction, Nd4jLong n, X *extraParams) {
return sd::math::nd4j_abs<X>(reduction);
2019-06-06 14:21:15 +02:00
}
};
template <typename X>
class Min {
public:
no_op_exec_special_accumulation_same
no_op_exec_special_accumulation_same_cuda
const static functions::ReduceType reduceType = functions::ReduceType::MIN;
op_def static X startingValue(const X *input) {
return sd::DataTypeUtils::infOrMax<X>();
2019-06-06 14:21:15 +02:00
}
op_def static X merge(X old, X opOutput, X *extraParams) {
return sd::math::nd4j_min<X>(old, opOutput);
2019-06-06 14:21:15 +02:00
}
op_def static X update(X old, X opOutput, X *extraParams) {
return sd::math::nd4j_min<X>(opOutput, old);
2019-06-06 14:21:15 +02:00
}
op_def static X op(X d1, X d2, X *params) {
return sd::math::nd4j_min<X>(d1, d2);
2019-06-06 14:21:15 +02:00
}
op_def static X op(X d1, X d2) {
return sd::math::nd4j_min<X>(d1, d2);
2019-06-06 14:21:15 +02:00
}
// FIXME: this signature overlaps with MetaOp
op_def static X op(X d1, X *extraParams) {
return d1;
}
op_def static X postProcess(X reduction, Nd4jLong n, X *extraParams) {
return reduction;
}
};
template <typename X, typename Z>
class Norm1 {
public:
no_op_exec_special_accumulation
no_op_exec_special_accumulation_cuda
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
op_def static X startingValue(const X *input) {
return static_cast<X>(0);
}
op_def static Z merge(Z old, Z opOutput, Z *extraParams) {
return opOutput + old;
}
op_def static Z update(Z old, Z opOutput, Z *extraParams) {
return opOutput + old;
}
op_def static Z op(X d1, Z *extraParams) {
return static_cast<Z>(sd::math::nd4j_abs<X>(d1));
2019-06-06 14:21:15 +02:00
}
op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) {
return reduction;
}
};
template <typename X, typename Z>
class Norm2 {
public:
no_op_exec_special_accumulation
no_op_exec_special_accumulation_cuda
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
op_def static X startingValue(const X *input) {
return static_cast<X>(0);
}
op_def static Z merge(Z old, Z opOutput, Z *extraParams) {
return opOutput + old;
}
op_def static Z update(Z old, Z opOutput, Z *extraParams) {
return opOutput + old;
}
op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) {
return sd::math::nd4j_sqrt<Z, Z>(reduction);
2019-06-06 14:21:15 +02:00
}
op_def static Z op(X d1, Z *extraParams) {
return static_cast<Z>(d1 * d1);
}
};
template <typename X, typename Z>
class SquaredNorm {
public:
no_op_exec_special_accumulation
no_op_exec_special_accumulation_cuda
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
op_def static X startingValue(const X *input) {
return static_cast<X>(0);
}
op_def static Z merge(Z old, Z opOutput, Z *extraParams) {
return opOutput + old;
}
op_def static Z update(Z old, Z opOutput, Z *extraParams) {
return opOutput + old;
}
op_def static Z op(X d1, Z *extraParams) {
return static_cast<Z>(d1 * d1);
}
op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) {
return reduction;
}
};
template <typename X, typename Z>
class NormFrobenius {
public:
no_op_exec_special_accumulation
no_op_exec_special_accumulation_cuda
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
op_def static X startingValue(const X *input) {
return static_cast<X>(0);
}
op_def static Z merge(Z old, Z opOutput, Z *extraParams) {
return opOutput + old;
}
op_def static Z update(Z old, Z opOutput, Z *extraParams) {
return opOutput + old;
}
op_def static Z op(X d1, Z *extraParams) {
X v = sd::math::nd4j_abs<X>(d1);
2019-06-06 14:21:15 +02:00
return static_cast<Z>(v * v);
}
op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) {
return sd::math::nd4j_sqrt<Z, Z>(reduction);
2019-06-06 14:21:15 +02:00
}
};
template <typename X, typename Z>
class NormP {
public:
no_op_exec_special_accumulation
no_op_exec_special_accumulation_cuda
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
op_def static X startingValue(const X *input) {
return static_cast<X>(0);
}
op_def static Z merge(Z old, Z opOutput, Z *extraParams) {
return opOutput + old;
}
op_def static Z update(Z old, Z opOutput, Z *extraParams) {
return opOutput + old;
}
op_def static Z op(X d1, Z *extraParams) {
return sd::math::nd4j_pow<X, Z, Z>(sd::math::nd4j_abs<X>(d1), extraParams[0]);
2019-06-06 14:21:15 +02:00
}
op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) {
return sd::math::nd4j_pow<Z, Z, Z>(reduction, static_cast<Z>(1.0f) / extraParams[0]);
2019-06-06 14:21:15 +02:00
}
};
template <typename X, typename Z>
class NormMax {
public:
no_op_exec_special_accumulation
no_op_exec_special_accumulation_cuda
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
op_def static X startingValue(const X *input) {
return static_cast<X>(0);
}
op_def static Z merge(Z old, Z opOutput, Z *extraParams) {
return opOutput + old;
}
op_def static Z update(Z old, Z opOutput, Z *extraParams) {
return sd::math::nd4j_max<Z>(sd::math::nd4j_abs<Z>(old),
sd::math::nd4j_abs<Z>(opOutput));
2019-06-06 14:21:15 +02:00
}
op_def static Z op(X d1, Z *extraParams) {
return static_cast<Z>(d1);
}
op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) {
return sd::math::nd4j_max<Z>(sd::math::nd4j_abs<Z>(reduction), sd::math::nd4j_abs<Z>(reduction));
2019-06-06 14:21:15 +02:00
}
};
template <typename X, typename Z>
class Variance {
public:
no_op_exec_special_accumulation
no_op_exec_special_accumulation_cuda
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
op_def static X startingValue(const X *input) {
return static_cast<X>(0.0f);
}
op_def static Z merge(X old, X opOutput, Z *extraParams) {
return old + opOutput;
}
op_def static Z update(X old, X opOutput, Z *extraParams) {
return old + opOutput;
}
op_def static X op(X d1, Z *extraParams) {
X mean = static_cast<X>(extraParams[0]);
X ret = d1 - mean;
return ret * ret;
}
op_def static Z postProcess(X reduction, Nd4jLong n, Z *extraParams) {
// T bias = extraParams[1];
// return (reduction - (sd::math::nd4j_pow<T>(bias, static_cast<T>(2.0f)) / static_cast<T>(n))) / (n - 1)
2019-06-06 14:21:15 +02:00
return static_cast<Z>(reduction) / static_cast<Z>(n - 1);
}
};
/**
* Standard deviation of a buffer
*/
template <typename X, typename Z>
class StandardDeviation {
public:
no_op_exec_special_accumulation
no_op_exec_special_accumulation_cuda
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
op_def static X startingValue(const X *input) {
return static_cast<X>(0.0f);
}
op_def static Z merge(X old, X opOutput, Z *extraParams) {
return old + opOutput;
}
op_def static Z update(X old, X opOutput, Z *extraParams) {
return old + opOutput;
}
op_def static Z op(X d1, Z *extraParams) {
X mean = extraParams[0];
X ret = d1 - mean;
return ret * ret;
}
op_def static Z postProcess(X reduction, Nd4jLong n, Z *extraParams) {
Z ret = Variance<X,Z>::postProcess(reduction, n, extraParams);
Z sqrtRet = sd::math::nd4j_sqrt<X, Z>(ret);
2019-06-06 14:21:15 +02:00
return sqrtRet;
}
};
template <typename X, typename Y>
class CosineSimilarity {
public:
static const int extraParamsLen = 2;
op_def static X *generateExtraParams() {
//T *extraParams = new T[2];
return nullptr;
}
op_def static void finalizeExtraParams(X *extraParams) {
//delete[] extraParams;
}
op_def static Y startingValue(const X *input) {
return static_cast<Y>(0.0f);
}
op_def static Y postProcess(Y reduction, Nd4jLong n, Y *extraParams) {
return reduction / (sd::math::nd4j_sqrt<Y, Y>(extraParams[0]) * sd::math::nd4j_sqrt<Y, Y>(extraParams[1]));
2019-06-06 14:21:15 +02:00
}
op_def static Y op(X d1, X d2, Y *extraParams) {
extraParams[0] += static_cast<Y>(d1 * d1);
extraParams[1] += static_cast<Y>(d2 * d2);
return static_cast<Y>(d1 * d2);
}
op_def static void aggregateExtraParams(Y *extraParamsTotal, Y *extraParamsLocal) {
extraParamsTotal[0] += extraParamsLocal[0];
extraParamsTotal[1] += extraParamsLocal[1];
}
#ifdef __CUDACC__
static _CUDA_D inline Y opAtomic(X d1, X d2, Y *extraParams) {
sd::math::atomics::nd4j_atomicAdd(&extraParams[0],static_cast<Y>(d1 * d1));
sd::math::atomics::nd4j_atomicAdd(&extraParams[1],static_cast<Y>(d2 * d2));
2019-06-06 14:21:15 +02:00
return static_cast<Y>(d1 * d2);
}
#endif
op_def static Y update(Y old, Y opOutput, Y *extraParams) {
return old + opOutput;
}
op_def static Y merge(Y old, Y opOutput, Y *extraParams) {
return update(old, opOutput, extraParams);
}
};
template <typename X, typename Y>
class JaccardDistance {
public:
static const int extraParamsLen = 2;
op_def static X *generateExtraParams() {
//T *extraParams = new T[2];
return nullptr;
}
op_def static void finalizeExtraParams(X *extraParams) {
//delete[] extraParams;
}
op_def static Y startingValue(const X *input) {
return static_cast<X>(0.0f);
}
op_def static Y postProcess(Y reduction, Nd4jLong n, Y *extraParams) {
// num / denom
return (static_cast<Y>(1.0f)) - (extraParams[0] / extraParams[1]);
}
op_def static Y num(X d1, X d2) {
return sd::math::nd4j_min<X>(d1, d2);
2019-06-06 14:21:15 +02:00
}
op_def static Y denom(X d1, X d2) {
return sd::math::nd4j_max<X>(d1, d2);
2019-06-06 14:21:15 +02:00
}
op_def static Y op(X d1, X d2, Y *extraParams) {
extraParams[0] += static_cast<Y>(num(d1, d2));
extraParams[1] += static_cast<Y>(denom(d1, d2));
return static_cast<Y>(0.0f);
}
op_def static void aggregateExtraParams(Y *extraParamsTotal, Y *extraParamsLocal) {
extraParamsTotal[0] += extraParamsLocal[0];
extraParamsTotal[1] += extraParamsLocal[1];
}
#ifdef __CUDACC__
__device__
static inline Y opAtomic(X d1, X d2, Y *extraParams) {
sd::math::atomics::nd4j_atomicAdd(&extraParams[0],num(d1, d2));
sd::math::atomics::nd4j_atomicAdd(&extraParams[1], denom(d1, d2));
2019-06-06 14:21:15 +02:00
return static_cast<Y>(0.0f);
}
#endif
op_def static Y update(Y old, Y opOutput, Y *extraParams) {
return old + opOutput;
}
op_def static Y merge(Y old, Y opOutput, Y *extraParams) {
return update(old, opOutput, extraParams);
}
};
template <typename X, typename Y>
class SimpleHammingDistance {
public:
static const int extraParamsLen = 0;
op_def static X *generateExtraParams() {
//T *extraParams = new T[2];
return nullptr;
}
op_def static void finalizeExtraParams(X *extraParams) {
//delete[] extraParams;
}
op_def static Y startingValue(const X *input) {
return static_cast<Y>(0.0f);
}
op_def static Y postProcess(Y reduction, Nd4jLong n, Y *extraParams) {
return static_cast<Y>(reduction / n);
}
op_def static Y op(X d1, X d2, Y *extraParams) {
return (d1 == d2) ? static_cast<Y>(0.0f) : static_cast<Y>(1.0f);
}
op_def static void aggregateExtraParams(Y *extraParamsTotal, Y *extraParamsLocal) {
}
#ifdef __CUDACC__
__device__
static inline Y opAtomic(X d1, X d2, Y *extraParams) {
return op(d1, d2, extraParams);
}
#endif
op_def static Y update(Y old, Y opOutput, Y *extraParams) {
return old + opOutput;
}
op_def static Y merge(Y old, Y opOutput, Y *extraParams) {
return update(old, opOutput, extraParams);
}
};
template <typename X, typename Y>
class CosineDistance {
public:
static const int extraParamsLen = 2;
op_def static X *generateExtraParams() {
//T *extraParams = new T[2];
return nullptr;
}
op_def static void finalizeExtraParams(X *extraParams) {
//delete[] extraParams;
}
op_def static Y startingValue(const X *input) {
return static_cast<Y>(0.0f);
}
op_def static Y postProcess(Y reduction, Nd4jLong n, Y *extraParams) {
return (static_cast<Y>(1.0f)) - (reduction / (sd::math::nd4j_sqrt<Y, Y>(extraParams[0]) * sd::math::nd4j_sqrt<Y, Y>(extraParams[1])));
2019-06-06 14:21:15 +02:00
}
op_def static Y op(X d1, X d2, Y *extraParams) {
extraParams[0] += static_cast<Y>(sd::math::nd4j_abs<X>(d1) * sd::math::nd4j_abs<X>(d1));
extraParams[1] += static_cast<Y>(sd::math::nd4j_abs<X>(d2) * sd::math::nd4j_abs<X>(d2));
2019-06-06 14:21:15 +02:00
return (d1 * d2);
}
op_def static void aggregateExtraParams(Y *extraParamsTotal, Y *extraParamsLocal) {
extraParamsTotal[0] += extraParamsLocal[0];
extraParamsTotal[1] += extraParamsLocal[1];
}
#ifdef __CUDACC__
static _CUDA_D inline Y opAtomic(X d1, X d2, Y *extraParams) {
sd::math::atomics::nd4j_atomicAdd(&extraParams[0], sd::math::nd4j_abs<Y>(d1) * sd::math::nd4j_abs<Y>(d1));
sd::math::atomics::nd4j_atomicAdd(&extraParams[1], sd::math::nd4j_abs<Y>(d2) * sd::math::nd4j_abs<Y>(d2));
2019-06-06 14:21:15 +02:00
return (d1 * d2);
}
#endif
op_def static Y update(Y old, Y opOutput, Y *extraParams) {
return old + opOutput;
}
op_def static Y merge(Y old, Y opOutput, Y *extraParams) {
return update(old, opOutput, extraParams);
}
};
/**
* Dot product between 2 arrays
*/
template <typename X, typename Y>
class Dot {
public:
static const int extraParamsLen = 0;
op_def static X * generateExtraParams() {
return nullptr;
}
op_def static void finalizeExtraParams(X *extraParamsRef) {
//no-op
//delete[] * extraParamsRef;
}
op_def static Y startingValue(const X *input) {
return static_cast<Y>(0.0f);
}
op_def static Y postProcess(Y reduction, Nd4jLong n, Y *extraParamsRef) {
return reduction;
}
op_def static Y op(X d1, X d2, Y *extraParamsRef) {
return static_cast<Y>(d1 * d2);
}
#ifdef __CUDACC__
__device__
static inline Y opAtomic(X d1, X d2, Y *extraParamsRef) {
return op(d1, d2, extraParamsRef);
}
#endif
op_def static Y update(Y old, Y opOutput, Y *extraParamsRef) {
return opOutput + old;
}
op_def static Y merge(Y old, Y opOutput, Y *extraParamsRef) {
return update(old, opOutput, extraParamsRef);
}
op_def static void aggregateExtraParams(Y *extraParamsTotal, Y *extraParamsLocal) {}
};
/**
* Op to check equality within arrays
*/
template <typename X, typename Z>
class EqualsWithEps {
public:
static const int extraParamsLen = 0;
op_def static X * generateExtraParams() {
return nullptr;
}
op_def static void finalizeExtraParams(X *extraParamsRef) {
//no-op
}
op_def static Z startingValue(const X *input) {
return static_cast<Z>(0.0f);
}
op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParamsRef) {
return reduction;
}
op_def static Z op(X d1, X d2, Z *extraParamsRef) {
double eps = sd::math::nd4j_abs<double>(extraParamsRef[2]);
return static_cast<Z>(!sd::math::nd4j_eq<X>(d1, d2, eps));
2019-06-06 14:21:15 +02:00
}
#ifdef __CUDACC__
__device__
static inline Z opAtomic(X d1, X d2, Z *extraParamsRef) {
return op(d1, d2, extraParamsRef);
}
#endif
op_def static Z update(Z old, Z opOutput, Z *extraParamsRef) {
return opOutput + old;
}
op_def static Z merge(X old, Z opOutput, Z *extraParamsRef) {
return update(old, opOutput, extraParamsRef);
}
op_def static void aggregateExtraParams(Z *extraParamsTotal, Z *extraParamsLocal) {}
};
template <typename X, typename Y>
class EuclideanDistance {
public:
static const int extraParamsLen = 0;
op_def static X * generateExtraParams() {
return nullptr;
}
op_def static void finalizeExtraParams(X *extraParamsRef) {
//no-op
}
op_def static Y startingValue(const X *input) {
return static_cast<Y>(0.0f);
}
op_def static Y postProcess(Y reduction, Nd4jLong n, Y *extraParamsRef) {
return sd::math::nd4j_sqrt<Y, Y>(reduction);
2019-06-06 14:21:15 +02:00
}
op_def static Y op(X d1, X d2, Y *extraParamsRef) {
X ret = d1 - d2;
return static_cast<Y>(ret * ret);
}
#ifdef __CUDACC__
__device__
static inline Y opAtomic(X d1, X d2, Y *extraParamsRef) {
return op(d1, d2, extraParamsRef);
}
#endif
op_def static Y update(Y old, Y opOutput, Y *extraParamsRef) {
return opOutput + old;
}
op_def static Y merge(Y old, Y opOutput, Y *extraParamsRef) {
return update(old, opOutput, extraParamsRef);
}
op_def static void aggregateExtraParams(Y *extraParamsTotal, Y *extraParamsLocal) {}
};
template <typename X, typename Y>
class ManhattanDistance {
public:
static const int extraParamsLen = 0;
op_def static X * generateExtraParams() {
return nullptr;
}
op_def static void finalizeExtraParams(X *extraParamsRef) {
//no-op
}
op_def static Y startingValue(const X *input) {
return static_cast<Y>(0.0f);
}
op_def static Y postProcess(Y reduction, Nd4jLong n, Y *extraParamsRef) {
return reduction;
}
op_def static Y op(X d1, X d2, Y *extraParamsRef) {
return sd::math::nd4j_abs<X>(d1 - d2);
2019-06-06 14:21:15 +02:00
}
op_def static Y update(Y old, Y opOutput, Y *extraParamsRef) {
return old + opOutput;
}
op_def static void aggregateExtraParams(Y *extraParamsTotal, Y *extraParamsLocal) {
}
#ifdef __CUDACC__
__device__
static inline Y opAtomic(X d1, X d2, Y *extraParamsRef) {
return op(d1, d2, extraParamsRef);
}
#endif
#ifndef __clang__
#pragma omp declare simd uniform(extraParamsRef)
#endif
op_def static Y merge(X old, X opOutput, X *extraParamsRef) {
return update(old, opOutput, extraParamsRef);
}
};
template <typename X, typename Z>
2019-06-06 14:21:15 +02:00
class IndexAbsoluteMax {
public:
static _CUDA_HD inline functions::indexreduce::IndexValue<X> op(functions::indexreduce::IndexValue<X> val, X *extraParams) {
return sd::math::nd4j_abs<X>(val);
2019-06-06 14:21:15 +02:00
}
static _CUDA_HD inline functions::indexreduce::IndexValue<X> update(functions::indexreduce::IndexValue<X> &old, functions::indexreduce::IndexValue<X> &opOutput, X *extraParams) {
opOutput.value = sd::math::nd4j_abs<X>(opOutput.value);
old.value = sd::math::nd4j_abs<X>(old.value);
2019-06-06 14:21:15 +02:00
if (opOutput.value > old.value)
return opOutput;
#ifdef __CUDACC__
// workaround for cuda race condition at merge phase
else if (opOutput.value == old.value && opOutput.index < old.index)
return opOutput;
#elif defined(__GNUC__)
#endif
return old;
}
static _CUDA_HD inline functions::indexreduce::IndexValue<X> merge(
functions::indexreduce::IndexValue<X> f1,
functions::indexreduce::IndexValue<X> f2, X *extraParams) {
if (sd::math::nd4j_abs<X>(f1.value) > sd::math::nd4j_abs<X>(f2.value))
2019-06-06 14:21:15 +02:00
return f2;
return f1;
}
static _CUDA_HD inline functions::indexreduce::IndexValue<X> postProcess(
functions::indexreduce::IndexValue<X> reduction, int n, int xOffset,
X *dx, int incx, X *extraParams, X *result) {
return reduction;
}
static _CUDA_HD inline X startingValue(const X *input) {
return 0;
}
Legacy API changes (#441) * initial commit Signed-off-by: raver119@gmail.com <raver119@gmail.com> * another initial commit Signed-off-by: raver119@gmail.com <raver119@gmail.com> * another initial commit Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more initial commit Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Refactored buffer() and shapeInfo() methods usage with NDArray class. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt Graph class methods to use const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt choose op to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt where op shape method to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt lstsq op to use constant empty shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt matrix_diag_part op shape routine to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt determinant ops to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt mean_pairwssqerr_loss ops to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt ops shape methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt shape methods for loss ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt log_loss op shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt shape methods for ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt dilation2d ops shape methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted deconv2d ops shape methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted dynamicRNN op shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape methods for ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape methods for lstm layer ops. Signed-off-by: shugeo <sgazeos@gmail.com> * few updates Signed-off-by: raver119@gmail.com <raver119@gmail.com> * first cuda tweak Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Adopt constant shapes for sconv2d ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt constant shapes for gru ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt constant shapes with shape methods for segment ops and so on. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted constant shapes with unsorted_segment_* ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted constant shapes with gamma op shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape methods of reduce_stddev ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape methods for reduce_* ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt shape method for squeeze op. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt strided_slice shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored concat op shape method to adopt constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape method for mirror_pad op. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted split op shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted tile ops shape methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Added const cast for mkldnn routines handles. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored logSoftMaxForVector_ routine to conform with proper data and shape pointer casts. Signed-off-by: shugeo <sgazeos@gmail.com> * Cosmetic changes to proper usage of constant pointers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored a couple shape comparators for strides and addBias helpers to proper use data pointers with inplace option. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored depthToSpace helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored histogram helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored im2col helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored gather and gatherND helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage on percentile helper. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed gather shape with helpers and range buffer usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with space to depth helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage and constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with LUP decomposition> Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored onehot_ helper. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored pad and prefix to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactoed softmax helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed space to batch helpers to use buffers properly. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed stack and split helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with sparse to dense helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with mindistance_ helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with tile helper. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed constant shape usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed constant shape usage with legacy pairwise bool ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored a couple of methods to adopt constant shape usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed broadcasting with constant shape." Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const usage with inplace reverse and constant shapes with legacy reduction. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored legacy ops with const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored sort to adopt constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected sort for constant shape usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed constant shape usage with special methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored Context to conform with constant shape usage. Signed-off-by: shugeo <sgazeos@gmail.com> * CUDA broadcasting headers Signed-off-by: raver119@gmail.com <raver119@gmail.com> * pairwise/indexreduce/random headers Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Refactored native ops to adopt constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * legacy reduce3/scalar headers Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Corrected pullRow signature and tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected routines to proper use of constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored tests to use constant shapes properly. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored legacy ops tests to use constant shapes properly. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored buffer usage with NDArray tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed native ops tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed special concat routine. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with test. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with a test. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored TAD.h and tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored calcStrides* routines to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed miscelaneous errors with constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * NativeOps const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Corrected definitions for declared functions. Signed-off-by: shugeo <sgazeos@gmail.com> * NativeOps const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * few more const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fixed const shapes with shape routines. Signed-off-by: shugeo <sgazeos@gmail.com> * few more const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fixed shape method for broadcastable case. Signed-off-by: shugeo <sgazeos@gmail.com> * few more const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * xw_plus_b BP shape fn restored Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fixed signatures with broadcasting. Signed-off-by: shugeo <sgazeos@gmail.com> * Repaired backprops shape methods for a set of operations. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored broadcast bool for cuda. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored methods for 3 args with const qualifier. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed a couple of kernel signatures for broadcasting. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed kernels signatures for const buffers and shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored pairwise methods to persistent buffers and shapes usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt const to buffers and shapes with kernels. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt const to buffers and shapes with scalar kernels. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored indexreduce kernels signatures to use const buffers and shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored pairwise kernels to adopt cons shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored pairwise bool kernels to adopt cons shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored random special ops to conform with const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored native ops to conform with const shapes and buffers under cuda platform. Signed-off-by: shugeo <sgazeos@gmail.com> * Cosmetical changes only. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shapes and buffers error. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected start pos routine. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored methods to conform with const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored helpers to use proper methods instead. Signed-off-by: shugeo <sgazeos@gmail.com> * bunch of changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next bunch of changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next bunch of changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fixed execScalar declaration. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed execScalar declaration. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected const shape cases with sort and so on. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shapes for sort. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored kernel declarations to adopt const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed kernels declarations to adopt const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected kernel declarations to adopt const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed kernels declarations to adopt const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed segment helpers kernels declarations and so on to adopt const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shape usage with segment and solve helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed kernel declaration with adjustWeight helper. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed cuda implementations for constant shape helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted const shape usage with kernels. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted top_k kernels to use const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected kernels declarations to adopt const shapes with helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored NDArray definitions to adopt const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shapes with image suppression helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Slight improvement with buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored buffer usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored buffer usage with tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shape usage with definitions. Signed-off-by: shugeo <sgazeos@gmail.com> * minor updates on cpu side Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Refactored const shape usage with ConstantDescritor and native ops with cuda platform. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored tear and tile kernels to adopt with const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * softmax_loop fix Signed-off-by: raver119 <raver119@gmail.com> * update missing signature Signed-off-by: raver119@gmail.com <raver119@gmail.com> * softmax again Signed-off-by: raver119@gmail.com <raver119@gmail.com> * few more missing consts Signed-off-by: raver119 <raver119@gmail.com> * new methods updated Signed-off-by: raver119@gmail.com <raver119@gmail.com> Co-authored-by: shugeo <sgazeos@gmail.com>
2020-05-09 07:06:14 +02:00
static _CUDA_HD inline functions::indexreduce::IndexValue<X> startingIndexValue(const X *input) {
2019-06-06 14:21:15 +02:00
functions::indexreduce::IndexValue<X> local;
local.value = startingValue(input);
local.index = 0;
return local;
}
static _CUDA_HD inline functions::indexreduce::IndexValue<X> op(functions::indexreduce::IndexValue<X> d1,
functions::indexreduce::IndexValue<X> d2, X *extraParams) {
return d1;
}
};
template <typename X, typename Z>
2019-06-06 14:21:15 +02:00
class FirstIndex {
public:
static _CUDA_HD inline functions::indexreduce::IndexValue<X> op(functions::indexreduce::IndexValue<X> val, X *extraParams) {
return val;
}
static _CUDA_HD functions::indexreduce::IndexValue<X> update(functions::indexreduce::IndexValue<X> &old, functions::indexreduce::IndexValue<X> &opOutput, X *extraParams) {
#ifdef __CUDACC__
if (opOutput.index < 0)
return old;
#endif
auto res = simdOps::MatchCondition<X,X>::op(opOutput.value, extraParams);
//printf("res: %f; oldIdx: %i; newIdx: %i\n", res, old.index, opOutput.index);
if (res == static_cast<X>(0))
return old;
if (old.index < 0)
return opOutput;
if (old.index > opOutput.index)
return opOutput;
return old;
}
static _CUDA_HD inline X startingValue(const X *input) {
return -sd::DataTypeUtils::infOrMax<X>();
2019-06-06 14:21:15 +02:00
}
Legacy API changes (#441) * initial commit Signed-off-by: raver119@gmail.com <raver119@gmail.com> * another initial commit Signed-off-by: raver119@gmail.com <raver119@gmail.com> * another initial commit Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more initial commit Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Refactored buffer() and shapeInfo() methods usage with NDArray class. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt Graph class methods to use const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt choose op to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt where op shape method to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt lstsq op to use constant empty shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt matrix_diag_part op shape routine to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt determinant ops to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt mean_pairwssqerr_loss ops to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt ops shape methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt shape methods for loss ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt log_loss op shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt shape methods for ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt dilation2d ops shape methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted deconv2d ops shape methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted dynamicRNN op shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape methods for ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape methods for lstm layer ops. Signed-off-by: shugeo <sgazeos@gmail.com> * few updates Signed-off-by: raver119@gmail.com <raver119@gmail.com> * first cuda tweak Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Adopt constant shapes for sconv2d ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt constant shapes for gru ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt constant shapes with shape methods for segment ops and so on. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted constant shapes with unsorted_segment_* ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted constant shapes with gamma op shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape methods of reduce_stddev ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape methods for reduce_* ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt shape method for squeeze op. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt strided_slice shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored concat op shape method to adopt constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape method for mirror_pad op. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted split op shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted tile ops shape methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Added const cast for mkldnn routines handles. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored logSoftMaxForVector_ routine to conform with proper data and shape pointer casts. Signed-off-by: shugeo <sgazeos@gmail.com> * Cosmetic changes to proper usage of constant pointers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored a couple shape comparators for strides and addBias helpers to proper use data pointers with inplace option. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored depthToSpace helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored histogram helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored im2col helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored gather and gatherND helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage on percentile helper. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed gather shape with helpers and range buffer usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with space to depth helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage and constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with LUP decomposition> Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored onehot_ helper. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored pad and prefix to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactoed softmax helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed space to batch helpers to use buffers properly. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed stack and split helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with sparse to dense helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with mindistance_ helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with tile helper. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed constant shape usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed constant shape usage with legacy pairwise bool ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored a couple of methods to adopt constant shape usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed broadcasting with constant shape." Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const usage with inplace reverse and constant shapes with legacy reduction. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored legacy ops with const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored sort to adopt constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected sort for constant shape usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed constant shape usage with special methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored Context to conform with constant shape usage. Signed-off-by: shugeo <sgazeos@gmail.com> * CUDA broadcasting headers Signed-off-by: raver119@gmail.com <raver119@gmail.com> * pairwise/indexreduce/random headers Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Refactored native ops to adopt constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * legacy reduce3/scalar headers Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Corrected pullRow signature and tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected routines to proper use of constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored tests to use constant shapes properly. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored legacy ops tests to use constant shapes properly. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored buffer usage with NDArray tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed native ops tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed special concat routine. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with test. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with a test. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored TAD.h and tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored calcStrides* routines to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed miscelaneous errors with constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * NativeOps const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Corrected definitions for declared functions. Signed-off-by: shugeo <sgazeos@gmail.com> * NativeOps const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * few more const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fixed const shapes with shape routines. Signed-off-by: shugeo <sgazeos@gmail.com> * few more const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fixed shape method for broadcastable case. Signed-off-by: shugeo <sgazeos@gmail.com> * few more const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * xw_plus_b BP shape fn restored Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fixed signatures with broadcasting. Signed-off-by: shugeo <sgazeos@gmail.com> * Repaired backprops shape methods for a set of operations. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored broadcast bool for cuda. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored methods for 3 args with const qualifier. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed a couple of kernel signatures for broadcasting. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed kernels signatures for const buffers and shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored pairwise methods to persistent buffers and shapes usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt const to buffers and shapes with kernels. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt const to buffers and shapes with scalar kernels. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored indexreduce kernels signatures to use const buffers and shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored pairwise kernels to adopt cons shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored pairwise bool kernels to adopt cons shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored random special ops to conform with const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored native ops to conform with const shapes and buffers under cuda platform. Signed-off-by: shugeo <sgazeos@gmail.com> * Cosmetical changes only. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shapes and buffers error. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected start pos routine. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored methods to conform with const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored helpers to use proper methods instead. Signed-off-by: shugeo <sgazeos@gmail.com> * bunch of changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next bunch of changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next bunch of changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fixed execScalar declaration. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed execScalar declaration. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected const shape cases with sort and so on. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shapes for sort. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored kernel declarations to adopt const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed kernels declarations to adopt const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected kernel declarations to adopt const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed kernels declarations to adopt const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed segment helpers kernels declarations and so on to adopt const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shape usage with segment and solve helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed kernel declaration with adjustWeight helper. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed cuda implementations for constant shape helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted const shape usage with kernels. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted top_k kernels to use const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected kernels declarations to adopt const shapes with helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored NDArray definitions to adopt const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shapes with image suppression helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Slight improvement with buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored buffer usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored buffer usage with tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shape usage with definitions. Signed-off-by: shugeo <sgazeos@gmail.com> * minor updates on cpu side Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Refactored const shape usage with ConstantDescritor and native ops with cuda platform. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored tear and tile kernels to adopt with const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * softmax_loop fix Signed-off-by: raver119 <raver119@gmail.com> * update missing signature Signed-off-by: raver119@gmail.com <raver119@gmail.com> * softmax again Signed-off-by: raver119@gmail.com <raver119@gmail.com> * few more missing consts Signed-off-by: raver119 <raver119@gmail.com> * new methods updated Signed-off-by: raver119@gmail.com <raver119@gmail.com> Co-authored-by: shugeo <sgazeos@gmail.com>
2020-05-09 07:06:14 +02:00
static _CUDA_HD inline functions::indexreduce::IndexValue<X> startingIndexValue(const X *input) {
2019-06-06 14:21:15 +02:00
functions::indexreduce::IndexValue<X> local;
local.value = startingValue(input);
local.index = -1;
return local;
}
static _CUDA_HD inline functions::indexreduce::IndexValue<X> op(functions::indexreduce::IndexValue<X> d1,
functions::indexreduce::IndexValue<X> d2, X *extraParams) {
return d1;
}
static _CUDA_HD inline functions::indexreduce::IndexValue<X> merge(
functions::indexreduce::IndexValue<X> f1,
functions::indexreduce::IndexValue<X> f2, X *extraParams) {
if (f1.index > f2.index)
return f2;
return f1;
}
static _CUDA_HD inline functions::indexreduce::IndexValue<X> postProcess(
functions::indexreduce::IndexValue<X> reduction, int n, int xOffset,
X *dx, int incx, X *extraParams, X *result) {
return reduction;
}
};
template <typename X, typename Z>
2019-06-06 14:21:15 +02:00
class LastIndex {
public:
static _CUDA_HD inline functions::indexreduce::IndexValue<X> op(functions::indexreduce::IndexValue<X> val, X *extraParams) {
return val;
}
static _CUDA_HD functions::indexreduce::IndexValue<X> update(functions::indexreduce::IndexValue<X> &old, functions::indexreduce::IndexValue<X> &opOutput, X *extraParams) {
#ifdef __CUDACC__
if (opOutput.index < 0)
return old;
#endif
auto res = simdOps::MatchCondition<X,X>::op(opOutput.value, extraParams);
if (res == static_cast<X>(0))
return old;
if (old.index < 0)
return opOutput;
if (old.index < opOutput.index)
return opOutput;
return old;
}
static _CUDA_HD inline X startingValue(const X *input) {
return -sd::DataTypeUtils::infOrMax<X>();
2019-06-06 14:21:15 +02:00
}
Legacy API changes (#441) * initial commit Signed-off-by: raver119@gmail.com <raver119@gmail.com> * another initial commit Signed-off-by: raver119@gmail.com <raver119@gmail.com> * another initial commit Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more initial commit Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Refactored buffer() and shapeInfo() methods usage with NDArray class. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt Graph class methods to use const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt choose op to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt where op shape method to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt lstsq op to use constant empty shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt matrix_diag_part op shape routine to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt determinant ops to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt mean_pairwssqerr_loss ops to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt ops shape methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt shape methods for loss ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt log_loss op shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt shape methods for ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt dilation2d ops shape methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted deconv2d ops shape methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted dynamicRNN op shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape methods for ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape methods for lstm layer ops. Signed-off-by: shugeo <sgazeos@gmail.com> * few updates Signed-off-by: raver119@gmail.com <raver119@gmail.com> * first cuda tweak Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Adopt constant shapes for sconv2d ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt constant shapes for gru ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt constant shapes with shape methods for segment ops and so on. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted constant shapes with unsorted_segment_* ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted constant shapes with gamma op shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape methods of reduce_stddev ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape methods for reduce_* ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt shape method for squeeze op. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt strided_slice shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored concat op shape method to adopt constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape method for mirror_pad op. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted split op shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted tile ops shape methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Added const cast for mkldnn routines handles. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored logSoftMaxForVector_ routine to conform with proper data and shape pointer casts. Signed-off-by: shugeo <sgazeos@gmail.com> * Cosmetic changes to proper usage of constant pointers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored a couple shape comparators for strides and addBias helpers to proper use data pointers with inplace option. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored depthToSpace helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored histogram helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored im2col helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored gather and gatherND helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage on percentile helper. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed gather shape with helpers and range buffer usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with space to depth helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage and constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with LUP decomposition> Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored onehot_ helper. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored pad and prefix to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactoed softmax helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed space to batch helpers to use buffers properly. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed stack and split helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with sparse to dense helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with mindistance_ helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with tile helper. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed constant shape usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed constant shape usage with legacy pairwise bool ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored a couple of methods to adopt constant shape usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed broadcasting with constant shape." Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const usage with inplace reverse and constant shapes with legacy reduction. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored legacy ops with const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored sort to adopt constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected sort for constant shape usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed constant shape usage with special methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored Context to conform with constant shape usage. Signed-off-by: shugeo <sgazeos@gmail.com> * CUDA broadcasting headers Signed-off-by: raver119@gmail.com <raver119@gmail.com> * pairwise/indexreduce/random headers Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Refactored native ops to adopt constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * legacy reduce3/scalar headers Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Corrected pullRow signature and tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected routines to proper use of constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored tests to use constant shapes properly. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored legacy ops tests to use constant shapes properly. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored buffer usage with NDArray tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed native ops tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed special concat routine. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with test. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with a test. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored TAD.h and tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored calcStrides* routines to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed miscelaneous errors with constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * NativeOps const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Corrected definitions for declared functions. Signed-off-by: shugeo <sgazeos@gmail.com> * NativeOps const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * few more const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fixed const shapes with shape routines. Signed-off-by: shugeo <sgazeos@gmail.com> * few more const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fixed shape method for broadcastable case. Signed-off-by: shugeo <sgazeos@gmail.com> * few more const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * xw_plus_b BP shape fn restored Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fixed signatures with broadcasting. Signed-off-by: shugeo <sgazeos@gmail.com> * Repaired backprops shape methods for a set of operations. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored broadcast bool for cuda. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored methods for 3 args with const qualifier. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed a couple of kernel signatures for broadcasting. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed kernels signatures for const buffers and shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored pairwise methods to persistent buffers and shapes usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt const to buffers and shapes with kernels. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt const to buffers and shapes with scalar kernels. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored indexreduce kernels signatures to use const buffers and shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored pairwise kernels to adopt cons shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored pairwise bool kernels to adopt cons shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored random special ops to conform with const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored native ops to conform with const shapes and buffers under cuda platform. Signed-off-by: shugeo <sgazeos@gmail.com> * Cosmetical changes only. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shapes and buffers error. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected start pos routine. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored methods to conform with const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored helpers to use proper methods instead. Signed-off-by: shugeo <sgazeos@gmail.com> * bunch of changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next bunch of changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next bunch of changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fixed execScalar declaration. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed execScalar declaration. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected const shape cases with sort and so on. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shapes for sort. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored kernel declarations to adopt const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed kernels declarations to adopt const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected kernel declarations to adopt const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed kernels declarations to adopt const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed segment helpers kernels declarations and so on to adopt const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shape usage with segment and solve helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed kernel declaration with adjustWeight helper. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed cuda implementations for constant shape helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted const shape usage with kernels. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted top_k kernels to use const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected kernels declarations to adopt const shapes with helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored NDArray definitions to adopt const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shapes with image suppression helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Slight improvement with buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored buffer usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored buffer usage with tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shape usage with definitions. Signed-off-by: shugeo <sgazeos@gmail.com> * minor updates on cpu side Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Refactored const shape usage with ConstantDescritor and native ops with cuda platform. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored tear and tile kernels to adopt with const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * softmax_loop fix Signed-off-by: raver119 <raver119@gmail.com> * update missing signature Signed-off-by: raver119@gmail.com <raver119@gmail.com> * softmax again Signed-off-by: raver119@gmail.com <raver119@gmail.com> * few more missing consts Signed-off-by: raver119 <raver119@gmail.com> * new methods updated Signed-off-by: raver119@gmail.com <raver119@gmail.com> Co-authored-by: shugeo <sgazeos@gmail.com>
2020-05-09 07:06:14 +02:00
static _CUDA_HD inline functions::indexreduce::IndexValue<X> startingIndexValue(const X *input) {
2019-06-06 14:21:15 +02:00
functions::indexreduce::IndexValue<X> local;
local.value = startingValue(input);
local.index = -1;
return local;
}
static _CUDA_HD inline functions::indexreduce::IndexValue<X> op(functions::indexreduce::IndexValue<X> d1,
functions::indexreduce::IndexValue<X> d2, X *extraParams) {
return d1;
}
static _CUDA_HD inline functions::indexreduce::IndexValue<X> merge(
functions::indexreduce::IndexValue<X> f1,
functions::indexreduce::IndexValue<X> f2, X *extraParams) {
if (f1.index < f2.index)
return f2;
return f1;
}
static _CUDA_HD inline functions::indexreduce::IndexValue<X> postProcess(
functions::indexreduce::IndexValue<X> reduction, int n, int xOffset,
X *dx, int incx, X *extraParams, X *result) {
return reduction;
}
};
template <typename X, typename Z>
2019-06-06 14:21:15 +02:00
class IndexMax {
public:
static _CUDA_HD inline functions::indexreduce::IndexValue<X> op(functions::indexreduce::IndexValue<X> val, X *extraParams) {
return val;
}
static _CUDA_HD functions::indexreduce::IndexValue<X> update(functions::indexreduce::IndexValue<X> &old, functions::indexreduce::IndexValue<X> &opOutput, X *extraParams) {
if (opOutput.value > old.value) {
return opOutput;
}
#ifdef __CUDACC__
// workaround for cuda race condition at merge phase
else if (opOutput.value == old.value && opOutput.index < old.index)
return opOutput;
#elif defined(__GNUC__)
#endif
return old;
}
static _CUDA_HD inline functions::indexreduce::IndexValue<X> merge(
functions::indexreduce::IndexValue<X> f1,
functions::indexreduce::IndexValue<X> f2, X *extraParams) {
if (f1.value > f2.value)
return f2;
return f1;
}
static _CUDA_HD inline functions::indexreduce::IndexValue<X> postProcess(
functions::indexreduce::IndexValue<X> reduction, int n, int xOffset,
X *dx, int incx, X *extraParams, X *result) {
return reduction;
}
static _CUDA_HD inline X startingValue(const X *input) {
return -sd::DataTypeUtils::infOrMax<X>();
2019-06-06 14:21:15 +02:00
}
Legacy API changes (#441) * initial commit Signed-off-by: raver119@gmail.com <raver119@gmail.com> * another initial commit Signed-off-by: raver119@gmail.com <raver119@gmail.com> * another initial commit Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more initial commit Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Refactored buffer() and shapeInfo() methods usage with NDArray class. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt Graph class methods to use const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt choose op to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt where op shape method to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt lstsq op to use constant empty shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt matrix_diag_part op shape routine to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt determinant ops to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt mean_pairwssqerr_loss ops to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt ops shape methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt shape methods for loss ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt log_loss op shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt shape methods for ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt dilation2d ops shape methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted deconv2d ops shape methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted dynamicRNN op shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape methods for ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape methods for lstm layer ops. Signed-off-by: shugeo <sgazeos@gmail.com> * few updates Signed-off-by: raver119@gmail.com <raver119@gmail.com> * first cuda tweak Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Adopt constant shapes for sconv2d ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt constant shapes for gru ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt constant shapes with shape methods for segment ops and so on. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted constant shapes with unsorted_segment_* ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted constant shapes with gamma op shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape methods of reduce_stddev ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape methods for reduce_* ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt shape method for squeeze op. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt strided_slice shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored concat op shape method to adopt constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape method for mirror_pad op. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted split op shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted tile ops shape methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Added const cast for mkldnn routines handles. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored logSoftMaxForVector_ routine to conform with proper data and shape pointer casts. Signed-off-by: shugeo <sgazeos@gmail.com> * Cosmetic changes to proper usage of constant pointers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored a couple shape comparators for strides and addBias helpers to proper use data pointers with inplace option. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored depthToSpace helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored histogram helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored im2col helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored gather and gatherND helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage on percentile helper. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed gather shape with helpers and range buffer usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with space to depth helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage and constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with LUP decomposition> Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored onehot_ helper. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored pad and prefix to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactoed softmax helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed space to batch helpers to use buffers properly. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed stack and split helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with sparse to dense helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with mindistance_ helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with tile helper. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed constant shape usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed constant shape usage with legacy pairwise bool ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored a couple of methods to adopt constant shape usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed broadcasting with constant shape." Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const usage with inplace reverse and constant shapes with legacy reduction. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored legacy ops with const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored sort to adopt constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected sort for constant shape usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed constant shape usage with special methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored Context to conform with constant shape usage. Signed-off-by: shugeo <sgazeos@gmail.com> * CUDA broadcasting headers Signed-off-by: raver119@gmail.com <raver119@gmail.com> * pairwise/indexreduce/random headers Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Refactored native ops to adopt constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * legacy reduce3/scalar headers Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Corrected pullRow signature and tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected routines to proper use of constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored tests to use constant shapes properly. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored legacy ops tests to use constant shapes properly. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored buffer usage with NDArray tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed native ops tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed special concat routine. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with test. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with a test. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored TAD.h and tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored calcStrides* routines to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed miscelaneous errors with constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * NativeOps const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Corrected definitions for declared functions. Signed-off-by: shugeo <sgazeos@gmail.com> * NativeOps const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * few more const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fixed const shapes with shape routines. Signed-off-by: shugeo <sgazeos@gmail.com> * few more const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fixed shape method for broadcastable case. Signed-off-by: shugeo <sgazeos@gmail.com> * few more const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * xw_plus_b BP shape fn restored Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fixed signatures with broadcasting. Signed-off-by: shugeo <sgazeos@gmail.com> * Repaired backprops shape methods for a set of operations. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored broadcast bool for cuda. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored methods for 3 args with const qualifier. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed a couple of kernel signatures for broadcasting. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed kernels signatures for const buffers and shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored pairwise methods to persistent buffers and shapes usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt const to buffers and shapes with kernels. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt const to buffers and shapes with scalar kernels. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored indexreduce kernels signatures to use const buffers and shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored pairwise kernels to adopt cons shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored pairwise bool kernels to adopt cons shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored random special ops to conform with const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored native ops to conform with const shapes and buffers under cuda platform. Signed-off-by: shugeo <sgazeos@gmail.com> * Cosmetical changes only. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shapes and buffers error. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected start pos routine. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored methods to conform with const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored helpers to use proper methods instead. Signed-off-by: shugeo <sgazeos@gmail.com> * bunch of changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next bunch of changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next bunch of changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fixed execScalar declaration. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed execScalar declaration. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected const shape cases with sort and so on. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shapes for sort. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored kernel declarations to adopt const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed kernels declarations to adopt const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected kernel declarations to adopt const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed kernels declarations to adopt const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed segment helpers kernels declarations and so on to adopt const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shape usage with segment and solve helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed kernel declaration with adjustWeight helper. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed cuda implementations for constant shape helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted const shape usage with kernels. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted top_k kernels to use const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected kernels declarations to adopt const shapes with helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored NDArray definitions to adopt const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shapes with image suppression helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Slight improvement with buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored buffer usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored buffer usage with tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shape usage with definitions. Signed-off-by: shugeo <sgazeos@gmail.com> * minor updates on cpu side Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Refactored const shape usage with ConstantDescritor and native ops with cuda platform. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored tear and tile kernels to adopt with const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * softmax_loop fix Signed-off-by: raver119 <raver119@gmail.com> * update missing signature Signed-off-by: raver119@gmail.com <raver119@gmail.com> * softmax again Signed-off-by: raver119@gmail.com <raver119@gmail.com> * few more missing consts Signed-off-by: raver119 <raver119@gmail.com> * new methods updated Signed-off-by: raver119@gmail.com <raver119@gmail.com> Co-authored-by: shugeo <sgazeos@gmail.com>
2020-05-09 07:06:14 +02:00
static _CUDA_HD inline functions::indexreduce::IndexValue<X> startingIndexValue(const X *input) {
2019-06-06 14:21:15 +02:00
functions::indexreduce::IndexValue<X> local;
local.value = startingValue(input);
local.index = 0;
return local;
}
static _CUDA_HD inline functions::indexreduce::IndexValue<X> op(functions::indexreduce::IndexValue<X> d1,
functions::indexreduce::IndexValue<X> d2, X *extraParams) {
return d1;
}
};
template <typename X, typename Z>
2019-06-06 14:21:15 +02:00
class IndexAbsoluteMin {
public:
static _CUDA_HD inline functions::indexreduce::IndexValue<X> op(
functions::indexreduce::IndexValue<X> val, X *extraParams) {
return val;
}
static _CUDA_HD inline X startingValue(const X *input) {
return sd::DataTypeUtils::infOrMax<X>();
2019-06-06 14:21:15 +02:00
}
Legacy API changes (#441) * initial commit Signed-off-by: raver119@gmail.com <raver119@gmail.com> * another initial commit Signed-off-by: raver119@gmail.com <raver119@gmail.com> * another initial commit Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more initial commit Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Refactored buffer() and shapeInfo() methods usage with NDArray class. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt Graph class methods to use const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt choose op to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt where op shape method to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt lstsq op to use constant empty shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt matrix_diag_part op shape routine to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt determinant ops to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt mean_pairwssqerr_loss ops to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt ops shape methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt shape methods for loss ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt log_loss op shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt shape methods for ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt dilation2d ops shape methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted deconv2d ops shape methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted dynamicRNN op shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape methods for ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape methods for lstm layer ops. Signed-off-by: shugeo <sgazeos@gmail.com> * few updates Signed-off-by: raver119@gmail.com <raver119@gmail.com> * first cuda tweak Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Adopt constant shapes for sconv2d ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt constant shapes for gru ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt constant shapes with shape methods for segment ops and so on. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted constant shapes with unsorted_segment_* ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted constant shapes with gamma op shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape methods of reduce_stddev ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape methods for reduce_* ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt shape method for squeeze op. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt strided_slice shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored concat op shape method to adopt constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape method for mirror_pad op. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted split op shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted tile ops shape methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Added const cast for mkldnn routines handles. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored logSoftMaxForVector_ routine to conform with proper data and shape pointer casts. Signed-off-by: shugeo <sgazeos@gmail.com> * Cosmetic changes to proper usage of constant pointers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored a couple shape comparators for strides and addBias helpers to proper use data pointers with inplace option. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored depthToSpace helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored histogram helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored im2col helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored gather and gatherND helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage on percentile helper. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed gather shape with helpers and range buffer usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with space to depth helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage and constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with LUP decomposition> Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored onehot_ helper. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored pad and prefix to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactoed softmax helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed space to batch helpers to use buffers properly. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed stack and split helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with sparse to dense helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with mindistance_ helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with tile helper. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed constant shape usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed constant shape usage with legacy pairwise bool ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored a couple of methods to adopt constant shape usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed broadcasting with constant shape." Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const usage with inplace reverse and constant shapes with legacy reduction. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored legacy ops with const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored sort to adopt constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected sort for constant shape usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed constant shape usage with special methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored Context to conform with constant shape usage. Signed-off-by: shugeo <sgazeos@gmail.com> * CUDA broadcasting headers Signed-off-by: raver119@gmail.com <raver119@gmail.com> * pairwise/indexreduce/random headers Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Refactored native ops to adopt constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * legacy reduce3/scalar headers Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Corrected pullRow signature and tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected routines to proper use of constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored tests to use constant shapes properly. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored legacy ops tests to use constant shapes properly. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored buffer usage with NDArray tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed native ops tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed special concat routine. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with test. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with a test. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored TAD.h and tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored calcStrides* routines to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed miscelaneous errors with constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * NativeOps const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Corrected definitions for declared functions. Signed-off-by: shugeo <sgazeos@gmail.com> * NativeOps const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * few more const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fixed const shapes with shape routines. Signed-off-by: shugeo <sgazeos@gmail.com> * few more const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fixed shape method for broadcastable case. Signed-off-by: shugeo <sgazeos@gmail.com> * few more const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * xw_plus_b BP shape fn restored Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fixed signatures with broadcasting. Signed-off-by: shugeo <sgazeos@gmail.com> * Repaired backprops shape methods for a set of operations. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored broadcast bool for cuda. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored methods for 3 args with const qualifier. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed a couple of kernel signatures for broadcasting. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed kernels signatures for const buffers and shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored pairwise methods to persistent buffers and shapes usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt const to buffers and shapes with kernels. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt const to buffers and shapes with scalar kernels. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored indexreduce kernels signatures to use const buffers and shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored pairwise kernels to adopt cons shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored pairwise bool kernels to adopt cons shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored random special ops to conform with const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored native ops to conform with const shapes and buffers under cuda platform. Signed-off-by: shugeo <sgazeos@gmail.com> * Cosmetical changes only. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shapes and buffers error. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected start pos routine. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored methods to conform with const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored helpers to use proper methods instead. Signed-off-by: shugeo <sgazeos@gmail.com> * bunch of changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next bunch of changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next bunch of changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fixed execScalar declaration. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed execScalar declaration. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected const shape cases with sort and so on. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shapes for sort. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored kernel declarations to adopt const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed kernels declarations to adopt const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected kernel declarations to adopt const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed kernels declarations to adopt const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed segment helpers kernels declarations and so on to adopt const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shape usage with segment and solve helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed kernel declaration with adjustWeight helper. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed cuda implementations for constant shape helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted const shape usage with kernels. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted top_k kernels to use const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected kernels declarations to adopt const shapes with helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored NDArray definitions to adopt const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shapes with image suppression helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Slight improvement with buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored buffer usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored buffer usage with tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shape usage with definitions. Signed-off-by: shugeo <sgazeos@gmail.com> * minor updates on cpu side Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Refactored const shape usage with ConstantDescritor and native ops with cuda platform. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored tear and tile kernels to adopt with const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * softmax_loop fix Signed-off-by: raver119 <raver119@gmail.com> * update missing signature Signed-off-by: raver119@gmail.com <raver119@gmail.com> * softmax again Signed-off-by: raver119@gmail.com <raver119@gmail.com> * few more missing consts Signed-off-by: raver119 <raver119@gmail.com> * new methods updated Signed-off-by: raver119@gmail.com <raver119@gmail.com> Co-authored-by: shugeo <sgazeos@gmail.com>
2020-05-09 07:06:14 +02:00
static _CUDA_HD inline functions::indexreduce::IndexValue<X> startingIndexValue(const X *input) {
2019-06-06 14:21:15 +02:00
functions::indexreduce::IndexValue<X> local;
local.value = startingValue(input);
local.index = 0;
return local;
}
static _CUDA_HD inline functions::indexreduce::IndexValue<X> update(functions::indexreduce::IndexValue<X> &old, functions::indexreduce::IndexValue<X> &opOutput, X *extraParams) {
opOutput.value = sd::math::nd4j_abs<X>(opOutput.value);
old.value = sd::math::nd4j_abs<X>(old.value);
2019-06-06 14:21:15 +02:00
if (opOutput.value < old.value)
return opOutput;
#ifdef __CUDACC__
// workaround for cuda race condition at merge phase
else if (opOutput.value == old.value && opOutput.index < old.index)
return opOutput;
#elif defined(__GNUC__)
#endif
return old;
}
static _CUDA_HD inline functions::indexreduce::IndexValue<X> merge(
functions::indexreduce::IndexValue<X> f1,
functions::indexreduce::IndexValue<X> f2, X *extraParams) {
if (sd::math::nd4j_abs<X>(f1.value) < sd::math::nd4j_abs<X>(f2.value))
2019-06-06 14:21:15 +02:00
return f2;
return f1;
}
static _CUDA_HD inline functions::indexreduce::IndexValue<X> postProcess(
functions::indexreduce::IndexValue<X> reduction, int n, int xOffset,
X *dx, int incx, X *extraParams, X *result) {
return reduction;
}
static _CUDA_HD inline functions::indexreduce::IndexValue<X> op(functions::indexreduce::IndexValue<X> d1,
functions::indexreduce::IndexValue<X> d2, X *extraParams) {
return d1;
}
};
template <typename X, typename Z>
2019-06-06 14:21:15 +02:00
class IndexMin {
public:
static _CUDA_HD inline functions::indexreduce::IndexValue<X> op(
functions::indexreduce::IndexValue<X> val, X *extraParams) {
return val;
}
static _CUDA_HD inline X startingValue(const X *input) {
return sd::DataTypeUtils::infOrMax<X>();
2019-06-06 14:21:15 +02:00
}
Legacy API changes (#441) * initial commit Signed-off-by: raver119@gmail.com <raver119@gmail.com> * another initial commit Signed-off-by: raver119@gmail.com <raver119@gmail.com> * another initial commit Signed-off-by: raver119@gmail.com <raver119@gmail.com> * one more initial commit Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next step Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Refactored buffer() and shapeInfo() methods usage with NDArray class. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt Graph class methods to use const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt choose op to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt where op shape method to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt lstsq op to use constant empty shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt matrix_diag_part op shape routine to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt determinant ops to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt mean_pairwssqerr_loss ops to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt ops shape methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt shape methods for loss ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt log_loss op shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt shape methods for ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt dilation2d ops shape methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted deconv2d ops shape methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted dynamicRNN op shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape methods for ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape methods for lstm layer ops. Signed-off-by: shugeo <sgazeos@gmail.com> * few updates Signed-off-by: raver119@gmail.com <raver119@gmail.com> * first cuda tweak Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Adopt constant shapes for sconv2d ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt constant shapes for gru ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt constant shapes with shape methods for segment ops and so on. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted constant shapes with unsorted_segment_* ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted constant shapes with gamma op shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape methods of reduce_stddev ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape methods for reduce_* ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt shape method for squeeze op. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt strided_slice shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored concat op shape method to adopt constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted shape method for mirror_pad op. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted split op shape method. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted tile ops shape methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Added const cast for mkldnn routines handles. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored logSoftMaxForVector_ routine to conform with proper data and shape pointer casts. Signed-off-by: shugeo <sgazeos@gmail.com> * Cosmetic changes to proper usage of constant pointers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored a couple shape comparators for strides and addBias helpers to proper use data pointers with inplace option. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored depthToSpace helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored histogram helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored im2col helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored gather and gatherND helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage on percentile helper. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed gather shape with helpers and range buffer usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with space to depth helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage and constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with LUP decomposition> Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored onehot_ helper. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored pad and prefix to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactoed softmax helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed space to batch helpers to use buffers properly. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed stack and split helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with sparse to dense helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with mindistance_ helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with tile helper. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed constant shape usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed constant shape usage with legacy pairwise bool ops. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored a couple of methods to adopt constant shape usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed broadcasting with constant shape." Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const usage with inplace reverse and constant shapes with legacy reduction. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored legacy ops with const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored sort to adopt constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected sort for constant shape usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed constant shape usage with special methods. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored Context to conform with constant shape usage. Signed-off-by: shugeo <sgazeos@gmail.com> * CUDA broadcasting headers Signed-off-by: raver119@gmail.com <raver119@gmail.com> * pairwise/indexreduce/random headers Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Refactored native ops to adopt constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * legacy reduce3/scalar headers Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Corrected pullRow signature and tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected routines to proper use of constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored tests to use constant shapes properly. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored legacy ops tests to use constant shapes properly. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored buffer usage with NDArray tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed native ops tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed special concat routine. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with test. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed buffer usage with a test. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored TAD.h and tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored calcStrides* routines to use constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed miscelaneous errors with constant shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * NativeOps const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Corrected definitions for declared functions. Signed-off-by: shugeo <sgazeos@gmail.com> * NativeOps const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * few more const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fixed const shapes with shape routines. Signed-off-by: shugeo <sgazeos@gmail.com> * few more const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fixed shape method for broadcastable case. Signed-off-by: shugeo <sgazeos@gmail.com> * few more const changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * xw_plus_b BP shape fn restored Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fixed signatures with broadcasting. Signed-off-by: shugeo <sgazeos@gmail.com> * Repaired backprops shape methods for a set of operations. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored broadcast bool for cuda. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored methods for 3 args with const qualifier. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed a couple of kernel signatures for broadcasting. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed kernels signatures for const buffers and shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored pairwise methods to persistent buffers and shapes usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt const to buffers and shapes with kernels. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopt const to buffers and shapes with scalar kernels. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored indexreduce kernels signatures to use const buffers and shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored pairwise kernels to adopt cons shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored pairwise bool kernels to adopt cons shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored random special ops to conform with const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored native ops to conform with const shapes and buffers under cuda platform. Signed-off-by: shugeo <sgazeos@gmail.com> * Cosmetical changes only. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shapes and buffers error. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected start pos routine. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored methods to conform with const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored helpers to use proper methods instead. Signed-off-by: shugeo <sgazeos@gmail.com> * bunch of changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next bunch of changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * next bunch of changes Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Fixed execScalar declaration. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed execScalar declaration. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected const shape cases with sort and so on. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shapes for sort. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored kernel declarations to adopt const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed kernels declarations to adopt const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected kernel declarations to adopt const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed kernels declarations to adopt const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed segment helpers kernels declarations and so on to adopt const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shape usage with segment and solve helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed kernel declaration with adjustWeight helper. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed cuda implementations for constant shape helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted const shape usage with kernels. Signed-off-by: shugeo <sgazeos@gmail.com> * Adopted top_k kernels to use const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Corrected kernels declarations to adopt const shapes with helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored NDArray definitions to adopt const shapes and buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shapes with image suppression helpers. Signed-off-by: shugeo <sgazeos@gmail.com> * Slight improvement with buffers. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored buffer usage. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored buffer usage with tests. Signed-off-by: shugeo <sgazeos@gmail.com> * Fixed const shape usage with definitions. Signed-off-by: shugeo <sgazeos@gmail.com> * minor updates on cpu side Signed-off-by: raver119@gmail.com <raver119@gmail.com> * Refactored const shape usage with ConstantDescritor and native ops with cuda platform. Signed-off-by: shugeo <sgazeos@gmail.com> * Refactored tear and tile kernels to adopt with const shapes. Signed-off-by: shugeo <sgazeos@gmail.com> * softmax_loop fix Signed-off-by: raver119 <raver119@gmail.com> * update missing signature Signed-off-by: raver119@gmail.com <raver119@gmail.com> * softmax again Signed-off-by: raver119@gmail.com <raver119@gmail.com> * few more missing consts Signed-off-by: raver119 <raver119@gmail.com> * new methods updated Signed-off-by: raver119@gmail.com <raver119@gmail.com> Co-authored-by: shugeo <sgazeos@gmail.com>
2020-05-09 07:06:14 +02:00
static _CUDA_HD inline functions::indexreduce::IndexValue<X> startingIndexValue(const X *input) {
2019-06-06 14:21:15 +02:00
functions::indexreduce::IndexValue<X> local;
local.value = startingValue(input);
local.index = 0;
return local;
}
static _CUDA_HD inline functions::indexreduce::IndexValue<X> update(functions::indexreduce::IndexValue<X> &old, functions::indexreduce::IndexValue<X> &opOutput, X *extraParams) {
if (opOutput.value < old.value)
return opOutput;
#ifdef __CUDACC__
// workaround for cuda race condition at merge phase
else if (opOutput.value == old.value && opOutput.index < old.index)
return opOutput;
#elif defined(__GNUC__)
#endif
return old;
}
static _CUDA_HD inline functions::indexreduce::IndexValue<X> merge(
functions::indexreduce::IndexValue<X> f1,
functions::indexreduce::IndexValue<X> f2, X *extraParams) {
if (f1.value < f2.value)
return f2;
return f1;
}
static _CUDA_HD inline functions::indexreduce::IndexValue<X> postProcess(
functions::indexreduce::IndexValue<X> reduction, int n, int xOffset,
X *dx, int incx, X *extraParams, X *result) {
return reduction;
}
static _CUDA_HD inline functions::indexreduce::IndexValue<X> op(functions::indexreduce::IndexValue<X> d1,
functions::indexreduce::IndexValue<X> d2, X *extraParams) {
return d1;
}
};
template <typename X, typename Z>
class SummaryStatsVariance {
public:
static _CUDA_HD inline Z getValue(const bool biasCorrected, functions::summarystats::SummaryStatsData<X> val) {
if (biasCorrected) {
Z ret = static_cast<Z>(val.varianceBiasCorrected());
if (ret < static_cast<Z>(0.0f))
return static_cast<Z>(val.variance());
return ret;
}
return static_cast<Z>(val.variance());
}
static _CUDA_HD inline functions::summarystats::SummaryStatsData<X> op(functions::summarystats::SummaryStatsData<X> d1, Z *extraParams) {
return d1;
}
};
template <typename X, typename Z>
class SummaryStatsStandardDeviation {
public:
static _CUDA_HD inline Z getValue(const bool biasCorrected, functions::summarystats::SummaryStatsData<X> val) {
if (biasCorrected) {
auto ret = static_cast<Z>(val.varianceBiasCorrected());
if (ret < static_cast<Z>(0.0f))
return sd::math::nd4j_sqrt<double, Z>(val.variance());
2019-06-06 14:21:15 +02:00
else
return sd::math::nd4j_sqrt<double, Z>(ret);
2019-06-06 14:21:15 +02:00
}
return sd::math::nd4j_sqrt<double, Z>(val.variance());
2019-06-06 14:21:15 +02:00
}
static _CUDA_HD inline functions::summarystats::SummaryStatsData<X> op(functions::summarystats::SummaryStatsData<X> d1, Z *extraParams) {
return d1;
}
};
template <typename X>
class DropOut {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
inline _CUDA_D static X op(X d1, X *params) {
X prob = params[0];
#ifdef __CUDACC__
X length = params[1];
X tid = blockIdx.x * blockDim.x + threadIdx.x;
X rnd = sd::math::nd4j_abs<X>(sd::math::nd4j_cos<X>(static_cast<X>(clock64()) * static_cast<X>(tid) + static_cast<X>(length) * static_cast<X>(tid)));
2019-06-06 14:21:15 +02:00
#else
X rnd = static_cast<X>(rand() / RAND_MAX);
#endif
return rnd >= prob ? static_cast<X>(0.0f) : d1;
}
};
template <typename X, typename Y, typename Z>
class DropOutInverted {
public:
no_op_exec_special
no_op_exec_special_cuda
#ifdef __CUDACC__
__device__
#endif
inline static Z op(X d1, Y d2, Z *params) {
Y prob = d2;
#ifdef __CUDACC__
X length = params[1];
X tid = blockIdx.x * blockDim.x + threadIdx.x;
X rnd = sd::math::nd4j_abs<X>(sd::math::nd4j_cos<X>(static_cast<X>(clock64()) * static_cast<X>(tid) + static_cast<X>(length) * static_cast<X>(tid)));
2019-06-06 14:21:15 +02:00
#else
X rnd = static_cast<X>(rand() / RAND_MAX);
#endif
return rnd >= static_cast<X>(prob) ? static_cast<Z>(0.0f) : reinterpret_cast<Z>(d1 / static_cast<X>(prob));
}
};
template <typename X, typename Y, typename Z>
class ReplaceNans {
public:
no_op_exec_special
no_op_exec_special_cuda
op_def static Z op(X d1, Y d2, Z *params) {
return sd::math::nd4j_isnan(d1) ? static_cast<Z>(d2) : static_cast<Z>(d1) ;
2019-06-06 14:21:15 +02:00
}
};
// this op is used for conditional pairwise transforms only
template <typename X, typename Y, typename Z>
class CompareAndReplace{
public:
// op definition for PairWise Transform
op_def static Z op(X d1, Y d2, Z *params) {
auto zd1 = static_cast<Z>(d1);
auto zd2 = static_cast<Z>(d2);
auto compare = params[0];
auto eps = params[2];
int mode = (int) params[3];
if (mode == 0) // equals
if (sd::math::nd4j_abs<Z>(zd1 - compare) <= eps)
2019-06-06 14:21:15 +02:00
return zd2;
else
return zd1;
else if (mode == 1) // not equals eps
if (sd::math::nd4j_abs<Z>(zd1 - compare) > eps)
2019-06-06 14:21:15 +02:00
return zd2;
else
return zd1;
else if (mode == 2) // less_than eps
if (zd1 < compare)
return zd2;
else
return zd1;
else if (mode ==3) // greater_than
if (zd1 > compare)
return zd2;
else
return zd1;
else if (mode == 4) // less_or_equals_than
if (zd1 <= compare)
return zd2;
else
return zd1;
else if (mode == 5) // greater_or_equals_than
if (zd1 >= compare)
return zd2;
else
return zd1;
else if (mode == 6) // abs_less_than
if (sd::math::nd4j_abs<Z>(zd1) < compare)
2019-06-06 14:21:15 +02:00
return zd2;
else
return zd1;
else if (mode == 7) // abs_greater_than
if (sd::math::nd4j_abs<Z>(zd1) > compare)
2019-06-06 14:21:15 +02:00
return zd2;
else
return zd1;
else if (mode == 8) // is inf
if (sd::math::nd4j_isinf(zd1))
2019-06-06 14:21:15 +02:00
return zd2;
else
return zd1;
else if (mode == 9) // is nan
if (sd::math::nd4j_isnan(zd1))
2019-06-06 14:21:15 +02:00
return zd2;
else
return zd1;
else if (mode == 10)
if (zd1 == compare)
return zd2;
else
return zd1;
else if (mode == 11)
if (zd1 != compare)
return zd2;
else
return zd1;
else if (mode == 12) // abs_greater_or_equals_than
if (sd::math::nd4j_abs<Z>(zd1) >= compare)
2019-06-06 14:21:15 +02:00
return zd2;
else
return zd1;
else if (mode == 13) // abs_less_or_equals_than
if (sd::math::nd4j_abs<Z>(zd1) <= compare)
2019-06-06 14:21:15 +02:00
return zd2;
else
return zd1;
else
printf("Undefined boolean operation: [%i]\n", mode);
return zd1;
}
};
template <typename X, typename Y, typename Z>
class CompareAndSet {
public:
// op definition for PairWise Transform
op_def static Z op(X dX, Y dY, Z *params) {
auto d1 = static_cast<Z>(dX);
auto d2 = static_cast<Z>(dY);
auto compare = params[0];
auto eps = params[2];
auto mode = static_cast<int>(params[3]);
if (mode == 0) // equals
if (sd::math::nd4j_abs<Z>(d2 - compare) <= eps)
2019-06-06 14:21:15 +02:00
return d2;
else
return d1;
else if (mode == 1) // not equals
if (sd::math::nd4j_abs<Z>(d2 - compare) > eps)
2019-06-06 14:21:15 +02:00
return d2;
else
return d1;
else if (mode == 2) // less_than
if (d2 < compare)
return d2;
else
return d1;
else if (mode ==3) // greater_than
if (d2 > compare)
return d2;
else
return d1;
else if (mode == 4) // less_or_equals_than
if (d2 <= compare)
return d2;
else
return d1;
else if (mode == 5) // greater_or_equals_than
if (d2 >= compare)
return d2;
else
return d1;
else if (mode == 6) // abs_less_than
if (sd::math::nd4j_abs<Z>(d2) < compare)
2019-06-06 14:21:15 +02:00
return d2;
else
return d1;
else if (mode == 7) // abs_greater_than
if (sd::math::nd4j_abs<Z>(d2) > compare)
2019-06-06 14:21:15 +02:00
return d2;
else
return d1;
else if (mode == 8) // is inf
if (sd::math::nd4j_isinf(d2))
2019-06-06 14:21:15 +02:00
return d2;
else
return d1;
else if (mode == 9) // is nan
if (sd::math::nd4j_isnan(d2))
2019-06-06 14:21:15 +02:00
return d2;
else
return d1;
else if (mode == 10)
if (d2 == compare)
return d2;
else
return d1;
else if (mode == 11)
if (d2 != compare)
return d2;
else
return d1;
else if (mode == 12) // abs_greater_or_equals_than
if (sd::math::nd4j_abs<Z>(d1) >= compare)
2019-06-06 14:21:15 +02:00
return d2;
else
return d1;
else if (mode == 13) // abs_less_or_equals_than
if (sd::math::nd4j_abs<Z>(d1) <= compare)
2019-06-06 14:21:15 +02:00
return d2;
else
return d1;
else
printf("Undefined boolean operation: [%i]\n", mode);
return d1;
}
};
template <typename X>
class CompareAndSetTransform {
public:
no_op_exec_special_same
no_op_exec_special_same_cuda
// op definition for Transform
op_def static X op(X d1, X *params) {
auto compare = params[0];
auto set = params[1];
auto eps = params[2];
// with mode == 0 we do set if d1 equals to compare, and with mode == 1 - we go otherwise
int mode = (int) params[3];
if (mode == 0) // equals
if (sd::math::nd4j_abs<X>(d1 - compare) <= eps)
2019-06-06 14:21:15 +02:00
return set;
else
return d1;
//return sd::math::nd4j_abs<T>(d1 - compare) <= eps ? set : d1;
2019-06-06 14:21:15 +02:00
else if (mode == 1) // not equals
if (sd::math::nd4j_abs<X>(d1 - compare) > eps)
2019-06-06 14:21:15 +02:00
return set;
else
return d1;
//return sd::math::nd4j_abs<T>(d1 - compare) > eps ? set : d1;
2019-06-06 14:21:15 +02:00
else if (mode == 2) // less_than
if (d1 < compare)
return set;
else
return d1;
else if (mode ==3) // greater_than
if (d1 > compare)
return set;
else
return d1;
else if (mode == 4) // less_or_equals_than
if (d1 <= compare)
return set;
else
return d1;
else if (mode == 5) // greater_or_equals_than
if (d1 >= compare)
return set;
else
return d1;
else if (mode == 6) // abs_less_than
if (sd::math::nd4j_abs<X>(d1) < compare)
2019-06-06 14:21:15 +02:00
return set;
else
return d1;
else if (mode == 7) // abs_greater_than
if (sd::math::nd4j_abs<X>(d1) > compare)
2019-06-06 14:21:15 +02:00
return set;
else
return d1;
else if (mode == 8) // is inf
if (sd::math::nd4j_isinf(d1))
2019-06-06 14:21:15 +02:00
return set;
else
return d1;
else if (mode == 9) // is nan
if (sd::math::nd4j_isnan(d1))
2019-06-06 14:21:15 +02:00
return set;
else
return d1;
else if (mode == 10)
if (d1 == compare)
return set;
else
return d1;
else if (mode == 11)
if (d1 != compare)
return set;
else
return d1;
else if (mode == 12) // abs_greater_or_equals_than
if (sd::math::nd4j_abs<X>(d1) >= compare)
2019-06-06 14:21:15 +02:00
return set;
else
return d1;
else if (mode == 13) // abs_less_or_equals_than
if (sd::math::nd4j_abs<X>(d1) <= compare)
2019-06-06 14:21:15 +02:00
return set;
else
return d1;
else
printf("Undefined boolean operation: [%i]\n", mode);
return d1;
}
};
}
#endif