2019-06-06 14:21:15 +02:00
|
|
|
/*******************************************************************************
|
|
|
|
* Copyright (c) 2015-2018 Skymind, Inc.
|
|
|
|
*
|
|
|
|
* This program and the accompanying materials are made available under the
|
|
|
|
* terms of the Apache License, Version 2.0 which is available at
|
|
|
|
* https://www.apache.org/licenses/LICENSE-2.0.
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
|
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
|
|
* License for the specific language governing permissions and limitations
|
|
|
|
* under the License.
|
|
|
|
*
|
|
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
|
|
******************************************************************************/
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
#ifndef OPS_H_
|
|
|
|
#define OPS_H_
|
|
|
|
|
|
|
|
#include <op_boilerplate.h>
|
|
|
|
#include <array/DataTypeUtils.h>
|
|
|
|
#include <helpers/shape.h>
|
|
|
|
#include <vector>
|
|
|
|
#include <Environment.h>
|
|
|
|
#include <loops/summarystatsreduce.h>
|
|
|
|
#include <loops/ReduceType.h>
|
|
|
|
|
|
|
|
#define MIN_V 1e-12
|
|
|
|
#define MAX_FLOAT 1e37
|
|
|
|
#define MIN_FLOAT 1e-37
|
|
|
|
#define MAX_INT 2147483647
|
|
|
|
#define MIN_CUTFOFF -3.79297773665f
|
|
|
|
#define FLOAT_MIN_NORMAL 1.17549435e-38
|
|
|
|
#define EPS 1e-5
|
|
|
|
#define AFFINITY close
|
|
|
|
#define DOUBLE_PI_T T(2.0 * 3.14159265358979323846)
|
|
|
|
#define DOUBLE_PI_X X(2.0 * 3.14159265358979323846)
|
|
|
|
|
|
|
|
#define no_op_exec_special_any static const bool requiresSpecial = false; static void execSpecial(X *dx, Nd4jLong *xShapeBuffer, Z *result, Nd4jLong *resultShapeBuffer, X *extraParams, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) {}
|
|
|
|
#define no_op_exec_special_bool static const bool requiresSpecial = false; static void execSpecial(X *dx, Nd4jLong *xShapeBuffer, Z *result, Nd4jLong *resultShapeBuffer, X *extraParams, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) {}
|
|
|
|
#define no_op_exec_special_same static const bool requiresSpecial = false; static void execSpecial(X *dx, Nd4jLong *xShapeBuffer, X *result, Nd4jLong *resultShapeBuffer, X *extraParams, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) {}
|
|
|
|
#define no_op_exec_special static const bool requiresSpecial = false; static void execSpecial(X *dx, Nd4jLong *xShapeBuffer, Z *result, Nd4jLong *resultShapeBuffer, Z *extraParams, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) {}
|
|
|
|
#define no_op_exec_special_accumulation static const bool requiresSpecialAccumulation = false; static void execSpecial(X *x, Nd4jLong *xShapeInfo, Z *extraParams, Z *result, Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset){}
|
|
|
|
#define no_op_exec_special_accumulation_long static const bool requiresSpecialAccumulation = false; static void execSpecial(X *x, Nd4jLong *xShapeInfo, X *extraParams, Z *result, Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset){}
|
|
|
|
#define no_op_exec_special_accumulation_same static const bool requiresSpecialAccumulation = false; static void execSpecial(X *x, Nd4jLong *xShapeInfo, X *extraParams, X *result, Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset){}
|
|
|
|
#ifdef __CUDACC__
|
|
|
|
#define no_op_exec_special_any_cuda static __device__ void execSpecialCuda(X *dx, Nd4jLong *xShapeBuffer, Z *result, Nd4jLong *resultShapeBuffer, X *extraParams, int *allocationPointer, Z *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) {}
|
|
|
|
#define no_op_exec_special_bool_cuda static __device__ void execSpecialCuda(X *dx, Nd4jLong *xShapeBuffer, Z *result, Nd4jLong *resultShapeBuffer, X *extraParams, int *allocationPointer, Z *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) {}
|
|
|
|
#define no_op_exec_special_same_cuda static __device__ void execSpecialCuda(X *dx, Nd4jLong *xShapeBuffer, X *result, Nd4jLong *resultShapeBuffer, X *extraParams, int *allocationPointer, X *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) {}
|
|
|
|
#define no_op_exec_special_cuda static __device__ void execSpecialCuda(X *dx, Nd4jLong *xShapeBuffer,Z *result, Nd4jLong *resultShapeBuffer,Z *extraParams, int *allocationPointer, Z *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) {}
|
|
|
|
#define no_op_exec_special_accumulation_same_cuda static inline __device__ void execSpecialCuda(X *dx, Nd4jLong *xShapeInfo, X *extraParams, X *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, X *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) {}
|
|
|
|
#define no_op_exec_special_accumulation_long_cuda static inline __device__ void execSpecialCuda(X *dx, Nd4jLong *xShapeInfo, X *extraParams, Z *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, Z *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) {}
|
|
|
|
#define no_op_exec_special_accumulation_cuda static inline __device__ void execSpecialCuda(X *dx, Nd4jLong *xShapeInfo, Z *extraParams, Z *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, Z *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) {}
|
|
|
|
|
|
|
|
#else
|
|
|
|
// hacky fix for isnan/being being out of scope
|
|
|
|
//#ifdef IOS
|
|
|
|
//#define isinf(x) 0 // this isn't right. But std::isinf fails
|
|
|
|
//#define isnan(x) 0
|
|
|
|
//#else
|
|
|
|
//#define isnan std::isnan
|
|
|
|
//#define isinf std::isinf
|
|
|
|
//#endif
|
|
|
|
|
|
|
|
#define no_op_exec_special_cuda
|
|
|
|
#define no_op_exec_special_accumulation_cuda
|
|
|
|
#define no_op_exec_special_accumulation_same_cuda
|
|
|
|
#define no_op_exec_special_accumulation_long_cuda
|
|
|
|
#define no_op_exec_special_any_cuda
|
|
|
|
#define no_op_exec_special_bool_cuda
|
|
|
|
#define no_op_exec_special_same_cuda
|
|
|
|
#define no_op_exec_special_accumulation_same_cuda
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
#define SELU_ALPHA 1.6732632423543772848170429916717
|
|
|
|
#define SELU_LAMBDA 1.0507009873554804934193349852946
|
|
|
|
|
|
|
|
#ifdef _OPENMP
|
|
|
|
#pragma omp declare reduction(maxTF : float,double,float16,bfloat16 : \
|
|
|
|
omp_out = nd4j::math::nd4j_max(omp_in, omp_out) )\
|
|
|
|
initializer (omp_priv=-MAX_FLOAT)
|
|
|
|
|
|
|
|
#pragma omp declare reduction(minTF : float,double,float16,bfloat16 : \
|
|
|
|
omp_out = nd4j::math::nd4j_min(omp_in, omp_out) )\
|
|
|
|
initializer (omp_priv=MAX_FLOAT)
|
|
|
|
|
|
|
|
#pragma omp declare reduction(maxT : float,double,float16,bfloat16,int,Nd4jLong,Nd4jULong,int8_t,uint8_t,bool,int16_t,uint16_t,uint32_t : \
|
|
|
|
omp_out = nd4j::math::nd4j_max(omp_in, omp_out) )\
|
|
|
|
initializer (omp_priv=0)
|
|
|
|
|
|
|
|
#pragma omp declare reduction(minT : float,double,float16,bfloat16,int,Nd4jLong,Nd4jULong,int8_t,uint8_t,bool,int16_t,uint16_t,uint32_t : \
|
|
|
|
omp_out = nd4j::math::nd4j_min(omp_in, omp_out) )\
|
|
|
|
initializer (omp_priv=0)
|
|
|
|
|
|
|
|
#pragma omp declare reduction(amaxT : float,double,float16,bfloat16,int,Nd4jLong,Nd4jULong,int8_t,uint8_t,bool,int16_t,uint16_t,uint32_t : \
|
|
|
|
omp_out = nd4j::math::nd4j_max(nd4j::math::nd4j_abs(omp_in), nd4j::math::nd4j_abs(omp_out)) )
|
|
|
|
|
|
|
|
#pragma omp declare reduction(aminT : float,double,float16,bfloat16,int,Nd4jLong,Nd4jULong,int8_t,uint8_t,bool,int16_t,uint16_t,uint32_t : \
|
|
|
|
omp_out = nd4j::math::nd4j_min(nd4j::math::nd4j_abs(omp_in), nd4j::math::nd4j_abs(omp_out)) )
|
|
|
|
|
|
|
|
#pragma omp declare reduction(asumT : float,double,float16,bfloat16,int,Nd4jLong,Nd4jULong,int8_t,uint8_t,bool,int16_t,uint16_t,uint32_t : \
|
|
|
|
omp_out = nd4j::math::nd4j_abs(omp_in) + nd4j::math::nd4j_abs(omp_out))\
|
|
|
|
initializer (omp_priv=0)
|
|
|
|
|
|
|
|
#pragma omp declare reduction(sumT : float,double,float16,bfloat16,int,Nd4jLong,Nd4jULong,int8_t,uint8_t,bool,int16_t,uint16_t,uint32_t : \
|
|
|
|
omp_out = omp_in + omp_out)\
|
|
|
|
initializer (omp_priv=0)
|
|
|
|
|
|
|
|
#pragma omp declare reduction(prodT : float,double,float16,bfloat16,int,Nd4jLong,Nd4jULong,int8_t,uint8_t,bool,int16_t,uint16_t,uint32_t : \
|
|
|
|
omp_out = omp_in * omp_out)\
|
|
|
|
initializer (omp_priv=1)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
namespace functions {
|
|
|
|
namespace indexreduce {
|
|
|
|
template <typename T>
|
|
|
|
struct IndexValue {
|
|
|
|
T value;
|
|
|
|
Nd4jLong index;
|
|
|
|
_CUDA_HD IndexValue() = default;
|
|
|
|
_CUDA_HD IndexValue(const T val, const Nd4jLong ind): index(ind), value(val) {}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
namespace summarystats {
|
|
|
|
template <typename T>
|
|
|
|
class SummaryStatsData;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
namespace simdOps {
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class Add {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
return static_cast<Z>(d1 + d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
return static_cast<Z>(d1 + d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1) {
|
|
|
|
return static_cast<Z>(d1);
|
|
|
|
}
|
|
|
|
|
|
|
|
// op for MetaOps
|
|
|
|
op_def static Z op(X d1, Y *params) {
|
|
|
|
return static_cast<Z>(d1 + params[0]);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X startingValue() {
|
|
|
|
return static_cast<X>(0.f);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y>
|
|
|
|
class NewAdd {
|
|
|
|
public:
|
|
|
|
op_def static X op(X d1, Y d2, X *params) {
|
|
|
|
return d1 + d2;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class Subtract {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
return static_cast<Z>(d1 - d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
return static_cast<Z>(d1 - d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1) {
|
|
|
|
return static_cast<Z>(d1);
|
|
|
|
}
|
|
|
|
|
|
|
|
// op for MetaOps
|
|
|
|
op_def static Z op(X d1, Y *params) {
|
|
|
|
return static_cast<Z>(d1 - params[0]);
|
|
|
|
}
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class SquaredSubtract {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
auto d = static_cast<Z>(d1 - d2);
|
|
|
|
return d * d;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
auto d = static_cast<Z>(d1 - d2);
|
|
|
|
return d * d;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// op for MetaOps
|
|
|
|
op_def static Z op(X d1, Y *params) {
|
|
|
|
auto d = static_cast<Z>(d1 - params[0]);
|
|
|
|
return d * d;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class SquaredReverseSubtract {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
auto d = static_cast<Z>(d2 - d1);
|
|
|
|
return d * d;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
auto d = static_cast<Z>(d2 - d1);
|
|
|
|
return d * d;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// op for MetaOps
|
|
|
|
op_def static Z op(X d1, Y *params) {
|
|
|
|
auto d = static_cast<Z>(params[0] - d1);
|
|
|
|
return d * d;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class ReverseSubtract {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
return static_cast<Z>(d2 - d1);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
return static_cast<Z>(d2 - d1);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// op for MetaOps
|
|
|
|
op_def static Z op(X d1, Y *params) {
|
|
|
|
return static_cast<Z>(params[0] - d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class LogPoissonLossFull {
|
|
|
|
|
|
|
|
public:
|
|
|
|
op_def static Z op(X z, Y c) {
|
|
|
|
auto zz = static_cast<Z>(z);
|
|
|
|
auto zc = static_cast<Z>(c);
|
|
|
|
return (nd4j::math::nd4j_exp<Y, Z>(c) - zz * zc + (zz * nd4j::math::nd4j_log<X, Z>(z) - zz + static_cast<Z>(0.5f) * nd4j::math::nd4j_log<Z, Z>(static_cast<Z>(DOUBLE_PI_X) * zz)));
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X z, Y c, Z *params) {
|
|
|
|
auto zz = static_cast<Z>(z);
|
|
|
|
auto zc = static_cast<Z>(c);
|
|
|
|
return (nd4j::math::nd4j_exp<Y, Z>(c) - zz * zc + (zz * nd4j::math::nd4j_log<X, Z>(z) - zz + static_cast<Z>(0.5f) * nd4j::math::nd4j_log<Z, Z>(static_cast<Z>(DOUBLE_PI_X) * zz)));
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X z) {
|
|
|
|
auto zz = static_cast<Z>(z);
|
|
|
|
return (zz * nd4j::math::nd4j_log<Y, Z>(z) - zz + static_cast<Z>(0.5f) * nd4j::math::nd4j_log<Z, Z>(static_cast<Z>(DOUBLE_PI_X) * zz));
|
|
|
|
}
|
|
|
|
|
|
|
|
// op for MetaOps
|
|
|
|
op_def static X op(X z, Y *params) {
|
|
|
|
return (nd4j::math::nd4j_exp<X, X>(params[0]) - z * params[0] + (z * nd4j::math::nd4j_log<X, Z>(z) - z + static_cast<X>(0.5f) * nd4j::math::nd4j_log<X, Z>(DOUBLE_PI_X * z)));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class LogPoissonLoss {
|
|
|
|
|
|
|
|
public:
|
|
|
|
op_def static Z op(X z, Y c) {
|
|
|
|
auto zz = static_cast<Z>(z);
|
|
|
|
auto zc = static_cast<Z>(c);
|
|
|
|
return (nd4j::math::nd4j_exp<Y, Z>(c) - zz * zc);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X z, Y c, Z *params) {
|
|
|
|
auto zz = static_cast<Z>(z);
|
|
|
|
auto zc = static_cast<Z>(c);
|
|
|
|
return (nd4j::math::nd4j_exp<Y, Z>(c) - zz * zc);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X z) {
|
|
|
|
return static_cast<Z>(z);
|
|
|
|
}
|
|
|
|
|
|
|
|
// op for MetaOps
|
|
|
|
op_def static Z op(X z, Y *params) {
|
|
|
|
return (nd4j::math::nd4j_exp<Y, Z>(params[0]) - static_cast<Z>(z) * static_cast<Z>(params[0]));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class Multiply {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
return static_cast<Z>(d1 * d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
return static_cast<Z>(d1 * d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1) {
|
|
|
|
return static_cast<Z>(d1);
|
|
|
|
}
|
|
|
|
|
|
|
|
// op for MetaOps
|
|
|
|
op_def static Z op(X d1, Y *params) {
|
|
|
|
return static_cast<Z>(d1 * params[0]);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X startingValue() {
|
|
|
|
return static_cast<X>(1.f);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class Divide {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
return static_cast<Z>(d1 / d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
return static_cast<Z>(d1 / d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1) {
|
|
|
|
return static_cast<Z>(d1);
|
|
|
|
}
|
|
|
|
|
|
|
|
// op for MetaOps
|
|
|
|
op_def static Z op(X d1, Y *params) {
|
|
|
|
return static_cast<Z>(d1 / params[0]);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X startingValue() {
|
|
|
|
return static_cast<X>(1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2019-10-03 17:22:17 +02:00
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class DivideNoNan {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
if (d2 == (Y)0) return (Z)0;
|
|
|
|
return static_cast<Z>(d1 / d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
if (d2 == (Y)0) return (Z)0;
|
|
|
|
return static_cast<Z>(d1 / d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1) {
|
|
|
|
return static_cast<Z>(d1);
|
|
|
|
}
|
|
|
|
|
|
|
|
// op for MetaOps
|
|
|
|
op_def static Z op(X d1, Y *params) {
|
|
|
|
if (params[0] == (Y)0) return (Z)0;
|
|
|
|
return static_cast<Z>(d1 / params[0]);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X startingValue() {
|
|
|
|
return static_cast<X>(1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2019-06-06 14:21:15 +02:00
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class SafeDivide {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
if(d2 == static_cast<Y>(0))
|
|
|
|
return static_cast<Z>(0);
|
|
|
|
return static_cast<Z>(d1 / d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
if(d2 == static_cast<Y>(0))
|
|
|
|
return static_cast<Z>(0);
|
|
|
|
return static_cast<Z>(d1 / d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1) {
|
|
|
|
return static_cast<Z>(d1);
|
|
|
|
}
|
|
|
|
|
|
|
|
// op for MetaOps
|
|
|
|
op_def static Z op(X d1, Y *params) {
|
|
|
|
if(params[0] == static_cast<Y>(0))
|
|
|
|
return static_cast<Z>(0);
|
|
|
|
return static_cast<Z>(d1 / params[0]);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class FloorDiv {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
return nd4j::math::nd4j_floor<Z,Z>(static_cast<Z>(d1 / d2));
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
return nd4j::math::nd4j_floor<Z,Z>(static_cast<Z>(d1 / d2));
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1) {
|
|
|
|
return nd4j::math::nd4j_floor<Z,Z>(static_cast<Z>(d1));
|
|
|
|
}
|
|
|
|
|
|
|
|
// op for MetaOps
|
|
|
|
op_def static Z op(X d1, Y *params) {
|
|
|
|
return nd4j::math::nd4j_floor<Z,Z>(static_cast<Z>(d1 / params[0]));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class TruncateDiv {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
auto i1 = static_cast<int>(d1);
|
|
|
|
auto i2 = static_cast<int>(d2);
|
|
|
|
return static_cast<Z>(i1 / i2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
auto i1 = static_cast<int>(d1);
|
|
|
|
auto i2 = static_cast<int>(d2);
|
|
|
|
return static_cast<Z>(i1 / i2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// op for MetaOps
|
|
|
|
op_def static Z op(X d1, Y *params) {
|
|
|
|
auto i1 = static_cast<int>(d1);
|
|
|
|
auto i2 = static_cast<int>(params[0]);
|
|
|
|
return static_cast<Z>(i1 / i2);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class TruncateMod {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
auto i1 = static_cast<int>(d1);
|
|
|
|
auto i2 = static_cast<int>(d2);
|
|
|
|
return static_cast<Z>(i1 % i2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
auto i1 = static_cast<int>(d1);
|
|
|
|
auto i2 = static_cast<int>(d2);
|
|
|
|
return static_cast<Z>(i1 % i2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1) {
|
|
|
|
return static_cast<Z>(d1);
|
|
|
|
}
|
|
|
|
|
|
|
|
// op for MetaOps
|
|
|
|
op_def static Z op(X d1, Y *params) {
|
|
|
|
auto i1 = static_cast<int>(d1);
|
|
|
|
auto i2 = static_cast<int>(params[0]);
|
|
|
|
return static_cast<Z>(i1 % i2);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template<typename X, typename Y, typename Z>
|
|
|
|
class Remainder {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
return nd4j::math::nd4j_remainder<X, Y, Z>(d1, d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
return nd4j::math::nd4j_remainder<X, Y, Z>(d1, d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// op for MetaOps
|
|
|
|
op_def static Z op(X d1, Y *params) {
|
|
|
|
return nd4j::math::nd4j_remainder<X, Y, Z>(d1, params[0]);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class FMod {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
return nd4j::math::nd4j_fmod<X, Y, Z>(d1, d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
return nd4j::math::nd4j_fmod<X, Y, Z>(d1, d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// op for MetaOps
|
|
|
|
op_def static Z op(X d1, Y *params) {
|
|
|
|
return nd4j::math::nd4j_fmod<X, Y, Z>(d1, params[0]);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class FloorMod {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
auto m = nd4j::math::nd4j_fmod<X, Y, Z>(d1, d2);
|
|
|
|
return (d1 < static_cast<X>(0)) == (d2 < static_cast<Y>(0)) ? m : nd4j::math::nd4j_fmod<Z, Y, Z>(m + static_cast<Z>(d2), d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
auto m = nd4j::math::nd4j_fmod<X, Y, Z>(d1, d2);
|
|
|
|
return (d1 < static_cast<X>(0.0f)) == (d2 < static_cast<Y>(0)) ? m : nd4j::math::nd4j_fmod<Z, Y, Z>(m + static_cast<Z>(d2), d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// op for MetaOps
|
|
|
|
op_def static Z op(X d1, Y *params) {
|
|
|
|
return op(d1, params[0]);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class ReverseDivide {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
return static_cast<Z>(d2 / d1);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
return static_cast<Z>(d2 / d1);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1) {
|
|
|
|
return static_cast<Z>(d1);
|
|
|
|
}
|
|
|
|
|
|
|
|
// op for MetaOps
|
|
|
|
op_def static Z op(X d1, Y *params) {
|
|
|
|
return static_cast<Z>(params[0] / d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class CopyPws {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
return static_cast<Z>(d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
return static_cast<Z>(d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1) {
|
|
|
|
return static_cast<Z>(d1);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y *params) {
|
|
|
|
return static_cast<Z>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class Copy {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class Copy2 {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
return static_cast<Z>(d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
return static_cast<Z>(d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1) {
|
|
|
|
return static_cast<Z>(d1);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y *params) {
|
|
|
|
return static_cast<Z>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class Axpy {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
return static_cast<Z>(d2 + d1);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
auto alpha = params[0];
|
|
|
|
return alpha * static_cast<Z>(d1) + static_cast<Z>(d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1) {
|
|
|
|
return static_cast<Z>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class Assign {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_any
|
|
|
|
no_op_exec_special_any_cuda
|
|
|
|
|
|
|
|
op_def static Z op(X d1, X *params) {
|
|
|
|
return static_cast<Z>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class And {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_bool
|
|
|
|
no_op_exec_special_bool_cuda
|
|
|
|
|
|
|
|
op_def static Z op(X d1, X d2) {
|
|
|
|
return d2 + d1;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, X d2, X *params) {
|
|
|
|
if (params != nullptr) {
|
|
|
|
auto comp = params[0];
|
|
|
|
return d1 != comp && d2 != comp ? static_cast<Z>(1) : static_cast<Z>(0);
|
|
|
|
} else {
|
|
|
|
auto b1 = static_cast<bool>(d1);
|
|
|
|
auto b2 = static_cast<bool>(d2);
|
|
|
|
|
|
|
|
return (b1 && b2) ? static_cast<Z>(1) : static_cast<Z>(0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// op for MetaOps
|
|
|
|
op_def static Z op(X d1, X *params) {
|
|
|
|
return static_cast<Z>(119);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2019-08-30 09:12:40 +02:00
|
|
|
template <typename X>
|
|
|
|
class IntOr {
|
|
|
|
public:
|
|
|
|
|
|
|
|
op_def static X op(X d1, X d2) {
|
|
|
|
return d2 | d1;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X op(X d1, X d2, X *params) {
|
|
|
|
return op(d1, d2);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class IntAnd {
|
|
|
|
public:
|
|
|
|
|
|
|
|
op_def static X op(X d1, X d2) {
|
|
|
|
return d2 & d1;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X op(X d1, X d2, X *params) {
|
|
|
|
return op(d1, d2);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class IntXor {
|
|
|
|
public:
|
|
|
|
|
|
|
|
op_def static X op(X d1, X d2) {
|
|
|
|
return d2 ^ d1;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X op(X d1, X d2, X *params) {
|
|
|
|
return op(d1, d2);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class ShiftLeft {
|
|
|
|
public:
|
|
|
|
|
|
|
|
op_def static X op(X d1, X d2) {
|
|
|
|
return d1 << d2;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X op(X d1, X d2, X *params) {
|
|
|
|
return op(d1, d2);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class ShiftRight {
|
|
|
|
public:
|
|
|
|
|
|
|
|
op_def static X op(X d1, X d2) {
|
|
|
|
return d1 >> d2;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X op(X d1, X d2, X *params) {
|
|
|
|
return op(d1, d2);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class CyclicShiftLeft {
|
|
|
|
public:
|
|
|
|
|
|
|
|
op_def static X op(X d1, X d2) {
|
|
|
|
return d1 << d2 | d1 >> ((sizeof(X) * 8) - d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X op(X d1, X d2, X *params) {
|
|
|
|
return op(d1, d2);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class CyclicShiftRight {
|
|
|
|
public:
|
|
|
|
|
|
|
|
op_def static X op(X d1, X d2) {
|
|
|
|
return d1 >> d2 | d1 << ((sizeof(X) * 8) - d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X op(X d1, X d2, X *params) {
|
|
|
|
return op(d1, d2);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2019-06-06 14:21:15 +02:00
|
|
|
template <typename X, typename Z>
|
|
|
|
class Or {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_bool
|
|
|
|
no_op_exec_special_bool_cuda
|
|
|
|
|
|
|
|
op_def static Z op(X d1, X d2) {
|
|
|
|
return d2 + d1;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, X d2, X *params) {
|
|
|
|
if (params != nullptr) {
|
|
|
|
auto comp = params[0];
|
|
|
|
|
|
|
|
return d1 != comp || d2 != comp ? static_cast<Z>(1) : static_cast<Z>(0);
|
|
|
|
} else {
|
|
|
|
auto b1 = static_cast<bool>(d1);
|
|
|
|
auto b2 = static_cast<bool>(d2);
|
|
|
|
|
|
|
|
return b1 || b2 ? static_cast<Z>(1) : static_cast<Z>(0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// op for MetaOps
|
|
|
|
op_def static Z op(X d1, X *params) {
|
|
|
|
return static_cast<Z>(119);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class Xor {
|
|
|
|
public:
|
|
|
|
|
|
|
|
no_op_exec_special_bool
|
|
|
|
no_op_exec_special_bool_cuda
|
|
|
|
|
|
|
|
op_def static Z op(X d1, X d2) {
|
|
|
|
return d2 + d1;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, X d2, X *params) {
|
|
|
|
if (params != nullptr) {
|
|
|
|
auto comp = params[0];
|
|
|
|
|
|
|
|
return ((d1 == comp && d2 != comp) || (d1 != comp && d2 == comp)) ? static_cast<Z>(1) : static_cast<Z>(0);
|
|
|
|
} else {
|
|
|
|
auto b1 = static_cast<bool>(d1);
|
|
|
|
auto b2 = static_cast<bool>(d2);
|
|
|
|
|
|
|
|
return (!b1 && b2 )||(b1 && !b2) ? static_cast<Z>(1) : static_cast<Z>(0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class Not {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_bool
|
|
|
|
no_op_exec_special_bool_cuda
|
|
|
|
|
|
|
|
op_def static Z op(X d1, X d2) {
|
|
|
|
return static_cast<Z>(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, X d2, X *params) {
|
|
|
|
return d1 != d2 ? static_cast<Z>(1) : static_cast<Z>(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
// this transform op should run only on boolean input
|
|
|
|
op_def static Z op(X d1, X *params) {
|
|
|
|
auto b1 = static_cast<bool>(d1);
|
|
|
|
return !b1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class LogicalNot {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
return !((int) d1 && (int) d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
return static_cast<X>(!(static_cast<int>(d1) && static_cast<int>(d2)));
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// op for MetaOps
|
|
|
|
op_def static Z op(X d1, Y *params) {
|
|
|
|
return static_cast<X>(119);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class LogicalXor {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
auto i1 = static_cast<int>(d1);
|
|
|
|
auto i2 = static_cast<int>(d2);
|
|
|
|
|
|
|
|
return (i1 | i2) &~ (i1 & i2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
return op(d1, d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// op for MetaOps
|
|
|
|
op_def static Z op(X d1, Y *params) {
|
|
|
|
return static_cast<Z>(119);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class LogicalAnd {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
return static_cast<int>(d1) & static_cast<int>(d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
return op(d1, d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(Y d1) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// op for MetaOps
|
|
|
|
op_def static Z op(X d1, Y *params) {
|
|
|
|
return static_cast<Z>(119);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class LogicalOr {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
return static_cast<int>(d1) | static_cast<int>(d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
return op(d1, d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// op for MetaOps
|
|
|
|
op_def static Z op(X d1, Y *params) {
|
|
|
|
return static_cast<X>(119);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class Mod {
|
|
|
|
public:
|
|
|
|
/*
|
|
|
|
|
|
|
|
// just a optional note, feel free to remove later
|
|
|
|
|
|
|
|
op_def static half op(half d1, half d2, half *params) {
|
|
|
|
return __float2half(simdOps::Mod<float>::op(__half2float(d1), __half2float(d2), nullptr));
|
|
|
|
}
|
|
|
|
*/
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
return static_cast<int>(d1) % static_cast<int>(d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
return op(d1, d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
// op for MetaOp
|
|
|
|
op_def static Z op(X d1, Y *params) {
|
|
|
|
return op(d1, params[0]);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class ReverseMod {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
return static_cast<int>(d2) % static_cast<int>(d1);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
return op(d1, d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
// op for MetaOp
|
|
|
|
op_def static Z op(X d1, Y *params) {
|
|
|
|
return op(d1, params[0]);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Whether 2 elements in an array
|
|
|
|
* are epsilion equal
|
|
|
|
*/
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class Epsilon {
|
|
|
|
public:
|
|
|
|
|
|
|
|
op_def static Z op(X d1, X d2) {
|
|
|
|
X diff = d1 - d2;
|
|
|
|
X absDiff = nd4j::math::nd4j_abs<X>(diff);
|
|
|
|
if (absDiff <= static_cast<X>(MIN_V))
|
|
|
|
return static_cast<Z>(1);
|
|
|
|
return static_cast<Z>(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, X d2, X *params) {
|
|
|
|
return op(d1, d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, X *params) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class EqualTo {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, X d2) {
|
|
|
|
return d1 == d2;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, X d2, X *params) {
|
|
|
|
return op(d1, d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, X *params) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class NotEqualTo {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, X d2) {
|
|
|
|
return d1 != d2;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, X d2, X *params) {
|
|
|
|
return op(d1, d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, X *params) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class GreaterThanOrEqual {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, X d2) {
|
|
|
|
return d1 >= d2;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, X d2, X *params) {
|
|
|
|
return op(d1, d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
// FIXME: this signature clashes with MetaOp stuff
|
|
|
|
op_def static Z op(X d1, X *params) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class GreaterThan {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, X d2) {
|
|
|
|
return d1 > d2;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, X d2, X *params) {
|
|
|
|
return op(d1, d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
// FIXME: this signature clashes with MetaOp stuff
|
|
|
|
op_def static Z op(X d1, X *params) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class LessThan {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, X d2) {
|
|
|
|
return d1 < d2;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, X d2, X *params) {
|
|
|
|
return op(d1, d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, X *params) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class LessThanOrEqual {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, X d2) {
|
|
|
|
return d1 <= d2;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, X d2, X *params) {
|
|
|
|
return op(d1, d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, X *params) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class Abs {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_abs<X>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class Ceiling {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_ceil<X,X>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class Cosine {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_cos<X,X>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class Exp {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_exp<X, X>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class HardTanhDerivative {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return ((d1 >= static_cast<X>(-1.f) && d1 <= static_cast<X>(1.f)) ? static_cast<X>(1.f) : static_cast<X>(0.f));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class HardTanh {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
if (d1 < static_cast<X>(-1))
|
|
|
|
return static_cast<X>(-1);
|
|
|
|
else if (d1 > static_cast<X>(1))
|
|
|
|
return static_cast<X>(1);
|
|
|
|
else
|
|
|
|
return d1;
|
|
|
|
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class Floor {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_floor<X,X>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class Log {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_log<X, X>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class Log1p {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_log<X, X>(1 + d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class LogX {
|
|
|
|
public:
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
return nd4j::math::nd4j_log<X, Z>(d1) / nd4j::math::nd4j_log<Y, Z>(d2) ;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class StabilizeFP16 {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
if (d1 <= static_cast<X>(0))
|
|
|
|
return static_cast<X>(nd4j::DataTypeUtils::min<float16>());
|
|
|
|
else return d1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class StabilizeX {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
if (d1 <= static_cast<X>(0))
|
|
|
|
return nd4j::DataTypeUtils::min<X>();
|
|
|
|
else return d1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class SpecialDerivative {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return d1 * (static_cast<X>(1.f) - d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class Neg {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return -d1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class Erf {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_erf<X,X>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class Erfc {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_erfc<X,X>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class Reciprocal {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
// op_def static T op(T d1) {
|
|
|
|
// return (T(1.0f) / d1);
|
|
|
|
// }
|
|
|
|
// op for MetaOps
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return (static_cast<X>(1) / d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class Sqr {
|
|
|
|
public:
|
|
|
|
no_op_exec_special
|
|
|
|
no_op_exec_special_cuda
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Z *params) {
|
|
|
|
return nd4j::math::nd4j_pow<X, X, Z>(d1, static_cast<X>(2));
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1) {
|
|
|
|
return nd4j::math::nd4j_pow<X, X, Z>(d1, static_cast<X>(2));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class RelativeError {
|
|
|
|
public:
|
|
|
|
no_op_exec_special
|
|
|
|
no_op_exec_special_cuda
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
return nd4j::math::nd4j_re<X>(d1, d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
return op(d1, d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1) {
|
|
|
|
return static_cast<Z>(0);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class BinaryRelativeError {
|
|
|
|
public:
|
|
|
|
no_op_exec_special
|
|
|
|
no_op_exec_special_cuda
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
X threshold = params[0];
|
|
|
|
return nd4j::math::nd4j_re<X>(d1, d2) > threshold ? static_cast<Z>(1) : static_cast<Z>(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1) {
|
|
|
|
return static_cast<Z>(0);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class BinaryMinimumAbsoluteRelativeError {
|
|
|
|
public:
|
|
|
|
no_op_exec_special
|
|
|
|
no_op_exec_special_cuda
|
|
|
|
|
|
|
|
op_def static Z op(X d1, X *params) {
|
|
|
|
X d2 = params[0];
|
|
|
|
X thresholdRelative = params[1];
|
|
|
|
X thresholdAbsolute = params[2];
|
|
|
|
return nd4j::math::nd4j_re<X>(d1, d2) > thresholdRelative ? (nd4j::math::nd4j_abs<X>(d1 - static_cast<X>(d2)) < thresholdAbsolute ? static_cast<Z>(0) : static_cast<Z>(1)) : static_cast<Z>(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
X thresholdRelative = params[0];
|
|
|
|
X thresholdAbsolute = params[1];
|
|
|
|
return nd4j::math::nd4j_re<X>(d1, d2) > thresholdRelative ? (nd4j::math::nd4j_abs<X>(d1 - static_cast<X>(d2)) < thresholdAbsolute ? static_cast<Z>(0) : static_cast<Z>(1)) : static_cast<Z>(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1) {
|
|
|
|
return static_cast<Z>(0);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class ReversePow {
|
|
|
|
public:
|
|
|
|
no_op_exec_special
|
|
|
|
no_op_exec_special_cuda
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Z *params) {
|
|
|
|
return nd4j::math::nd4j_pow<X, X, Z>(params[0], d1);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
return nd4j::math::nd4j_pow<X, Y, Z>(d2, d1);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
return nd4j::math::nd4j_pow<X, Y, Z>(d2, d1);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class Pow {
|
|
|
|
public:
|
|
|
|
no_op_exec_special
|
|
|
|
no_op_exec_special_cuda
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Z *params) {
|
|
|
|
return nd4j::math::nd4j_pow<X, X, Z>(d1, params[0]);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
return nd4j::math::nd4j_pow<X, Y, Z>(d1, d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
return nd4j::math::nd4j_pow<X, Y, Z>(d1, d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class PowDerivative {
|
|
|
|
public:
|
|
|
|
no_op_exec_special
|
|
|
|
no_op_exec_special_cuda
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Z *params) {
|
|
|
|
return params[0] * nd4j::math::nd4j_pow<X, Z, Z>(d1, static_cast<Z>(params[0]) - static_cast<Z>(1.f));
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
return static_cast<Z>(d2) * nd4j::math::nd4j_pow<X, Z, Z>(d1, static_cast<Z>(d2) - static_cast<Z>(1.f));
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
return static_cast<Z>(d2) * nd4j::math::nd4j_pow<X, Z, Z>(d1, static_cast<Z>(d2) - static_cast<Z>(1.f));
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class Round {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_round<X,X>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class IsNan {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_bool
|
|
|
|
no_op_exec_special_bool_cuda
|
|
|
|
|
|
|
|
no_op_exec_special_accumulation
|
|
|
|
no_op_exec_special_accumulation_cuda
|
|
|
|
|
|
|
|
op_def static Z op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_isnan(d1) ? static_cast<X>(1) : static_cast<X>(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X startingValue(const X *input) {
|
|
|
|
return static_cast<X>(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z merge(X old, X opOutput, X *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
op_def static Z update(X old, X opOutput, X *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
op_def static Z postProcess(X reduction, Nd4jLong n, X *extraParams) {
|
|
|
|
return reduction;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class Expm1 {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_exp<X, X>(d1) - static_cast<X>(1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class IsPositive {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_bool
|
|
|
|
no_op_exec_special_bool_cuda
|
|
|
|
|
|
|
|
no_op_exec_special_accumulation
|
|
|
|
no_op_exec_special_accumulation_cuda
|
|
|
|
|
|
|
|
op_def static Z op(X d1, X *params) {
|
|
|
|
return d1 > (X)0.f;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X startingValue(const X *input) {
|
|
|
|
return static_cast<X>(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z merge(X old, X opOutput, X *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
op_def static Z update(X old, X opOutput, X *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
op_def static Z postProcess(X reduction, Nd4jLong n, X *extraParams) {
|
|
|
|
return reduction;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class IsInf {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_bool
|
|
|
|
no_op_exec_special_bool_cuda
|
|
|
|
|
|
|
|
no_op_exec_special_accumulation
|
|
|
|
no_op_exec_special_accumulation_cuda
|
|
|
|
|
|
|
|
op_def static Z op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_isinf<X>(d1) ? static_cast<Z>(1) : static_cast<Z>(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X startingValue(const X *input) {
|
|
|
|
return static_cast<X>(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z merge(X old, X opOutput, X *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
op_def static Z update(X old, X opOutput, X *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
op_def static Z postProcess(X reduction, Nd4jLong n, X *extraParams) {
|
|
|
|
return reduction;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class IsInfOrNan{
|
|
|
|
public:
|
|
|
|
no_op_exec_special_bool
|
|
|
|
no_op_exec_special_bool_cuda
|
|
|
|
|
|
|
|
no_op_exec_special_accumulation
|
|
|
|
no_op_exec_special_accumulation_cuda
|
|
|
|
|
|
|
|
op_def static Z op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_isfin<X>(d1) ? static_cast<Z>(0) : static_cast<Z>(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X startingValue(const X *input) {
|
|
|
|
return static_cast<X>(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z merge(X old, X opOutput, X *extraParams) {
|
Merge master to upstream (#7945)
* Shugeo strided slice zeros (#14)
* Modified strided_slice op to properly work with empty-like shapes.
* Fixed test for reduce_mean with empty-like input.
* [WIP] Last merge (#15)
* correct logsoftmax looss (#2)
* Small SameDiff listener fix (#4)
* Various fixes (#6)
* #7839 Fix for asXMatrix and tests
* #7866 EmbeddingSequenceLayer dtype fix + test
* #7856 SameDiff save/load stream methods
* #7859 RegressionEvaluation rank 4 fix + tests + axis configuration
* EvaluationBinary 3d/4d
* More evaluation 3d/4d tests
* #7847 Evaluation empty checks
* Small test ifx
* #7848 Fix median edge case
* Improve DL4J samediff layer tests
* [WIP] FastText wrapper implemented (#8)
* FastText implemented
* Some fixes
* Fix shapes for wordsNearest
* Validation of input vectors
* Fixes
* Fixed test
* Thread tagged
* Some tweaks
* setContextClassLoader for DeallocatorServiceThread
* Numpy format tests (#1)
* Various fixes (#11)
* #7852 SameDiff gather fix
* #7892 SameDiff placeholder to constant conversion
* #7890 validate input rank for MLN/CG init methods
* Fix broken permute shape calculation
* Permute and gather fixes
* Tests
* #7850 LogSumExp fix + test
* Handful of test fixes
* Empty arrays with non-scalar shapes (#10)
* minor rearrangements for lambdas
* empty tensors with non-scalar shapes
* numpy empty tensors with non-scalar shapes
* few more empty tweaks
* Small fixes
* conv3d signature update
* micro fix in batchnorm mkldnn
* Import fixes
* Fix
* MKL-DNN update
* Small fill fix
* fill with empty input + test
* Fixes
* Small error improvement
* Fix
* one special test
* couple of fixes for lstm
* Rewrite TFGraphMapper.getNDArrayFromTensor to be maintainable and less error prone
* Fixes
* FP16
* Unsigned
* BFloat16
* Fill op - empty tweaks
* - couple of fixes for empty arrays construction
- stack updated
* strided slice fix
* one transform test
* provide method for reducing shapeInfo in case of input array is empty
* Fixed reduceAlongDimensions to use empty input properly.
* couple of broadcast tests
* couple of tests broadcast tests + tweak to make them pass
* add check of non-empty to methods producing sub-arrays
* Fixed reshapeC with zeros in shape.
* complete empty check in reduce_... legacy ops
* Concat and cumsum/prod
* Tweak to empty shape inference on import
* add empty check to the rest of reduce legacy ops
* one more test
* correct typo in evalReduceShapeInfoEmpty
* Added tests for reduce_* ops to tests with zero shapes.
* few more tests for empty reductions
* Fixed strided_slice op with empty case and tests.
* one more empty reduction test
* Fixed strided_slice test.
* add empty check to NDArray::reshapei
* infOrMax
* empty min/max with infinity tests
* made unstack working correctly with empty arrays
* few IndexReduce tests + tweaks for empty shapes
* add test for empty concat
* few tests fixed
* Validation fix for reductions on empty shapes
* Reverse fix
* Reduction shape calc fixes
* SameDiff.generateOutputVariable: don't use shape function to determine number of outputs
* Range fix
* - NDArray constructor updated for scalars/empty arrays
- few tests fixed
* More fixes
* Empty creator fixes
* concat fix
* concat fix
* TF import tests: allow 'both all NaN' and 'both all inf' to pass
* Slice, zero fraction, and reshape fixes
* transpose, gather
* Zero fraction
* scalar cast fix
* Empty reduction axis support
* few more tests fixed
* Fixed input checks conforming with TF for concat op and tests.
* few tests fixed
* matmul scalar shape fix
* Fixed checkout for data type and scalarity with concat to allow non-empty scalars with vector concats.
* broadcast bool fix
* few more tests
* few more tests
* correct evalReduceShapeInfoEmpty
* argmax/argmin + tests
* one more empty edge case + one more test
* argmax/argmin/realdiv_bp tweaks
* empty reshape test + fix
* Helper fixes
* Small fixes
* Gather test fix
* Gather test fix
* Small fixes
* reduce scalar zero values
* scalar mean workaround
* Remove debug code
* along dim mean workaround
* one more test
* - equalsTo() tweak for empty arrays
- one more test
* broadcast tweaks
* [WIP] Fixing outstanding issues for NLP (#9)
* Avoid using not-inited objects
* Test fixed.
* Redundant method avoided for models like FastText
* KMeans++ implementation
* KMeans++ implementation
* Disable parallel execution
* KMeans++
* Tests
* Dev branch merge (#16)
* SameDiff: convertDataType and gradient check util improvements (#12)
* GradCheck util improvements
* StopGradient constructor + test
* SameDiff: Add datatype conversion
* Javadoc and add DataType.isNumerical()
* Small fix
* Fix SameDiff TF import test cases intermediate naming (workaround for bad default)
* TFGraphTestAllHelper: check intermediates in execution order
* Add missing debug listener
* [WIP] lstmBlock fix + other changes (#13)
- fixes lstmBlock issue
- changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer
- CheckNumerics op
- fixes for ReduceBool IsInfOrNan & IsFinite
* Small test fix
* CheckNumerics op wrapper
* Fix some issues on master (#17)
* Fix DataVec test issue
* Fix issue with dl4j SameDiff output layer
* Dtype fix for lambda layers
* #7912 BertIterator dtype fix (use float32 not global default)
* [WIP] Next set of CUDA stuff (#7)
New CUDA implementations and improvements
* bad file
* Dev branch master merge (#23)
* SameDiff: convertDataType and gradient check util improvements (#12)
* GradCheck util improvements
* StopGradient constructor + test
* SameDiff: Add datatype conversion
* Javadoc and add DataType.isNumerical()
* Small fix
* Fix SameDiff TF import test cases intermediate naming (workaround for bad default)
* TFGraphTestAllHelper: check intermediates in execution order
* Add missing debug listener
* [WIP] lstmBlock fix + other changes (#13)
- fixes lstmBlock issue
- changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer
- CheckNumerics op
- fixes for ReduceBool IsInfOrNan & IsFinite
* Small test fix
* CheckNumerics op wrapper
* Compatibility of deserialization (#18)
Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com>
* SameDiff: add activation gradient checking support for debugging (#19)
* SameDiff gradient checker: first pass on activation gradient checks
* Fixes + tests for activation gradient checking
* Javadoc
* [WIP] Some nd4j data type corrections (#20)
* Adjust data type
* Set correct Data type.
* Size of proper data type.
* fix averaged cpu load (#22)
* SameDiff ops, TF import and fixes (#24)
* CheckNumerics tests + fixes + misc fixes
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fake quant
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fixes
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* FakeQuantWithMinMaxArgs
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* CheckNumerics fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fix libnd4j ALL_INTS and ALL_FLOATS declaration (uint and bfloat types)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Small fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Javadoc
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Exception tweak
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fix for out of scope stack allocated var use
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Ignores
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Ignore for known failing test (already logged issue)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Merge upstream to fork (#25)
* Add thousand-separator commas to TotalParams (#7915)
* Add thousand-separator commas to TotalParams
The number of parameters can be quite large, and it would help the reading of the summary printout to have the TotalParams column & values at the bottom have thousand-separator-commas in them.
* Add thousand-separator commas to MultiLayerNetwork
Corresponding change to MultiLayerNetwork
Signed-off-by: Jxtps Jxtps <jxtps435@gmail.com>
* Update contributing and issue/PR templates (#7934)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fix link to AdaDelta paper (#7942)
Fix link to AdaDelta paper hosted on matthewzeiler.com
Signed-off-by: Jxtps
* Fixes, and ignores for known/logged failing issues (#7943)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* SameDiff + DL4J/SameDiff: Multiple fixes (#28)
* #7919 HDF5 attribute buffer length fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7909 Arbiter constructor exception ux improvements
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7925 RNN output layer length checks
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7939 Add listener for validating inputs are not incorrectly modified
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7939 Integrate NonInplaceValidationListener into tests
* #7844 DL4J SameDiff fixes for variable minibatch size
* DL4J SameDiff fixes - ensure gradient for input placeholder is available
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Tweaks to ExternalErrorsFunction - use placeholders, make more robust
* Another fix
* More fixes
* More SameDiff/DL4J fixes
* Scope out scalar array creation in BaseScalarOp
* Remove debug code
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* [WIP] Final dev branch merge (#29)
* SameDiff: convertDataType and gradient check util improvements (#12)
* GradCheck util improvements
* StopGradient constructor + test
* SameDiff: Add datatype conversion
* Javadoc and add DataType.isNumerical()
* Small fix
* Fix SameDiff TF import test cases intermediate naming (workaround for bad default)
* TFGraphTestAllHelper: check intermediates in execution order
* Add missing debug listener
* [WIP] lstmBlock fix + other changes (#13)
- fixes lstmBlock issue
- changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer
- CheckNumerics op
- fixes for ReduceBool IsInfOrNan & IsFinite
* Small test fix
* CheckNumerics op wrapper
* Compatibility of deserialization (#18)
Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com>
* SameDiff: add activation gradient checking support for debugging (#19)
* SameDiff gradient checker: first pass on activation gradient checks
* Fixes + tests for activation gradient checking
* Javadoc
* [WIP] Some nd4j data type corrections (#20)
* Adjust data type
* Set correct Data type.
* Size of proper data type.
* fix averaged cpu load (#22)
* [WIP] Multiple dataset iterators (#27)
* Splitting dataset into arbitrary number
* Fixes
* Multiple split of iterator
* Test
* Test
* Some fixes
* signature change
* one more tweak
Signed-off-by: raver119 <raver119@gmail.com>
* one more test for sequential use of DataSetIteratorSplitter
Signed-off-by: raver119 <raver119@gmail.com>
* Fixes
* Fixes
* one more test for Alexander
Signed-off-by: raver119 <raver119@gmail.com>
* Some fixes
* Some fixes
* one more test for Alexander
Signed-off-by: raver119 <raver119@gmail.com>
* minor test fix
Signed-off-by: raver119 <raver119@gmail.com>
* Some fixes
* Some fixes
* couple of assertions tweaked
Signed-off-by: raver119 <raver119@gmail.com>
* MDS splitter test :/
Signed-off-by: raver119 <raver119@gmail.com>
* Minor refactoring
* Multi dataset
* Some fixes
* More tests
* Small number of test fixes/improvements (failures on CI) (#31)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* [WIP] More CUDA stuff (#26)
* initial commit
Signed-off-by: raver119 <raver119@gmail.com>
* LRN BP CUDA
Signed-off-by: raver119 <raver119@gmail.com>
* less memory
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed bug with crop_and_resize op helper.
* get rid of unnecessary index-calculation dunction
Signed-off-by: Yurii <yurii@skymind.io>
* Fixed sort with nth_element cuda-based helper.
* Refactored nth_element.
* Refactored nth_element op and tests.
* Modified usage of dim array with sortTad routine.
* Refactored main routine of helper for non_max_image_suppression op.
* non_max_image_suppression op helper with cuda kernel implementation. Initial revision.
* fix vol2col cuda kernel
* meh
Signed-off-by: raver119 <raver119@gmail.com>
* topK concept
Signed-off-by: raver119 <raver119@gmail.com>
* unsorted topK with scanWitdh of 1
Signed-off-by: raver119 <raver119@gmail.com>
* correct vol2col tests
* sorted/unsorted topK
Signed-off-by: raver119 <raver119@gmail.com>
* implementation and fixing col2im/col2vol
* Corrected usage flags with input/output with reverse op.
* dup is const now
Signed-off-by: raver119 <raver119@gmail.com>
* percentile op
Signed-off-by: raver119 <raver119@gmail.com>
* group tests for mapool2d
Signed-off-by: Yurii <yurii@skymind.io>
* special test for george
Signed-off-by: raver119 <raver119@gmail.com>
* less threads for sortTad
Signed-off-by: raver119 <raver119@gmail.com>
* provide conv2d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* remove auther in sort tad kernel code
Signed-off-by: Yurii <yurii@skymind.io>
* provide depthwise_conv2d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* - max_pooling_with_argmax
- null check for special use
Signed-off-by: raver119 <raver119@gmail.com>
* dts cuda
Signed-off-by: raver119 <raver119@gmail.com>
* provide sconv2d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* std cuda
Signed-off-by: raver119 <raver119@gmail.com>
* Refactored non_max_suppression op to conform TF implementation.
* Improved suppression helper.
* provide pooling3d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* minor lstm rearrangements
Signed-off-by: raver119 <raver119@gmail.com>
* more of minor lstm rearrangements
Signed-off-by: raver119 <raver119@gmail.com>
* (bi)dynamic_rnn
Signed-off-by: raver119 <raver119@gmail.com>
* templates init order
Signed-off-by: raver119 <raver119@gmail.com>
* Refactored non_max_suppression op.
* Added cuda kernel for non_max_suppression.
* CPU sort by key/value
Signed-off-by: raver119 <raver119@gmail.com>
* CPU sort TAD by key/value
Signed-off-by: raver119 <raver119@gmail.com>
* CPU sort TAD by key/value tests
Signed-off-by: raver119 <raver119@gmail.com>
* Eliminate compiler error with cuda implementation.
* - repaired gradCheck in cuda
- provide conv2d_bp for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* missed signature
Signed-off-by: raver119 <raver119@gmail.com>
* provide depthwise_conv2d_bp for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* Implementation of lup helper with cuda kernel. Initial commit.
* further work on backprops for convolutions
Signed-off-by: Yurii <yurii@skymind.io>
* CUDA linear sort by key/val
Signed-off-by: raver119 <raver119@gmail.com>
* CUDA tad sort by key/val
Signed-off-by: raver119 <raver119@gmail.com>
* start providing of backprop for pooling2d/3d
Signed-off-by: Yurii <yurii@skymind.io>
* Added atomicAdd for bool datatype.
* dynamic partition concept
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic partition concept
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic partition scalar CUDA
Signed-off-by: raver119 <raver119@gmail.com>
* important comment
Signed-off-by: raver119 <raver119@gmail.com>
* fix pooling2d/3d backprop helpers
Signed-off-by: Yurii <yurii@skymind.io>
* Added non-linear test with dynamic_partition.
* Improved test for dynamic_partition.
* dynamic_partition TAD concept
Signed-off-by: raver119 <raver119@gmail.com>
* - dynamic_partition TAD CUDA impl
- dynamic_partition TAD CPU fix
Signed-off-by: raver119 <raver119@gmail.com>
* - rewrite cpu code for usampling2d/3d
- write cuda code for usampling2d/3d
Signed-off-by: Yurii <yurii@skymind.io>
* dynamic_stitch CUDA vector case
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic_stitch CUDA TAD case concept
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic_stitch CUDA TAD case impl
Signed-off-by: raver119 <raver119@gmail.com>
* Added tests for dynamic_stitch 3D-4D cases.
* minor tests tweaks
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed type check for dynamic stitch.
* min/max bp
Signed-off-by: raver119 <raver119@gmail.com>
* rewrite code for upsampling2d/3d cpu
Signed-off-by: Yurii <yurii@skymind.io>
* reduce min/max/norm_max bp
Signed-off-by: raver119 <raver119@gmail.com>
* lup implementation. Additional enhancements.
* provide code for upsamling2d/3d backprop
Signed-off-by: Yurii <yurii@skymind.io>
* weightedCrossEntropyWithLogits
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed template math atomicMul for 64bit ints.
* Refactored dynamic_partition_bp op.
* inverseBroadcast fix
Signed-off-by: raver119 <raver119@gmail.com>
* DynamicPartitionBP test datatype fixed.
* - nd4j_atomicMul Windows fix
- cpu/NDArrayLambda.hpp excluded from CUDA
Signed-off-by: raver119 <raver119@gmail.com>
2019-06-27 17:37:04 +02:00
|
|
|
return opOutput == static_cast<X>(0) && old == static_cast<X>(0) ? static_cast<Z>(0) : static_cast<Z>(1);
|
2019-06-06 14:21:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
op_def static Z update(X old, X opOutput, X *extraParams) {
|
Merge master to upstream (#7945)
* Shugeo strided slice zeros (#14)
* Modified strided_slice op to properly work with empty-like shapes.
* Fixed test for reduce_mean with empty-like input.
* [WIP] Last merge (#15)
* correct logsoftmax looss (#2)
* Small SameDiff listener fix (#4)
* Various fixes (#6)
* #7839 Fix for asXMatrix and tests
* #7866 EmbeddingSequenceLayer dtype fix + test
* #7856 SameDiff save/load stream methods
* #7859 RegressionEvaluation rank 4 fix + tests + axis configuration
* EvaluationBinary 3d/4d
* More evaluation 3d/4d tests
* #7847 Evaluation empty checks
* Small test ifx
* #7848 Fix median edge case
* Improve DL4J samediff layer tests
* [WIP] FastText wrapper implemented (#8)
* FastText implemented
* Some fixes
* Fix shapes for wordsNearest
* Validation of input vectors
* Fixes
* Fixed test
* Thread tagged
* Some tweaks
* setContextClassLoader for DeallocatorServiceThread
* Numpy format tests (#1)
* Various fixes (#11)
* #7852 SameDiff gather fix
* #7892 SameDiff placeholder to constant conversion
* #7890 validate input rank for MLN/CG init methods
* Fix broken permute shape calculation
* Permute and gather fixes
* Tests
* #7850 LogSumExp fix + test
* Handful of test fixes
* Empty arrays with non-scalar shapes (#10)
* minor rearrangements for lambdas
* empty tensors with non-scalar shapes
* numpy empty tensors with non-scalar shapes
* few more empty tweaks
* Small fixes
* conv3d signature update
* micro fix in batchnorm mkldnn
* Import fixes
* Fix
* MKL-DNN update
* Small fill fix
* fill with empty input + test
* Fixes
* Small error improvement
* Fix
* one special test
* couple of fixes for lstm
* Rewrite TFGraphMapper.getNDArrayFromTensor to be maintainable and less error prone
* Fixes
* FP16
* Unsigned
* BFloat16
* Fill op - empty tweaks
* - couple of fixes for empty arrays construction
- stack updated
* strided slice fix
* one transform test
* provide method for reducing shapeInfo in case of input array is empty
* Fixed reduceAlongDimensions to use empty input properly.
* couple of broadcast tests
* couple of tests broadcast tests + tweak to make them pass
* add check of non-empty to methods producing sub-arrays
* Fixed reshapeC with zeros in shape.
* complete empty check in reduce_... legacy ops
* Concat and cumsum/prod
* Tweak to empty shape inference on import
* add empty check to the rest of reduce legacy ops
* one more test
* correct typo in evalReduceShapeInfoEmpty
* Added tests for reduce_* ops to tests with zero shapes.
* few more tests for empty reductions
* Fixed strided_slice op with empty case and tests.
* one more empty reduction test
* Fixed strided_slice test.
* add empty check to NDArray::reshapei
* infOrMax
* empty min/max with infinity tests
* made unstack working correctly with empty arrays
* few IndexReduce tests + tweaks for empty shapes
* add test for empty concat
* few tests fixed
* Validation fix for reductions on empty shapes
* Reverse fix
* Reduction shape calc fixes
* SameDiff.generateOutputVariable: don't use shape function to determine number of outputs
* Range fix
* - NDArray constructor updated for scalars/empty arrays
- few tests fixed
* More fixes
* Empty creator fixes
* concat fix
* concat fix
* TF import tests: allow 'both all NaN' and 'both all inf' to pass
* Slice, zero fraction, and reshape fixes
* transpose, gather
* Zero fraction
* scalar cast fix
* Empty reduction axis support
* few more tests fixed
* Fixed input checks conforming with TF for concat op and tests.
* few tests fixed
* matmul scalar shape fix
* Fixed checkout for data type and scalarity with concat to allow non-empty scalars with vector concats.
* broadcast bool fix
* few more tests
* few more tests
* correct evalReduceShapeInfoEmpty
* argmax/argmin + tests
* one more empty edge case + one more test
* argmax/argmin/realdiv_bp tweaks
* empty reshape test + fix
* Helper fixes
* Small fixes
* Gather test fix
* Gather test fix
* Small fixes
* reduce scalar zero values
* scalar mean workaround
* Remove debug code
* along dim mean workaround
* one more test
* - equalsTo() tweak for empty arrays
- one more test
* broadcast tweaks
* [WIP] Fixing outstanding issues for NLP (#9)
* Avoid using not-inited objects
* Test fixed.
* Redundant method avoided for models like FastText
* KMeans++ implementation
* KMeans++ implementation
* Disable parallel execution
* KMeans++
* Tests
* Dev branch merge (#16)
* SameDiff: convertDataType and gradient check util improvements (#12)
* GradCheck util improvements
* StopGradient constructor + test
* SameDiff: Add datatype conversion
* Javadoc and add DataType.isNumerical()
* Small fix
* Fix SameDiff TF import test cases intermediate naming (workaround for bad default)
* TFGraphTestAllHelper: check intermediates in execution order
* Add missing debug listener
* [WIP] lstmBlock fix + other changes (#13)
- fixes lstmBlock issue
- changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer
- CheckNumerics op
- fixes for ReduceBool IsInfOrNan & IsFinite
* Small test fix
* CheckNumerics op wrapper
* Fix some issues on master (#17)
* Fix DataVec test issue
* Fix issue with dl4j SameDiff output layer
* Dtype fix for lambda layers
* #7912 BertIterator dtype fix (use float32 not global default)
* [WIP] Next set of CUDA stuff (#7)
New CUDA implementations and improvements
* bad file
* Dev branch master merge (#23)
* SameDiff: convertDataType and gradient check util improvements (#12)
* GradCheck util improvements
* StopGradient constructor + test
* SameDiff: Add datatype conversion
* Javadoc and add DataType.isNumerical()
* Small fix
* Fix SameDiff TF import test cases intermediate naming (workaround for bad default)
* TFGraphTestAllHelper: check intermediates in execution order
* Add missing debug listener
* [WIP] lstmBlock fix + other changes (#13)
- fixes lstmBlock issue
- changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer
- CheckNumerics op
- fixes for ReduceBool IsInfOrNan & IsFinite
* Small test fix
* CheckNumerics op wrapper
* Compatibility of deserialization (#18)
Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com>
* SameDiff: add activation gradient checking support for debugging (#19)
* SameDiff gradient checker: first pass on activation gradient checks
* Fixes + tests for activation gradient checking
* Javadoc
* [WIP] Some nd4j data type corrections (#20)
* Adjust data type
* Set correct Data type.
* Size of proper data type.
* fix averaged cpu load (#22)
* SameDiff ops, TF import and fixes (#24)
* CheckNumerics tests + fixes + misc fixes
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fake quant
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fixes
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* FakeQuantWithMinMaxArgs
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* CheckNumerics fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fix libnd4j ALL_INTS and ALL_FLOATS declaration (uint and bfloat types)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Small fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Javadoc
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Exception tweak
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fix for out of scope stack allocated var use
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Ignores
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Ignore for known failing test (already logged issue)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Merge upstream to fork (#25)
* Add thousand-separator commas to TotalParams (#7915)
* Add thousand-separator commas to TotalParams
The number of parameters can be quite large, and it would help the reading of the summary printout to have the TotalParams column & values at the bottom have thousand-separator-commas in them.
* Add thousand-separator commas to MultiLayerNetwork
Corresponding change to MultiLayerNetwork
Signed-off-by: Jxtps Jxtps <jxtps435@gmail.com>
* Update contributing and issue/PR templates (#7934)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fix link to AdaDelta paper (#7942)
Fix link to AdaDelta paper hosted on matthewzeiler.com
Signed-off-by: Jxtps
* Fixes, and ignores for known/logged failing issues (#7943)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* SameDiff + DL4J/SameDiff: Multiple fixes (#28)
* #7919 HDF5 attribute buffer length fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7909 Arbiter constructor exception ux improvements
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7925 RNN output layer length checks
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7939 Add listener for validating inputs are not incorrectly modified
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7939 Integrate NonInplaceValidationListener into tests
* #7844 DL4J SameDiff fixes for variable minibatch size
* DL4J SameDiff fixes - ensure gradient for input placeholder is available
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Tweaks to ExternalErrorsFunction - use placeholders, make more robust
* Another fix
* More fixes
* More SameDiff/DL4J fixes
* Scope out scalar array creation in BaseScalarOp
* Remove debug code
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* [WIP] Final dev branch merge (#29)
* SameDiff: convertDataType and gradient check util improvements (#12)
* GradCheck util improvements
* StopGradient constructor + test
* SameDiff: Add datatype conversion
* Javadoc and add DataType.isNumerical()
* Small fix
* Fix SameDiff TF import test cases intermediate naming (workaround for bad default)
* TFGraphTestAllHelper: check intermediates in execution order
* Add missing debug listener
* [WIP] lstmBlock fix + other changes (#13)
- fixes lstmBlock issue
- changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer
- CheckNumerics op
- fixes for ReduceBool IsInfOrNan & IsFinite
* Small test fix
* CheckNumerics op wrapper
* Compatibility of deserialization (#18)
Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com>
* SameDiff: add activation gradient checking support for debugging (#19)
* SameDiff gradient checker: first pass on activation gradient checks
* Fixes + tests for activation gradient checking
* Javadoc
* [WIP] Some nd4j data type corrections (#20)
* Adjust data type
* Set correct Data type.
* Size of proper data type.
* fix averaged cpu load (#22)
* [WIP] Multiple dataset iterators (#27)
* Splitting dataset into arbitrary number
* Fixes
* Multiple split of iterator
* Test
* Test
* Some fixes
* signature change
* one more tweak
Signed-off-by: raver119 <raver119@gmail.com>
* one more test for sequential use of DataSetIteratorSplitter
Signed-off-by: raver119 <raver119@gmail.com>
* Fixes
* Fixes
* one more test for Alexander
Signed-off-by: raver119 <raver119@gmail.com>
* Some fixes
* Some fixes
* one more test for Alexander
Signed-off-by: raver119 <raver119@gmail.com>
* minor test fix
Signed-off-by: raver119 <raver119@gmail.com>
* Some fixes
* Some fixes
* couple of assertions tweaked
Signed-off-by: raver119 <raver119@gmail.com>
* MDS splitter test :/
Signed-off-by: raver119 <raver119@gmail.com>
* Minor refactoring
* Multi dataset
* Some fixes
* More tests
* Small number of test fixes/improvements (failures on CI) (#31)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* [WIP] More CUDA stuff (#26)
* initial commit
Signed-off-by: raver119 <raver119@gmail.com>
* LRN BP CUDA
Signed-off-by: raver119 <raver119@gmail.com>
* less memory
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed bug with crop_and_resize op helper.
* get rid of unnecessary index-calculation dunction
Signed-off-by: Yurii <yurii@skymind.io>
* Fixed sort with nth_element cuda-based helper.
* Refactored nth_element.
* Refactored nth_element op and tests.
* Modified usage of dim array with sortTad routine.
* Refactored main routine of helper for non_max_image_suppression op.
* non_max_image_suppression op helper with cuda kernel implementation. Initial revision.
* fix vol2col cuda kernel
* meh
Signed-off-by: raver119 <raver119@gmail.com>
* topK concept
Signed-off-by: raver119 <raver119@gmail.com>
* unsorted topK with scanWitdh of 1
Signed-off-by: raver119 <raver119@gmail.com>
* correct vol2col tests
* sorted/unsorted topK
Signed-off-by: raver119 <raver119@gmail.com>
* implementation and fixing col2im/col2vol
* Corrected usage flags with input/output with reverse op.
* dup is const now
Signed-off-by: raver119 <raver119@gmail.com>
* percentile op
Signed-off-by: raver119 <raver119@gmail.com>
* group tests for mapool2d
Signed-off-by: Yurii <yurii@skymind.io>
* special test for george
Signed-off-by: raver119 <raver119@gmail.com>
* less threads for sortTad
Signed-off-by: raver119 <raver119@gmail.com>
* provide conv2d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* remove auther in sort tad kernel code
Signed-off-by: Yurii <yurii@skymind.io>
* provide depthwise_conv2d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* - max_pooling_with_argmax
- null check for special use
Signed-off-by: raver119 <raver119@gmail.com>
* dts cuda
Signed-off-by: raver119 <raver119@gmail.com>
* provide sconv2d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* std cuda
Signed-off-by: raver119 <raver119@gmail.com>
* Refactored non_max_suppression op to conform TF implementation.
* Improved suppression helper.
* provide pooling3d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* minor lstm rearrangements
Signed-off-by: raver119 <raver119@gmail.com>
* more of minor lstm rearrangements
Signed-off-by: raver119 <raver119@gmail.com>
* (bi)dynamic_rnn
Signed-off-by: raver119 <raver119@gmail.com>
* templates init order
Signed-off-by: raver119 <raver119@gmail.com>
* Refactored non_max_suppression op.
* Added cuda kernel for non_max_suppression.
* CPU sort by key/value
Signed-off-by: raver119 <raver119@gmail.com>
* CPU sort TAD by key/value
Signed-off-by: raver119 <raver119@gmail.com>
* CPU sort TAD by key/value tests
Signed-off-by: raver119 <raver119@gmail.com>
* Eliminate compiler error with cuda implementation.
* - repaired gradCheck in cuda
- provide conv2d_bp for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* missed signature
Signed-off-by: raver119 <raver119@gmail.com>
* provide depthwise_conv2d_bp for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* Implementation of lup helper with cuda kernel. Initial commit.
* further work on backprops for convolutions
Signed-off-by: Yurii <yurii@skymind.io>
* CUDA linear sort by key/val
Signed-off-by: raver119 <raver119@gmail.com>
* CUDA tad sort by key/val
Signed-off-by: raver119 <raver119@gmail.com>
* start providing of backprop for pooling2d/3d
Signed-off-by: Yurii <yurii@skymind.io>
* Added atomicAdd for bool datatype.
* dynamic partition concept
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic partition concept
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic partition scalar CUDA
Signed-off-by: raver119 <raver119@gmail.com>
* important comment
Signed-off-by: raver119 <raver119@gmail.com>
* fix pooling2d/3d backprop helpers
Signed-off-by: Yurii <yurii@skymind.io>
* Added non-linear test with dynamic_partition.
* Improved test for dynamic_partition.
* dynamic_partition TAD concept
Signed-off-by: raver119 <raver119@gmail.com>
* - dynamic_partition TAD CUDA impl
- dynamic_partition TAD CPU fix
Signed-off-by: raver119 <raver119@gmail.com>
* - rewrite cpu code for usampling2d/3d
- write cuda code for usampling2d/3d
Signed-off-by: Yurii <yurii@skymind.io>
* dynamic_stitch CUDA vector case
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic_stitch CUDA TAD case concept
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic_stitch CUDA TAD case impl
Signed-off-by: raver119 <raver119@gmail.com>
* Added tests for dynamic_stitch 3D-4D cases.
* minor tests tweaks
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed type check for dynamic stitch.
* min/max bp
Signed-off-by: raver119 <raver119@gmail.com>
* rewrite code for upsampling2d/3d cpu
Signed-off-by: Yurii <yurii@skymind.io>
* reduce min/max/norm_max bp
Signed-off-by: raver119 <raver119@gmail.com>
* lup implementation. Additional enhancements.
* provide code for upsamling2d/3d backprop
Signed-off-by: Yurii <yurii@skymind.io>
* weightedCrossEntropyWithLogits
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed template math atomicMul for 64bit ints.
* Refactored dynamic_partition_bp op.
* inverseBroadcast fix
Signed-off-by: raver119 <raver119@gmail.com>
* DynamicPartitionBP test datatype fixed.
* - nd4j_atomicMul Windows fix
- cpu/NDArrayLambda.hpp excluded from CUDA
Signed-off-by: raver119 <raver119@gmail.com>
2019-06-27 17:37:04 +02:00
|
|
|
return opOutput == static_cast<X>(0) && old == static_cast<X>(0) ? static_cast<Z>(0) : static_cast<Z>(1);
|
2019-06-06 14:21:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z postProcess(X reduction, Nd4jLong n, X *extraParams) {
|
Merge master to upstream (#7945)
* Shugeo strided slice zeros (#14)
* Modified strided_slice op to properly work with empty-like shapes.
* Fixed test for reduce_mean with empty-like input.
* [WIP] Last merge (#15)
* correct logsoftmax looss (#2)
* Small SameDiff listener fix (#4)
* Various fixes (#6)
* #7839 Fix for asXMatrix and tests
* #7866 EmbeddingSequenceLayer dtype fix + test
* #7856 SameDiff save/load stream methods
* #7859 RegressionEvaluation rank 4 fix + tests + axis configuration
* EvaluationBinary 3d/4d
* More evaluation 3d/4d tests
* #7847 Evaluation empty checks
* Small test ifx
* #7848 Fix median edge case
* Improve DL4J samediff layer tests
* [WIP] FastText wrapper implemented (#8)
* FastText implemented
* Some fixes
* Fix shapes for wordsNearest
* Validation of input vectors
* Fixes
* Fixed test
* Thread tagged
* Some tweaks
* setContextClassLoader for DeallocatorServiceThread
* Numpy format tests (#1)
* Various fixes (#11)
* #7852 SameDiff gather fix
* #7892 SameDiff placeholder to constant conversion
* #7890 validate input rank for MLN/CG init methods
* Fix broken permute shape calculation
* Permute and gather fixes
* Tests
* #7850 LogSumExp fix + test
* Handful of test fixes
* Empty arrays with non-scalar shapes (#10)
* minor rearrangements for lambdas
* empty tensors with non-scalar shapes
* numpy empty tensors with non-scalar shapes
* few more empty tweaks
* Small fixes
* conv3d signature update
* micro fix in batchnorm mkldnn
* Import fixes
* Fix
* MKL-DNN update
* Small fill fix
* fill with empty input + test
* Fixes
* Small error improvement
* Fix
* one special test
* couple of fixes for lstm
* Rewrite TFGraphMapper.getNDArrayFromTensor to be maintainable and less error prone
* Fixes
* FP16
* Unsigned
* BFloat16
* Fill op - empty tweaks
* - couple of fixes for empty arrays construction
- stack updated
* strided slice fix
* one transform test
* provide method for reducing shapeInfo in case of input array is empty
* Fixed reduceAlongDimensions to use empty input properly.
* couple of broadcast tests
* couple of tests broadcast tests + tweak to make them pass
* add check of non-empty to methods producing sub-arrays
* Fixed reshapeC with zeros in shape.
* complete empty check in reduce_... legacy ops
* Concat and cumsum/prod
* Tweak to empty shape inference on import
* add empty check to the rest of reduce legacy ops
* one more test
* correct typo in evalReduceShapeInfoEmpty
* Added tests for reduce_* ops to tests with zero shapes.
* few more tests for empty reductions
* Fixed strided_slice op with empty case and tests.
* one more empty reduction test
* Fixed strided_slice test.
* add empty check to NDArray::reshapei
* infOrMax
* empty min/max with infinity tests
* made unstack working correctly with empty arrays
* few IndexReduce tests + tweaks for empty shapes
* add test for empty concat
* few tests fixed
* Validation fix for reductions on empty shapes
* Reverse fix
* Reduction shape calc fixes
* SameDiff.generateOutputVariable: don't use shape function to determine number of outputs
* Range fix
* - NDArray constructor updated for scalars/empty arrays
- few tests fixed
* More fixes
* Empty creator fixes
* concat fix
* concat fix
* TF import tests: allow 'both all NaN' and 'both all inf' to pass
* Slice, zero fraction, and reshape fixes
* transpose, gather
* Zero fraction
* scalar cast fix
* Empty reduction axis support
* few more tests fixed
* Fixed input checks conforming with TF for concat op and tests.
* few tests fixed
* matmul scalar shape fix
* Fixed checkout for data type and scalarity with concat to allow non-empty scalars with vector concats.
* broadcast bool fix
* few more tests
* few more tests
* correct evalReduceShapeInfoEmpty
* argmax/argmin + tests
* one more empty edge case + one more test
* argmax/argmin/realdiv_bp tweaks
* empty reshape test + fix
* Helper fixes
* Small fixes
* Gather test fix
* Gather test fix
* Small fixes
* reduce scalar zero values
* scalar mean workaround
* Remove debug code
* along dim mean workaround
* one more test
* - equalsTo() tweak for empty arrays
- one more test
* broadcast tweaks
* [WIP] Fixing outstanding issues for NLP (#9)
* Avoid using not-inited objects
* Test fixed.
* Redundant method avoided for models like FastText
* KMeans++ implementation
* KMeans++ implementation
* Disable parallel execution
* KMeans++
* Tests
* Dev branch merge (#16)
* SameDiff: convertDataType and gradient check util improvements (#12)
* GradCheck util improvements
* StopGradient constructor + test
* SameDiff: Add datatype conversion
* Javadoc and add DataType.isNumerical()
* Small fix
* Fix SameDiff TF import test cases intermediate naming (workaround for bad default)
* TFGraphTestAllHelper: check intermediates in execution order
* Add missing debug listener
* [WIP] lstmBlock fix + other changes (#13)
- fixes lstmBlock issue
- changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer
- CheckNumerics op
- fixes for ReduceBool IsInfOrNan & IsFinite
* Small test fix
* CheckNumerics op wrapper
* Fix some issues on master (#17)
* Fix DataVec test issue
* Fix issue with dl4j SameDiff output layer
* Dtype fix for lambda layers
* #7912 BertIterator dtype fix (use float32 not global default)
* [WIP] Next set of CUDA stuff (#7)
New CUDA implementations and improvements
* bad file
* Dev branch master merge (#23)
* SameDiff: convertDataType and gradient check util improvements (#12)
* GradCheck util improvements
* StopGradient constructor + test
* SameDiff: Add datatype conversion
* Javadoc and add DataType.isNumerical()
* Small fix
* Fix SameDiff TF import test cases intermediate naming (workaround for bad default)
* TFGraphTestAllHelper: check intermediates in execution order
* Add missing debug listener
* [WIP] lstmBlock fix + other changes (#13)
- fixes lstmBlock issue
- changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer
- CheckNumerics op
- fixes for ReduceBool IsInfOrNan & IsFinite
* Small test fix
* CheckNumerics op wrapper
* Compatibility of deserialization (#18)
Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com>
* SameDiff: add activation gradient checking support for debugging (#19)
* SameDiff gradient checker: first pass on activation gradient checks
* Fixes + tests for activation gradient checking
* Javadoc
* [WIP] Some nd4j data type corrections (#20)
* Adjust data type
* Set correct Data type.
* Size of proper data type.
* fix averaged cpu load (#22)
* SameDiff ops, TF import and fixes (#24)
* CheckNumerics tests + fixes + misc fixes
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fake quant
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fixes
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* FakeQuantWithMinMaxArgs
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* CheckNumerics fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fix libnd4j ALL_INTS and ALL_FLOATS declaration (uint and bfloat types)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Small fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Javadoc
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Exception tweak
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fix for out of scope stack allocated var use
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Ignores
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Ignore for known failing test (already logged issue)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Merge upstream to fork (#25)
* Add thousand-separator commas to TotalParams (#7915)
* Add thousand-separator commas to TotalParams
The number of parameters can be quite large, and it would help the reading of the summary printout to have the TotalParams column & values at the bottom have thousand-separator-commas in them.
* Add thousand-separator commas to MultiLayerNetwork
Corresponding change to MultiLayerNetwork
Signed-off-by: Jxtps Jxtps <jxtps435@gmail.com>
* Update contributing and issue/PR templates (#7934)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fix link to AdaDelta paper (#7942)
Fix link to AdaDelta paper hosted on matthewzeiler.com
Signed-off-by: Jxtps
* Fixes, and ignores for known/logged failing issues (#7943)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* SameDiff + DL4J/SameDiff: Multiple fixes (#28)
* #7919 HDF5 attribute buffer length fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7909 Arbiter constructor exception ux improvements
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7925 RNN output layer length checks
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7939 Add listener for validating inputs are not incorrectly modified
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7939 Integrate NonInplaceValidationListener into tests
* #7844 DL4J SameDiff fixes for variable minibatch size
* DL4J SameDiff fixes - ensure gradient for input placeholder is available
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Tweaks to ExternalErrorsFunction - use placeholders, make more robust
* Another fix
* More fixes
* More SameDiff/DL4J fixes
* Scope out scalar array creation in BaseScalarOp
* Remove debug code
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* [WIP] Final dev branch merge (#29)
* SameDiff: convertDataType and gradient check util improvements (#12)
* GradCheck util improvements
* StopGradient constructor + test
* SameDiff: Add datatype conversion
* Javadoc and add DataType.isNumerical()
* Small fix
* Fix SameDiff TF import test cases intermediate naming (workaround for bad default)
* TFGraphTestAllHelper: check intermediates in execution order
* Add missing debug listener
* [WIP] lstmBlock fix + other changes (#13)
- fixes lstmBlock issue
- changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer
- CheckNumerics op
- fixes for ReduceBool IsInfOrNan & IsFinite
* Small test fix
* CheckNumerics op wrapper
* Compatibility of deserialization (#18)
Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com>
* SameDiff: add activation gradient checking support for debugging (#19)
* SameDiff gradient checker: first pass on activation gradient checks
* Fixes + tests for activation gradient checking
* Javadoc
* [WIP] Some nd4j data type corrections (#20)
* Adjust data type
* Set correct Data type.
* Size of proper data type.
* fix averaged cpu load (#22)
* [WIP] Multiple dataset iterators (#27)
* Splitting dataset into arbitrary number
* Fixes
* Multiple split of iterator
* Test
* Test
* Some fixes
* signature change
* one more tweak
Signed-off-by: raver119 <raver119@gmail.com>
* one more test for sequential use of DataSetIteratorSplitter
Signed-off-by: raver119 <raver119@gmail.com>
* Fixes
* Fixes
* one more test for Alexander
Signed-off-by: raver119 <raver119@gmail.com>
* Some fixes
* Some fixes
* one more test for Alexander
Signed-off-by: raver119 <raver119@gmail.com>
* minor test fix
Signed-off-by: raver119 <raver119@gmail.com>
* Some fixes
* Some fixes
* couple of assertions tweaked
Signed-off-by: raver119 <raver119@gmail.com>
* MDS splitter test :/
Signed-off-by: raver119 <raver119@gmail.com>
* Minor refactoring
* Multi dataset
* Some fixes
* More tests
* Small number of test fixes/improvements (failures on CI) (#31)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* [WIP] More CUDA stuff (#26)
* initial commit
Signed-off-by: raver119 <raver119@gmail.com>
* LRN BP CUDA
Signed-off-by: raver119 <raver119@gmail.com>
* less memory
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed bug with crop_and_resize op helper.
* get rid of unnecessary index-calculation dunction
Signed-off-by: Yurii <yurii@skymind.io>
* Fixed sort with nth_element cuda-based helper.
* Refactored nth_element.
* Refactored nth_element op and tests.
* Modified usage of dim array with sortTad routine.
* Refactored main routine of helper for non_max_image_suppression op.
* non_max_image_suppression op helper with cuda kernel implementation. Initial revision.
* fix vol2col cuda kernel
* meh
Signed-off-by: raver119 <raver119@gmail.com>
* topK concept
Signed-off-by: raver119 <raver119@gmail.com>
* unsorted topK with scanWitdh of 1
Signed-off-by: raver119 <raver119@gmail.com>
* correct vol2col tests
* sorted/unsorted topK
Signed-off-by: raver119 <raver119@gmail.com>
* implementation and fixing col2im/col2vol
* Corrected usage flags with input/output with reverse op.
* dup is const now
Signed-off-by: raver119 <raver119@gmail.com>
* percentile op
Signed-off-by: raver119 <raver119@gmail.com>
* group tests for mapool2d
Signed-off-by: Yurii <yurii@skymind.io>
* special test for george
Signed-off-by: raver119 <raver119@gmail.com>
* less threads for sortTad
Signed-off-by: raver119 <raver119@gmail.com>
* provide conv2d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* remove auther in sort tad kernel code
Signed-off-by: Yurii <yurii@skymind.io>
* provide depthwise_conv2d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* - max_pooling_with_argmax
- null check for special use
Signed-off-by: raver119 <raver119@gmail.com>
* dts cuda
Signed-off-by: raver119 <raver119@gmail.com>
* provide sconv2d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* std cuda
Signed-off-by: raver119 <raver119@gmail.com>
* Refactored non_max_suppression op to conform TF implementation.
* Improved suppression helper.
* provide pooling3d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* minor lstm rearrangements
Signed-off-by: raver119 <raver119@gmail.com>
* more of minor lstm rearrangements
Signed-off-by: raver119 <raver119@gmail.com>
* (bi)dynamic_rnn
Signed-off-by: raver119 <raver119@gmail.com>
* templates init order
Signed-off-by: raver119 <raver119@gmail.com>
* Refactored non_max_suppression op.
* Added cuda kernel for non_max_suppression.
* CPU sort by key/value
Signed-off-by: raver119 <raver119@gmail.com>
* CPU sort TAD by key/value
Signed-off-by: raver119 <raver119@gmail.com>
* CPU sort TAD by key/value tests
Signed-off-by: raver119 <raver119@gmail.com>
* Eliminate compiler error with cuda implementation.
* - repaired gradCheck in cuda
- provide conv2d_bp for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* missed signature
Signed-off-by: raver119 <raver119@gmail.com>
* provide depthwise_conv2d_bp for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* Implementation of lup helper with cuda kernel. Initial commit.
* further work on backprops for convolutions
Signed-off-by: Yurii <yurii@skymind.io>
* CUDA linear sort by key/val
Signed-off-by: raver119 <raver119@gmail.com>
* CUDA tad sort by key/val
Signed-off-by: raver119 <raver119@gmail.com>
* start providing of backprop for pooling2d/3d
Signed-off-by: Yurii <yurii@skymind.io>
* Added atomicAdd for bool datatype.
* dynamic partition concept
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic partition concept
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic partition scalar CUDA
Signed-off-by: raver119 <raver119@gmail.com>
* important comment
Signed-off-by: raver119 <raver119@gmail.com>
* fix pooling2d/3d backprop helpers
Signed-off-by: Yurii <yurii@skymind.io>
* Added non-linear test with dynamic_partition.
* Improved test for dynamic_partition.
* dynamic_partition TAD concept
Signed-off-by: raver119 <raver119@gmail.com>
* - dynamic_partition TAD CUDA impl
- dynamic_partition TAD CPU fix
Signed-off-by: raver119 <raver119@gmail.com>
* - rewrite cpu code for usampling2d/3d
- write cuda code for usampling2d/3d
Signed-off-by: Yurii <yurii@skymind.io>
* dynamic_stitch CUDA vector case
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic_stitch CUDA TAD case concept
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic_stitch CUDA TAD case impl
Signed-off-by: raver119 <raver119@gmail.com>
* Added tests for dynamic_stitch 3D-4D cases.
* minor tests tweaks
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed type check for dynamic stitch.
* min/max bp
Signed-off-by: raver119 <raver119@gmail.com>
* rewrite code for upsampling2d/3d cpu
Signed-off-by: Yurii <yurii@skymind.io>
* reduce min/max/norm_max bp
Signed-off-by: raver119 <raver119@gmail.com>
* lup implementation. Additional enhancements.
* provide code for upsamling2d/3d backprop
Signed-off-by: Yurii <yurii@skymind.io>
* weightedCrossEntropyWithLogits
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed template math atomicMul for 64bit ints.
* Refactored dynamic_partition_bp op.
* inverseBroadcast fix
Signed-off-by: raver119 <raver119@gmail.com>
* DynamicPartitionBP test datatype fixed.
* - nd4j_atomicMul Windows fix
- cpu/NDArrayLambda.hpp excluded from CUDA
Signed-off-by: raver119 <raver119@gmail.com>
2019-06-27 17:37:04 +02:00
|
|
|
return reduction != static_cast<X>(0);
|
2019-06-06 14:21:15 +02:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class IsFinite {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_bool
|
|
|
|
no_op_exec_special_bool_cuda
|
|
|
|
|
|
|
|
no_op_exec_special_accumulation
|
|
|
|
no_op_exec_special_accumulation_cuda
|
|
|
|
|
|
|
|
op_def static Z op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_isfin<X>(d1) ? static_cast<Z>(1) : static_cast<Z>(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X startingValue(const X *input) {
|
Merge master to upstream (#7945)
* Shugeo strided slice zeros (#14)
* Modified strided_slice op to properly work with empty-like shapes.
* Fixed test for reduce_mean with empty-like input.
* [WIP] Last merge (#15)
* correct logsoftmax looss (#2)
* Small SameDiff listener fix (#4)
* Various fixes (#6)
* #7839 Fix for asXMatrix and tests
* #7866 EmbeddingSequenceLayer dtype fix + test
* #7856 SameDiff save/load stream methods
* #7859 RegressionEvaluation rank 4 fix + tests + axis configuration
* EvaluationBinary 3d/4d
* More evaluation 3d/4d tests
* #7847 Evaluation empty checks
* Small test ifx
* #7848 Fix median edge case
* Improve DL4J samediff layer tests
* [WIP] FastText wrapper implemented (#8)
* FastText implemented
* Some fixes
* Fix shapes for wordsNearest
* Validation of input vectors
* Fixes
* Fixed test
* Thread tagged
* Some tweaks
* setContextClassLoader for DeallocatorServiceThread
* Numpy format tests (#1)
* Various fixes (#11)
* #7852 SameDiff gather fix
* #7892 SameDiff placeholder to constant conversion
* #7890 validate input rank for MLN/CG init methods
* Fix broken permute shape calculation
* Permute and gather fixes
* Tests
* #7850 LogSumExp fix + test
* Handful of test fixes
* Empty arrays with non-scalar shapes (#10)
* minor rearrangements for lambdas
* empty tensors with non-scalar shapes
* numpy empty tensors with non-scalar shapes
* few more empty tweaks
* Small fixes
* conv3d signature update
* micro fix in batchnorm mkldnn
* Import fixes
* Fix
* MKL-DNN update
* Small fill fix
* fill with empty input + test
* Fixes
* Small error improvement
* Fix
* one special test
* couple of fixes for lstm
* Rewrite TFGraphMapper.getNDArrayFromTensor to be maintainable and less error prone
* Fixes
* FP16
* Unsigned
* BFloat16
* Fill op - empty tweaks
* - couple of fixes for empty arrays construction
- stack updated
* strided slice fix
* one transform test
* provide method for reducing shapeInfo in case of input array is empty
* Fixed reduceAlongDimensions to use empty input properly.
* couple of broadcast tests
* couple of tests broadcast tests + tweak to make them pass
* add check of non-empty to methods producing sub-arrays
* Fixed reshapeC with zeros in shape.
* complete empty check in reduce_... legacy ops
* Concat and cumsum/prod
* Tweak to empty shape inference on import
* add empty check to the rest of reduce legacy ops
* one more test
* correct typo in evalReduceShapeInfoEmpty
* Added tests for reduce_* ops to tests with zero shapes.
* few more tests for empty reductions
* Fixed strided_slice op with empty case and tests.
* one more empty reduction test
* Fixed strided_slice test.
* add empty check to NDArray::reshapei
* infOrMax
* empty min/max with infinity tests
* made unstack working correctly with empty arrays
* few IndexReduce tests + tweaks for empty shapes
* add test for empty concat
* few tests fixed
* Validation fix for reductions on empty shapes
* Reverse fix
* Reduction shape calc fixes
* SameDiff.generateOutputVariable: don't use shape function to determine number of outputs
* Range fix
* - NDArray constructor updated for scalars/empty arrays
- few tests fixed
* More fixes
* Empty creator fixes
* concat fix
* concat fix
* TF import tests: allow 'both all NaN' and 'both all inf' to pass
* Slice, zero fraction, and reshape fixes
* transpose, gather
* Zero fraction
* scalar cast fix
* Empty reduction axis support
* few more tests fixed
* Fixed input checks conforming with TF for concat op and tests.
* few tests fixed
* matmul scalar shape fix
* Fixed checkout for data type and scalarity with concat to allow non-empty scalars with vector concats.
* broadcast bool fix
* few more tests
* few more tests
* correct evalReduceShapeInfoEmpty
* argmax/argmin + tests
* one more empty edge case + one more test
* argmax/argmin/realdiv_bp tweaks
* empty reshape test + fix
* Helper fixes
* Small fixes
* Gather test fix
* Gather test fix
* Small fixes
* reduce scalar zero values
* scalar mean workaround
* Remove debug code
* along dim mean workaround
* one more test
* - equalsTo() tweak for empty arrays
- one more test
* broadcast tweaks
* [WIP] Fixing outstanding issues for NLP (#9)
* Avoid using not-inited objects
* Test fixed.
* Redundant method avoided for models like FastText
* KMeans++ implementation
* KMeans++ implementation
* Disable parallel execution
* KMeans++
* Tests
* Dev branch merge (#16)
* SameDiff: convertDataType and gradient check util improvements (#12)
* GradCheck util improvements
* StopGradient constructor + test
* SameDiff: Add datatype conversion
* Javadoc and add DataType.isNumerical()
* Small fix
* Fix SameDiff TF import test cases intermediate naming (workaround for bad default)
* TFGraphTestAllHelper: check intermediates in execution order
* Add missing debug listener
* [WIP] lstmBlock fix + other changes (#13)
- fixes lstmBlock issue
- changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer
- CheckNumerics op
- fixes for ReduceBool IsInfOrNan & IsFinite
* Small test fix
* CheckNumerics op wrapper
* Fix some issues on master (#17)
* Fix DataVec test issue
* Fix issue with dl4j SameDiff output layer
* Dtype fix for lambda layers
* #7912 BertIterator dtype fix (use float32 not global default)
* [WIP] Next set of CUDA stuff (#7)
New CUDA implementations and improvements
* bad file
* Dev branch master merge (#23)
* SameDiff: convertDataType and gradient check util improvements (#12)
* GradCheck util improvements
* StopGradient constructor + test
* SameDiff: Add datatype conversion
* Javadoc and add DataType.isNumerical()
* Small fix
* Fix SameDiff TF import test cases intermediate naming (workaround for bad default)
* TFGraphTestAllHelper: check intermediates in execution order
* Add missing debug listener
* [WIP] lstmBlock fix + other changes (#13)
- fixes lstmBlock issue
- changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer
- CheckNumerics op
- fixes for ReduceBool IsInfOrNan & IsFinite
* Small test fix
* CheckNumerics op wrapper
* Compatibility of deserialization (#18)
Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com>
* SameDiff: add activation gradient checking support for debugging (#19)
* SameDiff gradient checker: first pass on activation gradient checks
* Fixes + tests for activation gradient checking
* Javadoc
* [WIP] Some nd4j data type corrections (#20)
* Adjust data type
* Set correct Data type.
* Size of proper data type.
* fix averaged cpu load (#22)
* SameDiff ops, TF import and fixes (#24)
* CheckNumerics tests + fixes + misc fixes
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fake quant
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fixes
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* FakeQuantWithMinMaxArgs
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* CheckNumerics fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fix libnd4j ALL_INTS and ALL_FLOATS declaration (uint and bfloat types)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Small fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Javadoc
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Exception tweak
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fix for out of scope stack allocated var use
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Ignores
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Ignore for known failing test (already logged issue)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Merge upstream to fork (#25)
* Add thousand-separator commas to TotalParams (#7915)
* Add thousand-separator commas to TotalParams
The number of parameters can be quite large, and it would help the reading of the summary printout to have the TotalParams column & values at the bottom have thousand-separator-commas in them.
* Add thousand-separator commas to MultiLayerNetwork
Corresponding change to MultiLayerNetwork
Signed-off-by: Jxtps Jxtps <jxtps435@gmail.com>
* Update contributing and issue/PR templates (#7934)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fix link to AdaDelta paper (#7942)
Fix link to AdaDelta paper hosted on matthewzeiler.com
Signed-off-by: Jxtps
* Fixes, and ignores for known/logged failing issues (#7943)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* SameDiff + DL4J/SameDiff: Multiple fixes (#28)
* #7919 HDF5 attribute buffer length fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7909 Arbiter constructor exception ux improvements
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7925 RNN output layer length checks
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7939 Add listener for validating inputs are not incorrectly modified
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7939 Integrate NonInplaceValidationListener into tests
* #7844 DL4J SameDiff fixes for variable minibatch size
* DL4J SameDiff fixes - ensure gradient for input placeholder is available
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Tweaks to ExternalErrorsFunction - use placeholders, make more robust
* Another fix
* More fixes
* More SameDiff/DL4J fixes
* Scope out scalar array creation in BaseScalarOp
* Remove debug code
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* [WIP] Final dev branch merge (#29)
* SameDiff: convertDataType and gradient check util improvements (#12)
* GradCheck util improvements
* StopGradient constructor + test
* SameDiff: Add datatype conversion
* Javadoc and add DataType.isNumerical()
* Small fix
* Fix SameDiff TF import test cases intermediate naming (workaround for bad default)
* TFGraphTestAllHelper: check intermediates in execution order
* Add missing debug listener
* [WIP] lstmBlock fix + other changes (#13)
- fixes lstmBlock issue
- changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer
- CheckNumerics op
- fixes for ReduceBool IsInfOrNan & IsFinite
* Small test fix
* CheckNumerics op wrapper
* Compatibility of deserialization (#18)
Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com>
* SameDiff: add activation gradient checking support for debugging (#19)
* SameDiff gradient checker: first pass on activation gradient checks
* Fixes + tests for activation gradient checking
* Javadoc
* [WIP] Some nd4j data type corrections (#20)
* Adjust data type
* Set correct Data type.
* Size of proper data type.
* fix averaged cpu load (#22)
* [WIP] Multiple dataset iterators (#27)
* Splitting dataset into arbitrary number
* Fixes
* Multiple split of iterator
* Test
* Test
* Some fixes
* signature change
* one more tweak
Signed-off-by: raver119 <raver119@gmail.com>
* one more test for sequential use of DataSetIteratorSplitter
Signed-off-by: raver119 <raver119@gmail.com>
* Fixes
* Fixes
* one more test for Alexander
Signed-off-by: raver119 <raver119@gmail.com>
* Some fixes
* Some fixes
* one more test for Alexander
Signed-off-by: raver119 <raver119@gmail.com>
* minor test fix
Signed-off-by: raver119 <raver119@gmail.com>
* Some fixes
* Some fixes
* couple of assertions tweaked
Signed-off-by: raver119 <raver119@gmail.com>
* MDS splitter test :/
Signed-off-by: raver119 <raver119@gmail.com>
* Minor refactoring
* Multi dataset
* Some fixes
* More tests
* Small number of test fixes/improvements (failures on CI) (#31)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* [WIP] More CUDA stuff (#26)
* initial commit
Signed-off-by: raver119 <raver119@gmail.com>
* LRN BP CUDA
Signed-off-by: raver119 <raver119@gmail.com>
* less memory
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed bug with crop_and_resize op helper.
* get rid of unnecessary index-calculation dunction
Signed-off-by: Yurii <yurii@skymind.io>
* Fixed sort with nth_element cuda-based helper.
* Refactored nth_element.
* Refactored nth_element op and tests.
* Modified usage of dim array with sortTad routine.
* Refactored main routine of helper for non_max_image_suppression op.
* non_max_image_suppression op helper with cuda kernel implementation. Initial revision.
* fix vol2col cuda kernel
* meh
Signed-off-by: raver119 <raver119@gmail.com>
* topK concept
Signed-off-by: raver119 <raver119@gmail.com>
* unsorted topK with scanWitdh of 1
Signed-off-by: raver119 <raver119@gmail.com>
* correct vol2col tests
* sorted/unsorted topK
Signed-off-by: raver119 <raver119@gmail.com>
* implementation and fixing col2im/col2vol
* Corrected usage flags with input/output with reverse op.
* dup is const now
Signed-off-by: raver119 <raver119@gmail.com>
* percentile op
Signed-off-by: raver119 <raver119@gmail.com>
* group tests for mapool2d
Signed-off-by: Yurii <yurii@skymind.io>
* special test for george
Signed-off-by: raver119 <raver119@gmail.com>
* less threads for sortTad
Signed-off-by: raver119 <raver119@gmail.com>
* provide conv2d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* remove auther in sort tad kernel code
Signed-off-by: Yurii <yurii@skymind.io>
* provide depthwise_conv2d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* - max_pooling_with_argmax
- null check for special use
Signed-off-by: raver119 <raver119@gmail.com>
* dts cuda
Signed-off-by: raver119 <raver119@gmail.com>
* provide sconv2d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* std cuda
Signed-off-by: raver119 <raver119@gmail.com>
* Refactored non_max_suppression op to conform TF implementation.
* Improved suppression helper.
* provide pooling3d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* minor lstm rearrangements
Signed-off-by: raver119 <raver119@gmail.com>
* more of minor lstm rearrangements
Signed-off-by: raver119 <raver119@gmail.com>
* (bi)dynamic_rnn
Signed-off-by: raver119 <raver119@gmail.com>
* templates init order
Signed-off-by: raver119 <raver119@gmail.com>
* Refactored non_max_suppression op.
* Added cuda kernel for non_max_suppression.
* CPU sort by key/value
Signed-off-by: raver119 <raver119@gmail.com>
* CPU sort TAD by key/value
Signed-off-by: raver119 <raver119@gmail.com>
* CPU sort TAD by key/value tests
Signed-off-by: raver119 <raver119@gmail.com>
* Eliminate compiler error with cuda implementation.
* - repaired gradCheck in cuda
- provide conv2d_bp for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* missed signature
Signed-off-by: raver119 <raver119@gmail.com>
* provide depthwise_conv2d_bp for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* Implementation of lup helper with cuda kernel. Initial commit.
* further work on backprops for convolutions
Signed-off-by: Yurii <yurii@skymind.io>
* CUDA linear sort by key/val
Signed-off-by: raver119 <raver119@gmail.com>
* CUDA tad sort by key/val
Signed-off-by: raver119 <raver119@gmail.com>
* start providing of backprop for pooling2d/3d
Signed-off-by: Yurii <yurii@skymind.io>
* Added atomicAdd for bool datatype.
* dynamic partition concept
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic partition concept
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic partition scalar CUDA
Signed-off-by: raver119 <raver119@gmail.com>
* important comment
Signed-off-by: raver119 <raver119@gmail.com>
* fix pooling2d/3d backprop helpers
Signed-off-by: Yurii <yurii@skymind.io>
* Added non-linear test with dynamic_partition.
* Improved test for dynamic_partition.
* dynamic_partition TAD concept
Signed-off-by: raver119 <raver119@gmail.com>
* - dynamic_partition TAD CUDA impl
- dynamic_partition TAD CPU fix
Signed-off-by: raver119 <raver119@gmail.com>
* - rewrite cpu code for usampling2d/3d
- write cuda code for usampling2d/3d
Signed-off-by: Yurii <yurii@skymind.io>
* dynamic_stitch CUDA vector case
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic_stitch CUDA TAD case concept
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic_stitch CUDA TAD case impl
Signed-off-by: raver119 <raver119@gmail.com>
* Added tests for dynamic_stitch 3D-4D cases.
* minor tests tweaks
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed type check for dynamic stitch.
* min/max bp
Signed-off-by: raver119 <raver119@gmail.com>
* rewrite code for upsampling2d/3d cpu
Signed-off-by: Yurii <yurii@skymind.io>
* reduce min/max/norm_max bp
Signed-off-by: raver119 <raver119@gmail.com>
* lup implementation. Additional enhancements.
* provide code for upsamling2d/3d backprop
Signed-off-by: Yurii <yurii@skymind.io>
* weightedCrossEntropyWithLogits
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed template math atomicMul for 64bit ints.
* Refactored dynamic_partition_bp op.
* inverseBroadcast fix
Signed-off-by: raver119 <raver119@gmail.com>
* DynamicPartitionBP test datatype fixed.
* - nd4j_atomicMul Windows fix
- cpu/NDArrayLambda.hpp excluded from CUDA
Signed-off-by: raver119 <raver119@gmail.com>
2019-06-27 17:37:04 +02:00
|
|
|
return static_cast<X>(1);
|
2019-06-06 14:21:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z merge(X old, X opOutput, X *extraParams) {
|
Merge master to upstream (#7945)
* Shugeo strided slice zeros (#14)
* Modified strided_slice op to properly work with empty-like shapes.
* Fixed test for reduce_mean with empty-like input.
* [WIP] Last merge (#15)
* correct logsoftmax looss (#2)
* Small SameDiff listener fix (#4)
* Various fixes (#6)
* #7839 Fix for asXMatrix and tests
* #7866 EmbeddingSequenceLayer dtype fix + test
* #7856 SameDiff save/load stream methods
* #7859 RegressionEvaluation rank 4 fix + tests + axis configuration
* EvaluationBinary 3d/4d
* More evaluation 3d/4d tests
* #7847 Evaluation empty checks
* Small test ifx
* #7848 Fix median edge case
* Improve DL4J samediff layer tests
* [WIP] FastText wrapper implemented (#8)
* FastText implemented
* Some fixes
* Fix shapes for wordsNearest
* Validation of input vectors
* Fixes
* Fixed test
* Thread tagged
* Some tweaks
* setContextClassLoader for DeallocatorServiceThread
* Numpy format tests (#1)
* Various fixes (#11)
* #7852 SameDiff gather fix
* #7892 SameDiff placeholder to constant conversion
* #7890 validate input rank for MLN/CG init methods
* Fix broken permute shape calculation
* Permute and gather fixes
* Tests
* #7850 LogSumExp fix + test
* Handful of test fixes
* Empty arrays with non-scalar shapes (#10)
* minor rearrangements for lambdas
* empty tensors with non-scalar shapes
* numpy empty tensors with non-scalar shapes
* few more empty tweaks
* Small fixes
* conv3d signature update
* micro fix in batchnorm mkldnn
* Import fixes
* Fix
* MKL-DNN update
* Small fill fix
* fill with empty input + test
* Fixes
* Small error improvement
* Fix
* one special test
* couple of fixes for lstm
* Rewrite TFGraphMapper.getNDArrayFromTensor to be maintainable and less error prone
* Fixes
* FP16
* Unsigned
* BFloat16
* Fill op - empty tweaks
* - couple of fixes for empty arrays construction
- stack updated
* strided slice fix
* one transform test
* provide method for reducing shapeInfo in case of input array is empty
* Fixed reduceAlongDimensions to use empty input properly.
* couple of broadcast tests
* couple of tests broadcast tests + tweak to make them pass
* add check of non-empty to methods producing sub-arrays
* Fixed reshapeC with zeros in shape.
* complete empty check in reduce_... legacy ops
* Concat and cumsum/prod
* Tweak to empty shape inference on import
* add empty check to the rest of reduce legacy ops
* one more test
* correct typo in evalReduceShapeInfoEmpty
* Added tests for reduce_* ops to tests with zero shapes.
* few more tests for empty reductions
* Fixed strided_slice op with empty case and tests.
* one more empty reduction test
* Fixed strided_slice test.
* add empty check to NDArray::reshapei
* infOrMax
* empty min/max with infinity tests
* made unstack working correctly with empty arrays
* few IndexReduce tests + tweaks for empty shapes
* add test for empty concat
* few tests fixed
* Validation fix for reductions on empty shapes
* Reverse fix
* Reduction shape calc fixes
* SameDiff.generateOutputVariable: don't use shape function to determine number of outputs
* Range fix
* - NDArray constructor updated for scalars/empty arrays
- few tests fixed
* More fixes
* Empty creator fixes
* concat fix
* concat fix
* TF import tests: allow 'both all NaN' and 'both all inf' to pass
* Slice, zero fraction, and reshape fixes
* transpose, gather
* Zero fraction
* scalar cast fix
* Empty reduction axis support
* few more tests fixed
* Fixed input checks conforming with TF for concat op and tests.
* few tests fixed
* matmul scalar shape fix
* Fixed checkout for data type and scalarity with concat to allow non-empty scalars with vector concats.
* broadcast bool fix
* few more tests
* few more tests
* correct evalReduceShapeInfoEmpty
* argmax/argmin + tests
* one more empty edge case + one more test
* argmax/argmin/realdiv_bp tweaks
* empty reshape test + fix
* Helper fixes
* Small fixes
* Gather test fix
* Gather test fix
* Small fixes
* reduce scalar zero values
* scalar mean workaround
* Remove debug code
* along dim mean workaround
* one more test
* - equalsTo() tweak for empty arrays
- one more test
* broadcast tweaks
* [WIP] Fixing outstanding issues for NLP (#9)
* Avoid using not-inited objects
* Test fixed.
* Redundant method avoided for models like FastText
* KMeans++ implementation
* KMeans++ implementation
* Disable parallel execution
* KMeans++
* Tests
* Dev branch merge (#16)
* SameDiff: convertDataType and gradient check util improvements (#12)
* GradCheck util improvements
* StopGradient constructor + test
* SameDiff: Add datatype conversion
* Javadoc and add DataType.isNumerical()
* Small fix
* Fix SameDiff TF import test cases intermediate naming (workaround for bad default)
* TFGraphTestAllHelper: check intermediates in execution order
* Add missing debug listener
* [WIP] lstmBlock fix + other changes (#13)
- fixes lstmBlock issue
- changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer
- CheckNumerics op
- fixes for ReduceBool IsInfOrNan & IsFinite
* Small test fix
* CheckNumerics op wrapper
* Fix some issues on master (#17)
* Fix DataVec test issue
* Fix issue with dl4j SameDiff output layer
* Dtype fix for lambda layers
* #7912 BertIterator dtype fix (use float32 not global default)
* [WIP] Next set of CUDA stuff (#7)
New CUDA implementations and improvements
* bad file
* Dev branch master merge (#23)
* SameDiff: convertDataType and gradient check util improvements (#12)
* GradCheck util improvements
* StopGradient constructor + test
* SameDiff: Add datatype conversion
* Javadoc and add DataType.isNumerical()
* Small fix
* Fix SameDiff TF import test cases intermediate naming (workaround for bad default)
* TFGraphTestAllHelper: check intermediates in execution order
* Add missing debug listener
* [WIP] lstmBlock fix + other changes (#13)
- fixes lstmBlock issue
- changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer
- CheckNumerics op
- fixes for ReduceBool IsInfOrNan & IsFinite
* Small test fix
* CheckNumerics op wrapper
* Compatibility of deserialization (#18)
Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com>
* SameDiff: add activation gradient checking support for debugging (#19)
* SameDiff gradient checker: first pass on activation gradient checks
* Fixes + tests for activation gradient checking
* Javadoc
* [WIP] Some nd4j data type corrections (#20)
* Adjust data type
* Set correct Data type.
* Size of proper data type.
* fix averaged cpu load (#22)
* SameDiff ops, TF import and fixes (#24)
* CheckNumerics tests + fixes + misc fixes
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fake quant
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fixes
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* FakeQuantWithMinMaxArgs
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* CheckNumerics fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fix libnd4j ALL_INTS and ALL_FLOATS declaration (uint and bfloat types)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Small fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Javadoc
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Exception tweak
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fix for out of scope stack allocated var use
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Ignores
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Ignore for known failing test (already logged issue)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Merge upstream to fork (#25)
* Add thousand-separator commas to TotalParams (#7915)
* Add thousand-separator commas to TotalParams
The number of parameters can be quite large, and it would help the reading of the summary printout to have the TotalParams column & values at the bottom have thousand-separator-commas in them.
* Add thousand-separator commas to MultiLayerNetwork
Corresponding change to MultiLayerNetwork
Signed-off-by: Jxtps Jxtps <jxtps435@gmail.com>
* Update contributing and issue/PR templates (#7934)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fix link to AdaDelta paper (#7942)
Fix link to AdaDelta paper hosted on matthewzeiler.com
Signed-off-by: Jxtps
* Fixes, and ignores for known/logged failing issues (#7943)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* SameDiff + DL4J/SameDiff: Multiple fixes (#28)
* #7919 HDF5 attribute buffer length fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7909 Arbiter constructor exception ux improvements
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7925 RNN output layer length checks
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7939 Add listener for validating inputs are not incorrectly modified
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7939 Integrate NonInplaceValidationListener into tests
* #7844 DL4J SameDiff fixes for variable minibatch size
* DL4J SameDiff fixes - ensure gradient for input placeholder is available
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Tweaks to ExternalErrorsFunction - use placeholders, make more robust
* Another fix
* More fixes
* More SameDiff/DL4J fixes
* Scope out scalar array creation in BaseScalarOp
* Remove debug code
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* [WIP] Final dev branch merge (#29)
* SameDiff: convertDataType and gradient check util improvements (#12)
* GradCheck util improvements
* StopGradient constructor + test
* SameDiff: Add datatype conversion
* Javadoc and add DataType.isNumerical()
* Small fix
* Fix SameDiff TF import test cases intermediate naming (workaround for bad default)
* TFGraphTestAllHelper: check intermediates in execution order
* Add missing debug listener
* [WIP] lstmBlock fix + other changes (#13)
- fixes lstmBlock issue
- changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer
- CheckNumerics op
- fixes for ReduceBool IsInfOrNan & IsFinite
* Small test fix
* CheckNumerics op wrapper
* Compatibility of deserialization (#18)
Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com>
* SameDiff: add activation gradient checking support for debugging (#19)
* SameDiff gradient checker: first pass on activation gradient checks
* Fixes + tests for activation gradient checking
* Javadoc
* [WIP] Some nd4j data type corrections (#20)
* Adjust data type
* Set correct Data type.
* Size of proper data type.
* fix averaged cpu load (#22)
* [WIP] Multiple dataset iterators (#27)
* Splitting dataset into arbitrary number
* Fixes
* Multiple split of iterator
* Test
* Test
* Some fixes
* signature change
* one more tweak
Signed-off-by: raver119 <raver119@gmail.com>
* one more test for sequential use of DataSetIteratorSplitter
Signed-off-by: raver119 <raver119@gmail.com>
* Fixes
* Fixes
* one more test for Alexander
Signed-off-by: raver119 <raver119@gmail.com>
* Some fixes
* Some fixes
* one more test for Alexander
Signed-off-by: raver119 <raver119@gmail.com>
* minor test fix
Signed-off-by: raver119 <raver119@gmail.com>
* Some fixes
* Some fixes
* couple of assertions tweaked
Signed-off-by: raver119 <raver119@gmail.com>
* MDS splitter test :/
Signed-off-by: raver119 <raver119@gmail.com>
* Minor refactoring
* Multi dataset
* Some fixes
* More tests
* Small number of test fixes/improvements (failures on CI) (#31)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* [WIP] More CUDA stuff (#26)
* initial commit
Signed-off-by: raver119 <raver119@gmail.com>
* LRN BP CUDA
Signed-off-by: raver119 <raver119@gmail.com>
* less memory
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed bug with crop_and_resize op helper.
* get rid of unnecessary index-calculation dunction
Signed-off-by: Yurii <yurii@skymind.io>
* Fixed sort with nth_element cuda-based helper.
* Refactored nth_element.
* Refactored nth_element op and tests.
* Modified usage of dim array with sortTad routine.
* Refactored main routine of helper for non_max_image_suppression op.
* non_max_image_suppression op helper with cuda kernel implementation. Initial revision.
* fix vol2col cuda kernel
* meh
Signed-off-by: raver119 <raver119@gmail.com>
* topK concept
Signed-off-by: raver119 <raver119@gmail.com>
* unsorted topK with scanWitdh of 1
Signed-off-by: raver119 <raver119@gmail.com>
* correct vol2col tests
* sorted/unsorted topK
Signed-off-by: raver119 <raver119@gmail.com>
* implementation and fixing col2im/col2vol
* Corrected usage flags with input/output with reverse op.
* dup is const now
Signed-off-by: raver119 <raver119@gmail.com>
* percentile op
Signed-off-by: raver119 <raver119@gmail.com>
* group tests for mapool2d
Signed-off-by: Yurii <yurii@skymind.io>
* special test for george
Signed-off-by: raver119 <raver119@gmail.com>
* less threads for sortTad
Signed-off-by: raver119 <raver119@gmail.com>
* provide conv2d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* remove auther in sort tad kernel code
Signed-off-by: Yurii <yurii@skymind.io>
* provide depthwise_conv2d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* - max_pooling_with_argmax
- null check for special use
Signed-off-by: raver119 <raver119@gmail.com>
* dts cuda
Signed-off-by: raver119 <raver119@gmail.com>
* provide sconv2d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* std cuda
Signed-off-by: raver119 <raver119@gmail.com>
* Refactored non_max_suppression op to conform TF implementation.
* Improved suppression helper.
* provide pooling3d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* minor lstm rearrangements
Signed-off-by: raver119 <raver119@gmail.com>
* more of minor lstm rearrangements
Signed-off-by: raver119 <raver119@gmail.com>
* (bi)dynamic_rnn
Signed-off-by: raver119 <raver119@gmail.com>
* templates init order
Signed-off-by: raver119 <raver119@gmail.com>
* Refactored non_max_suppression op.
* Added cuda kernel for non_max_suppression.
* CPU sort by key/value
Signed-off-by: raver119 <raver119@gmail.com>
* CPU sort TAD by key/value
Signed-off-by: raver119 <raver119@gmail.com>
* CPU sort TAD by key/value tests
Signed-off-by: raver119 <raver119@gmail.com>
* Eliminate compiler error with cuda implementation.
* - repaired gradCheck in cuda
- provide conv2d_bp for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* missed signature
Signed-off-by: raver119 <raver119@gmail.com>
* provide depthwise_conv2d_bp for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* Implementation of lup helper with cuda kernel. Initial commit.
* further work on backprops for convolutions
Signed-off-by: Yurii <yurii@skymind.io>
* CUDA linear sort by key/val
Signed-off-by: raver119 <raver119@gmail.com>
* CUDA tad sort by key/val
Signed-off-by: raver119 <raver119@gmail.com>
* start providing of backprop for pooling2d/3d
Signed-off-by: Yurii <yurii@skymind.io>
* Added atomicAdd for bool datatype.
* dynamic partition concept
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic partition concept
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic partition scalar CUDA
Signed-off-by: raver119 <raver119@gmail.com>
* important comment
Signed-off-by: raver119 <raver119@gmail.com>
* fix pooling2d/3d backprop helpers
Signed-off-by: Yurii <yurii@skymind.io>
* Added non-linear test with dynamic_partition.
* Improved test for dynamic_partition.
* dynamic_partition TAD concept
Signed-off-by: raver119 <raver119@gmail.com>
* - dynamic_partition TAD CUDA impl
- dynamic_partition TAD CPU fix
Signed-off-by: raver119 <raver119@gmail.com>
* - rewrite cpu code for usampling2d/3d
- write cuda code for usampling2d/3d
Signed-off-by: Yurii <yurii@skymind.io>
* dynamic_stitch CUDA vector case
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic_stitch CUDA TAD case concept
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic_stitch CUDA TAD case impl
Signed-off-by: raver119 <raver119@gmail.com>
* Added tests for dynamic_stitch 3D-4D cases.
* minor tests tweaks
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed type check for dynamic stitch.
* min/max bp
Signed-off-by: raver119 <raver119@gmail.com>
* rewrite code for upsampling2d/3d cpu
Signed-off-by: Yurii <yurii@skymind.io>
* reduce min/max/norm_max bp
Signed-off-by: raver119 <raver119@gmail.com>
* lup implementation. Additional enhancements.
* provide code for upsamling2d/3d backprop
Signed-off-by: Yurii <yurii@skymind.io>
* weightedCrossEntropyWithLogits
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed template math atomicMul for 64bit ints.
* Refactored dynamic_partition_bp op.
* inverseBroadcast fix
Signed-off-by: raver119 <raver119@gmail.com>
* DynamicPartitionBP test datatype fixed.
* - nd4j_atomicMul Windows fix
- cpu/NDArrayLambda.hpp excluded from CUDA
Signed-off-by: raver119 <raver119@gmail.com>
2019-06-27 17:37:04 +02:00
|
|
|
return opOutput == static_cast<X>(0) || old == static_cast<X>(0) ? static_cast<Z>(0) : static_cast<Z>(1);
|
2019-06-06 14:21:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z update(X old, X opOutput, X *extraParams) {
|
Merge master to upstream (#7945)
* Shugeo strided slice zeros (#14)
* Modified strided_slice op to properly work with empty-like shapes.
* Fixed test for reduce_mean with empty-like input.
* [WIP] Last merge (#15)
* correct logsoftmax looss (#2)
* Small SameDiff listener fix (#4)
* Various fixes (#6)
* #7839 Fix for asXMatrix and tests
* #7866 EmbeddingSequenceLayer dtype fix + test
* #7856 SameDiff save/load stream methods
* #7859 RegressionEvaluation rank 4 fix + tests + axis configuration
* EvaluationBinary 3d/4d
* More evaluation 3d/4d tests
* #7847 Evaluation empty checks
* Small test ifx
* #7848 Fix median edge case
* Improve DL4J samediff layer tests
* [WIP] FastText wrapper implemented (#8)
* FastText implemented
* Some fixes
* Fix shapes for wordsNearest
* Validation of input vectors
* Fixes
* Fixed test
* Thread tagged
* Some tweaks
* setContextClassLoader for DeallocatorServiceThread
* Numpy format tests (#1)
* Various fixes (#11)
* #7852 SameDiff gather fix
* #7892 SameDiff placeholder to constant conversion
* #7890 validate input rank for MLN/CG init methods
* Fix broken permute shape calculation
* Permute and gather fixes
* Tests
* #7850 LogSumExp fix + test
* Handful of test fixes
* Empty arrays with non-scalar shapes (#10)
* minor rearrangements for lambdas
* empty tensors with non-scalar shapes
* numpy empty tensors with non-scalar shapes
* few more empty tweaks
* Small fixes
* conv3d signature update
* micro fix in batchnorm mkldnn
* Import fixes
* Fix
* MKL-DNN update
* Small fill fix
* fill with empty input + test
* Fixes
* Small error improvement
* Fix
* one special test
* couple of fixes for lstm
* Rewrite TFGraphMapper.getNDArrayFromTensor to be maintainable and less error prone
* Fixes
* FP16
* Unsigned
* BFloat16
* Fill op - empty tweaks
* - couple of fixes for empty arrays construction
- stack updated
* strided slice fix
* one transform test
* provide method for reducing shapeInfo in case of input array is empty
* Fixed reduceAlongDimensions to use empty input properly.
* couple of broadcast tests
* couple of tests broadcast tests + tweak to make them pass
* add check of non-empty to methods producing sub-arrays
* Fixed reshapeC with zeros in shape.
* complete empty check in reduce_... legacy ops
* Concat and cumsum/prod
* Tweak to empty shape inference on import
* add empty check to the rest of reduce legacy ops
* one more test
* correct typo in evalReduceShapeInfoEmpty
* Added tests for reduce_* ops to tests with zero shapes.
* few more tests for empty reductions
* Fixed strided_slice op with empty case and tests.
* one more empty reduction test
* Fixed strided_slice test.
* add empty check to NDArray::reshapei
* infOrMax
* empty min/max with infinity tests
* made unstack working correctly with empty arrays
* few IndexReduce tests + tweaks for empty shapes
* add test for empty concat
* few tests fixed
* Validation fix for reductions on empty shapes
* Reverse fix
* Reduction shape calc fixes
* SameDiff.generateOutputVariable: don't use shape function to determine number of outputs
* Range fix
* - NDArray constructor updated for scalars/empty arrays
- few tests fixed
* More fixes
* Empty creator fixes
* concat fix
* concat fix
* TF import tests: allow 'both all NaN' and 'both all inf' to pass
* Slice, zero fraction, and reshape fixes
* transpose, gather
* Zero fraction
* scalar cast fix
* Empty reduction axis support
* few more tests fixed
* Fixed input checks conforming with TF for concat op and tests.
* few tests fixed
* matmul scalar shape fix
* Fixed checkout for data type and scalarity with concat to allow non-empty scalars with vector concats.
* broadcast bool fix
* few more tests
* few more tests
* correct evalReduceShapeInfoEmpty
* argmax/argmin + tests
* one more empty edge case + one more test
* argmax/argmin/realdiv_bp tweaks
* empty reshape test + fix
* Helper fixes
* Small fixes
* Gather test fix
* Gather test fix
* Small fixes
* reduce scalar zero values
* scalar mean workaround
* Remove debug code
* along dim mean workaround
* one more test
* - equalsTo() tweak for empty arrays
- one more test
* broadcast tweaks
* [WIP] Fixing outstanding issues for NLP (#9)
* Avoid using not-inited objects
* Test fixed.
* Redundant method avoided for models like FastText
* KMeans++ implementation
* KMeans++ implementation
* Disable parallel execution
* KMeans++
* Tests
* Dev branch merge (#16)
* SameDiff: convertDataType and gradient check util improvements (#12)
* GradCheck util improvements
* StopGradient constructor + test
* SameDiff: Add datatype conversion
* Javadoc and add DataType.isNumerical()
* Small fix
* Fix SameDiff TF import test cases intermediate naming (workaround for bad default)
* TFGraphTestAllHelper: check intermediates in execution order
* Add missing debug listener
* [WIP] lstmBlock fix + other changes (#13)
- fixes lstmBlock issue
- changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer
- CheckNumerics op
- fixes for ReduceBool IsInfOrNan & IsFinite
* Small test fix
* CheckNumerics op wrapper
* Fix some issues on master (#17)
* Fix DataVec test issue
* Fix issue with dl4j SameDiff output layer
* Dtype fix for lambda layers
* #7912 BertIterator dtype fix (use float32 not global default)
* [WIP] Next set of CUDA stuff (#7)
New CUDA implementations and improvements
* bad file
* Dev branch master merge (#23)
* SameDiff: convertDataType and gradient check util improvements (#12)
* GradCheck util improvements
* StopGradient constructor + test
* SameDiff: Add datatype conversion
* Javadoc and add DataType.isNumerical()
* Small fix
* Fix SameDiff TF import test cases intermediate naming (workaround for bad default)
* TFGraphTestAllHelper: check intermediates in execution order
* Add missing debug listener
* [WIP] lstmBlock fix + other changes (#13)
- fixes lstmBlock issue
- changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer
- CheckNumerics op
- fixes for ReduceBool IsInfOrNan & IsFinite
* Small test fix
* CheckNumerics op wrapper
* Compatibility of deserialization (#18)
Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com>
* SameDiff: add activation gradient checking support for debugging (#19)
* SameDiff gradient checker: first pass on activation gradient checks
* Fixes + tests for activation gradient checking
* Javadoc
* [WIP] Some nd4j data type corrections (#20)
* Adjust data type
* Set correct Data type.
* Size of proper data type.
* fix averaged cpu load (#22)
* SameDiff ops, TF import and fixes (#24)
* CheckNumerics tests + fixes + misc fixes
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fake quant
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fixes
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* FakeQuantWithMinMaxArgs
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* CheckNumerics fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fix libnd4j ALL_INTS and ALL_FLOATS declaration (uint and bfloat types)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Small fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Javadoc
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Exception tweak
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fix for out of scope stack allocated var use
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Ignores
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Ignore for known failing test (already logged issue)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Merge upstream to fork (#25)
* Add thousand-separator commas to TotalParams (#7915)
* Add thousand-separator commas to TotalParams
The number of parameters can be quite large, and it would help the reading of the summary printout to have the TotalParams column & values at the bottom have thousand-separator-commas in them.
* Add thousand-separator commas to MultiLayerNetwork
Corresponding change to MultiLayerNetwork
Signed-off-by: Jxtps Jxtps <jxtps435@gmail.com>
* Update contributing and issue/PR templates (#7934)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fix link to AdaDelta paper (#7942)
Fix link to AdaDelta paper hosted on matthewzeiler.com
Signed-off-by: Jxtps
* Fixes, and ignores for known/logged failing issues (#7943)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* SameDiff + DL4J/SameDiff: Multiple fixes (#28)
* #7919 HDF5 attribute buffer length fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7909 Arbiter constructor exception ux improvements
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7925 RNN output layer length checks
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7939 Add listener for validating inputs are not incorrectly modified
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7939 Integrate NonInplaceValidationListener into tests
* #7844 DL4J SameDiff fixes for variable minibatch size
* DL4J SameDiff fixes - ensure gradient for input placeholder is available
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Tweaks to ExternalErrorsFunction - use placeholders, make more robust
* Another fix
* More fixes
* More SameDiff/DL4J fixes
* Scope out scalar array creation in BaseScalarOp
* Remove debug code
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* [WIP] Final dev branch merge (#29)
* SameDiff: convertDataType and gradient check util improvements (#12)
* GradCheck util improvements
* StopGradient constructor + test
* SameDiff: Add datatype conversion
* Javadoc and add DataType.isNumerical()
* Small fix
* Fix SameDiff TF import test cases intermediate naming (workaround for bad default)
* TFGraphTestAllHelper: check intermediates in execution order
* Add missing debug listener
* [WIP] lstmBlock fix + other changes (#13)
- fixes lstmBlock issue
- changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer
- CheckNumerics op
- fixes for ReduceBool IsInfOrNan & IsFinite
* Small test fix
* CheckNumerics op wrapper
* Compatibility of deserialization (#18)
Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com>
* SameDiff: add activation gradient checking support for debugging (#19)
* SameDiff gradient checker: first pass on activation gradient checks
* Fixes + tests for activation gradient checking
* Javadoc
* [WIP] Some nd4j data type corrections (#20)
* Adjust data type
* Set correct Data type.
* Size of proper data type.
* fix averaged cpu load (#22)
* [WIP] Multiple dataset iterators (#27)
* Splitting dataset into arbitrary number
* Fixes
* Multiple split of iterator
* Test
* Test
* Some fixes
* signature change
* one more tweak
Signed-off-by: raver119 <raver119@gmail.com>
* one more test for sequential use of DataSetIteratorSplitter
Signed-off-by: raver119 <raver119@gmail.com>
* Fixes
* Fixes
* one more test for Alexander
Signed-off-by: raver119 <raver119@gmail.com>
* Some fixes
* Some fixes
* one more test for Alexander
Signed-off-by: raver119 <raver119@gmail.com>
* minor test fix
Signed-off-by: raver119 <raver119@gmail.com>
* Some fixes
* Some fixes
* couple of assertions tweaked
Signed-off-by: raver119 <raver119@gmail.com>
* MDS splitter test :/
Signed-off-by: raver119 <raver119@gmail.com>
* Minor refactoring
* Multi dataset
* Some fixes
* More tests
* Small number of test fixes/improvements (failures on CI) (#31)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* [WIP] More CUDA stuff (#26)
* initial commit
Signed-off-by: raver119 <raver119@gmail.com>
* LRN BP CUDA
Signed-off-by: raver119 <raver119@gmail.com>
* less memory
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed bug with crop_and_resize op helper.
* get rid of unnecessary index-calculation dunction
Signed-off-by: Yurii <yurii@skymind.io>
* Fixed sort with nth_element cuda-based helper.
* Refactored nth_element.
* Refactored nth_element op and tests.
* Modified usage of dim array with sortTad routine.
* Refactored main routine of helper for non_max_image_suppression op.
* non_max_image_suppression op helper with cuda kernel implementation. Initial revision.
* fix vol2col cuda kernel
* meh
Signed-off-by: raver119 <raver119@gmail.com>
* topK concept
Signed-off-by: raver119 <raver119@gmail.com>
* unsorted topK with scanWitdh of 1
Signed-off-by: raver119 <raver119@gmail.com>
* correct vol2col tests
* sorted/unsorted topK
Signed-off-by: raver119 <raver119@gmail.com>
* implementation and fixing col2im/col2vol
* Corrected usage flags with input/output with reverse op.
* dup is const now
Signed-off-by: raver119 <raver119@gmail.com>
* percentile op
Signed-off-by: raver119 <raver119@gmail.com>
* group tests for mapool2d
Signed-off-by: Yurii <yurii@skymind.io>
* special test for george
Signed-off-by: raver119 <raver119@gmail.com>
* less threads for sortTad
Signed-off-by: raver119 <raver119@gmail.com>
* provide conv2d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* remove auther in sort tad kernel code
Signed-off-by: Yurii <yurii@skymind.io>
* provide depthwise_conv2d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* - max_pooling_with_argmax
- null check for special use
Signed-off-by: raver119 <raver119@gmail.com>
* dts cuda
Signed-off-by: raver119 <raver119@gmail.com>
* provide sconv2d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* std cuda
Signed-off-by: raver119 <raver119@gmail.com>
* Refactored non_max_suppression op to conform TF implementation.
* Improved suppression helper.
* provide pooling3d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* minor lstm rearrangements
Signed-off-by: raver119 <raver119@gmail.com>
* more of minor lstm rearrangements
Signed-off-by: raver119 <raver119@gmail.com>
* (bi)dynamic_rnn
Signed-off-by: raver119 <raver119@gmail.com>
* templates init order
Signed-off-by: raver119 <raver119@gmail.com>
* Refactored non_max_suppression op.
* Added cuda kernel for non_max_suppression.
* CPU sort by key/value
Signed-off-by: raver119 <raver119@gmail.com>
* CPU sort TAD by key/value
Signed-off-by: raver119 <raver119@gmail.com>
* CPU sort TAD by key/value tests
Signed-off-by: raver119 <raver119@gmail.com>
* Eliminate compiler error with cuda implementation.
* - repaired gradCheck in cuda
- provide conv2d_bp for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* missed signature
Signed-off-by: raver119 <raver119@gmail.com>
* provide depthwise_conv2d_bp for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* Implementation of lup helper with cuda kernel. Initial commit.
* further work on backprops for convolutions
Signed-off-by: Yurii <yurii@skymind.io>
* CUDA linear sort by key/val
Signed-off-by: raver119 <raver119@gmail.com>
* CUDA tad sort by key/val
Signed-off-by: raver119 <raver119@gmail.com>
* start providing of backprop for pooling2d/3d
Signed-off-by: Yurii <yurii@skymind.io>
* Added atomicAdd for bool datatype.
* dynamic partition concept
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic partition concept
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic partition scalar CUDA
Signed-off-by: raver119 <raver119@gmail.com>
* important comment
Signed-off-by: raver119 <raver119@gmail.com>
* fix pooling2d/3d backprop helpers
Signed-off-by: Yurii <yurii@skymind.io>
* Added non-linear test with dynamic_partition.
* Improved test for dynamic_partition.
* dynamic_partition TAD concept
Signed-off-by: raver119 <raver119@gmail.com>
* - dynamic_partition TAD CUDA impl
- dynamic_partition TAD CPU fix
Signed-off-by: raver119 <raver119@gmail.com>
* - rewrite cpu code for usampling2d/3d
- write cuda code for usampling2d/3d
Signed-off-by: Yurii <yurii@skymind.io>
* dynamic_stitch CUDA vector case
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic_stitch CUDA TAD case concept
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic_stitch CUDA TAD case impl
Signed-off-by: raver119 <raver119@gmail.com>
* Added tests for dynamic_stitch 3D-4D cases.
* minor tests tweaks
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed type check for dynamic stitch.
* min/max bp
Signed-off-by: raver119 <raver119@gmail.com>
* rewrite code for upsampling2d/3d cpu
Signed-off-by: Yurii <yurii@skymind.io>
* reduce min/max/norm_max bp
Signed-off-by: raver119 <raver119@gmail.com>
* lup implementation. Additional enhancements.
* provide code for upsamling2d/3d backprop
Signed-off-by: Yurii <yurii@skymind.io>
* weightedCrossEntropyWithLogits
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed template math atomicMul for 64bit ints.
* Refactored dynamic_partition_bp op.
* inverseBroadcast fix
Signed-off-by: raver119 <raver119@gmail.com>
* DynamicPartitionBP test datatype fixed.
* - nd4j_atomicMul Windows fix
- cpu/NDArrayLambda.hpp excluded from CUDA
Signed-off-by: raver119 <raver119@gmail.com>
2019-06-27 17:37:04 +02:00
|
|
|
return opOutput == static_cast<X>(0) || old == static_cast<X>(0) ? static_cast<Z>(0) : static_cast<Z>(1);
|
2019-06-06 14:21:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z postProcess(X reduction, Nd4jLong n, X *extraParams) {
|
Merge master to upstream (#7945)
* Shugeo strided slice zeros (#14)
* Modified strided_slice op to properly work with empty-like shapes.
* Fixed test for reduce_mean with empty-like input.
* [WIP] Last merge (#15)
* correct logsoftmax looss (#2)
* Small SameDiff listener fix (#4)
* Various fixes (#6)
* #7839 Fix for asXMatrix and tests
* #7866 EmbeddingSequenceLayer dtype fix + test
* #7856 SameDiff save/load stream methods
* #7859 RegressionEvaluation rank 4 fix + tests + axis configuration
* EvaluationBinary 3d/4d
* More evaluation 3d/4d tests
* #7847 Evaluation empty checks
* Small test ifx
* #7848 Fix median edge case
* Improve DL4J samediff layer tests
* [WIP] FastText wrapper implemented (#8)
* FastText implemented
* Some fixes
* Fix shapes for wordsNearest
* Validation of input vectors
* Fixes
* Fixed test
* Thread tagged
* Some tweaks
* setContextClassLoader for DeallocatorServiceThread
* Numpy format tests (#1)
* Various fixes (#11)
* #7852 SameDiff gather fix
* #7892 SameDiff placeholder to constant conversion
* #7890 validate input rank for MLN/CG init methods
* Fix broken permute shape calculation
* Permute and gather fixes
* Tests
* #7850 LogSumExp fix + test
* Handful of test fixes
* Empty arrays with non-scalar shapes (#10)
* minor rearrangements for lambdas
* empty tensors with non-scalar shapes
* numpy empty tensors with non-scalar shapes
* few more empty tweaks
* Small fixes
* conv3d signature update
* micro fix in batchnorm mkldnn
* Import fixes
* Fix
* MKL-DNN update
* Small fill fix
* fill with empty input + test
* Fixes
* Small error improvement
* Fix
* one special test
* couple of fixes for lstm
* Rewrite TFGraphMapper.getNDArrayFromTensor to be maintainable and less error prone
* Fixes
* FP16
* Unsigned
* BFloat16
* Fill op - empty tweaks
* - couple of fixes for empty arrays construction
- stack updated
* strided slice fix
* one transform test
* provide method for reducing shapeInfo in case of input array is empty
* Fixed reduceAlongDimensions to use empty input properly.
* couple of broadcast tests
* couple of tests broadcast tests + tweak to make them pass
* add check of non-empty to methods producing sub-arrays
* Fixed reshapeC with zeros in shape.
* complete empty check in reduce_... legacy ops
* Concat and cumsum/prod
* Tweak to empty shape inference on import
* add empty check to the rest of reduce legacy ops
* one more test
* correct typo in evalReduceShapeInfoEmpty
* Added tests for reduce_* ops to tests with zero shapes.
* few more tests for empty reductions
* Fixed strided_slice op with empty case and tests.
* one more empty reduction test
* Fixed strided_slice test.
* add empty check to NDArray::reshapei
* infOrMax
* empty min/max with infinity tests
* made unstack working correctly with empty arrays
* few IndexReduce tests + tweaks for empty shapes
* add test for empty concat
* few tests fixed
* Validation fix for reductions on empty shapes
* Reverse fix
* Reduction shape calc fixes
* SameDiff.generateOutputVariable: don't use shape function to determine number of outputs
* Range fix
* - NDArray constructor updated for scalars/empty arrays
- few tests fixed
* More fixes
* Empty creator fixes
* concat fix
* concat fix
* TF import tests: allow 'both all NaN' and 'both all inf' to pass
* Slice, zero fraction, and reshape fixes
* transpose, gather
* Zero fraction
* scalar cast fix
* Empty reduction axis support
* few more tests fixed
* Fixed input checks conforming with TF for concat op and tests.
* few tests fixed
* matmul scalar shape fix
* Fixed checkout for data type and scalarity with concat to allow non-empty scalars with vector concats.
* broadcast bool fix
* few more tests
* few more tests
* correct evalReduceShapeInfoEmpty
* argmax/argmin + tests
* one more empty edge case + one more test
* argmax/argmin/realdiv_bp tweaks
* empty reshape test + fix
* Helper fixes
* Small fixes
* Gather test fix
* Gather test fix
* Small fixes
* reduce scalar zero values
* scalar mean workaround
* Remove debug code
* along dim mean workaround
* one more test
* - equalsTo() tweak for empty arrays
- one more test
* broadcast tweaks
* [WIP] Fixing outstanding issues for NLP (#9)
* Avoid using not-inited objects
* Test fixed.
* Redundant method avoided for models like FastText
* KMeans++ implementation
* KMeans++ implementation
* Disable parallel execution
* KMeans++
* Tests
* Dev branch merge (#16)
* SameDiff: convertDataType and gradient check util improvements (#12)
* GradCheck util improvements
* StopGradient constructor + test
* SameDiff: Add datatype conversion
* Javadoc and add DataType.isNumerical()
* Small fix
* Fix SameDiff TF import test cases intermediate naming (workaround for bad default)
* TFGraphTestAllHelper: check intermediates in execution order
* Add missing debug listener
* [WIP] lstmBlock fix + other changes (#13)
- fixes lstmBlock issue
- changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer
- CheckNumerics op
- fixes for ReduceBool IsInfOrNan & IsFinite
* Small test fix
* CheckNumerics op wrapper
* Fix some issues on master (#17)
* Fix DataVec test issue
* Fix issue with dl4j SameDiff output layer
* Dtype fix for lambda layers
* #7912 BertIterator dtype fix (use float32 not global default)
* [WIP] Next set of CUDA stuff (#7)
New CUDA implementations and improvements
* bad file
* Dev branch master merge (#23)
* SameDiff: convertDataType and gradient check util improvements (#12)
* GradCheck util improvements
* StopGradient constructor + test
* SameDiff: Add datatype conversion
* Javadoc and add DataType.isNumerical()
* Small fix
* Fix SameDiff TF import test cases intermediate naming (workaround for bad default)
* TFGraphTestAllHelper: check intermediates in execution order
* Add missing debug listener
* [WIP] lstmBlock fix + other changes (#13)
- fixes lstmBlock issue
- changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer
- CheckNumerics op
- fixes for ReduceBool IsInfOrNan & IsFinite
* Small test fix
* CheckNumerics op wrapper
* Compatibility of deserialization (#18)
Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com>
* SameDiff: add activation gradient checking support for debugging (#19)
* SameDiff gradient checker: first pass on activation gradient checks
* Fixes + tests for activation gradient checking
* Javadoc
* [WIP] Some nd4j data type corrections (#20)
* Adjust data type
* Set correct Data type.
* Size of proper data type.
* fix averaged cpu load (#22)
* SameDiff ops, TF import and fixes (#24)
* CheckNumerics tests + fixes + misc fixes
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fake quant
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fixes
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* FakeQuantWithMinMaxArgs
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* CheckNumerics fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fix libnd4j ALL_INTS and ALL_FLOATS declaration (uint and bfloat types)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Small fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Javadoc
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Exception tweak
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fix for out of scope stack allocated var use
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Ignores
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Ignore for known failing test (already logged issue)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Merge upstream to fork (#25)
* Add thousand-separator commas to TotalParams (#7915)
* Add thousand-separator commas to TotalParams
The number of parameters can be quite large, and it would help the reading of the summary printout to have the TotalParams column & values at the bottom have thousand-separator-commas in them.
* Add thousand-separator commas to MultiLayerNetwork
Corresponding change to MultiLayerNetwork
Signed-off-by: Jxtps Jxtps <jxtps435@gmail.com>
* Update contributing and issue/PR templates (#7934)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Fix link to AdaDelta paper (#7942)
Fix link to AdaDelta paper hosted on matthewzeiler.com
Signed-off-by: Jxtps
* Fixes, and ignores for known/logged failing issues (#7943)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* SameDiff + DL4J/SameDiff: Multiple fixes (#28)
* #7919 HDF5 attribute buffer length fix
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7909 Arbiter constructor exception ux improvements
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7925 RNN output layer length checks
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7939 Add listener for validating inputs are not incorrectly modified
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* #7939 Integrate NonInplaceValidationListener into tests
* #7844 DL4J SameDiff fixes for variable minibatch size
* DL4J SameDiff fixes - ensure gradient for input placeholder is available
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* Tweaks to ExternalErrorsFunction - use placeholders, make more robust
* Another fix
* More fixes
* More SameDiff/DL4J fixes
* Scope out scalar array creation in BaseScalarOp
* Remove debug code
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* [WIP] Final dev branch merge (#29)
* SameDiff: convertDataType and gradient check util improvements (#12)
* GradCheck util improvements
* StopGradient constructor + test
* SameDiff: Add datatype conversion
* Javadoc and add DataType.isNumerical()
* Small fix
* Fix SameDiff TF import test cases intermediate naming (workaround for bad default)
* TFGraphTestAllHelper: check intermediates in execution order
* Add missing debug listener
* [WIP] lstmBlock fix + other changes (#13)
- fixes lstmBlock issue
- changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer
- CheckNumerics op
- fixes for ReduceBool IsInfOrNan & IsFinite
* Small test fix
* CheckNumerics op wrapper
* Compatibility of deserialization (#18)
Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com>
* SameDiff: add activation gradient checking support for debugging (#19)
* SameDiff gradient checker: first pass on activation gradient checks
* Fixes + tests for activation gradient checking
* Javadoc
* [WIP] Some nd4j data type corrections (#20)
* Adjust data type
* Set correct Data type.
* Size of proper data type.
* fix averaged cpu load (#22)
* [WIP] Multiple dataset iterators (#27)
* Splitting dataset into arbitrary number
* Fixes
* Multiple split of iterator
* Test
* Test
* Some fixes
* signature change
* one more tweak
Signed-off-by: raver119 <raver119@gmail.com>
* one more test for sequential use of DataSetIteratorSplitter
Signed-off-by: raver119 <raver119@gmail.com>
* Fixes
* Fixes
* one more test for Alexander
Signed-off-by: raver119 <raver119@gmail.com>
* Some fixes
* Some fixes
* one more test for Alexander
Signed-off-by: raver119 <raver119@gmail.com>
* minor test fix
Signed-off-by: raver119 <raver119@gmail.com>
* Some fixes
* Some fixes
* couple of assertions tweaked
Signed-off-by: raver119 <raver119@gmail.com>
* MDS splitter test :/
Signed-off-by: raver119 <raver119@gmail.com>
* Minor refactoring
* Multi dataset
* Some fixes
* More tests
* Small number of test fixes/improvements (failures on CI) (#31)
Signed-off-by: AlexDBlack <blacka101@gmail.com>
* [WIP] More CUDA stuff (#26)
* initial commit
Signed-off-by: raver119 <raver119@gmail.com>
* LRN BP CUDA
Signed-off-by: raver119 <raver119@gmail.com>
* less memory
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed bug with crop_and_resize op helper.
* get rid of unnecessary index-calculation dunction
Signed-off-by: Yurii <yurii@skymind.io>
* Fixed sort with nth_element cuda-based helper.
* Refactored nth_element.
* Refactored nth_element op and tests.
* Modified usage of dim array with sortTad routine.
* Refactored main routine of helper for non_max_image_suppression op.
* non_max_image_suppression op helper with cuda kernel implementation. Initial revision.
* fix vol2col cuda kernel
* meh
Signed-off-by: raver119 <raver119@gmail.com>
* topK concept
Signed-off-by: raver119 <raver119@gmail.com>
* unsorted topK with scanWitdh of 1
Signed-off-by: raver119 <raver119@gmail.com>
* correct vol2col tests
* sorted/unsorted topK
Signed-off-by: raver119 <raver119@gmail.com>
* implementation and fixing col2im/col2vol
* Corrected usage flags with input/output with reverse op.
* dup is const now
Signed-off-by: raver119 <raver119@gmail.com>
* percentile op
Signed-off-by: raver119 <raver119@gmail.com>
* group tests for mapool2d
Signed-off-by: Yurii <yurii@skymind.io>
* special test for george
Signed-off-by: raver119 <raver119@gmail.com>
* less threads for sortTad
Signed-off-by: raver119 <raver119@gmail.com>
* provide conv2d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* remove auther in sort tad kernel code
Signed-off-by: Yurii <yurii@skymind.io>
* provide depthwise_conv2d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* - max_pooling_with_argmax
- null check for special use
Signed-off-by: raver119 <raver119@gmail.com>
* dts cuda
Signed-off-by: raver119 <raver119@gmail.com>
* provide sconv2d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* std cuda
Signed-off-by: raver119 <raver119@gmail.com>
* Refactored non_max_suppression op to conform TF implementation.
* Improved suppression helper.
* provide pooling3d for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* minor lstm rearrangements
Signed-off-by: raver119 <raver119@gmail.com>
* more of minor lstm rearrangements
Signed-off-by: raver119 <raver119@gmail.com>
* (bi)dynamic_rnn
Signed-off-by: raver119 <raver119@gmail.com>
* templates init order
Signed-off-by: raver119 <raver119@gmail.com>
* Refactored non_max_suppression op.
* Added cuda kernel for non_max_suppression.
* CPU sort by key/value
Signed-off-by: raver119 <raver119@gmail.com>
* CPU sort TAD by key/value
Signed-off-by: raver119 <raver119@gmail.com>
* CPU sort TAD by key/value tests
Signed-off-by: raver119 <raver119@gmail.com>
* Eliminate compiler error with cuda implementation.
* - repaired gradCheck in cuda
- provide conv2d_bp for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* missed signature
Signed-off-by: raver119 <raver119@gmail.com>
* provide depthwise_conv2d_bp for cuda
Signed-off-by: Yurii <yurii@skymind.io>
* Implementation of lup helper with cuda kernel. Initial commit.
* further work on backprops for convolutions
Signed-off-by: Yurii <yurii@skymind.io>
* CUDA linear sort by key/val
Signed-off-by: raver119 <raver119@gmail.com>
* CUDA tad sort by key/val
Signed-off-by: raver119 <raver119@gmail.com>
* start providing of backprop for pooling2d/3d
Signed-off-by: Yurii <yurii@skymind.io>
* Added atomicAdd for bool datatype.
* dynamic partition concept
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic partition concept
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic partition scalar CUDA
Signed-off-by: raver119 <raver119@gmail.com>
* important comment
Signed-off-by: raver119 <raver119@gmail.com>
* fix pooling2d/3d backprop helpers
Signed-off-by: Yurii <yurii@skymind.io>
* Added non-linear test with dynamic_partition.
* Improved test for dynamic_partition.
* dynamic_partition TAD concept
Signed-off-by: raver119 <raver119@gmail.com>
* - dynamic_partition TAD CUDA impl
- dynamic_partition TAD CPU fix
Signed-off-by: raver119 <raver119@gmail.com>
* - rewrite cpu code for usampling2d/3d
- write cuda code for usampling2d/3d
Signed-off-by: Yurii <yurii@skymind.io>
* dynamic_stitch CUDA vector case
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic_stitch CUDA TAD case concept
Signed-off-by: raver119 <raver119@gmail.com>
* dynamic_stitch CUDA TAD case impl
Signed-off-by: raver119 <raver119@gmail.com>
* Added tests for dynamic_stitch 3D-4D cases.
* minor tests tweaks
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed type check for dynamic stitch.
* min/max bp
Signed-off-by: raver119 <raver119@gmail.com>
* rewrite code for upsampling2d/3d cpu
Signed-off-by: Yurii <yurii@skymind.io>
* reduce min/max/norm_max bp
Signed-off-by: raver119 <raver119@gmail.com>
* lup implementation. Additional enhancements.
* provide code for upsamling2d/3d backprop
Signed-off-by: Yurii <yurii@skymind.io>
* weightedCrossEntropyWithLogits
Signed-off-by: raver119 <raver119@gmail.com>
* Fixed template math atomicMul for 64bit ints.
* Refactored dynamic_partition_bp op.
* inverseBroadcast fix
Signed-off-by: raver119 <raver119@gmail.com>
* DynamicPartitionBP test datatype fixed.
* - nd4j_atomicMul Windows fix
- cpu/NDArrayLambda.hpp excluded from CUDA
Signed-off-by: raver119 <raver119@gmail.com>
2019-06-27 17:37:04 +02:00
|
|
|
return reduction != static_cast<X>(0);
|
2019-06-06 14:21:15 +02:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class ClipByValue {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
if (d1 > params[1])
|
|
|
|
return params[1];
|
|
|
|
if (d1 < params[0])
|
|
|
|
return params[0];
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class LstmClip {
|
|
|
|
public:
|
|
|
|
no_op_exec_special
|
|
|
|
no_op_exec_special_cuda
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
X _v = (X) d2;
|
|
|
|
if (d1 > _v)
|
|
|
|
return _v;
|
|
|
|
else if (d1 < -_v)
|
|
|
|
return -_v;
|
|
|
|
else return d1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class Swish {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return d1 * nd4j::math::nd4j_sigmoid<X,X>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class GELU {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return d1 * nd4j::math::nd4j_sigmoid<X,X>(static_cast<X>(1.702f) * d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class PreciseGELU {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
auto sp = nd4j::math::nd4j_sqrt<X, X>(static_cast<X>(2) / static_cast<X>(M_PI));
|
|
|
|
auto xp = d1 + nd4j::math::nd4j_pow<X, X, X>(static_cast<X>(0.044715) * d1, static_cast<X>(3));
|
|
|
|
return (d1 / static_cast<X>(2)) * (static_cast<X>(1) + nd4j::math::nd4j_tanh<X, X>(sp * xp));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class GELUDerivative {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
auto x17 = static_cast<X>(1.702f) * d1;
|
|
|
|
auto ep = nd4j::math::nd4j_pow<X,X,X>(static_cast<X>(M_E), x17);
|
|
|
|
// (E^(1.702 x) (1. + E^(1.702 x) + 1.702 x))/(1. + E^(1.702 x))^2
|
|
|
|
return (ep * (static_cast<X>(1.f) + ep + x17)) / nd4j::math::nd4j_pow<X, int, X>((static_cast<X>(1.f) + ep), 2);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class PreciseGELUDerivative {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
auto x79 = static_cast<X>(0.797885) * d1;
|
|
|
|
auto x03 = nd4j::math::nd4j_pow<X, int, X>(static_cast<X>(0.0356774) * d1, 3);
|
|
|
|
auto x39 = static_cast<X>(0.398942) * d1;
|
|
|
|
auto x05 = nd4j::math::nd4j_pow<X, int, X>(static_cast<X>(0.0535161) * d1, 3);
|
|
|
|
auto scz = nd4j::math::nd4j_sech<X, X>(x79 + x03);
|
|
|
|
// 0.5 + (0.398942 x + 0.0535161 x^3) Sech[0.797885 x + 0.0356774 x^3]^2 + 0.5 Tanh[0.797885 x + 0.0356774 x^3]
|
|
|
|
return static_cast<X>(0.5) + (x39 + x05) * (scz * scz) + static_cast<X>(0.5) * nd4j::math::nd4j_tanh<X, X>(x79 + x03);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class SwishDerivative {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
X ex = nd4j::math::nd4j_pow<X, X, X>(static_cast<X>(M_E), d1);
|
|
|
|
return (ex * (d1 + ex + static_cast<X>(1.f))) / nd4j::math::nd4j_pow<X, X, X>((ex + static_cast<X>(1.f)) , static_cast<X>(2.f));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class LogSigmoid {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_log<X, X>(nd4j::math::nd4j_sigmoid<X, X>(d1));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class LogSigmoidDerivative {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
X ex = nd4j::math::nd4j_pow<X, X, X>(M_E, d1);
|
|
|
|
return static_cast<X>(1.f) / (ex + static_cast<X>(1.f));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class Sigmoid {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_sigmoid<X, X>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class SigmoidDerivative {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_sigmoidderivative<X, X>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class HardSigmoid {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_min<X>(static_cast<X>(1), nd4j::math::nd4j_max<X>(static_cast<X>(0), (static_cast<X>(0.2f)) * d1 + static_cast<X>(0.5f)));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class HardSigmoidDerivative {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return d1 < static_cast<X>(-2.5f) || d1 > static_cast<X>(2.5f) ? static_cast<X>(0.f) : static_cast<X>(0.2f);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Scale to be between a min and max
|
|
|
|
*/
|
|
|
|
template <typename X>
|
|
|
|
class SetRange {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
auto min = params[0];
|
|
|
|
auto max = params[1];
|
|
|
|
if (static_cast<X>(d1) >= min && static_cast<X>(d1) <= max)
|
|
|
|
return d1;
|
|
|
|
if (min == static_cast<X>(0) && max == static_cast<X>(1)) {
|
|
|
|
auto val = static_cast<X>(1) / (static_cast<X>(1) + nd4j::math::nd4j_exp<X, X>(-d1));
|
|
|
|
return (nd4j::math::nd4j_floor<X,X>(val * (max - min)) + min);
|
|
|
|
}
|
|
|
|
|
|
|
|
return (nd4j::math::nd4j_floor<X,X>(d1 * (max - min)) + min);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class Sin {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_sin<X,X>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class Square {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return d1 * d1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class Sqrt {
|
|
|
|
public:
|
|
|
|
no_op_exec_special
|
|
|
|
no_op_exec_special_cuda
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Z *params) {
|
|
|
|
return nd4j::math::nd4j_sqrt<X, Z>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class RSqrt {
|
|
|
|
public:
|
|
|
|
no_op_exec_special
|
|
|
|
no_op_exec_special_cuda
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Z *params) {
|
|
|
|
return static_cast<Z>(1) / nd4j::math::nd4j_sqrt<X, Z>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class Rint {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_rint<X,X>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class SoftPlus {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::softplus<X, X>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class Sign {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return (d1 > static_cast<X>(0)) - (d1 < static_cast<X>(0));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class TimesOneMinus {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return d1 * (static_cast<X>(1) - d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class RationalTanh {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
// keep 2/3 as runtime variable, to match precision
|
|
|
|
auto dis = (static_cast<X>(2) / static_cast<X>(3)) * d1;
|
|
|
|
|
|
|
|
auto tanh = nd4j::math::nd4j_sgn<X,X>(dis) * (static_cast<X>(1) - (static_cast<X>(1) / (static_cast<X>(1) + static_cast<X>(nd4j::math::nd4j_abs<X>(dis)) + nd4j::math::nd4j_pow<X, X, X>(dis, static_cast<X>(2)) + static_cast<X>(1.41645f) * nd4j::math::nd4j_pow<X, X, X>(dis, static_cast<X>(4)) )));
|
|
|
|
return static_cast<X>(1.7159f) * tanh;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class RationalTanhDerivative {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
auto dis = (static_cast<X>(2.f) / static_cast<X>(3.f)) * d1;
|
|
|
|
|
|
|
|
auto a = static_cast<X>(1.f) + nd4j::math::nd4j_abs<X>(dis) + nd4j::math::nd4j_pow<X, X, X>(dis, static_cast<X>(2.f)) + static_cast<X>(1.41645f) * nd4j::math::nd4j_pow<X, X, X>(dis, static_cast<X>(4));
|
|
|
|
|
|
|
|
auto tDeriv = (static_cast<X>(1.f) + nd4j::math::nd4j_sign<X,X>(dis) * (static_cast<X>(2.f) * dis + static_cast<X>(4.f) * static_cast<X>(1.41645f) * nd4j::math::nd4j_pow<X, X, X>(dis, static_cast<X>(3)))) / (a * a);
|
|
|
|
|
|
|
|
return static_cast<X>(1.7159f) * (static_cast<X>(2.f) / static_cast<X>(3.f)) * tDeriv;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class Tanh {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_tanh<X, X>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class RectifiedTanh {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_max<X>(static_cast<X>(0), nd4j::math::nd4j_tanh<X,X>(d1));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class RectifiedTanhDerivative {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return d1 > static_cast<X>(0.f) ? nd4j::math::nd4j_tanhderivative<X,X>(d1) : static_cast<X>(0.f);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class ATanh {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_atanh<X,X>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class TanhDerivative {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_tanhderivative<X,X>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class Cube {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return d1 * d1 * d1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class CubeDerivative {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return static_cast<X>(3) * d1 * d1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class ACos {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_acos<X, X>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class ASinh {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_asinh<X, X>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class ASinhDerivative {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return static_cast<X>(1.f) / (nd4j::math::nd4j_sqrt<X, X>(nd4j::math::nd4j_pow<X, X, X>(d1, static_cast<X>(2.f)) + static_cast<X>(1.f)));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class ACosh {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_acosh<X, X>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class ACoshDerivative {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return static_cast<X>(1.f) / (nd4j::math::nd4j_sqrt<X, X>(d1 - static_cast<X>(1.f)) * nd4j::math::nd4j_sqrt<X, X>(d1 + static_cast<X>(1.f)));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class Ones {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return static_cast<X>(1.0f);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class SoftSign {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_softsign<X, X>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class SoftSignDerivative {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_softsignderivative<X,X>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class MatchConditionBool {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_bool
|
|
|
|
no_op_exec_special_bool_cuda
|
|
|
|
|
|
|
|
// this op return 1.0 if condition met, 0.0 otherwise
|
|
|
|
op_def static Z op(X d1, X *extraParams) {
|
|
|
|
X compare = extraParams[0];
|
|
|
|
X eps = extraParams[1];
|
|
|
|
|
|
|
|
auto mode = static_cast<int>(extraParams[2]);
|
|
|
|
//nd4j_printf("value: %f; comp: %f; eps: %f; mode: %i;\n", d1, compare, eps, mode);
|
|
|
|
|
|
|
|
switch (mode) {
|
|
|
|
case 0: // equals
|
|
|
|
return nd4j::math::nd4j_abs<X>(d1 - compare) <= eps ? true : false;
|
|
|
|
case 1: // not equals
|
|
|
|
return nd4j::math::nd4j_abs<X>(d1 - compare) > eps ? true : false;
|
|
|
|
case 2: // less_than
|
|
|
|
return d1 < compare ? true : false;
|
|
|
|
case 3: // greater_than
|
|
|
|
return d1 > compare ? true : false;
|
|
|
|
case 4: // less_or_equals_than
|
|
|
|
return d1 <= compare ? true : false;
|
|
|
|
case 5: // greater_or_equals_than
|
|
|
|
return d1 >= compare ? true : false;
|
|
|
|
case 6: // abs_less_than
|
|
|
|
return nd4j::math::nd4j_abs<X>(d1) < compare ? true : false;
|
|
|
|
case 7: // abs_greater_than
|
|
|
|
return nd4j::math::nd4j_abs<X>(d1) > compare ? true : false;
|
|
|
|
case 8: // is inf
|
|
|
|
return nd4j::math::nd4j_isinf(d1) ? true : false;
|
|
|
|
case 9: // is nan
|
|
|
|
return nd4j::math::nd4j_isnan(d1) ? true : false;
|
|
|
|
case 10:
|
|
|
|
return (d1 == compare) ? true : false;
|
|
|
|
case 11:
|
|
|
|
return (d1 != compare) ? true : false;
|
|
|
|
case 12: // abs_greater_or_equals_than
|
|
|
|
return nd4j::math::nd4j_abs<X>(d1) >= compare ? true : false;
|
|
|
|
case 13: // abs_less_or_equals_than
|
|
|
|
return nd4j::math::nd4j_abs<X>(d1) <= compare ? true : false;
|
|
|
|
case 14:
|
|
|
|
// isFinite
|
|
|
|
return !(nd4j::math::nd4j_isinf(d1) || nd4j::math::nd4j_isnan(d1));
|
|
|
|
case 15:
|
|
|
|
// isInfinite
|
|
|
|
return nd4j::math::nd4j_isinf(d1) || nd4j::math::nd4j_isnan(d1);
|
|
|
|
default:
|
|
|
|
printf("Undefined match condition: [%i]\n", mode);
|
|
|
|
}
|
|
|
|
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class MatchCondition {
|
|
|
|
public:
|
|
|
|
no_op_exec_special
|
|
|
|
no_op_exec_special_cuda
|
|
|
|
|
|
|
|
no_op_exec_special_accumulation_long
|
|
|
|
no_op_exec_special_accumulation_cuda
|
|
|
|
|
|
|
|
op_def static Z startingValue(const X *input) {
|
|
|
|
return static_cast<Z>(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z merge(Z old, Z opOutput, X *extraParams) {
|
|
|
|
return old + opOutput;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z update(Z old, Z opOutput, X *extraParams) {
|
|
|
|
return old + opOutput;
|
|
|
|
}
|
|
|
|
|
|
|
|
// this op return 1.0 if condition met, 0.0 otherwise
|
|
|
|
op_def static Z op(X d1, X *extraParams) {
|
|
|
|
X compare = extraParams[0];
|
|
|
|
X eps = extraParams[1];
|
|
|
|
|
|
|
|
auto mode = static_cast<int>(extraParams[2]);
|
|
|
|
//printf("value: %f; comp: %f; eps: %f; mode: %i;\n", (float) d1, (float) compare, (float) eps, mode);
|
|
|
|
|
|
|
|
switch (mode) {
|
|
|
|
case 0: // equals
|
|
|
|
return nd4j::math::nd4j_abs<X>(d1 - compare) <= eps ? 1 : 0;
|
|
|
|
case 1: // not equals
|
|
|
|
return nd4j::math::nd4j_abs<X>(d1 - compare) > eps ? 1 : 0;
|
|
|
|
case 2: // less_than
|
|
|
|
return d1 < compare ? 1 : 0;
|
|
|
|
case 3: // greater_than
|
|
|
|
return d1 > compare ? 1 : 0;
|
|
|
|
case 4: // less_or_equals_than
|
|
|
|
return d1 <= compare ? 1 : 0;
|
|
|
|
case 5: // greater_or_equals_than
|
|
|
|
return d1 >= compare ? 1 : 0;
|
|
|
|
case 6: // abs_less_than
|
|
|
|
return nd4j::math::nd4j_abs<X>(d1) < compare ? 1 : 0;
|
|
|
|
case 7: // abs_greater_than
|
|
|
|
return nd4j::math::nd4j_abs<X>(d1) > compare ? 1 : 0;
|
|
|
|
case 8: // is inf
|
|
|
|
return nd4j::math::nd4j_isinf(d1) ? 1 : 0;
|
|
|
|
case 9: // is nan
|
|
|
|
return nd4j::math::nd4j_isnan(d1) ? 1 : 0;
|
|
|
|
case 10:
|
|
|
|
return (d1 == compare) ? 1 : 0;
|
|
|
|
case 11:
|
|
|
|
return (d1 != compare) ? 1 : 0;
|
|
|
|
case 12: // abs_greater_or_equals_than
|
|
|
|
return nd4j::math::nd4j_abs<X>(d1) >= compare ? 1 : 0;
|
|
|
|
case 13: // abs_less_or_equals_than
|
|
|
|
return nd4j::math::nd4j_abs<X>(d1) <= compare ? 1 : 0;
|
|
|
|
case 14:
|
|
|
|
// isFinite
|
|
|
|
return !(nd4j::math::nd4j_isinf(d1) || nd4j::math::nd4j_isnan(d1)) ? 1 : 0;
|
|
|
|
case 15:
|
|
|
|
// isInfinite
|
|
|
|
return nd4j::math::nd4j_isinf(d1) || nd4j::math::nd4j_isnan(d1) ? 1 : 0;
|
|
|
|
default:
|
|
|
|
printf("Undefined match condition: [%i]\n", mode);
|
|
|
|
}
|
|
|
|
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z postProcess(Z reduction, Nd4jLong n, X *extraParams) {
|
|
|
|
return reduction;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2019-08-31 19:57:39 +02:00
|
|
|
template <typename X, typename Y, typename Z>
|
2019-06-06 14:21:15 +02:00
|
|
|
class ELU {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
2019-08-31 19:57:39 +02:00
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
return nd4j::math::nd4j_elu<X,Z>(d1, static_cast<X>(d2));
|
2019-06-06 14:21:15 +02:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2019-08-31 19:57:39 +02:00
|
|
|
template <typename X, typename Y, typename Z>
|
2019-06-06 14:21:15 +02:00
|
|
|
class ELUDerivative {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
2019-08-31 19:57:39 +02:00
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
return nd4j::math::nd4j_eluderivative<X,Z>(d1, static_cast<X>(d2));
|
2019-06-06 14:21:15 +02:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class RELU {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
auto xt = static_cast<Z>(d1);
|
|
|
|
auto xf = static_cast<Z>(d2);
|
|
|
|
return xt < xf ? xf : xt;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class SXELogitsSmoother {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
return d1 * ((X)1.f - (X) d2) + (X)(0.5f) * (X) d2;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class RELU6 {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
auto relu = simdOps::RELU<X,Y,Z>::op(d1, d2, params);
|
|
|
|
return relu < static_cast<Z>(6) ? relu : static_cast<Z>(6);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class LeakyRELU {
|
|
|
|
public:
|
|
|
|
no_op_exec_special
|
|
|
|
no_op_exec_special_cuda
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
auto val = static_cast<Z>(d1);
|
|
|
|
auto alpha = static_cast<Z>(d2);
|
|
|
|
return val < 0.0f ? alpha * val : val;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class SELU {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return d1 > static_cast<X>(0.0f) ? static_cast<X>(SELU_LAMBDA) * static_cast<X>(d1) : static_cast<X>(SELU_LAMBDA) * (static_cast<X>(SELU_ALPHA) * nd4j::math::nd4j_exp<X, X>(d1) - static_cast<X>(SELU_ALPHA));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class SELUDerivative {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return d1 > static_cast<X>(0.f) ? static_cast<X>(SELU_LAMBDA) : static_cast<X>(SELU_ALPHA) * static_cast<X>(SELU_LAMBDA) * nd4j::math::nd4j_exp<X, X>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class LeakyRELUDerivative {
|
|
|
|
public:
|
|
|
|
no_op_exec_special
|
|
|
|
no_op_exec_special_cuda
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
if (d1 >= static_cast<X>(0))
|
|
|
|
return static_cast<Z>(1);
|
|
|
|
else
|
|
|
|
return static_cast<Z>(d2);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class ASin {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_asin<X,X>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class Sinh {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_sinh<X,X>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class SinhDerivative {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_cosh<X, X>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class Cosh {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_cosh<X,X>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class Tan {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_tan<X,X>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class TanDerivative {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return static_cast<X>(1.f) / nd4j::math::nd4j_pow<X, X, X>(nd4j::math::nd4j_cos<X, X>(d1), static_cast<X>(2.0f));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class ATan {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return nd4j::math::nd4j_atan<X, X>(d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class Atan2 {
|
|
|
|
public:
|
|
|
|
no_op_exec_special
|
|
|
|
no_op_exec_special_cuda
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
return nd4j::math::nd4j_atan2<X, Z>(d2, d1);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
return op(d1, d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
// op for MetaOps
|
|
|
|
op_def static Z op(X d1, Y *params) {
|
|
|
|
return op(d1, params[0]);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class Identity {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class Stabilize {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
X k = params[0];
|
|
|
|
if (d1 * k > static_cast<X>(- MIN_CUTFOFF))
|
|
|
|
return static_cast<X>(- MIN_CUTFOFF) / k;
|
|
|
|
else if (d1 * k < static_cast<X>(MIN_CUTFOFF))
|
|
|
|
return static_cast<X>(MIN_CUTFOFF) / k;
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class Step {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
return (d1 > static_cast<X>(d2) ? static_cast<Z>(1) : static_cast<Z>(0));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class OneMinus {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
return static_cast<X>(1) - d1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class Sum {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_accumulation_same
|
|
|
|
no_op_exec_special_accumulation_same_cuda
|
|
|
|
|
|
|
|
op_def static X startingValue(const X *input) {
|
|
|
|
return static_cast<X>(0.0f);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X merge(X old, X opOutput, X *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X update(X old, X opOutput, X *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *extraParams) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X postProcess(X reduction, Nd4jLong n, X *extraParams) {
|
|
|
|
return reduction;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class ReduceSameBenchmarkOp {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_accumulation_same
|
|
|
|
no_op_exec_special_accumulation_same_cuda
|
|
|
|
|
|
|
|
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
|
|
|
|
|
|
|
|
op_def static X startingValue(const X *input) {
|
|
|
|
return static_cast<X>(0.0f);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X merge(X old, X opOutput, X *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X update(X old, X opOutput, X *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *extraParams) {
|
|
|
|
auto f1 = static_cast<float>(d1);
|
|
|
|
return static_cast<X>(nd4j::math::nd4j_pow<float,float,float>(f1, 3)
|
|
|
|
+ nd4j::math::nd4j_log<float,float>(f1) * nd4j::math::nd4j_sin<float,float>(f1)
|
|
|
|
/ nd4j::math::nd4j_tanh<float,float>(static_cast<float>(M_E) * static_cast<float>(M_PI) * f1)
|
|
|
|
* nd4j::math::nd4j_sqrt<float,float>(static_cast<float>(M_PI) / f1)
|
|
|
|
- nd4j::math::nd4j_atan<float,float>(static_cast<float>(M_E) / f1));
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X postProcess(X reduction, Nd4jLong n, X *extraParams) {
|
|
|
|
return reduction;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class ShannonEntropy {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_accumulation
|
|
|
|
no_op_exec_special_accumulation_cuda
|
|
|
|
|
|
|
|
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
|
|
|
|
|
|
|
|
op_def static X startingValue(const X *input) {
|
|
|
|
return static_cast<X>(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z merge(Z old, Z opOutput, Z *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z update(Z old, Z opOutput, Z *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Z *extraParams) {
|
|
|
|
auto p = d1 * d1;
|
|
|
|
return static_cast<Z>(p) * nd4j::math::nd4j_log<X, Z>(p);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) {
|
|
|
|
return -reduction;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class LogEntropy {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_accumulation
|
|
|
|
no_op_exec_special_accumulation_cuda
|
|
|
|
|
|
|
|
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
|
|
|
|
|
|
|
|
op_def static X startingValue(const X *input) {
|
|
|
|
return static_cast<X>(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z merge(Z old, Z opOutput, Z *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z update(Z old, Z opOutput, Z *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Z *extraParams) {
|
|
|
|
return static_cast<Z>(d1) * nd4j::math::nd4j_log<X, Z>(d1);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) {
|
|
|
|
//entropy is -sum(p(x) * log(p(x))); log entropy is log of this
|
|
|
|
return nd4j::math::nd4j_log<Z, Z>(-reduction);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class Entropy {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_accumulation
|
|
|
|
no_op_exec_special_accumulation_cuda
|
|
|
|
|
|
|
|
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
|
|
|
|
|
|
|
|
op_def static X startingValue(const X *input) {
|
|
|
|
return static_cast<X>(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z merge(Z old, Z opOutput, Z *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z update(Z old, Z opOutput, Z *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Z *extraParams) {
|
|
|
|
return static_cast<Z>(d1) * nd4j::math::nd4j_log<X, Z>(d1);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) {
|
|
|
|
return static_cast<Z>(-reduction); //entropy is -sum(p(x) * log(p(x)))
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class ASum {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_accumulation_same
|
|
|
|
no_op_exec_special_accumulation_same_cuda
|
|
|
|
|
|
|
|
const static functions::ReduceType reduceType = functions::ReduceType::ASUM;
|
|
|
|
|
|
|
|
op_def static X startingValue(const X *input) {
|
|
|
|
return static_cast<X>(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X merge(X old, X opOutput, X *extraParams) {
|
|
|
|
return nd4j::math::nd4j_abs<X>(opOutput) + nd4j::math::nd4j_abs<X>(old);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X update(X old, X opOutput, X *extraParams) {
|
|
|
|
return nd4j::math::nd4j_abs<X>(opOutput) + nd4j::math::nd4j_abs<X>(old);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *extraParams) {
|
|
|
|
return nd4j::math::nd4j_abs<X>(d1);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X postProcess(X reduction, Nd4jLong n, X *extraParams) {
|
|
|
|
return nd4j::math::nd4j_abs<X>(reduction);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class CountNonZero {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_accumulation_long
|
|
|
|
no_op_exec_special_accumulation_cuda
|
|
|
|
|
|
|
|
const static functions::ReduceType reduceType = functions::ReduceType::ASUM;
|
|
|
|
|
|
|
|
op_def static Z startingValue(const X *input) {
|
|
|
|
return static_cast<Z>(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z merge(Z old, Z opOutput, X *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z update(Z old, Z opOutput, X *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, X *extraParams) {
|
|
|
|
return d1 == static_cast<X>(0.0f) ? static_cast<Z>(0.0f) : static_cast<Z>(1.0f);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z postProcess(Z reduction, Nd4jLong n, X *extraParams) {
|
|
|
|
return reduction;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class CountZero {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_accumulation_long
|
|
|
|
no_op_exec_special_accumulation_cuda
|
|
|
|
|
|
|
|
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
|
|
|
|
|
|
|
|
op_def static Z startingValue(const X *input) {
|
|
|
|
return static_cast<Z>(0.0f);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z merge(Z old, Z opOutput, X *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z update(Z old, Z opOutput, X *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, X *extraParams) {
|
|
|
|
return d1 == static_cast<X>(0) ? static_cast<X>(1) : static_cast<X>(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z postProcess(X reduction, Nd4jLong n, X *extraParams) {
|
|
|
|
return static_cast<Z>(reduction);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class Prod {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_accumulation_same
|
|
|
|
no_op_exec_special_accumulation_same_cuda
|
|
|
|
|
|
|
|
const static functions::ReduceType reduceType = functions::ReduceType::PRODUCT;
|
|
|
|
|
|
|
|
op_def static X startingValue(const X *input) {
|
|
|
|
return static_cast<X>(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X merge(X old, X opOutput, X *extraParams) {
|
|
|
|
return opOutput * old;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X update(X old, X opOutput, X *extraParams) {
|
|
|
|
return opOutput * old;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X op(X d1, X *extraParams) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X postProcess(X reduction, Nd4jLong n, X *extraParams) {
|
|
|
|
return reduction;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class Any {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_accumulation
|
|
|
|
no_op_exec_special_accumulation_cuda
|
|
|
|
|
|
|
|
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
|
|
|
|
|
|
|
|
op_def static X startingValue(const X *input) {
|
|
|
|
return static_cast<X>(0.0f);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z merge(X old, X opOutput, X *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z update(X old, X opOutput, X *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, X *extraParams) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z postProcess(X reduction, Nd4jLong n, X *extraParams) {
|
|
|
|
return reduction > static_cast<X>(0) ? static_cast<Z>(1) : static_cast<Z>(0) ;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class All {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_accumulation
|
|
|
|
no_op_exec_special_accumulation_cuda
|
|
|
|
|
|
|
|
const static functions::ReduceType reduceType = functions::ReduceType::PRODUCT;
|
|
|
|
|
|
|
|
op_def static X startingValue(const X *input) {
|
|
|
|
return static_cast<X>(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z merge(X old, X opOutput, X *extraParams) {
|
|
|
|
return opOutput * old;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z update(X old, X opOutput, X *extraParams) {
|
|
|
|
return opOutput * old;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, X *extraParams) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z postProcess(X reduction, Nd4jLong n, X *extraParams) {
|
|
|
|
return reduction > static_cast<X>(0) ? static_cast<Z>(1) : static_cast<Z>(0);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class Mean {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_accumulation
|
|
|
|
no_op_exec_special_accumulation_cuda
|
|
|
|
|
|
|
|
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
|
|
|
|
|
|
|
|
op_def static X startingValue(const X *input) {
|
|
|
|
return static_cast<X>(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z merge(Z old, Z opOutput, Z *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z update(Z old, Z opOutput, Z *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Z *extraParams) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) {
|
|
|
|
return reduction / (Z) n;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class ReduceFloatBenchmarkOp {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_accumulation
|
|
|
|
no_op_exec_special_accumulation_cuda
|
|
|
|
|
|
|
|
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
|
|
|
|
|
|
|
|
op_def static X startingValue(const X *input) {
|
|
|
|
return static_cast<X>(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z merge(Z old, Z opOutput, Z *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z update(Z old, Z opOutput, Z *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Z *extraParams) {
|
|
|
|
auto f1 = static_cast<float>(d1);
|
|
|
|
return static_cast<Z>(nd4j::math::nd4j_pow<float,float,float>(f1, 3)
|
|
|
|
+ nd4j::math::nd4j_log<float,float>(f1) * nd4j::math::nd4j_sin<float,float>(f1)
|
|
|
|
/ nd4j::math::nd4j_tanh<float,float>(static_cast<float>(M_E) * static_cast<float>(M_PI) * f1)
|
|
|
|
* nd4j::math::nd4j_sqrt<float,float>(static_cast<float>(M_PI) / f1)
|
|
|
|
- nd4j::math::nd4j_atan<float,float>(static_cast<float>(M_E) / f1));
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) {
|
|
|
|
return (Z) reduction / (Z) n;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class AMean {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_accumulation
|
|
|
|
no_op_exec_special_accumulation_cuda
|
|
|
|
|
|
|
|
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
|
|
|
|
|
|
|
|
op_def static X startingValue(const X *input) {
|
|
|
|
return static_cast<X>(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z merge(Z old, Z opOutput, Z *extraParams) {
|
|
|
|
return nd4j::math::nd4j_abs<X>(opOutput) + nd4j::math::nd4j_abs<X>(old);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z update(Z old, Z opOutput, Z *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Z *extraParams) {
|
|
|
|
return nd4j::math::nd4j_abs<X>(d1);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) {
|
|
|
|
return nd4j::math::nd4j_abs<Z>(reduction) / static_cast<Z>(n);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class Max {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_accumulation_same
|
|
|
|
no_op_exec_special_accumulation_same_cuda
|
|
|
|
|
|
|
|
const static functions::ReduceType reduceType = functions::ReduceType::MAX;
|
|
|
|
|
|
|
|
op_def static X startingValue(const X *input) {
|
2019-06-15 13:34:34 +02:00
|
|
|
return -nd4j::DataTypeUtils::infOrMax<X>();
|
2019-06-06 14:21:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X merge(X old, X opOutput, X *extraParams) {
|
|
|
|
return nd4j::math::nd4j_max<X>(old, opOutput);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X update(X old, X opOutput, X *extraParams) {
|
|
|
|
return nd4j::math::nd4j_max<X>(opOutput, old);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X op(X d1, X d2, X *params) {
|
|
|
|
return nd4j::math::nd4j_max<X>(d1, d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X op(X d1, X d2) {
|
|
|
|
return nd4j::math::nd4j_max<X>(d1, d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
// FIXME: this signature overlaps with MetaOp
|
|
|
|
op_def static X op(X d1, X *extraParams) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X postProcess(X reduction, Nd4jLong n, X *extraParams) {
|
|
|
|
return reduction;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class AMaxPairwise {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
return op(d1, d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
auto z1 = static_cast<Z>(d1);
|
|
|
|
auto z2 = static_cast<Z>(d2);
|
|
|
|
|
|
|
|
if (nd4j::math::nd4j_abs<Z>(z1) > nd4j::math::nd4j_abs<Z>(z2))
|
|
|
|
return z1;
|
|
|
|
else
|
|
|
|
return z2;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class AMinPairwise {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
return op(d1, d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
auto z1 = static_cast<Z>(d1);
|
|
|
|
auto z2 = static_cast<Z>(d2);
|
|
|
|
|
|
|
|
if (nd4j::math::nd4j_abs<Z>(z1) < nd4j::math::nd4j_abs<Z>(z2))
|
|
|
|
return z1;
|
|
|
|
else
|
|
|
|
return z2;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class MaxPairwise {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
return nd4j::math::nd4j_max<Z>(static_cast<Z>(d1), static_cast<Z>(d2));
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
return nd4j::math::nd4j_max<Z>(static_cast<Z>(d1), static_cast<Z>(d2));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class MinPairwise {
|
|
|
|
public:
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
return nd4j::math::nd4j_min<Z>(static_cast<Z>(d1), static_cast<Z>(d2));
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2) {
|
|
|
|
return nd4j::math::nd4j_min<Z>(static_cast<Z>(d1), static_cast<Z>(d2));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class AMax {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_accumulation_same
|
|
|
|
no_op_exec_special_accumulation_same_cuda
|
|
|
|
|
|
|
|
const static functions::ReduceType reduceType = functions::ReduceType::AMAX;
|
|
|
|
|
|
|
|
op_def static X startingValue(const X *input) {
|
|
|
|
return input[0];
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X merge(X old, X opOutput, X *extraParams) {
|
|
|
|
return nd4j::math::nd4j_max<X>(nd4j::math::nd4j_abs<X>(old), nd4j::math::nd4j_abs<X>(opOutput));
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X update(X old, X opOutput, X *extraParams) {
|
|
|
|
return nd4j::math::nd4j_max<X>(nd4j::math::nd4j_abs<X>(opOutput), nd4j::math::nd4j_abs<X>(old));
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X op(X d1, X d2, X *params) {
|
|
|
|
return nd4j::math::nd4j_max<X>(nd4j::math::nd4j_abs<X>(d1), nd4j::math::nd4j_abs<X>(d2));
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X op(X d1, X d2) {
|
|
|
|
return nd4j::math::nd4j_abs<X>(d1) > nd4j::math::nd4j_abs<X>(d2) ? d1 : d2;
|
|
|
|
}
|
|
|
|
|
|
|
|
// FIXME: this signature overlaps with MetaOp
|
|
|
|
op_def static X op(X d1, X *extraParams) {
|
|
|
|
return nd4j::math::nd4j_abs<X>(d1);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X postProcess(X reduction, Nd4jLong n, X *extraParams) {
|
|
|
|
return nd4j::math::nd4j_abs<X>(reduction);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class AMin {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_accumulation_same
|
|
|
|
no_op_exec_special_accumulation_same_cuda
|
|
|
|
|
|
|
|
const static functions::ReduceType reduceType = functions::ReduceType::AMIN;
|
|
|
|
|
|
|
|
op_def static X startingValue(const X *input) {
|
|
|
|
return input[0];
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X merge(X old, X opOutput, X *extraParams) {
|
|
|
|
return nd4j::math::nd4j_min<X>(nd4j::math::nd4j_abs<X>(old), nd4j::math::nd4j_abs<X>(opOutput));
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X update(X old, X opOutput, X *extraParams) {
|
|
|
|
return nd4j::math::nd4j_min<X>(nd4j::math::nd4j_abs<X>(opOutput), nd4j::math::nd4j_abs<X>(old));
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X op(X d1, X d2, X *params) {
|
|
|
|
return nd4j::math::nd4j_min<X>(nd4j::math::nd4j_abs<X>(d1), nd4j::math::nd4j_abs<X>(d2));
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X op(X d1, X d2) {
|
|
|
|
return nd4j::math::nd4j_min<X>(nd4j::math::nd4j_abs<X>(d1), nd4j::math::nd4j_abs<X>(d2));
|
|
|
|
}
|
|
|
|
|
|
|
|
// FIXME: this signature overlaps with MetaOp
|
|
|
|
op_def static X op(X d1, X *extraParams) {
|
|
|
|
return nd4j::math::nd4j_abs<X>(d1);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X postProcess(X reduction, Nd4jLong n, X *extraParams) {
|
|
|
|
return nd4j::math::nd4j_abs<X>(reduction);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class Min {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_accumulation_same
|
|
|
|
no_op_exec_special_accumulation_same_cuda
|
|
|
|
|
|
|
|
const static functions::ReduceType reduceType = functions::ReduceType::MIN;
|
|
|
|
|
|
|
|
op_def static X startingValue(const X *input) {
|
2019-06-15 13:34:34 +02:00
|
|
|
return nd4j::DataTypeUtils::infOrMax<X>();
|
2019-06-06 14:21:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X merge(X old, X opOutput, X *extraParams) {
|
|
|
|
return nd4j::math::nd4j_min<X>(old, opOutput);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X update(X old, X opOutput, X *extraParams) {
|
|
|
|
return nd4j::math::nd4j_min<X>(opOutput, old);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X op(X d1, X d2, X *params) {
|
|
|
|
return nd4j::math::nd4j_min<X>(d1, d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X op(X d1, X d2) {
|
|
|
|
return nd4j::math::nd4j_min<X>(d1, d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
// FIXME: this signature overlaps with MetaOp
|
|
|
|
op_def static X op(X d1, X *extraParams) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X postProcess(X reduction, Nd4jLong n, X *extraParams) {
|
|
|
|
return reduction;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class Norm1 {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_accumulation
|
|
|
|
no_op_exec_special_accumulation_cuda
|
|
|
|
|
|
|
|
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
|
|
|
|
|
|
|
|
op_def static X startingValue(const X *input) {
|
|
|
|
return static_cast<X>(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z merge(Z old, Z opOutput, Z *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z update(Z old, Z opOutput, Z *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Z *extraParams) {
|
|
|
|
return static_cast<Z>(nd4j::math::nd4j_abs<X>(d1));
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) {
|
|
|
|
return reduction;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class Norm2 {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_accumulation
|
|
|
|
no_op_exec_special_accumulation_cuda
|
|
|
|
|
|
|
|
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
|
|
|
|
|
|
|
|
op_def static X startingValue(const X *input) {
|
|
|
|
return static_cast<X>(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z merge(Z old, Z opOutput, Z *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
op_def static Z update(Z old, Z opOutput, Z *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) {
|
|
|
|
return nd4j::math::nd4j_sqrt<Z, Z>(reduction);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Z *extraParams) {
|
|
|
|
return static_cast<Z>(d1 * d1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class SquaredNorm {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_accumulation
|
|
|
|
no_op_exec_special_accumulation_cuda
|
|
|
|
|
|
|
|
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
|
|
|
|
|
|
|
|
op_def static X startingValue(const X *input) {
|
|
|
|
return static_cast<X>(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z merge(Z old, Z opOutput, Z *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
op_def static Z update(Z old, Z opOutput, Z *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Z *extraParams) {
|
|
|
|
return static_cast<Z>(d1 * d1);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) {
|
|
|
|
return reduction;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class NormFrobenius {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_accumulation
|
|
|
|
no_op_exec_special_accumulation_cuda
|
|
|
|
|
|
|
|
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
|
|
|
|
|
|
|
|
op_def static X startingValue(const X *input) {
|
|
|
|
return static_cast<X>(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z merge(Z old, Z opOutput, Z *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
op_def static Z update(Z old, Z opOutput, Z *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Z *extraParams) {
|
|
|
|
X v = nd4j::math::nd4j_abs<X>(d1);
|
|
|
|
return static_cast<Z>(v * v);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) {
|
|
|
|
return nd4j::math::nd4j_sqrt<Z, Z>(reduction);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class NormP {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_accumulation
|
|
|
|
no_op_exec_special_accumulation_cuda
|
|
|
|
|
|
|
|
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
|
|
|
|
|
|
|
|
op_def static X startingValue(const X *input) {
|
|
|
|
return static_cast<X>(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z merge(Z old, Z opOutput, Z *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z update(Z old, Z opOutput, Z *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Z *extraParams) {
|
|
|
|
return nd4j::math::nd4j_pow<X, Z, Z>(nd4j::math::nd4j_abs<X>(d1), extraParams[0]);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) {
|
|
|
|
return nd4j::math::nd4j_pow<Z, Z, Z>(reduction, static_cast<Z>(1.0f) / extraParams[0]);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class NormMax {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_accumulation
|
|
|
|
no_op_exec_special_accumulation_cuda
|
|
|
|
|
|
|
|
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
|
|
|
|
|
|
|
|
op_def static X startingValue(const X *input) {
|
|
|
|
return static_cast<X>(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z merge(Z old, Z opOutput, Z *extraParams) {
|
|
|
|
return opOutput + old;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z update(Z old, Z opOutput, Z *extraParams) {
|
|
|
|
return nd4j::math::nd4j_max<Z>(nd4j::math::nd4j_abs<Z>(old),
|
|
|
|
nd4j::math::nd4j_abs<Z>(opOutput));
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Z *extraParams) {
|
|
|
|
return static_cast<Z>(d1);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) {
|
|
|
|
return nd4j::math::nd4j_max<Z>(nd4j::math::nd4j_abs<Z>(reduction), nd4j::math::nd4j_abs<Z>(reduction));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class Variance {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_accumulation
|
|
|
|
no_op_exec_special_accumulation_cuda
|
|
|
|
|
|
|
|
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
|
|
|
|
|
|
|
|
op_def static X startingValue(const X *input) {
|
|
|
|
return static_cast<X>(0.0f);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z merge(X old, X opOutput, Z *extraParams) {
|
|
|
|
return old + opOutput;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z update(X old, X opOutput, Z *extraParams) {
|
|
|
|
return old + opOutput;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static X op(X d1, Z *extraParams) {
|
|
|
|
X mean = static_cast<X>(extraParams[0]);
|
|
|
|
X ret = d1 - mean;
|
|
|
|
return ret * ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z postProcess(X reduction, Nd4jLong n, Z *extraParams) {
|
|
|
|
// T bias = extraParams[1];
|
|
|
|
// return (reduction - (nd4j::math::nd4j_pow<T>(bias, static_cast<T>(2.0f)) / static_cast<T>(n))) / (n - 1)
|
|
|
|
return static_cast<Z>(reduction) / static_cast<Z>(n - 1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Standard deviation of a buffer
|
|
|
|
*/
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class StandardDeviation {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_accumulation
|
|
|
|
no_op_exec_special_accumulation_cuda
|
|
|
|
|
|
|
|
const static functions::ReduceType reduceType = functions::ReduceType::SUM;
|
|
|
|
|
|
|
|
op_def static X startingValue(const X *input) {
|
|
|
|
return static_cast<X>(0.0f);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z merge(X old, X opOutput, Z *extraParams) {
|
|
|
|
return old + opOutput;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z update(X old, X opOutput, Z *extraParams) {
|
|
|
|
return old + opOutput;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Z *extraParams) {
|
|
|
|
X mean = extraParams[0];
|
|
|
|
X ret = d1 - mean;
|
|
|
|
return ret * ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z postProcess(X reduction, Nd4jLong n, Z *extraParams) {
|
|
|
|
Z ret = Variance<X,Z>::postProcess(reduction, n, extraParams);
|
|
|
|
Z sqrtRet = nd4j::math::nd4j_sqrt<X, Z>(ret);
|
|
|
|
return sqrtRet;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y>
|
|
|
|
class CosineSimilarity {
|
|
|
|
public:
|
|
|
|
static const int extraParamsLen = 2;
|
|
|
|
|
|
|
|
op_def static X *generateExtraParams() {
|
|
|
|
//T *extraParams = new T[2];
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static void finalizeExtraParams(X *extraParams) {
|
|
|
|
//delete[] extraParams;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Y startingValue(const X *input) {
|
|
|
|
return static_cast<Y>(0.0f);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Y postProcess(Y reduction, Nd4jLong n, Y *extraParams) {
|
|
|
|
return reduction / (nd4j::math::nd4j_sqrt<Y, Y>(extraParams[0]) * nd4j::math::nd4j_sqrt<Y, Y>(extraParams[1]));
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Y op(X d1, X d2, Y *extraParams) {
|
|
|
|
extraParams[0] += static_cast<Y>(d1 * d1);
|
|
|
|
extraParams[1] += static_cast<Y>(d2 * d2);
|
|
|
|
return static_cast<Y>(d1 * d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static void aggregateExtraParams(Y *extraParamsTotal, Y *extraParamsLocal) {
|
|
|
|
extraParamsTotal[0] += extraParamsLocal[0];
|
|
|
|
extraParamsTotal[1] += extraParamsLocal[1];
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef __CUDACC__
|
|
|
|
static _CUDA_D inline Y opAtomic(X d1, X d2, Y *extraParams) {
|
|
|
|
nd4j::math::atomics::nd4j_atomicAdd(&extraParams[0],static_cast<Y>(d1 * d1));
|
|
|
|
nd4j::math::atomics::nd4j_atomicAdd(&extraParams[1],static_cast<Y>(d2 * d2));
|
|
|
|
|
|
|
|
return static_cast<Y>(d1 * d2);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
op_def static Y update(Y old, Y opOutput, Y *extraParams) {
|
|
|
|
return old + opOutput;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
op_def static Y merge(Y old, Y opOutput, Y *extraParams) {
|
|
|
|
return update(old, opOutput, extraParams);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Y>
|
|
|
|
class JaccardDistance {
|
|
|
|
public:
|
|
|
|
static const int extraParamsLen = 2;
|
|
|
|
|
|
|
|
op_def static X *generateExtraParams() {
|
|
|
|
//T *extraParams = new T[2];
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static void finalizeExtraParams(X *extraParams) {
|
|
|
|
//delete[] extraParams;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Y startingValue(const X *input) {
|
|
|
|
return static_cast<X>(0.0f);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Y postProcess(Y reduction, Nd4jLong n, Y *extraParams) {
|
|
|
|
// num / denom
|
|
|
|
return (static_cast<Y>(1.0f)) - (extraParams[0] / extraParams[1]);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Y num(X d1, X d2) {
|
|
|
|
return nd4j::math::nd4j_min<X>(d1, d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Y denom(X d1, X d2) {
|
|
|
|
return nd4j::math::nd4j_max<X>(d1, d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Y op(X d1, X d2, Y *extraParams) {
|
|
|
|
extraParams[0] += static_cast<Y>(num(d1, d2));
|
|
|
|
extraParams[1] += static_cast<Y>(denom(d1, d2));
|
|
|
|
return static_cast<Y>(0.0f);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static void aggregateExtraParams(Y *extraParamsTotal, Y *extraParamsLocal) {
|
|
|
|
extraParamsTotal[0] += extraParamsLocal[0];
|
|
|
|
extraParamsTotal[1] += extraParamsLocal[1];
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef __CUDACC__
|
|
|
|
__device__
|
|
|
|
static inline Y opAtomic(X d1, X d2, Y *extraParams) {
|
|
|
|
nd4j::math::atomics::nd4j_atomicAdd(&extraParams[0],num(d1, d2));
|
|
|
|
nd4j::math::atomics::nd4j_atomicAdd(&extraParams[1], denom(d1, d2));
|
|
|
|
|
|
|
|
return static_cast<Y>(0.0f);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
op_def static Y update(Y old, Y opOutput, Y *extraParams) {
|
|
|
|
return old + opOutput;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
op_def static Y merge(Y old, Y opOutput, Y *extraParams) {
|
|
|
|
return update(old, opOutput, extraParams);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Y>
|
|
|
|
class SimpleHammingDistance {
|
|
|
|
public:
|
|
|
|
static const int extraParamsLen = 0;
|
|
|
|
|
|
|
|
op_def static X *generateExtraParams() {
|
|
|
|
//T *extraParams = new T[2];
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static void finalizeExtraParams(X *extraParams) {
|
|
|
|
//delete[] extraParams;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Y startingValue(const X *input) {
|
|
|
|
return static_cast<Y>(0.0f);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Y postProcess(Y reduction, Nd4jLong n, Y *extraParams) {
|
|
|
|
return static_cast<Y>(reduction / n);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Y op(X d1, X d2, Y *extraParams) {
|
|
|
|
return (d1 == d2) ? static_cast<Y>(0.0f) : static_cast<Y>(1.0f);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static void aggregateExtraParams(Y *extraParamsTotal, Y *extraParamsLocal) {
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef __CUDACC__
|
|
|
|
__device__
|
|
|
|
static inline Y opAtomic(X d1, X d2, Y *extraParams) {
|
|
|
|
return op(d1, d2, extraParams);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
op_def static Y update(Y old, Y opOutput, Y *extraParams) {
|
|
|
|
return old + opOutput;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
op_def static Y merge(Y old, Y opOutput, Y *extraParams) {
|
|
|
|
return update(old, opOutput, extraParams);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y>
|
|
|
|
class CosineDistance {
|
|
|
|
public:
|
|
|
|
static const int extraParamsLen = 2;
|
|
|
|
|
|
|
|
op_def static X *generateExtraParams() {
|
|
|
|
//T *extraParams = new T[2];
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static void finalizeExtraParams(X *extraParams) {
|
|
|
|
//delete[] extraParams;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Y startingValue(const X *input) {
|
|
|
|
return static_cast<Y>(0.0f);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Y postProcess(Y reduction, Nd4jLong n, Y *extraParams) {
|
|
|
|
return (static_cast<Y>(1.0f)) - (reduction / (nd4j::math::nd4j_sqrt<Y, Y>(extraParams[0]) * nd4j::math::nd4j_sqrt<Y, Y>(extraParams[1])));
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Y op(X d1, X d2, Y *extraParams) {
|
|
|
|
extraParams[0] += static_cast<Y>(nd4j::math::nd4j_abs<X>(d1) * nd4j::math::nd4j_abs<X>(d1));
|
|
|
|
extraParams[1] += static_cast<Y>(nd4j::math::nd4j_abs<X>(d2) * nd4j::math::nd4j_abs<X>(d2));
|
|
|
|
return (d1 * d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static void aggregateExtraParams(Y *extraParamsTotal, Y *extraParamsLocal) {
|
|
|
|
extraParamsTotal[0] += extraParamsLocal[0];
|
|
|
|
extraParamsTotal[1] += extraParamsLocal[1];
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef __CUDACC__
|
|
|
|
static _CUDA_D inline Y opAtomic(X d1, X d2, Y *extraParams) {
|
|
|
|
nd4j::math::atomics::nd4j_atomicAdd(&extraParams[0], nd4j::math::nd4j_abs<Y>(d1) * nd4j::math::nd4j_abs<Y>(d1));
|
|
|
|
nd4j::math::atomics::nd4j_atomicAdd(&extraParams[1], nd4j::math::nd4j_abs<Y>(d2) * nd4j::math::nd4j_abs<Y>(d2));
|
|
|
|
|
|
|
|
return (d1 * d2);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
op_def static Y update(Y old, Y opOutput, Y *extraParams) {
|
|
|
|
return old + opOutput;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
op_def static Y merge(Y old, Y opOutput, Y *extraParams) {
|
|
|
|
return update(old, opOutput, extraParams);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Dot product between 2 arrays
|
|
|
|
*/
|
|
|
|
template <typename X, typename Y>
|
|
|
|
class Dot {
|
|
|
|
public:
|
|
|
|
static const int extraParamsLen = 0;
|
|
|
|
|
|
|
|
op_def static X * generateExtraParams() {
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static void finalizeExtraParams(X *extraParamsRef) {
|
|
|
|
//no-op
|
|
|
|
//delete[] * extraParamsRef;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Y startingValue(const X *input) {
|
|
|
|
return static_cast<Y>(0.0f);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Y postProcess(Y reduction, Nd4jLong n, Y *extraParamsRef) {
|
|
|
|
return reduction;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Y op(X d1, X d2, Y *extraParamsRef) {
|
|
|
|
return static_cast<Y>(d1 * d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef __CUDACC__
|
|
|
|
__device__
|
|
|
|
static inline Y opAtomic(X d1, X d2, Y *extraParamsRef) {
|
|
|
|
return op(d1, d2, extraParamsRef);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
op_def static Y update(Y old, Y opOutput, Y *extraParamsRef) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Y merge(Y old, Y opOutput, Y *extraParamsRef) {
|
|
|
|
return update(old, opOutput, extraParamsRef);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static void aggregateExtraParams(Y *extraParamsTotal, Y *extraParamsLocal) {}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Op to check equality within arrays
|
|
|
|
*/
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class EqualsWithEps {
|
|
|
|
public:
|
|
|
|
static const int extraParamsLen = 0;
|
|
|
|
|
|
|
|
op_def static X * generateExtraParams() {
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static void finalizeExtraParams(X *extraParamsRef) {
|
|
|
|
//no-op
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z startingValue(const X *input) {
|
|
|
|
return static_cast<Z>(0.0f);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParamsRef) {
|
|
|
|
return reduction;
|
|
|
|
}
|
|
|
|
|
2019-08-31 19:57:39 +02:00
|
|
|
op_def static Z op(X d1, X d2, Z *extraParamsRef) {
|
2019-06-06 14:21:15 +02:00
|
|
|
double eps = nd4j::math::nd4j_abs<double>(extraParamsRef[2]);
|
|
|
|
return static_cast<Z>(!nd4j::math::nd4j_eq<X>(d1, d2, eps));
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef __CUDACC__
|
|
|
|
__device__
|
|
|
|
static inline Z opAtomic(X d1, X d2, Z *extraParamsRef) {
|
|
|
|
return op(d1, d2, extraParamsRef);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
op_def static Z update(Z old, Z opOutput, Z *extraParamsRef) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Z merge(X old, Z opOutput, Z *extraParamsRef) {
|
|
|
|
return update(old, opOutput, extraParamsRef);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static void aggregateExtraParams(Z *extraParamsTotal, Z *extraParamsLocal) {}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Y>
|
|
|
|
class EuclideanDistance {
|
|
|
|
public:
|
|
|
|
static const int extraParamsLen = 0;
|
|
|
|
|
|
|
|
op_def static X * generateExtraParams() {
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static void finalizeExtraParams(X *extraParamsRef) {
|
|
|
|
//no-op
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Y startingValue(const X *input) {
|
|
|
|
return static_cast<Y>(0.0f);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Y postProcess(Y reduction, Nd4jLong n, Y *extraParamsRef) {
|
|
|
|
return nd4j::math::nd4j_sqrt<Y, Y>(reduction);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Y op(X d1, X d2, Y *extraParamsRef) {
|
|
|
|
X ret = d1 - d2;
|
|
|
|
return static_cast<Y>(ret * ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef __CUDACC__
|
|
|
|
__device__
|
|
|
|
static inline Y opAtomic(X d1, X d2, Y *extraParamsRef) {
|
|
|
|
return op(d1, d2, extraParamsRef);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
op_def static Y update(Y old, Y opOutput, Y *extraParamsRef) {
|
|
|
|
return opOutput + old;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Y merge(Y old, Y opOutput, Y *extraParamsRef) {
|
|
|
|
return update(old, opOutput, extraParamsRef);
|
|
|
|
}
|
|
|
|
op_def static void aggregateExtraParams(Y *extraParamsTotal, Y *extraParamsLocal) {}
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Y>
|
|
|
|
class ManhattanDistance {
|
|
|
|
public:
|
|
|
|
static const int extraParamsLen = 0;
|
|
|
|
|
|
|
|
op_def static X * generateExtraParams() {
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static void finalizeExtraParams(X *extraParamsRef) {
|
|
|
|
//no-op
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Y startingValue(const X *input) {
|
|
|
|
return static_cast<Y>(0.0f);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Y postProcess(Y reduction, Nd4jLong n, Y *extraParamsRef) {
|
|
|
|
return reduction;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Y op(X d1, X d2, Y *extraParamsRef) {
|
|
|
|
return nd4j::math::nd4j_abs<X>(d1 - d2);
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static Y update(Y old, Y opOutput, Y *extraParamsRef) {
|
|
|
|
return old + opOutput;
|
|
|
|
}
|
|
|
|
|
|
|
|
op_def static void aggregateExtraParams(Y *extraParamsTotal, Y *extraParamsLocal) {
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef __CUDACC__
|
|
|
|
__device__
|
|
|
|
static inline Y opAtomic(X d1, X d2, Y *extraParamsRef) {
|
|
|
|
return op(d1, d2, extraParamsRef);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef __clang__
|
|
|
|
#pragma omp declare simd uniform(extraParamsRef)
|
|
|
|
#endif
|
|
|
|
op_def static Y merge(X old, X opOutput, X *extraParamsRef) {
|
|
|
|
return update(old, opOutput, extraParamsRef);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2019-08-27 09:37:10 +02:00
|
|
|
template <typename X, typename Z>
|
2019-06-06 14:21:15 +02:00
|
|
|
class IndexAbsoluteMax {
|
|
|
|
public:
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> op(functions::indexreduce::IndexValue<X> val, X *extraParams) {
|
|
|
|
return nd4j::math::nd4j_abs<X>(val);
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> update(functions::indexreduce::IndexValue<X> &old, functions::indexreduce::IndexValue<X> &opOutput, X *extraParams) {
|
|
|
|
opOutput.value = nd4j::math::nd4j_abs<X>(opOutput.value);
|
|
|
|
old.value = nd4j::math::nd4j_abs<X>(old.value);
|
|
|
|
if (opOutput.value > old.value)
|
|
|
|
return opOutput;
|
|
|
|
#ifdef __CUDACC__
|
|
|
|
// workaround for cuda race condition at merge phase
|
|
|
|
else if (opOutput.value == old.value && opOutput.index < old.index)
|
|
|
|
return opOutput;
|
|
|
|
#elif defined(__GNUC__)
|
|
|
|
|
|
|
|
#endif
|
|
|
|
return old;
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> merge(
|
|
|
|
functions::indexreduce::IndexValue<X> f1,
|
|
|
|
functions::indexreduce::IndexValue<X> f2, X *extraParams) {
|
|
|
|
if (nd4j::math::nd4j_abs<X>(f1.value) > nd4j::math::nd4j_abs<X>(f2.value))
|
|
|
|
return f2;
|
|
|
|
return f1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> postProcess(
|
|
|
|
functions::indexreduce::IndexValue<X> reduction, int n, int xOffset,
|
|
|
|
X *dx, int incx, X *extraParams, X *result) {
|
|
|
|
return reduction;
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline X startingValue(const X *input) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> startingIndexValue(X *input) {
|
|
|
|
functions::indexreduce::IndexValue<X> local;
|
|
|
|
local.value = startingValue(input);
|
|
|
|
local.index = 0;
|
|
|
|
return local;
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> op(functions::indexreduce::IndexValue<X> d1,
|
|
|
|
functions::indexreduce::IndexValue<X> d2, X *extraParams) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2019-08-27 09:37:10 +02:00
|
|
|
template <typename X, typename Z>
|
2019-06-06 14:21:15 +02:00
|
|
|
class FirstIndex {
|
|
|
|
public:
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> op(functions::indexreduce::IndexValue<X> val, X *extraParams) {
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD functions::indexreduce::IndexValue<X> update(functions::indexreduce::IndexValue<X> &old, functions::indexreduce::IndexValue<X> &opOutput, X *extraParams) {
|
|
|
|
|
|
|
|
#ifdef __CUDACC__
|
|
|
|
if (opOutput.index < 0)
|
|
|
|
return old;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
auto res = simdOps::MatchCondition<X,X>::op(opOutput.value, extraParams);
|
|
|
|
|
|
|
|
//printf("res: %f; oldIdx: %i; newIdx: %i\n", res, old.index, opOutput.index);
|
|
|
|
|
|
|
|
if (res == static_cast<X>(0))
|
|
|
|
return old;
|
|
|
|
|
|
|
|
if (old.index < 0)
|
|
|
|
return opOutput;
|
|
|
|
|
|
|
|
if (old.index > opOutput.index)
|
|
|
|
return opOutput;
|
|
|
|
|
|
|
|
return old;
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline X startingValue(const X *input) {
|
2019-06-15 13:34:34 +02:00
|
|
|
return -nd4j::DataTypeUtils::infOrMax<X>();
|
2019-06-06 14:21:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> startingIndexValue(X *input) {
|
|
|
|
functions::indexreduce::IndexValue<X> local;
|
|
|
|
local.value = startingValue(input);
|
|
|
|
local.index = -1;
|
|
|
|
return local;
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> op(functions::indexreduce::IndexValue<X> d1,
|
|
|
|
functions::indexreduce::IndexValue<X> d2, X *extraParams) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> merge(
|
|
|
|
functions::indexreduce::IndexValue<X> f1,
|
|
|
|
functions::indexreduce::IndexValue<X> f2, X *extraParams) {
|
|
|
|
if (f1.index > f2.index)
|
|
|
|
return f2;
|
|
|
|
return f1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> postProcess(
|
|
|
|
functions::indexreduce::IndexValue<X> reduction, int n, int xOffset,
|
|
|
|
X *dx, int incx, X *extraParams, X *result) {
|
|
|
|
return reduction;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2019-08-27 09:37:10 +02:00
|
|
|
template <typename X, typename Z>
|
2019-06-06 14:21:15 +02:00
|
|
|
class LastIndex {
|
|
|
|
public:
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> op(functions::indexreduce::IndexValue<X> val, X *extraParams) {
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD functions::indexreduce::IndexValue<X> update(functions::indexreduce::IndexValue<X> &old, functions::indexreduce::IndexValue<X> &opOutput, X *extraParams) {
|
|
|
|
#ifdef __CUDACC__
|
|
|
|
if (opOutput.index < 0)
|
|
|
|
return old;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
auto res = simdOps::MatchCondition<X,X>::op(opOutput.value, extraParams);
|
|
|
|
|
|
|
|
if (res == static_cast<X>(0))
|
|
|
|
return old;
|
|
|
|
|
|
|
|
if (old.index < 0)
|
|
|
|
return opOutput;
|
|
|
|
|
|
|
|
if (old.index < opOutput.index)
|
|
|
|
return opOutput;
|
|
|
|
|
|
|
|
return old;
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline X startingValue(const X *input) {
|
2019-06-15 13:34:34 +02:00
|
|
|
return -nd4j::DataTypeUtils::infOrMax<X>();
|
2019-06-06 14:21:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> startingIndexValue(X *input) {
|
|
|
|
functions::indexreduce::IndexValue<X> local;
|
|
|
|
local.value = startingValue(input);
|
|
|
|
local.index = -1;
|
|
|
|
return local;
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> op(functions::indexreduce::IndexValue<X> d1,
|
|
|
|
functions::indexreduce::IndexValue<X> d2, X *extraParams) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> merge(
|
|
|
|
functions::indexreduce::IndexValue<X> f1,
|
|
|
|
functions::indexreduce::IndexValue<X> f2, X *extraParams) {
|
|
|
|
if (f1.index < f2.index)
|
|
|
|
return f2;
|
|
|
|
return f1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> postProcess(
|
|
|
|
functions::indexreduce::IndexValue<X> reduction, int n, int xOffset,
|
|
|
|
X *dx, int incx, X *extraParams, X *result) {
|
|
|
|
return reduction;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2019-08-27 09:37:10 +02:00
|
|
|
template <typename X, typename Z>
|
2019-06-06 14:21:15 +02:00
|
|
|
class IndexMax {
|
|
|
|
public:
|
|
|
|
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> op(functions::indexreduce::IndexValue<X> val, X *extraParams) {
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD functions::indexreduce::IndexValue<X> update(functions::indexreduce::IndexValue<X> &old, functions::indexreduce::IndexValue<X> &opOutput, X *extraParams) {
|
|
|
|
if (opOutput.value > old.value) {
|
|
|
|
return opOutput;
|
|
|
|
}
|
|
|
|
#ifdef __CUDACC__
|
|
|
|
// workaround for cuda race condition at merge phase
|
|
|
|
else if (opOutput.value == old.value && opOutput.index < old.index)
|
|
|
|
return opOutput;
|
|
|
|
#elif defined(__GNUC__)
|
|
|
|
|
|
|
|
#endif
|
|
|
|
return old;
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> merge(
|
|
|
|
functions::indexreduce::IndexValue<X> f1,
|
|
|
|
functions::indexreduce::IndexValue<X> f2, X *extraParams) {
|
|
|
|
if (f1.value > f2.value)
|
|
|
|
return f2;
|
|
|
|
return f1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> postProcess(
|
|
|
|
functions::indexreduce::IndexValue<X> reduction, int n, int xOffset,
|
|
|
|
X *dx, int incx, X *extraParams, X *result) {
|
|
|
|
return reduction;
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline X startingValue(const X *input) {
|
2019-06-15 13:34:34 +02:00
|
|
|
return -nd4j::DataTypeUtils::infOrMax<X>();
|
2019-06-06 14:21:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> startingIndexValue(X *input) {
|
|
|
|
functions::indexreduce::IndexValue<X> local;
|
|
|
|
local.value = startingValue(input);
|
|
|
|
local.index = 0;
|
|
|
|
return local;
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> op(functions::indexreduce::IndexValue<X> d1,
|
|
|
|
functions::indexreduce::IndexValue<X> d2, X *extraParams) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2019-08-27 09:37:10 +02:00
|
|
|
template <typename X, typename Z>
|
2019-06-06 14:21:15 +02:00
|
|
|
class IndexAbsoluteMin {
|
|
|
|
public:
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> op(
|
|
|
|
functions::indexreduce::IndexValue<X> val, X *extraParams) {
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline X startingValue(const X *input) {
|
2019-06-15 13:34:34 +02:00
|
|
|
return nd4j::DataTypeUtils::infOrMax<X>();
|
2019-06-06 14:21:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> startingIndexValue(X *input) {
|
|
|
|
functions::indexreduce::IndexValue<X> local;
|
|
|
|
local.value = startingValue(input);
|
|
|
|
local.index = 0;
|
|
|
|
return local;
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> update(functions::indexreduce::IndexValue<X> &old, functions::indexreduce::IndexValue<X> &opOutput, X *extraParams) {
|
|
|
|
opOutput.value = nd4j::math::nd4j_abs<X>(opOutput.value);
|
|
|
|
old.value = nd4j::math::nd4j_abs<X>(old.value);
|
|
|
|
if (opOutput.value < old.value)
|
|
|
|
return opOutput;
|
|
|
|
|
|
|
|
#ifdef __CUDACC__
|
|
|
|
// workaround for cuda race condition at merge phase
|
|
|
|
else if (opOutput.value == old.value && opOutput.index < old.index)
|
|
|
|
return opOutput;
|
|
|
|
#elif defined(__GNUC__)
|
|
|
|
|
|
|
|
#endif
|
|
|
|
return old;
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> merge(
|
|
|
|
functions::indexreduce::IndexValue<X> f1,
|
|
|
|
functions::indexreduce::IndexValue<X> f2, X *extraParams) {
|
|
|
|
if (nd4j::math::nd4j_abs<X>(f1.value) < nd4j::math::nd4j_abs<X>(f2.value))
|
|
|
|
return f2;
|
|
|
|
return f1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> postProcess(
|
|
|
|
functions::indexreduce::IndexValue<X> reduction, int n, int xOffset,
|
|
|
|
X *dx, int incx, X *extraParams, X *result) {
|
|
|
|
return reduction;
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> op(functions::indexreduce::IndexValue<X> d1,
|
|
|
|
functions::indexreduce::IndexValue<X> d2, X *extraParams) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2019-08-27 09:37:10 +02:00
|
|
|
template <typename X, typename Z>
|
2019-06-06 14:21:15 +02:00
|
|
|
class IndexMin {
|
|
|
|
public:
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> op(
|
|
|
|
functions::indexreduce::IndexValue<X> val, X *extraParams) {
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline X startingValue(const X *input) {
|
2019-06-15 13:34:34 +02:00
|
|
|
return nd4j::DataTypeUtils::infOrMax<X>();
|
2019-06-06 14:21:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> startingIndexValue(X *input) {
|
|
|
|
functions::indexreduce::IndexValue<X> local;
|
|
|
|
local.value = startingValue(input);
|
|
|
|
local.index = 0;
|
|
|
|
return local;
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> update(functions::indexreduce::IndexValue<X> &old, functions::indexreduce::IndexValue<X> &opOutput, X *extraParams) {
|
|
|
|
if (opOutput.value < old.value)
|
|
|
|
return opOutput;
|
|
|
|
|
|
|
|
#ifdef __CUDACC__
|
|
|
|
// workaround for cuda race condition at merge phase
|
|
|
|
else if (opOutput.value == old.value && opOutput.index < old.index)
|
|
|
|
return opOutput;
|
|
|
|
#elif defined(__GNUC__)
|
|
|
|
|
|
|
|
#endif
|
|
|
|
return old;
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> merge(
|
|
|
|
functions::indexreduce::IndexValue<X> f1,
|
|
|
|
functions::indexreduce::IndexValue<X> f2, X *extraParams) {
|
|
|
|
if (f1.value < f2.value)
|
|
|
|
return f2;
|
|
|
|
return f1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> postProcess(
|
|
|
|
functions::indexreduce::IndexValue<X> reduction, int n, int xOffset,
|
|
|
|
X *dx, int incx, X *extraParams, X *result) {
|
|
|
|
return reduction;
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline functions::indexreduce::IndexValue<X> op(functions::indexreduce::IndexValue<X> d1,
|
|
|
|
functions::indexreduce::IndexValue<X> d2, X *extraParams) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class SummaryStatsVariance {
|
|
|
|
public:
|
|
|
|
|
|
|
|
static _CUDA_HD inline Z getValue(const bool biasCorrected, functions::summarystats::SummaryStatsData<X> val) {
|
|
|
|
if (biasCorrected) {
|
|
|
|
Z ret = static_cast<Z>(val.varianceBiasCorrected());
|
|
|
|
if (ret < static_cast<Z>(0.0f))
|
|
|
|
return static_cast<Z>(val.variance());
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
return static_cast<Z>(val.variance());
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline functions::summarystats::SummaryStatsData<X> op(functions::summarystats::SummaryStatsData<X> d1, Z *extraParams) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Z>
|
|
|
|
class SummaryStatsStandardDeviation {
|
|
|
|
public:
|
|
|
|
|
|
|
|
static _CUDA_HD inline Z getValue(const bool biasCorrected, functions::summarystats::SummaryStatsData<X> val) {
|
|
|
|
if (biasCorrected) {
|
|
|
|
auto ret = static_cast<Z>(val.varianceBiasCorrected());
|
|
|
|
if (ret < static_cast<Z>(0.0f))
|
|
|
|
return nd4j::math::nd4j_sqrt<double, Z>(val.variance());
|
|
|
|
else
|
|
|
|
return nd4j::math::nd4j_sqrt<double, Z>(ret);
|
|
|
|
}
|
|
|
|
return nd4j::math::nd4j_sqrt<double, Z>(val.variance());
|
|
|
|
}
|
|
|
|
|
|
|
|
static _CUDA_HD inline functions::summarystats::SummaryStatsData<X> op(functions::summarystats::SummaryStatsData<X> d1, Z *extraParams) {
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class DropOut {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
inline _CUDA_D static X op(X d1, X *params) {
|
|
|
|
X prob = params[0];
|
|
|
|
|
|
|
|
#ifdef __CUDACC__
|
|
|
|
X length = params[1];
|
|
|
|
X tid = blockIdx.x * blockDim.x + threadIdx.x;
|
|
|
|
X rnd = nd4j::math::nd4j_abs<X>(nd4j::math::nd4j_cos<X>(static_cast<X>(clock64()) * static_cast<X>(tid) + static_cast<X>(length) * static_cast<X>(tid)));
|
|
|
|
#else
|
|
|
|
X rnd = static_cast<X>(rand() / RAND_MAX);
|
|
|
|
#endif
|
|
|
|
return rnd >= prob ? static_cast<X>(0.0f) : d1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class DropOutInverted {
|
|
|
|
public:
|
|
|
|
no_op_exec_special
|
|
|
|
no_op_exec_special_cuda
|
|
|
|
|
|
|
|
#ifdef __CUDACC__
|
|
|
|
__device__
|
|
|
|
#endif
|
|
|
|
inline static Z op(X d1, Y d2, Z *params) {
|
|
|
|
Y prob = d2;
|
|
|
|
#ifdef __CUDACC__
|
|
|
|
X length = params[1];
|
|
|
|
X tid = blockIdx.x * blockDim.x + threadIdx.x;
|
|
|
|
X rnd = nd4j::math::nd4j_abs<X>(nd4j::math::nd4j_cos<X>(static_cast<X>(clock64()) * static_cast<X>(tid) + static_cast<X>(length) * static_cast<X>(tid)));
|
|
|
|
#else
|
|
|
|
X rnd = static_cast<X>(rand() / RAND_MAX);
|
|
|
|
#endif
|
|
|
|
return rnd >= static_cast<X>(prob) ? static_cast<Z>(0.0f) : reinterpret_cast<Z>(d1 / static_cast<X>(prob));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class ReplaceNans {
|
|
|
|
public:
|
|
|
|
no_op_exec_special
|
|
|
|
no_op_exec_special_cuda
|
|
|
|
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
return nd4j::math::nd4j_isnan(d1) ? static_cast<Z>(d2) : static_cast<Z>(d1) ;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
// this op is used for conditional pairwise transforms only
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class CompareAndReplace{
|
|
|
|
public:
|
|
|
|
// op definition for PairWise Transform
|
|
|
|
op_def static Z op(X d1, Y d2, Z *params) {
|
|
|
|
auto zd1 = static_cast<Z>(d1);
|
|
|
|
auto zd2 = static_cast<Z>(d2);
|
|
|
|
auto compare = params[0];
|
|
|
|
auto eps = params[2];
|
|
|
|
int mode = (int) params[3];
|
|
|
|
if (mode == 0) // equals
|
|
|
|
if (nd4j::math::nd4j_abs<Z>(zd1 - compare) <= eps)
|
|
|
|
return zd2;
|
|
|
|
else
|
|
|
|
return zd1;
|
|
|
|
else if (mode == 1) // not equals eps
|
|
|
|
if (nd4j::math::nd4j_abs<Z>(zd1 - compare) > eps)
|
|
|
|
return zd2;
|
|
|
|
else
|
|
|
|
return zd1;
|
|
|
|
else if (mode == 2) // less_than eps
|
|
|
|
if (zd1 < compare)
|
|
|
|
return zd2;
|
|
|
|
else
|
|
|
|
return zd1;
|
|
|
|
else if (mode ==3) // greater_than
|
|
|
|
if (zd1 > compare)
|
|
|
|
return zd2;
|
|
|
|
else
|
|
|
|
return zd1;
|
|
|
|
else if (mode == 4) // less_or_equals_than
|
|
|
|
if (zd1 <= compare)
|
|
|
|
return zd2;
|
|
|
|
else
|
|
|
|
return zd1;
|
|
|
|
else if (mode == 5) // greater_or_equals_than
|
|
|
|
if (zd1 >= compare)
|
|
|
|
return zd2;
|
|
|
|
else
|
|
|
|
return zd1;
|
|
|
|
else if (mode == 6) // abs_less_than
|
|
|
|
if (nd4j::math::nd4j_abs<Z>(zd1) < compare)
|
|
|
|
return zd2;
|
|
|
|
else
|
|
|
|
return zd1;
|
|
|
|
else if (mode == 7) // abs_greater_than
|
|
|
|
if (nd4j::math::nd4j_abs<Z>(zd1) > compare)
|
|
|
|
return zd2;
|
|
|
|
else
|
|
|
|
return zd1;
|
|
|
|
else if (mode == 8) // is inf
|
|
|
|
if (nd4j::math::nd4j_isinf(zd1))
|
|
|
|
return zd2;
|
|
|
|
else
|
|
|
|
return zd1;
|
|
|
|
else if (mode == 9) // is nan
|
|
|
|
if (nd4j::math::nd4j_isnan(zd1))
|
|
|
|
return zd2;
|
|
|
|
else
|
|
|
|
return zd1;
|
|
|
|
else if (mode == 10)
|
|
|
|
if (zd1 == compare)
|
|
|
|
return zd2;
|
|
|
|
else
|
|
|
|
return zd1;
|
|
|
|
else if (mode == 11)
|
|
|
|
if (zd1 != compare)
|
|
|
|
return zd2;
|
|
|
|
else
|
|
|
|
return zd1;
|
|
|
|
else if (mode == 12) // abs_greater_or_equals_than
|
|
|
|
if (nd4j::math::nd4j_abs<Z>(zd1) >= compare)
|
|
|
|
return zd2;
|
|
|
|
else
|
|
|
|
return zd1;
|
|
|
|
else if (mode == 13) // abs_less_or_equals_than
|
|
|
|
if (nd4j::math::nd4j_abs<Z>(zd1) <= compare)
|
|
|
|
return zd2;
|
|
|
|
else
|
|
|
|
return zd1;
|
|
|
|
else
|
|
|
|
printf("Undefined boolean operation: [%i]\n", mode);
|
|
|
|
return zd1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X, typename Y, typename Z>
|
|
|
|
class CompareAndSet {
|
|
|
|
public:
|
|
|
|
|
|
|
|
|
|
|
|
// op definition for PairWise Transform
|
|
|
|
op_def static Z op(X dX, Y dY, Z *params) {
|
|
|
|
auto d1 = static_cast<Z>(dX);
|
|
|
|
auto d2 = static_cast<Z>(dY);
|
|
|
|
auto compare = params[0];
|
|
|
|
auto eps = params[2];
|
|
|
|
auto mode = static_cast<int>(params[3]);
|
|
|
|
if (mode == 0) // equals
|
|
|
|
if (nd4j::math::nd4j_abs<Z>(d2 - compare) <= eps)
|
|
|
|
return d2;
|
|
|
|
else
|
|
|
|
return d1;
|
|
|
|
else if (mode == 1) // not equals
|
|
|
|
if (nd4j::math::nd4j_abs<Z>(d2 - compare) > eps)
|
|
|
|
return d2;
|
|
|
|
else
|
|
|
|
return d1;
|
|
|
|
else if (mode == 2) // less_than
|
|
|
|
if (d2 < compare)
|
|
|
|
return d2;
|
|
|
|
else
|
|
|
|
return d1;
|
|
|
|
else if (mode ==3) // greater_than
|
|
|
|
if (d2 > compare)
|
|
|
|
return d2;
|
|
|
|
else
|
|
|
|
return d1;
|
|
|
|
else if (mode == 4) // less_or_equals_than
|
|
|
|
if (d2 <= compare)
|
|
|
|
return d2;
|
|
|
|
else
|
|
|
|
return d1;
|
|
|
|
else if (mode == 5) // greater_or_equals_than
|
|
|
|
if (d2 >= compare)
|
|
|
|
return d2;
|
|
|
|
else
|
|
|
|
return d1;
|
|
|
|
else if (mode == 6) // abs_less_than
|
|
|
|
if (nd4j::math::nd4j_abs<Z>(d2) < compare)
|
|
|
|
return d2;
|
|
|
|
else
|
|
|
|
return d1;
|
|
|
|
else if (mode == 7) // abs_greater_than
|
|
|
|
if (nd4j::math::nd4j_abs<Z>(d2) > compare)
|
|
|
|
return d2;
|
|
|
|
else
|
|
|
|
return d1;
|
|
|
|
else if (mode == 8) // is inf
|
|
|
|
if (nd4j::math::nd4j_isinf(d2))
|
|
|
|
return d2;
|
|
|
|
else
|
|
|
|
return d1;
|
|
|
|
else if (mode == 9) // is nan
|
|
|
|
if (nd4j::math::nd4j_isnan(d2))
|
|
|
|
return d2;
|
|
|
|
else
|
|
|
|
return d1;
|
|
|
|
else if (mode == 10)
|
|
|
|
if (d2 == compare)
|
|
|
|
return d2;
|
|
|
|
else
|
|
|
|
return d1;
|
|
|
|
else if (mode == 11)
|
|
|
|
if (d2 != compare)
|
|
|
|
return d2;
|
|
|
|
else
|
|
|
|
return d1;
|
|
|
|
else if (mode == 12) // abs_greater_or_equals_than
|
|
|
|
if (nd4j::math::nd4j_abs<Z>(d1) >= compare)
|
|
|
|
return d2;
|
|
|
|
else
|
|
|
|
return d1;
|
|
|
|
else if (mode == 13) // abs_less_or_equals_than
|
|
|
|
if (nd4j::math::nd4j_abs<Z>(d1) <= compare)
|
|
|
|
return d2;
|
|
|
|
else
|
|
|
|
return d1;
|
|
|
|
else
|
|
|
|
printf("Undefined boolean operation: [%i]\n", mode);
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename X>
|
|
|
|
class CompareAndSetTransform {
|
|
|
|
public:
|
|
|
|
no_op_exec_special_same
|
|
|
|
no_op_exec_special_same_cuda
|
|
|
|
|
|
|
|
|
|
|
|
// op definition for Transform
|
|
|
|
op_def static X op(X d1, X *params) {
|
|
|
|
auto compare = params[0];
|
|
|
|
auto set = params[1];
|
|
|
|
auto eps = params[2];
|
|
|
|
|
|
|
|
// with mode == 0 we do set if d1 equals to compare, and with mode == 1 - we go otherwise
|
|
|
|
int mode = (int) params[3];
|
|
|
|
if (mode == 0) // equals
|
|
|
|
if (nd4j::math::nd4j_abs<X>(d1 - compare) <= eps)
|
|
|
|
return set;
|
|
|
|
else
|
|
|
|
return d1;
|
|
|
|
//return nd4j::math::nd4j_abs<T>(d1 - compare) <= eps ? set : d1;
|
|
|
|
else if (mode == 1) // not equals
|
|
|
|
if (nd4j::math::nd4j_abs<X>(d1 - compare) > eps)
|
|
|
|
return set;
|
|
|
|
else
|
|
|
|
return d1;
|
|
|
|
//return nd4j::math::nd4j_abs<T>(d1 - compare) > eps ? set : d1;
|
|
|
|
else if (mode == 2) // less_than
|
|
|
|
if (d1 < compare)
|
|
|
|
return set;
|
|
|
|
else
|
|
|
|
return d1;
|
|
|
|
else if (mode ==3) // greater_than
|
|
|
|
if (d1 > compare)
|
|
|
|
return set;
|
|
|
|
else
|
|
|
|
return d1;
|
|
|
|
else if (mode == 4) // less_or_equals_than
|
|
|
|
if (d1 <= compare)
|
|
|
|
return set;
|
|
|
|
else
|
|
|
|
return d1;
|
|
|
|
else if (mode == 5) // greater_or_equals_than
|
|
|
|
if (d1 >= compare)
|
|
|
|
return set;
|
|
|
|
else
|
|
|
|
return d1;
|
|
|
|
else if (mode == 6) // abs_less_than
|
|
|
|
if (nd4j::math::nd4j_abs<X>(d1) < compare)
|
|
|
|
return set;
|
|
|
|
else
|
|
|
|
return d1;
|
|
|
|
else if (mode == 7) // abs_greater_than
|
|
|
|
if (nd4j::math::nd4j_abs<X>(d1) > compare)
|
|
|
|
return set;
|
|
|
|
else
|
|
|
|
return d1;
|
|
|
|
else if (mode == 8) // is inf
|
|
|
|
if (nd4j::math::nd4j_isinf(d1))
|
|
|
|
return set;
|
|
|
|
else
|
|
|
|
return d1;
|
|
|
|
else if (mode == 9) // is nan
|
|
|
|
if (nd4j::math::nd4j_isnan(d1))
|
|
|
|
return set;
|
|
|
|
else
|
|
|
|
return d1;
|
|
|
|
else if (mode == 10)
|
|
|
|
if (d1 == compare)
|
|
|
|
return set;
|
|
|
|
else
|
|
|
|
return d1;
|
|
|
|
else if (mode == 11)
|
|
|
|
if (d1 != compare)
|
|
|
|
return set;
|
|
|
|
else
|
|
|
|
return d1;
|
|
|
|
else if (mode == 12) // abs_greater_or_equals_than
|
|
|
|
if (nd4j::math::nd4j_abs<X>(d1) >= compare)
|
|
|
|
return set;
|
|
|
|
else
|
|
|
|
return d1;
|
|
|
|
else if (mode == 13) // abs_less_or_equals_than
|
|
|
|
if (nd4j::math::nd4j_abs<X>(d1) <= compare)
|
|
|
|
return set;
|
|
|
|
else
|
|
|
|
return d1;
|
|
|
|
else
|
|
|
|
printf("Undefined boolean operation: [%i]\n", mode);
|
|
|
|
return d1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
2019-08-31 19:57:39 +02:00
|
|
|
|