/******************************************************************************* * Copyright (c) 2015-2018 Skymind, Inc. * * This program and the accompanying materials are made available under the * terms of the Apache License, Version 2.0 which is available at * https://www.apache.org/licenses/LICENSE-2.0. * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. * * SPDX-License-Identifier: Apache-2.0 ******************************************************************************/ #pragma once #ifndef OPS_H_ #define OPS_H_ #include #include #include #include #include #include #include #define MIN_V 1e-12 #define MAX_FLOAT 1e37 #define MIN_FLOAT 1e-37 #define MAX_INT 2147483647 #define MIN_CUTFOFF -3.79297773665f #define FLOAT_MIN_NORMAL 1.17549435e-38 #define EPS 1e-5 #define AFFINITY close #define DOUBLE_PI_T T(2.0 * 3.14159265358979323846) #define DOUBLE_PI_X X(2.0 * 3.14159265358979323846) #define no_op_exec_special_any static const bool requiresSpecial = false; static void execSpecial(X *dx, Nd4jLong *xShapeBuffer, Z *result, Nd4jLong *resultShapeBuffer, X *extraParams, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) {} #define no_op_exec_special_bool static const bool requiresSpecial = false; static void execSpecial(X *dx, Nd4jLong *xShapeBuffer, Z *result, Nd4jLong *resultShapeBuffer, X *extraParams, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) {} #define no_op_exec_special_same static const bool requiresSpecial = false; static void execSpecial(X *dx, Nd4jLong *xShapeBuffer, X *result, Nd4jLong *resultShapeBuffer, X *extraParams, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) {} #define no_op_exec_special static const bool requiresSpecial = false; static void execSpecial(X *dx, Nd4jLong *xShapeBuffer, Z *result, Nd4jLong *resultShapeBuffer, Z *extraParams, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) {} #define no_op_exec_special_accumulation static const bool requiresSpecialAccumulation = false; static void execSpecial(X *x, Nd4jLong *xShapeInfo, Z *extraParams, Z *result, Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset){} #define no_op_exec_special_accumulation_long static const bool requiresSpecialAccumulation = false; static void execSpecial(X *x, Nd4jLong *xShapeInfo, X *extraParams, Z *result, Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset){} #define no_op_exec_special_accumulation_same static const bool requiresSpecialAccumulation = false; static void execSpecial(X *x, Nd4jLong *xShapeInfo, X *extraParams, X *result, Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset){} #ifdef __CUDACC__ #define no_op_exec_special_any_cuda static __device__ void execSpecialCuda(X *dx, Nd4jLong *xShapeBuffer, Z *result, Nd4jLong *resultShapeBuffer, X *extraParams, int *allocationPointer, Z *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) {} #define no_op_exec_special_bool_cuda static __device__ void execSpecialCuda(X *dx, Nd4jLong *xShapeBuffer, Z *result, Nd4jLong *resultShapeBuffer, X *extraParams, int *allocationPointer, Z *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) {} #define no_op_exec_special_same_cuda static __device__ void execSpecialCuda(X *dx, Nd4jLong *xShapeBuffer, X *result, Nd4jLong *resultShapeBuffer, X *extraParams, int *allocationPointer, X *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) {} #define no_op_exec_special_cuda static __device__ void execSpecialCuda(X *dx, Nd4jLong *xShapeBuffer,Z *result, Nd4jLong *resultShapeBuffer,Z *extraParams, int *allocationPointer, Z *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) {} #define no_op_exec_special_accumulation_same_cuda static inline __device__ void execSpecialCuda(X *dx, Nd4jLong *xShapeInfo, X *extraParams, X *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, X *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) {} #define no_op_exec_special_accumulation_long_cuda static inline __device__ void execSpecialCuda(X *dx, Nd4jLong *xShapeInfo, X *extraParams, Z *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, Z *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) {} #define no_op_exec_special_accumulation_cuda static inline __device__ void execSpecialCuda(X *dx, Nd4jLong *xShapeInfo, Z *extraParams, Z *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, Z *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) {} #else // hacky fix for isnan/being being out of scope //#ifdef IOS //#define isinf(x) 0 // this isn't right. But std::isinf fails //#define isnan(x) 0 //#else //#define isnan std::isnan //#define isinf std::isinf //#endif #define no_op_exec_special_cuda #define no_op_exec_special_accumulation_cuda #define no_op_exec_special_accumulation_same_cuda #define no_op_exec_special_accumulation_long_cuda #define no_op_exec_special_any_cuda #define no_op_exec_special_bool_cuda #define no_op_exec_special_same_cuda #define no_op_exec_special_accumulation_same_cuda #endif #define SELU_ALPHA 1.6732632423543772848170429916717 #define SELU_LAMBDA 1.0507009873554804934193349852946 #ifdef _OPENMP #pragma omp declare reduction(maxTF : float,double,float16,bfloat16 : \ omp_out = nd4j::math::nd4j_max(omp_in, omp_out) )\ initializer (omp_priv=-MAX_FLOAT) #pragma omp declare reduction(minTF : float,double,float16,bfloat16 : \ omp_out = nd4j::math::nd4j_min(omp_in, omp_out) )\ initializer (omp_priv=MAX_FLOAT) #pragma omp declare reduction(maxT : float,double,float16,bfloat16,int,Nd4jLong,Nd4jULong,int8_t,uint8_t,bool,int16_t,uint16_t,uint32_t : \ omp_out = nd4j::math::nd4j_max(omp_in, omp_out) )\ initializer (omp_priv=0) #pragma omp declare reduction(minT : float,double,float16,bfloat16,int,Nd4jLong,Nd4jULong,int8_t,uint8_t,bool,int16_t,uint16_t,uint32_t : \ omp_out = nd4j::math::nd4j_min(omp_in, omp_out) )\ initializer (omp_priv=0) #pragma omp declare reduction(amaxT : float,double,float16,bfloat16,int,Nd4jLong,Nd4jULong,int8_t,uint8_t,bool,int16_t,uint16_t,uint32_t : \ omp_out = nd4j::math::nd4j_max(nd4j::math::nd4j_abs(omp_in), nd4j::math::nd4j_abs(omp_out)) ) #pragma omp declare reduction(aminT : float,double,float16,bfloat16,int,Nd4jLong,Nd4jULong,int8_t,uint8_t,bool,int16_t,uint16_t,uint32_t : \ omp_out = nd4j::math::nd4j_min(nd4j::math::nd4j_abs(omp_in), nd4j::math::nd4j_abs(omp_out)) ) #pragma omp declare reduction(asumT : float,double,float16,bfloat16,int,Nd4jLong,Nd4jULong,int8_t,uint8_t,bool,int16_t,uint16_t,uint32_t : \ omp_out = nd4j::math::nd4j_abs(omp_in) + nd4j::math::nd4j_abs(omp_out))\ initializer (omp_priv=0) #pragma omp declare reduction(sumT : float,double,float16,bfloat16,int,Nd4jLong,Nd4jULong,int8_t,uint8_t,bool,int16_t,uint16_t,uint32_t : \ omp_out = omp_in + omp_out)\ initializer (omp_priv=0) #pragma omp declare reduction(prodT : float,double,float16,bfloat16,int,Nd4jLong,Nd4jULong,int8_t,uint8_t,bool,int16_t,uint16_t,uint32_t : \ omp_out = omp_in * omp_out)\ initializer (omp_priv=1) #endif namespace functions { namespace indexreduce { template struct IndexValue { T value; Nd4jLong index; _CUDA_HD IndexValue() = default; _CUDA_HD IndexValue(const T val, const Nd4jLong ind): index(ind), value(val) {} }; } namespace summarystats { template class SummaryStatsData; } } namespace simdOps { template class Add { public: op_def static Z op(X d1, Y d2) { return static_cast(d1 + d2); } op_def static Z op(X d1, Y d2, Z *params) { return static_cast(d1 + d2); } op_def static Z op(X d1) { return static_cast(d1); } // op for MetaOps op_def static Z op(X d1, Y *params) { return static_cast(d1 + params[0]); } op_def static X startingValue() { return static_cast(0.f); } }; template class NewAdd { public: op_def static X op(X d1, Y d2, X *params) { return d1 + d2; } }; template class Subtract { public: op_def static Z op(X d1, Y d2) { return static_cast(d1 - d2); } op_def static Z op(X d1, Y d2, Z *params) { return static_cast(d1 - d2); } op_def static Z op(X d1) { return static_cast(d1); } // op for MetaOps op_def static Z op(X d1, Y *params) { return static_cast(d1 - params[0]); } }; template class SquaredSubtract { public: op_def static Z op(X d1, Y d2) { auto d = static_cast(d1 - d2); return d * d; } op_def static Z op(X d1, Y d2, Z *params) { auto d = static_cast(d1 - d2); return d * d; } op_def static Z op(X d1) { return d1; } // op for MetaOps op_def static Z op(X d1, Y *params) { auto d = static_cast(d1 - params[0]); return d * d; } }; template class SquaredReverseSubtract { public: op_def static Z op(X d1, Y d2) { auto d = static_cast(d2 - d1); return d * d; } op_def static Z op(X d1, Y d2, Z *params) { auto d = static_cast(d2 - d1); return d * d; } op_def static Z op(X d1) { return d1; } // op for MetaOps op_def static Z op(X d1, Y *params) { auto d = static_cast(params[0] - d1); return d * d; } }; template class ReverseSubtract { public: op_def static Z op(X d1, Y d2) { return static_cast(d2 - d1); } op_def static Z op(X d1, Y d2, Z *params) { return static_cast(d2 - d1); } op_def static Z op(X d1) { return d1; } // op for MetaOps op_def static Z op(X d1, Y *params) { return static_cast(params[0] - d1); } }; template class LogPoissonLossFull { public: op_def static Z op(X z, Y c) { auto zz = static_cast(z); auto zc = static_cast(c); return (nd4j::math::nd4j_exp(c) - zz * zc + (zz * nd4j::math::nd4j_log(z) - zz + static_cast(0.5f) * nd4j::math::nd4j_log(static_cast(DOUBLE_PI_X) * zz))); } op_def static Z op(X z, Y c, Z *params) { auto zz = static_cast(z); auto zc = static_cast(c); return (nd4j::math::nd4j_exp(c) - zz * zc + (zz * nd4j::math::nd4j_log(z) - zz + static_cast(0.5f) * nd4j::math::nd4j_log(static_cast(DOUBLE_PI_X) * zz))); } op_def static Z op(X z) { auto zz = static_cast(z); return (zz * nd4j::math::nd4j_log(z) - zz + static_cast(0.5f) * nd4j::math::nd4j_log(static_cast(DOUBLE_PI_X) * zz)); } // op for MetaOps op_def static X op(X z, Y *params) { return (nd4j::math::nd4j_exp(params[0]) - z * params[0] + (z * nd4j::math::nd4j_log(z) - z + static_cast(0.5f) * nd4j::math::nd4j_log(DOUBLE_PI_X * z))); } }; template class LogPoissonLoss { public: op_def static Z op(X z, Y c) { auto zz = static_cast(z); auto zc = static_cast(c); return (nd4j::math::nd4j_exp(c) - zz * zc); } op_def static Z op(X z, Y c, Z *params) { auto zz = static_cast(z); auto zc = static_cast(c); return (nd4j::math::nd4j_exp(c) - zz * zc); } op_def static Z op(X z) { return static_cast(z); } // op for MetaOps op_def static Z op(X z, Y *params) { return (nd4j::math::nd4j_exp(params[0]) - static_cast(z) * static_cast(params[0])); } }; template class Multiply { public: op_def static Z op(X d1, Y d2) { return static_cast(d1 * d2); } op_def static Z op(X d1, Y d2, Z *params) { return static_cast(d1 * d2); } op_def static Z op(X d1) { return static_cast(d1); } // op for MetaOps op_def static Z op(X d1, Y *params) { return static_cast(d1 * params[0]); } op_def static X startingValue() { return static_cast(1.f); } }; template class Divide { public: op_def static Z op(X d1, Y d2) { return static_cast(d1 / d2); } op_def static Z op(X d1, Y d2, Z *params) { return static_cast(d1 / d2); } op_def static Z op(X d1) { return static_cast(d1); } // op for MetaOps op_def static Z op(X d1, Y *params) { return static_cast(d1 / params[0]); } op_def static X startingValue() { return static_cast(1); } }; template class DivideNoNan { public: op_def static Z op(X d1, Y d2) { if (d2 == (Y)0) return (Z)0; return static_cast(d1 / d2); } op_def static Z op(X d1, Y d2, Z *params) { if (d2 == (Y)0) return (Z)0; return static_cast(d1 / d2); } op_def static Z op(X d1) { return static_cast(d1); } // op for MetaOps op_def static Z op(X d1, Y *params) { if (params[0] == (Y)0) return (Z)0; return static_cast(d1 / params[0]); } op_def static X startingValue() { return static_cast(1); } }; template class SafeDivide { public: op_def static Z op(X d1, Y d2) { if(d2 == static_cast(0)) return static_cast(0); return static_cast(d1 / d2); } op_def static Z op(X d1, Y d2, Z *params) { if(d2 == static_cast(0)) return static_cast(0); return static_cast(d1 / d2); } op_def static Z op(X d1) { return static_cast(d1); } // op for MetaOps op_def static Z op(X d1, Y *params) { if(params[0] == static_cast(0)) return static_cast(0); return static_cast(d1 / params[0]); } }; template class FloorDiv { public: op_def static Z op(X d1, Y d2) { return nd4j::math::nd4j_floor(static_cast(d1 / d2)); } op_def static Z op(X d1, Y d2, Z *params) { return nd4j::math::nd4j_floor(static_cast(d1 / d2)); } op_def static Z op(X d1) { return nd4j::math::nd4j_floor(static_cast(d1)); } // op for MetaOps op_def static Z op(X d1, Y *params) { return nd4j::math::nd4j_floor(static_cast(d1 / params[0])); } }; template class TruncateDiv { public: op_def static Z op(X d1, Y d2) { auto i1 = static_cast(d1); auto i2 = static_cast(d2); return static_cast(i1 / i2); } op_def static Z op(X d1, Y d2, Z *params) { auto i1 = static_cast(d1); auto i2 = static_cast(d2); return static_cast(i1 / i2); } op_def static Z op(X d1) { return d1; } // op for MetaOps op_def static Z op(X d1, Y *params) { auto i1 = static_cast(d1); auto i2 = static_cast(params[0]); return static_cast(i1 / i2); } }; template class TruncateMod { public: op_def static Z op(X d1, Y d2) { auto i1 = static_cast(d1); auto i2 = static_cast(d2); return static_cast(i1 % i2); } op_def static Z op(X d1, Y d2, Z *params) { auto i1 = static_cast(d1); auto i2 = static_cast(d2); return static_cast(i1 % i2); } op_def static Z op(X d1) { return static_cast(d1); } // op for MetaOps op_def static Z op(X d1, Y *params) { auto i1 = static_cast(d1); auto i2 = static_cast(params[0]); return static_cast(i1 % i2); } }; template class Remainder { public: op_def static Z op(X d1, Y d2) { return nd4j::math::nd4j_remainder(d1, d2); } op_def static Z op(X d1, Y d2, Z *params) { return nd4j::math::nd4j_remainder(d1, d2); } op_def static Z op(X d1) { return d1; } // op for MetaOps op_def static Z op(X d1, Y *params) { return nd4j::math::nd4j_remainder(d1, params[0]); } }; template class FMod { public: op_def static Z op(X d1, Y d2) { return nd4j::math::nd4j_fmod(d1, d2); } op_def static Z op(X d1, Y d2, Z *params) { return nd4j::math::nd4j_fmod(d1, d2); } op_def static Z op(X d1) { return d1; } // op for MetaOps op_def static Z op(X d1, Y *params) { return nd4j::math::nd4j_fmod(d1, params[0]); } }; template class FloorMod { public: op_def static Z op(X d1, Y d2) { auto m = nd4j::math::nd4j_fmod(d1, d2); return (d1 < static_cast(0)) == (d2 < static_cast(0)) ? m : nd4j::math::nd4j_fmod(m + static_cast(d2), d2); } op_def static Z op(X d1, Y d2, Z *params) { auto m = nd4j::math::nd4j_fmod(d1, d2); return (d1 < static_cast(0.0f)) == (d2 < static_cast(0)) ? m : nd4j::math::nd4j_fmod(m + static_cast(d2), d2); } op_def static Z op(X d1) { return d1; } // op for MetaOps op_def static Z op(X d1, Y *params) { return op(d1, params[0]); } }; template class ReverseDivide { public: op_def static Z op(X d1, Y d2) { return static_cast(d2 / d1); } op_def static Z op(X d1, Y d2, Z *params) { return static_cast(d2 / d1); } op_def static Z op(X d1) { return static_cast(d1); } // op for MetaOps op_def static Z op(X d1, Y *params) { return static_cast(params[0] / d1); } }; template class CopyPws { public: op_def static Z op(X d1, Y d2) { return static_cast(d2); } op_def static Z op(X d1, Y d2, Z *params) { return static_cast(d2); } op_def static Z op(X d1) { return static_cast(d1); } op_def static Z op(X d1, Y *params) { return static_cast(d1); } }; template class Copy { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return d1; } }; template class Copy2 { public: op_def static Z op(X d1, Y d2) { return static_cast(d2); } op_def static Z op(X d1, Y d2, Z *params) { return static_cast(d2); } op_def static Z op(X d1) { return static_cast(d1); } op_def static Z op(X d1, Y *params) { return static_cast(d1); } }; template class Axpy { public: op_def static Z op(X d1, Y d2) { return static_cast(d2 + d1); } op_def static Z op(X d1, Y d2, Z *params) { auto alpha = params[0]; return alpha * static_cast(d1) + static_cast(d2); } op_def static Z op(X d1) { return static_cast(d1); } }; template class Assign { public: no_op_exec_special_any no_op_exec_special_any_cuda op_def static Z op(X d1, X *params) { return static_cast(d1); } }; template class And { public: no_op_exec_special_bool no_op_exec_special_bool_cuda op_def static Z op(X d1, X d2) { return d2 + d1; } op_def static Z op(X d1, X d2, X *params) { if (params != nullptr) { auto comp = params[0]; return d1 != comp && d2 != comp ? static_cast(1) : static_cast(0); } else { auto b1 = static_cast(d1); auto b2 = static_cast(d2); return (b1 && b2) ? static_cast(1) : static_cast(0); } } op_def static Z op(X d1) { return d1; } // op for MetaOps op_def static Z op(X d1, X *params) { return static_cast(119); } }; template class IntOr { public: op_def static X op(X d1, X d2) { return d2 | d1; } op_def static X op(X d1, X d2, X *params) { return op(d1, d2); } }; template class IntAnd { public: op_def static X op(X d1, X d2) { return d2 & d1; } op_def static X op(X d1, X d2, X *params) { return op(d1, d2); } }; template class IntXor { public: op_def static X op(X d1, X d2) { return d2 ^ d1; } op_def static X op(X d1, X d2, X *params) { return op(d1, d2); } }; template class ShiftLeft { public: op_def static X op(X d1, X d2) { return d1 << d2; } op_def static X op(X d1, X d2, X *params) { return op(d1, d2); } }; template class ShiftRight { public: op_def static X op(X d1, X d2) { return d1 >> d2; } op_def static X op(X d1, X d2, X *params) { return op(d1, d2); } }; template class CyclicShiftLeft { public: op_def static X op(X d1, X d2) { return d1 << d2 | d1 >> ((sizeof(X) * 8) - d2); } op_def static X op(X d1, X d2, X *params) { return op(d1, d2); } }; template class CyclicShiftRight { public: op_def static X op(X d1, X d2) { return d1 >> d2 | d1 << ((sizeof(X) * 8) - d2); } op_def static X op(X d1, X d2, X *params) { return op(d1, d2); } }; template class Or { public: no_op_exec_special_bool no_op_exec_special_bool_cuda op_def static Z op(X d1, X d2) { return d2 + d1; } op_def static Z op(X d1, X d2, X *params) { if (params != nullptr) { auto comp = params[0]; return d1 != comp || d2 != comp ? static_cast(1) : static_cast(0); } else { auto b1 = static_cast(d1); auto b2 = static_cast(d2); return b1 || b2 ? static_cast(1) : static_cast(0); } } op_def static Z op(X d1) { return d1; } // op for MetaOps op_def static Z op(X d1, X *params) { return static_cast(119); } }; template class Xor { public: no_op_exec_special_bool no_op_exec_special_bool_cuda op_def static Z op(X d1, X d2) { return d2 + d1; } op_def static Z op(X d1, X d2, X *params) { if (params != nullptr) { auto comp = params[0]; return ((d1 == comp && d2 != comp) || (d1 != comp && d2 == comp)) ? static_cast(1) : static_cast(0); } else { auto b1 = static_cast(d1); auto b2 = static_cast(d2); return (!b1 && b2 )||(b1 && !b2) ? static_cast(1) : static_cast(0); } } op_def static Z op(X d1) { return d1; } }; template class Not { public: no_op_exec_special_bool no_op_exec_special_bool_cuda op_def static Z op(X d1, X d2) { return static_cast(0); } op_def static Z op(X d1, X d2, X *params) { return d1 != d2 ? static_cast(1) : static_cast(0); } // this transform op should run only on boolean input op_def static Z op(X d1, X *params) { auto b1 = static_cast(d1); return !b1; } }; template class LogicalNot { public: op_def static Z op(X d1, Y d2) { return !((int) d1 && (int) d2); } op_def static Z op(X d1, Y d2, Z *params) { return static_cast(!(static_cast(d1) && static_cast(d2))); } op_def static Z op(X d1) { return d1; } // op for MetaOps op_def static Z op(X d1, Y *params) { return static_cast(119); } }; template class LogicalXor { public: op_def static Z op(X d1, Y d2) { auto i1 = static_cast(d1); auto i2 = static_cast(d2); return (i1 | i2) &~ (i1 & i2); } op_def static Z op(X d1, Y d2, Z *params) { return op(d1, d2); } op_def static Z op(X d1) { return d1; } // op for MetaOps op_def static Z op(X d1, Y *params) { return static_cast(119); } }; template class LogicalAnd { public: op_def static Z op(X d1, Y d2) { return static_cast(d1) & static_cast(d2); } op_def static Z op(X d1, Y d2, Z *params) { return op(d1, d2); } op_def static Z op(Y d1) { return d1; } // op for MetaOps op_def static Z op(X d1, Y *params) { return static_cast(119); } }; template class LogicalOr { public: op_def static Z op(X d1, Y d2) { return static_cast(d1) | static_cast(d2); } op_def static Z op(X d1, Y d2, Z *params) { return op(d1, d2); } op_def static Z op(X d1) { return d1; } // op for MetaOps op_def static Z op(X d1, Y *params) { return static_cast(119); } }; template class Mod { public: /* // just a optional note, feel free to remove later op_def static half op(half d1, half d2, half *params) { return __float2half(simdOps::Mod::op(__half2float(d1), __half2float(d2), nullptr)); } */ op_def static Z op(X d1, Y d2) { return static_cast(d1) % static_cast(d2); } op_def static Z op(X d1, Y d2, Z *params) { return op(d1, d2); } // op for MetaOp op_def static Z op(X d1, Y *params) { return op(d1, params[0]); } }; template class ReverseMod { public: op_def static Z op(X d1, Y d2) { return static_cast(d2) % static_cast(d1); } op_def static Z op(X d1, Y d2, Z *params) { return op(d1, d2); } // op for MetaOp op_def static Z op(X d1, Y *params) { return op(d1, params[0]); } }; /** * Whether 2 elements in an array * are epsilion equal */ template class Epsilon { public: op_def static Z op(X d1, X d2) { X diff = d1 - d2; X absDiff = nd4j::math::nd4j_abs(diff); if (absDiff <= static_cast(MIN_V)) return static_cast(1); return static_cast(0); } op_def static Z op(X d1, X d2, X *params) { return op(d1, d2); } op_def static Z op(X d1, X *params) { return d1; } }; template class EqualTo { public: op_def static Z op(X d1, X d2) { return d1 == d2; } op_def static Z op(X d1, X d2, X *params) { return op(d1, d2); } op_def static Z op(X d1, X *params) { return d1; } }; template class NotEqualTo { public: op_def static Z op(X d1, X d2) { return d1 != d2; } op_def static Z op(X d1, X d2, X *params) { return op(d1, d2); } op_def static Z op(X d1, X *params) { return d1; } }; template class GreaterThanOrEqual { public: op_def static Z op(X d1, X d2) { return d1 >= d2; } op_def static Z op(X d1, X d2, X *params) { return op(d1, d2); } // FIXME: this signature clashes with MetaOp stuff op_def static Z op(X d1, X *params) { return d1; } }; template class GreaterThan { public: op_def static Z op(X d1, X d2) { return d1 > d2; } op_def static Z op(X d1, X d2, X *params) { return op(d1, d2); } // FIXME: this signature clashes with MetaOp stuff op_def static Z op(X d1, X *params) { return d1; } }; template class LessThan { public: op_def static Z op(X d1, X d2) { return d1 < d2; } op_def static Z op(X d1, X d2, X *params) { return op(d1, d2); } op_def static Z op(X d1, X *params) { return d1; } }; template class LessThanOrEqual { public: op_def static Z op(X d1, X d2) { return d1 <= d2; } op_def static Z op(X d1, X d2, X *params) { return op(d1, d2); } op_def static Z op(X d1, X *params) { return d1; } }; template class Abs { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::nd4j_abs(d1); } }; template class Ceiling { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::nd4j_ceil(d1); } }; template class Cosine { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::nd4j_cos(d1); } }; template class Exp { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::nd4j_exp(d1); } }; template class HardTanhDerivative { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return ((d1 >= static_cast(-1.f) && d1 <= static_cast(1.f)) ? static_cast(1.f) : static_cast(0.f)); } }; template class HardTanh { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { if (d1 < static_cast(-1)) return static_cast(-1); else if (d1 > static_cast(1)) return static_cast(1); else return d1; } }; template class Floor { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::nd4j_floor(d1); } }; template class Log { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::nd4j_log(d1); } }; template class Log1p { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::nd4j_log(1 + d1); } }; template class LogX { public: op_def static Z op(X d1, Y d2, Z *params) { return nd4j::math::nd4j_log(d1) / nd4j::math::nd4j_log(d2) ; } }; template class StabilizeFP16 { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { if (d1 <= static_cast(0)) return static_cast(nd4j::DataTypeUtils::min()); else return d1; } }; template class StabilizeX { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { if (d1 <= static_cast(0)) return nd4j::DataTypeUtils::min(); else return d1; } }; template class SpecialDerivative { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return d1 * (static_cast(1.f) - d1); } }; template class Neg { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return -d1; } }; template class Erf { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::nd4j_erf(d1); } }; template class Erfc { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::nd4j_erfc(d1); } }; template class Reciprocal { public: no_op_exec_special_same no_op_exec_special_same_cuda // op_def static T op(T d1) { // return (T(1.0f) / d1); // } // op for MetaOps op_def static X op(X d1, X *params) { return (static_cast(1) / d1); } }; template class Sqr { public: no_op_exec_special no_op_exec_special_cuda op_def static Z op(X d1, Z *params) { return nd4j::math::nd4j_pow(d1, static_cast(2)); } op_def static Z op(X d1) { return nd4j::math::nd4j_pow(d1, static_cast(2)); } }; template class RelativeError { public: no_op_exec_special no_op_exec_special_cuda op_def static Z op(X d1, Y d2) { return nd4j::math::nd4j_re(d1, d2); } op_def static Z op(X d1, Y d2, Z *params) { return op(d1, d2); } op_def static Z op(X d1) { return static_cast(0); } }; template class BinaryRelativeError { public: no_op_exec_special no_op_exec_special_cuda op_def static Z op(X d1, Y d2, Z *params) { X threshold = params[0]; return nd4j::math::nd4j_re(d1, d2) > threshold ? static_cast(1) : static_cast(0); } op_def static Z op(X d1) { return static_cast(0); } }; template class BinaryMinimumAbsoluteRelativeError { public: no_op_exec_special no_op_exec_special_cuda op_def static Z op(X d1, X *params) { X d2 = params[0]; X thresholdRelative = params[1]; X thresholdAbsolute = params[2]; return nd4j::math::nd4j_re(d1, d2) > thresholdRelative ? (nd4j::math::nd4j_abs(d1 - static_cast(d2)) < thresholdAbsolute ? static_cast(0) : static_cast(1)) : static_cast(0); } op_def static Z op(X d1, Y d2, Z *params) { X thresholdRelative = params[0]; X thresholdAbsolute = params[1]; return nd4j::math::nd4j_re(d1, d2) > thresholdRelative ? (nd4j::math::nd4j_abs(d1 - static_cast(d2)) < thresholdAbsolute ? static_cast(0) : static_cast(1)) : static_cast(0); } op_def static Z op(X d1) { return static_cast(0); } }; template class ReversePow { public: no_op_exec_special no_op_exec_special_cuda op_def static Z op(X d1, Z *params) { return nd4j::math::nd4j_pow(params[0], d1); } op_def static Z op(X d1, Y d2) { return nd4j::math::nd4j_pow(d2, d1); } op_def static Z op(X d1, Y d2, Z *params) { return nd4j::math::nd4j_pow(d2, d1); } op_def static Z op(X d1) { return d1; } }; template class Pow { public: no_op_exec_special no_op_exec_special_cuda op_def static Z op(X d1, Z *params) { return nd4j::math::nd4j_pow(d1, params[0]); } op_def static Z op(X d1, Y d2) { return nd4j::math::nd4j_pow(d1, d2); } op_def static Z op(X d1, Y d2, Z *params) { return nd4j::math::nd4j_pow(d1, d2); } op_def static Z op(X d1) { return d1; } }; template class PowDerivative { public: no_op_exec_special no_op_exec_special_cuda op_def static Z op(X d1, Z *params) { return params[0] * nd4j::math::nd4j_pow(d1, static_cast(params[0]) - static_cast(1.f)); } op_def static Z op(X d1, Y d2) { return static_cast(d2) * nd4j::math::nd4j_pow(d1, static_cast(d2) - static_cast(1.f)); } op_def static Z op(X d1, Y d2, Z *params) { return static_cast(d2) * nd4j::math::nd4j_pow(d1, static_cast(d2) - static_cast(1.f)); } op_def static Z op(X d1) { return d1; } }; template class IGamma { public: no_op_exec_special no_op_exec_special_cuda op_def static Z op(X d1, Z *params) { return nd4j::math::nd4j_igamma(d1, params[0]); } op_def static Z op(X d1, Y d2) { return nd4j::math::nd4j_igamma(d1, d2); } op_def static Z op(X d1, Y d2, Z *params) { return nd4j::math::nd4j_igamma(d1, d2); } op_def static Z op(X d1) { return d1; } }; template class IGammac { public: no_op_exec_special no_op_exec_special_cuda op_def static Z op(X d1, Z *params) { return nd4j::math::nd4j_igammac(d1, params[0]); } op_def static Z op(X d1, Y d2) { return nd4j::math::nd4j_igammac(d1, d2); } op_def static Z op(X d1, Y d2, Z *params) { return nd4j::math::nd4j_igammac(d1, d2); } op_def static Z op(X d1) { return d1; } }; template class Round { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::nd4j_round(d1); } }; template class IsNan { public: no_op_exec_special_bool no_op_exec_special_bool_cuda no_op_exec_special_accumulation no_op_exec_special_accumulation_cuda op_def static Z op(X d1, X *params) { return nd4j::math::nd4j_isnan(d1) ? static_cast(1) : static_cast(0); } op_def static X startingValue(const X *input) { return static_cast(0); } op_def static Z merge(X old, X opOutput, X *extraParams) { return opOutput + old; } op_def static Z update(X old, X opOutput, X *extraParams) { return opOutput + old; } op_def static Z postProcess(X reduction, Nd4jLong n, X *extraParams) { return reduction; } }; template class Expm1 { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::nd4j_exp(d1) - static_cast(1); } }; template class IsPositive { public: no_op_exec_special_bool no_op_exec_special_bool_cuda no_op_exec_special_accumulation no_op_exec_special_accumulation_cuda op_def static Z op(X d1, X *params) { return d1 > (X)0.f; } op_def static X startingValue(const X *input) { return static_cast(0); } op_def static Z merge(X old, X opOutput, X *extraParams) { return opOutput + old; } op_def static Z update(X old, X opOutput, X *extraParams) { return opOutput + old; } op_def static Z postProcess(X reduction, Nd4jLong n, X *extraParams) { return reduction; } }; template class IsInf { public: no_op_exec_special_bool no_op_exec_special_bool_cuda no_op_exec_special_accumulation no_op_exec_special_accumulation_cuda op_def static Z op(X d1, X *params) { return nd4j::math::nd4j_isinf(d1) ? static_cast(1) : static_cast(0); } op_def static X startingValue(const X *input) { return static_cast(0); } op_def static Z merge(X old, X opOutput, X *extraParams) { return opOutput + old; } op_def static Z update(X old, X opOutput, X *extraParams) { return opOutput + old; } op_def static Z postProcess(X reduction, Nd4jLong n, X *extraParams) { return reduction; } }; template class IsInfOrNan{ public: no_op_exec_special_bool no_op_exec_special_bool_cuda no_op_exec_special_accumulation no_op_exec_special_accumulation_cuda op_def static Z op(X d1, X *params) { return nd4j::math::nd4j_isfin(d1) ? static_cast(0) : static_cast(1); } op_def static X startingValue(const X *input) { return static_cast(0); } op_def static Z merge(X old, X opOutput, X *extraParams) { return opOutput == static_cast(0) && old == static_cast(0) ? static_cast(0) : static_cast(1); } op_def static Z update(X old, X opOutput, X *extraParams) { return opOutput == static_cast(0) && old == static_cast(0) ? static_cast(0) : static_cast(1); } op_def static Z postProcess(X reduction, Nd4jLong n, X *extraParams) { return reduction != static_cast(0); } }; template class IsFinite { public: no_op_exec_special_bool no_op_exec_special_bool_cuda no_op_exec_special_accumulation no_op_exec_special_accumulation_cuda op_def static Z op(X d1, X *params) { return nd4j::math::nd4j_isfin(d1) ? static_cast(1) : static_cast(0); } op_def static X startingValue(const X *input) { return static_cast(1); } op_def static Z merge(X old, X opOutput, X *extraParams) { return opOutput == static_cast(0) || old == static_cast(0) ? static_cast(0) : static_cast(1); } op_def static Z update(X old, X opOutput, X *extraParams) { return opOutput == static_cast(0) || old == static_cast(0) ? static_cast(0) : static_cast(1); } op_def static Z postProcess(X reduction, Nd4jLong n, X *extraParams) { return reduction != static_cast(0); } }; template class ClipByValue { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { if (d1 > params[1]) return params[1]; if (d1 < params[0]) return params[0]; return d1; } }; template class LstmClip { public: no_op_exec_special no_op_exec_special_cuda op_def static Z op(X d1, Y d2, Z *params) { X _v = (X) d2; if (d1 > _v) return _v; else if (d1 < -_v) return -_v; else return d1; } }; template class Swish { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return d1 * nd4j::math::nd4j_sigmoid(d1); } }; template class GELU { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return d1 * nd4j::math::nd4j_sigmoid(static_cast(1.702f) * d1); } }; template class PreciseGELU { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { auto sp = nd4j::math::nd4j_sqrt(static_cast(2) / static_cast(M_PI)); auto xp = d1 + nd4j::math::nd4j_pow(static_cast(0.044715) * d1, static_cast(3)); return (d1 / static_cast(2)) * (static_cast(1) + nd4j::math::nd4j_tanh(sp * xp)); } }; template class GELUDerivative { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { auto x17 = static_cast(1.702f) * d1; auto ep = nd4j::math::nd4j_pow(static_cast(M_E), x17); // (E^(1.702 x) (1. + E^(1.702 x) + 1.702 x))/(1. + E^(1.702 x))^2 return (ep * (static_cast(1.f) + ep + x17)) / nd4j::math::nd4j_pow((static_cast(1.f) + ep), 2); } }; template class PreciseGELUDerivative { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { auto x79 = static_cast(0.797885) * d1; auto x03 = nd4j::math::nd4j_pow(static_cast(0.0356774) * d1, 3); auto x39 = static_cast(0.398942) * d1; auto x05 = nd4j::math::nd4j_pow(static_cast(0.0535161) * d1, 3); auto scz = nd4j::math::nd4j_sech(x79 + x03); // 0.5 + (0.398942 x + 0.0535161 x^3) Sech[0.797885 x + 0.0356774 x^3]^2 + 0.5 Tanh[0.797885 x + 0.0356774 x^3] return static_cast(0.5) + (x39 + x05) * (scz * scz) + static_cast(0.5) * nd4j::math::nd4j_tanh(x79 + x03); } }; template class SwishDerivative { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { X ex = nd4j::math::nd4j_pow(static_cast(M_E), d1); return (ex * (d1 + ex + static_cast(1.f))) / nd4j::math::nd4j_pow((ex + static_cast(1.f)) , static_cast(2.f)); } }; template class LogSigmoid { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::nd4j_log(nd4j::math::nd4j_sigmoid(d1)); } }; template class LogSigmoidDerivative { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { X ex = nd4j::math::nd4j_pow(M_E, d1); return static_cast(1.f) / (ex + static_cast(1.f)); } }; template class Sigmoid { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::nd4j_sigmoid(d1); } }; template class SigmoidDerivative { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::nd4j_sigmoidderivative(d1); } }; template class HardSigmoid { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::nd4j_min(static_cast(1), nd4j::math::nd4j_max(static_cast(0), (static_cast(0.2f)) * d1 + static_cast(0.5f))); } }; template class HardSigmoidDerivative { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return d1 < static_cast(-2.5f) || d1 > static_cast(2.5f) ? static_cast(0.f) : static_cast(0.2f); } }; /** * Scale to be between a min and max */ template class SetRange { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { auto min = params[0]; auto max = params[1]; if (static_cast(d1) >= min && static_cast(d1) <= max) return d1; if (min == static_cast(0) && max == static_cast(1)) { auto val = static_cast(1) / (static_cast(1) + nd4j::math::nd4j_exp(-d1)); return (nd4j::math::nd4j_floor(val * (max - min)) + min); } return (nd4j::math::nd4j_floor(d1 * (max - min)) + min); } }; template class Sin { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::nd4j_sin(d1); } }; template class Square { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return d1 * d1; } }; template class Sqrt { public: no_op_exec_special no_op_exec_special_cuda op_def static Z op(X d1, Z *params) { return nd4j::math::nd4j_sqrt(d1); } }; template class RSqrt { public: no_op_exec_special no_op_exec_special_cuda op_def static Z op(X d1, Z *params) { return static_cast(1) / nd4j::math::nd4j_sqrt(d1); } }; template class Rint { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::nd4j_rint(d1); } }; template class SoftPlus { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::softplus(d1); } }; template class Sign { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return (d1 > static_cast(0)) - (d1 < static_cast(0)); } }; template class TimesOneMinus { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return d1 * (static_cast(1) - d1); } }; template class RationalTanh { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { // keep 2/3 as runtime variable, to match precision auto dis = (static_cast(2) / static_cast(3)) * d1; auto tanh = nd4j::math::nd4j_sgn(dis) * (static_cast(1) - (static_cast(1) / (static_cast(1) + static_cast(nd4j::math::nd4j_abs(dis)) + nd4j::math::nd4j_pow(dis, static_cast(2)) + static_cast(1.41645f) * nd4j::math::nd4j_pow(dis, static_cast(4)) ))); return static_cast(1.7159f) * tanh; } }; template class RationalTanhDerivative { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { auto dis = (static_cast(2.f) / static_cast(3.f)) * d1; auto a = static_cast(1.f) + nd4j::math::nd4j_abs(dis) + nd4j::math::nd4j_pow(dis, static_cast(2.f)) + static_cast(1.41645f) * nd4j::math::nd4j_pow(dis, static_cast(4)); auto tDeriv = (static_cast(1.f) + nd4j::math::nd4j_sign(dis) * (static_cast(2.f) * dis + static_cast(4.f) * static_cast(1.41645f) * nd4j::math::nd4j_pow(dis, static_cast(3)))) / (a * a); return static_cast(1.7159f) * (static_cast(2.f) / static_cast(3.f)) * tDeriv; } }; template class Tanh { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::nd4j_tanh(d1); } }; template class RectifiedTanh { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::nd4j_max(static_cast(0), nd4j::math::nd4j_tanh(d1)); } }; template class RectifiedTanhDerivative { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return d1 > static_cast(0.f) ? nd4j::math::nd4j_tanhderivative(d1) : static_cast(0.f); } }; template class ATanh { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::nd4j_atanh(d1); } }; template class TanhDerivative { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::nd4j_tanhderivative(d1); } }; template class Cube { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return d1 * d1 * d1; } }; template class CubeDerivative { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return static_cast(3) * d1 * d1; } }; template class ACos { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::nd4j_acos(d1); } }; template class ASinh { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::nd4j_asinh(d1); } }; template class ASinhDerivative { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return static_cast(1.f) / (nd4j::math::nd4j_sqrt(nd4j::math::nd4j_pow(d1, static_cast(2.f)) + static_cast(1.f))); } }; template class ACosh { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::nd4j_acosh(d1); } }; template class ACoshDerivative { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return static_cast(1.f) / (nd4j::math::nd4j_sqrt(d1 - static_cast(1.f)) * nd4j::math::nd4j_sqrt(d1 + static_cast(1.f))); } }; template class Ones { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return static_cast(1.0f); } }; template class SoftSign { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::nd4j_softsign(d1); } }; template class SoftSignDerivative { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::nd4j_softsignderivative(d1); } }; template class MatchConditionBool { public: no_op_exec_special_bool no_op_exec_special_bool_cuda // this op return 1.0 if condition met, 0.0 otherwise op_def static Z op(X d1, X *extraParams) { X compare = extraParams[0]; X eps = extraParams[1]; auto mode = static_cast(extraParams[2]); //nd4j_printf("value: %f; comp: %f; eps: %f; mode: %i;\n", d1, compare, eps, mode); switch (mode) { case 0: // equals return nd4j::math::nd4j_abs(d1 - compare) <= eps ? true : false; case 1: // not equals return nd4j::math::nd4j_abs(d1 - compare) > eps ? true : false; case 2: // less_than return d1 < compare ? true : false; case 3: // greater_than return d1 > compare ? true : false; case 4: // less_or_equals_than return d1 <= compare ? true : false; case 5: // greater_or_equals_than return d1 >= compare ? true : false; case 6: // abs_less_than return nd4j::math::nd4j_abs(d1) < compare ? true : false; case 7: // abs_greater_than return nd4j::math::nd4j_abs(d1) > compare ? true : false; case 8: // is inf return nd4j::math::nd4j_isinf(d1) ? true : false; case 9: // is nan return nd4j::math::nd4j_isnan(d1) ? true : false; case 10: return (d1 == compare) ? true : false; case 11: return (d1 != compare) ? true : false; case 12: // abs_greater_or_equals_than return nd4j::math::nd4j_abs(d1) >= compare ? true : false; case 13: // abs_less_or_equals_than return nd4j::math::nd4j_abs(d1) <= compare ? true : false; case 14: // isFinite return !(nd4j::math::nd4j_isinf(d1) || nd4j::math::nd4j_isnan(d1)); case 15: // isInfinite return nd4j::math::nd4j_isinf(d1) || nd4j::math::nd4j_isnan(d1); default: printf("Undefined match condition: [%i]\n", mode); } return d1; } }; template class MatchCondition { public: no_op_exec_special no_op_exec_special_cuda no_op_exec_special_accumulation_long no_op_exec_special_accumulation_cuda op_def static Z startingValue(const X *input) { return static_cast(0); } op_def static Z merge(Z old, Z opOutput, X *extraParams) { return old + opOutput; } op_def static Z update(Z old, Z opOutput, X *extraParams) { return old + opOutput; } // this op return 1.0 if condition met, 0.0 otherwise op_def static Z op(X d1, X *extraParams) { X compare = extraParams[0]; X eps = extraParams[1]; auto mode = static_cast(extraParams[2]); //printf("value: %f; comp: %f; eps: %f; mode: %i;\n", (float) d1, (float) compare, (float) eps, mode); switch (mode) { case 0: // equals return nd4j::math::nd4j_abs(d1 - compare) <= eps ? 1 : 0; case 1: // not equals return nd4j::math::nd4j_abs(d1 - compare) > eps ? 1 : 0; case 2: // less_than return d1 < compare ? 1 : 0; case 3: // greater_than return d1 > compare ? 1 : 0; case 4: // less_or_equals_than return d1 <= compare ? 1 : 0; case 5: // greater_or_equals_than return d1 >= compare ? 1 : 0; case 6: // abs_less_than return nd4j::math::nd4j_abs(d1) < compare ? 1 : 0; case 7: // abs_greater_than return nd4j::math::nd4j_abs(d1) > compare ? 1 : 0; case 8: // is inf return nd4j::math::nd4j_isinf(d1) ? 1 : 0; case 9: // is nan return nd4j::math::nd4j_isnan(d1) ? 1 : 0; case 10: return (d1 == compare) ? 1 : 0; case 11: return (d1 != compare) ? 1 : 0; case 12: // abs_greater_or_equals_than return nd4j::math::nd4j_abs(d1) >= compare ? 1 : 0; case 13: // abs_less_or_equals_than return nd4j::math::nd4j_abs(d1) <= compare ? 1 : 0; case 14: // isFinite return !(nd4j::math::nd4j_isinf(d1) || nd4j::math::nd4j_isnan(d1)) ? 1 : 0; case 15: // isInfinite return nd4j::math::nd4j_isinf(d1) || nd4j::math::nd4j_isnan(d1) ? 1 : 0; default: printf("Undefined match condition: [%i]\n", mode); } return d1; } op_def static Z postProcess(Z reduction, Nd4jLong n, X *extraParams) { return reduction; } }; template class ELU { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static Z op(X d1, Y d2, Z *params) { return nd4j::math::nd4j_elu(d1, static_cast(d2)); } }; template class ELUDerivative { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static Z op(X d1, Y d2, Z *params) { return nd4j::math::nd4j_eluderivative(d1, static_cast(d2)); } }; template class RELU { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static Z op(X d1, Y d2, Z *params) { auto xt = static_cast(d1); auto xf = static_cast(d2); return xt < xf ? xf : xt; } }; template class SXELogitsSmoother { public: op_def static Z op(X d1, Y d2, Z *params) { return d1 * ((X)1.f - (X) d2) + (X)(0.5f) * (X) d2; } }; template class RELU6 { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static Z op(X d1, Y d2, Z *params) { auto relu = simdOps::RELU::op(d1, d2, params); return relu < static_cast(6) ? relu : static_cast(6); } }; template class LeakyRELU { public: no_op_exec_special no_op_exec_special_cuda op_def static Z op(X d1, Y d2, Z *params) { auto val = static_cast(d1); auto alpha = static_cast(d2); return val < 0.0f ? alpha * val : val; } }; template class SELU { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return d1 > static_cast(0.0f) ? static_cast(SELU_LAMBDA) * static_cast(d1) : static_cast(SELU_LAMBDA) * (static_cast(SELU_ALPHA) * nd4j::math::nd4j_exp(d1) - static_cast(SELU_ALPHA)); } }; template class SELUDerivative { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return d1 > static_cast(0.f) ? static_cast(SELU_LAMBDA) : static_cast(SELU_ALPHA) * static_cast(SELU_LAMBDA) * nd4j::math::nd4j_exp(d1); } }; template class LeakyRELUDerivative { public: no_op_exec_special no_op_exec_special_cuda op_def static Z op(X d1, Y d2, Z *params) { if (d1 >= static_cast(0)) return static_cast(1); else return static_cast(d2); } }; template class ASin { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::nd4j_asin(d1); } }; template class Sinh { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::nd4j_sinh(d1); } }; template class SinhDerivative { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::nd4j_cosh(d1); } }; template class Cosh { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::nd4j_cosh(d1); } }; template class Tan { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::nd4j_tan(d1); } }; template class TanDerivative { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return static_cast(1.f) / nd4j::math::nd4j_pow(nd4j::math::nd4j_cos(d1), static_cast(2.0f)); } }; template class ATan { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return nd4j::math::nd4j_atan(d1); } }; template class Atan2 { public: no_op_exec_special no_op_exec_special_cuda op_def static Z op(X d1, Y d2) { return nd4j::math::nd4j_atan2(d2, d1); } op_def static Z op(X d1, Y d2, Z *params) { return op(d1, d2); } // op for MetaOps op_def static Z op(X d1, Y *params) { return op(d1, params[0]); } }; template class Identity { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return d1; } }; template class Stabilize { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { X k = params[0]; if (d1 * k > static_cast(- MIN_CUTFOFF)) return static_cast(- MIN_CUTFOFF) / k; else if (d1 * k < static_cast(MIN_CUTFOFF)) return static_cast(MIN_CUTFOFF) / k; return d1; } }; template class Step { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static Z op(X d1, Y d2, Z *params) { return (d1 > static_cast(d2) ? static_cast(1) : static_cast(0)); } }; template class OneMinus { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return static_cast(1) - d1; } }; template class Sum { public: no_op_exec_special_accumulation_same no_op_exec_special_accumulation_same_cuda op_def static X startingValue(const X *input) { return static_cast(0.0f); } op_def static X merge(X old, X opOutput, X *extraParams) { return opOutput + old; } op_def static X update(X old, X opOutput, X *extraParams) { return opOutput + old; } op_def static X op(X d1, X *extraParams) { return d1; } op_def static X postProcess(X reduction, Nd4jLong n, X *extraParams) { return reduction; } }; template class ReduceSameBenchmarkOp { public: no_op_exec_special_accumulation_same no_op_exec_special_accumulation_same_cuda const static functions::ReduceType reduceType = functions::ReduceType::SUM; op_def static X startingValue(const X *input) { return static_cast(0.0f); } op_def static X merge(X old, X opOutput, X *extraParams) { return opOutput + old; } op_def static X update(X old, X opOutput, X *extraParams) { return opOutput + old; } op_def static X op(X d1, X *extraParams) { auto f1 = static_cast(d1); return static_cast(nd4j::math::nd4j_pow(f1, 3) + nd4j::math::nd4j_log(f1) * nd4j::math::nd4j_sin(f1) / nd4j::math::nd4j_tanh(static_cast(M_E) * static_cast(M_PI) * f1) * nd4j::math::nd4j_sqrt(static_cast(M_PI) / f1) - nd4j::math::nd4j_atan(static_cast(M_E) / f1)); } op_def static X postProcess(X reduction, Nd4jLong n, X *extraParams) { return reduction; } }; template class ShannonEntropy { public: no_op_exec_special_accumulation no_op_exec_special_accumulation_cuda const static functions::ReduceType reduceType = functions::ReduceType::SUM; op_def static X startingValue(const X *input) { return static_cast(0); } op_def static Z merge(Z old, Z opOutput, Z *extraParams) { return opOutput + old; } op_def static Z update(Z old, Z opOutput, Z *extraParams) { return opOutput + old; } op_def static Z op(X d1, Z *extraParams) { auto p = d1 * d1; return static_cast(p) * nd4j::math::nd4j_log(p); } op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) { return -reduction; } }; template class LogEntropy { public: no_op_exec_special_accumulation no_op_exec_special_accumulation_cuda const static functions::ReduceType reduceType = functions::ReduceType::SUM; op_def static X startingValue(const X *input) { return static_cast(0); } op_def static Z merge(Z old, Z opOutput, Z *extraParams) { return opOutput + old; } op_def static Z update(Z old, Z opOutput, Z *extraParams) { return opOutput + old; } op_def static Z op(X d1, Z *extraParams) { return static_cast(d1) * nd4j::math::nd4j_log(d1); } op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) { //entropy is -sum(p(x) * log(p(x))); log entropy is log of this return nd4j::math::nd4j_log(-reduction); } }; template class Entropy { public: no_op_exec_special_accumulation no_op_exec_special_accumulation_cuda const static functions::ReduceType reduceType = functions::ReduceType::SUM; op_def static X startingValue(const X *input) { return static_cast(0); } op_def static Z merge(Z old, Z opOutput, Z *extraParams) { return opOutput + old; } op_def static Z update(Z old, Z opOutput, Z *extraParams) { return opOutput + old; } op_def static Z op(X d1, Z *extraParams) { return static_cast(d1) * nd4j::math::nd4j_log(d1); } op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) { return static_cast(-reduction); //entropy is -sum(p(x) * log(p(x))) } }; template class ASum { public: no_op_exec_special_accumulation_same no_op_exec_special_accumulation_same_cuda const static functions::ReduceType reduceType = functions::ReduceType::ASUM; op_def static X startingValue(const X *input) { return static_cast(0); } op_def static X merge(X old, X opOutput, X *extraParams) { return nd4j::math::nd4j_abs(opOutput) + nd4j::math::nd4j_abs(old); } op_def static X update(X old, X opOutput, X *extraParams) { return nd4j::math::nd4j_abs(opOutput) + nd4j::math::nd4j_abs(old); } op_def static X op(X d1, X *extraParams) { return nd4j::math::nd4j_abs(d1); } op_def static X postProcess(X reduction, Nd4jLong n, X *extraParams) { return nd4j::math::nd4j_abs(reduction); } }; template class CountNonZero { public: no_op_exec_special_accumulation_long no_op_exec_special_accumulation_cuda const static functions::ReduceType reduceType = functions::ReduceType::ASUM; op_def static Z startingValue(const X *input) { return static_cast(0); } op_def static Z merge(Z old, Z opOutput, X *extraParams) { return opOutput + old; } op_def static Z update(Z old, Z opOutput, X *extraParams) { return opOutput + old; } op_def static Z op(X d1, X *extraParams) { return d1 == static_cast(0.0f) ? static_cast(0.0f) : static_cast(1.0f); } op_def static Z postProcess(Z reduction, Nd4jLong n, X *extraParams) { return reduction; } }; template class CountZero { public: no_op_exec_special_accumulation_long no_op_exec_special_accumulation_cuda const static functions::ReduceType reduceType = functions::ReduceType::SUM; op_def static Z startingValue(const X *input) { return static_cast(0.0f); } op_def static Z merge(Z old, Z opOutput, X *extraParams) { return opOutput + old; } op_def static Z update(Z old, Z opOutput, X *extraParams) { return opOutput + old; } op_def static Z op(X d1, X *extraParams) { return d1 == static_cast(0) ? static_cast(1) : static_cast(0); } op_def static Z postProcess(X reduction, Nd4jLong n, X *extraParams) { return static_cast(reduction); } }; template class Prod { public: no_op_exec_special_accumulation_same no_op_exec_special_accumulation_same_cuda const static functions::ReduceType reduceType = functions::ReduceType::PRODUCT; op_def static X startingValue(const X *input) { return static_cast(1); } op_def static X merge(X old, X opOutput, X *extraParams) { return opOutput * old; } op_def static X update(X old, X opOutput, X *extraParams) { return opOutput * old; } op_def static X op(X d1, X *extraParams) { return d1; } op_def static X postProcess(X reduction, Nd4jLong n, X *extraParams) { return reduction; } }; template class Any { public: no_op_exec_special_accumulation no_op_exec_special_accumulation_cuda const static functions::ReduceType reduceType = functions::ReduceType::SUM; op_def static X startingValue(const X *input) { return static_cast(0.0f); } op_def static Z merge(X old, X opOutput, X *extraParams) { return opOutput + old; } op_def static Z update(X old, X opOutput, X *extraParams) { return opOutput + old; } op_def static Z op(X d1, X *extraParams) { return d1; } op_def static Z postProcess(X reduction, Nd4jLong n, X *extraParams) { return reduction > static_cast(0) ? static_cast(1) : static_cast(0) ; } }; template class All { public: no_op_exec_special_accumulation no_op_exec_special_accumulation_cuda const static functions::ReduceType reduceType = functions::ReduceType::PRODUCT; op_def static X startingValue(const X *input) { return static_cast(1); } op_def static Z merge(X old, X opOutput, X *extraParams) { return opOutput * old; } op_def static Z update(X old, X opOutput, X *extraParams) { return opOutput * old; } op_def static Z op(X d1, X *extraParams) { return d1; } op_def static Z postProcess(X reduction, Nd4jLong n, X *extraParams) { return reduction > static_cast(0) ? static_cast(1) : static_cast(0); } }; template class Mean { public: no_op_exec_special_accumulation no_op_exec_special_accumulation_cuda const static functions::ReduceType reduceType = functions::ReduceType::SUM; op_def static X startingValue(const X *input) { return static_cast(0); } op_def static Z merge(Z old, Z opOutput, Z *extraParams) { return opOutput + old; } op_def static Z update(Z old, Z opOutput, Z *extraParams) { return opOutput + old; } op_def static Z op(X d1, Z *extraParams) { return d1; } op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) { return reduction / (Z) n; } }; template class ReduceFloatBenchmarkOp { public: no_op_exec_special_accumulation no_op_exec_special_accumulation_cuda const static functions::ReduceType reduceType = functions::ReduceType::SUM; op_def static X startingValue(const X *input) { return static_cast(0); } op_def static Z merge(Z old, Z opOutput, Z *extraParams) { return opOutput + old; } op_def static Z update(Z old, Z opOutput, Z *extraParams) { return opOutput + old; } op_def static Z op(X d1, Z *extraParams) { auto f1 = static_cast(d1); return static_cast(nd4j::math::nd4j_pow(f1, 3) + nd4j::math::nd4j_log(f1) * nd4j::math::nd4j_sin(f1) / nd4j::math::nd4j_tanh(static_cast(M_E) * static_cast(M_PI) * f1) * nd4j::math::nd4j_sqrt(static_cast(M_PI) / f1) - nd4j::math::nd4j_atan(static_cast(M_E) / f1)); } op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) { return (Z) reduction / (Z) n; } }; template class AMean { public: no_op_exec_special_accumulation no_op_exec_special_accumulation_cuda const static functions::ReduceType reduceType = functions::ReduceType::SUM; op_def static X startingValue(const X *input) { return static_cast(0); } op_def static Z merge(Z old, Z opOutput, Z *extraParams) { return nd4j::math::nd4j_abs(opOutput) + nd4j::math::nd4j_abs(old); } op_def static Z update(Z old, Z opOutput, Z *extraParams) { return opOutput + old; } op_def static Z op(X d1, Z *extraParams) { return nd4j::math::nd4j_abs(d1); } op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) { return nd4j::math::nd4j_abs(reduction) / static_cast(n); } }; template class Max { public: no_op_exec_special_accumulation_same no_op_exec_special_accumulation_same_cuda const static functions::ReduceType reduceType = functions::ReduceType::MAX; op_def static X startingValue(const X *input) { return -nd4j::DataTypeUtils::infOrMax(); } op_def static X merge(X old, X opOutput, X *extraParams) { return nd4j::math::nd4j_max(old, opOutput); } op_def static X update(X old, X opOutput, X *extraParams) { return nd4j::math::nd4j_max(opOutput, old); } op_def static X op(X d1, X d2, X *params) { return nd4j::math::nd4j_max(d1, d2); } op_def static X op(X d1, X d2) { return nd4j::math::nd4j_max(d1, d2); } // FIXME: this signature overlaps with MetaOp op_def static X op(X d1, X *extraParams) { return d1; } op_def static X postProcess(X reduction, Nd4jLong n, X *extraParams) { return reduction; } }; template class AMaxPairwise { public: op_def static Z op(X d1, Y d2, Z *params) { return op(d1, d2); } op_def static Z op(X d1, Y d2) { auto z1 = static_cast(d1); auto z2 = static_cast(d2); if (nd4j::math::nd4j_abs(z1) > nd4j::math::nd4j_abs(z2)) return z1; else return z2; } }; template class AMinPairwise { public: op_def static Z op(X d1, Y d2, Z *params) { return op(d1, d2); } op_def static Z op(X d1, Y d2) { auto z1 = static_cast(d1); auto z2 = static_cast(d2); if (nd4j::math::nd4j_abs(z1) < nd4j::math::nd4j_abs(z2)) return z1; else return z2; } }; template class MaxPairwise { public: op_def static Z op(X d1, Y d2, Z *params) { return nd4j::math::nd4j_max(static_cast(d1), static_cast(d2)); } op_def static Z op(X d1, Y d2) { return nd4j::math::nd4j_max(static_cast(d1), static_cast(d2)); } }; template class MinPairwise { public: op_def static Z op(X d1, Y d2, Z *params) { return nd4j::math::nd4j_min(static_cast(d1), static_cast(d2)); } op_def static Z op(X d1, Y d2) { return nd4j::math::nd4j_min(static_cast(d1), static_cast(d2)); } }; template class AMax { public: no_op_exec_special_accumulation_same no_op_exec_special_accumulation_same_cuda const static functions::ReduceType reduceType = functions::ReduceType::AMAX; op_def static X startingValue(const X *input) { return input[0]; } op_def static X merge(X old, X opOutput, X *extraParams) { return nd4j::math::nd4j_max(nd4j::math::nd4j_abs(old), nd4j::math::nd4j_abs(opOutput)); } op_def static X update(X old, X opOutput, X *extraParams) { return nd4j::math::nd4j_max(nd4j::math::nd4j_abs(opOutput), nd4j::math::nd4j_abs(old)); } op_def static X op(X d1, X d2, X *params) { return nd4j::math::nd4j_max(nd4j::math::nd4j_abs(d1), nd4j::math::nd4j_abs(d2)); } op_def static X op(X d1, X d2) { return nd4j::math::nd4j_abs(d1) > nd4j::math::nd4j_abs(d2) ? d1 : d2; } // FIXME: this signature overlaps with MetaOp op_def static X op(X d1, X *extraParams) { return nd4j::math::nd4j_abs(d1); } op_def static X postProcess(X reduction, Nd4jLong n, X *extraParams) { return nd4j::math::nd4j_abs(reduction); } }; template class AMin { public: no_op_exec_special_accumulation_same no_op_exec_special_accumulation_same_cuda const static functions::ReduceType reduceType = functions::ReduceType::AMIN; op_def static X startingValue(const X *input) { return input[0]; } op_def static X merge(X old, X opOutput, X *extraParams) { return nd4j::math::nd4j_min(nd4j::math::nd4j_abs(old), nd4j::math::nd4j_abs(opOutput)); } op_def static X update(X old, X opOutput, X *extraParams) { return nd4j::math::nd4j_min(nd4j::math::nd4j_abs(opOutput), nd4j::math::nd4j_abs(old)); } op_def static X op(X d1, X d2, X *params) { return nd4j::math::nd4j_min(nd4j::math::nd4j_abs(d1), nd4j::math::nd4j_abs(d2)); } op_def static X op(X d1, X d2) { return nd4j::math::nd4j_min(nd4j::math::nd4j_abs(d1), nd4j::math::nd4j_abs(d2)); } // FIXME: this signature overlaps with MetaOp op_def static X op(X d1, X *extraParams) { return nd4j::math::nd4j_abs(d1); } op_def static X postProcess(X reduction, Nd4jLong n, X *extraParams) { return nd4j::math::nd4j_abs(reduction); } }; template class Min { public: no_op_exec_special_accumulation_same no_op_exec_special_accumulation_same_cuda const static functions::ReduceType reduceType = functions::ReduceType::MIN; op_def static X startingValue(const X *input) { return nd4j::DataTypeUtils::infOrMax(); } op_def static X merge(X old, X opOutput, X *extraParams) { return nd4j::math::nd4j_min(old, opOutput); } op_def static X update(X old, X opOutput, X *extraParams) { return nd4j::math::nd4j_min(opOutput, old); } op_def static X op(X d1, X d2, X *params) { return nd4j::math::nd4j_min(d1, d2); } op_def static X op(X d1, X d2) { return nd4j::math::nd4j_min(d1, d2); } // FIXME: this signature overlaps with MetaOp op_def static X op(X d1, X *extraParams) { return d1; } op_def static X postProcess(X reduction, Nd4jLong n, X *extraParams) { return reduction; } }; template class Norm1 { public: no_op_exec_special_accumulation no_op_exec_special_accumulation_cuda const static functions::ReduceType reduceType = functions::ReduceType::SUM; op_def static X startingValue(const X *input) { return static_cast(0); } op_def static Z merge(Z old, Z opOutput, Z *extraParams) { return opOutput + old; } op_def static Z update(Z old, Z opOutput, Z *extraParams) { return opOutput + old; } op_def static Z op(X d1, Z *extraParams) { return static_cast(nd4j::math::nd4j_abs(d1)); } op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) { return reduction; } }; template class Norm2 { public: no_op_exec_special_accumulation no_op_exec_special_accumulation_cuda const static functions::ReduceType reduceType = functions::ReduceType::SUM; op_def static X startingValue(const X *input) { return static_cast(0); } op_def static Z merge(Z old, Z opOutput, Z *extraParams) { return opOutput + old; } op_def static Z update(Z old, Z opOutput, Z *extraParams) { return opOutput + old; } op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) { return nd4j::math::nd4j_sqrt(reduction); } op_def static Z op(X d1, Z *extraParams) { return static_cast(d1 * d1); } }; template class SquaredNorm { public: no_op_exec_special_accumulation no_op_exec_special_accumulation_cuda const static functions::ReduceType reduceType = functions::ReduceType::SUM; op_def static X startingValue(const X *input) { return static_cast(0); } op_def static Z merge(Z old, Z opOutput, Z *extraParams) { return opOutput + old; } op_def static Z update(Z old, Z opOutput, Z *extraParams) { return opOutput + old; } op_def static Z op(X d1, Z *extraParams) { return static_cast(d1 * d1); } op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) { return reduction; } }; template class NormFrobenius { public: no_op_exec_special_accumulation no_op_exec_special_accumulation_cuda const static functions::ReduceType reduceType = functions::ReduceType::SUM; op_def static X startingValue(const X *input) { return static_cast(0); } op_def static Z merge(Z old, Z opOutput, Z *extraParams) { return opOutput + old; } op_def static Z update(Z old, Z opOutput, Z *extraParams) { return opOutput + old; } op_def static Z op(X d1, Z *extraParams) { X v = nd4j::math::nd4j_abs(d1); return static_cast(v * v); } op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) { return nd4j::math::nd4j_sqrt(reduction); } }; template class NormP { public: no_op_exec_special_accumulation no_op_exec_special_accumulation_cuda const static functions::ReduceType reduceType = functions::ReduceType::SUM; op_def static X startingValue(const X *input) { return static_cast(0); } op_def static Z merge(Z old, Z opOutput, Z *extraParams) { return opOutput + old; } op_def static Z update(Z old, Z opOutput, Z *extraParams) { return opOutput + old; } op_def static Z op(X d1, Z *extraParams) { return nd4j::math::nd4j_pow(nd4j::math::nd4j_abs(d1), extraParams[0]); } op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) { return nd4j::math::nd4j_pow(reduction, static_cast(1.0f) / extraParams[0]); } }; template class NormMax { public: no_op_exec_special_accumulation no_op_exec_special_accumulation_cuda const static functions::ReduceType reduceType = functions::ReduceType::SUM; op_def static X startingValue(const X *input) { return static_cast(0); } op_def static Z merge(Z old, Z opOutput, Z *extraParams) { return opOutput + old; } op_def static Z update(Z old, Z opOutput, Z *extraParams) { return nd4j::math::nd4j_max(nd4j::math::nd4j_abs(old), nd4j::math::nd4j_abs(opOutput)); } op_def static Z op(X d1, Z *extraParams) { return static_cast(d1); } op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParams) { return nd4j::math::nd4j_max(nd4j::math::nd4j_abs(reduction), nd4j::math::nd4j_abs(reduction)); } }; template class Variance { public: no_op_exec_special_accumulation no_op_exec_special_accumulation_cuda const static functions::ReduceType reduceType = functions::ReduceType::SUM; op_def static X startingValue(const X *input) { return static_cast(0.0f); } op_def static Z merge(X old, X opOutput, Z *extraParams) { return old + opOutput; } op_def static Z update(X old, X opOutput, Z *extraParams) { return old + opOutput; } op_def static X op(X d1, Z *extraParams) { X mean = static_cast(extraParams[0]); X ret = d1 - mean; return ret * ret; } op_def static Z postProcess(X reduction, Nd4jLong n, Z *extraParams) { // T bias = extraParams[1]; // return (reduction - (nd4j::math::nd4j_pow(bias, static_cast(2.0f)) / static_cast(n))) / (n - 1) return static_cast(reduction) / static_cast(n - 1); } }; /** * Standard deviation of a buffer */ template class StandardDeviation { public: no_op_exec_special_accumulation no_op_exec_special_accumulation_cuda const static functions::ReduceType reduceType = functions::ReduceType::SUM; op_def static X startingValue(const X *input) { return static_cast(0.0f); } op_def static Z merge(X old, X opOutput, Z *extraParams) { return old + opOutput; } op_def static Z update(X old, X opOutput, Z *extraParams) { return old + opOutput; } op_def static Z op(X d1, Z *extraParams) { X mean = extraParams[0]; X ret = d1 - mean; return ret * ret; } op_def static Z postProcess(X reduction, Nd4jLong n, Z *extraParams) { Z ret = Variance::postProcess(reduction, n, extraParams); Z sqrtRet = nd4j::math::nd4j_sqrt(ret); return sqrtRet; } }; template class CosineSimilarity { public: static const int extraParamsLen = 2; op_def static X *generateExtraParams() { //T *extraParams = new T[2]; return nullptr; } op_def static void finalizeExtraParams(X *extraParams) { //delete[] extraParams; } op_def static Y startingValue(const X *input) { return static_cast(0.0f); } op_def static Y postProcess(Y reduction, Nd4jLong n, Y *extraParams) { return reduction / (nd4j::math::nd4j_sqrt(extraParams[0]) * nd4j::math::nd4j_sqrt(extraParams[1])); } op_def static Y op(X d1, X d2, Y *extraParams) { extraParams[0] += static_cast(d1 * d1); extraParams[1] += static_cast(d2 * d2); return static_cast(d1 * d2); } op_def static void aggregateExtraParams(Y *extraParamsTotal, Y *extraParamsLocal) { extraParamsTotal[0] += extraParamsLocal[0]; extraParamsTotal[1] += extraParamsLocal[1]; } #ifdef __CUDACC__ static _CUDA_D inline Y opAtomic(X d1, X d2, Y *extraParams) { nd4j::math::atomics::nd4j_atomicAdd(&extraParams[0],static_cast(d1 * d1)); nd4j::math::atomics::nd4j_atomicAdd(&extraParams[1],static_cast(d2 * d2)); return static_cast(d1 * d2); } #endif op_def static Y update(Y old, Y opOutput, Y *extraParams) { return old + opOutput; } op_def static Y merge(Y old, Y opOutput, Y *extraParams) { return update(old, opOutput, extraParams); } }; template class JaccardDistance { public: static const int extraParamsLen = 2; op_def static X *generateExtraParams() { //T *extraParams = new T[2]; return nullptr; } op_def static void finalizeExtraParams(X *extraParams) { //delete[] extraParams; } op_def static Y startingValue(const X *input) { return static_cast(0.0f); } op_def static Y postProcess(Y reduction, Nd4jLong n, Y *extraParams) { // num / denom return (static_cast(1.0f)) - (extraParams[0] / extraParams[1]); } op_def static Y num(X d1, X d2) { return nd4j::math::nd4j_min(d1, d2); } op_def static Y denom(X d1, X d2) { return nd4j::math::nd4j_max(d1, d2); } op_def static Y op(X d1, X d2, Y *extraParams) { extraParams[0] += static_cast(num(d1, d2)); extraParams[1] += static_cast(denom(d1, d2)); return static_cast(0.0f); } op_def static void aggregateExtraParams(Y *extraParamsTotal, Y *extraParamsLocal) { extraParamsTotal[0] += extraParamsLocal[0]; extraParamsTotal[1] += extraParamsLocal[1]; } #ifdef __CUDACC__ __device__ static inline Y opAtomic(X d1, X d2, Y *extraParams) { nd4j::math::atomics::nd4j_atomicAdd(&extraParams[0],num(d1, d2)); nd4j::math::atomics::nd4j_atomicAdd(&extraParams[1], denom(d1, d2)); return static_cast(0.0f); } #endif op_def static Y update(Y old, Y opOutput, Y *extraParams) { return old + opOutput; } op_def static Y merge(Y old, Y opOutput, Y *extraParams) { return update(old, opOutput, extraParams); } }; template class SimpleHammingDistance { public: static const int extraParamsLen = 0; op_def static X *generateExtraParams() { //T *extraParams = new T[2]; return nullptr; } op_def static void finalizeExtraParams(X *extraParams) { //delete[] extraParams; } op_def static Y startingValue(const X *input) { return static_cast(0.0f); } op_def static Y postProcess(Y reduction, Nd4jLong n, Y *extraParams) { return static_cast(reduction / n); } op_def static Y op(X d1, X d2, Y *extraParams) { return (d1 == d2) ? static_cast(0.0f) : static_cast(1.0f); } op_def static void aggregateExtraParams(Y *extraParamsTotal, Y *extraParamsLocal) { } #ifdef __CUDACC__ __device__ static inline Y opAtomic(X d1, X d2, Y *extraParams) { return op(d1, d2, extraParams); } #endif op_def static Y update(Y old, Y opOutput, Y *extraParams) { return old + opOutput; } op_def static Y merge(Y old, Y opOutput, Y *extraParams) { return update(old, opOutput, extraParams); } }; template class CosineDistance { public: static const int extraParamsLen = 2; op_def static X *generateExtraParams() { //T *extraParams = new T[2]; return nullptr; } op_def static void finalizeExtraParams(X *extraParams) { //delete[] extraParams; } op_def static Y startingValue(const X *input) { return static_cast(0.0f); } op_def static Y postProcess(Y reduction, Nd4jLong n, Y *extraParams) { return (static_cast(1.0f)) - (reduction / (nd4j::math::nd4j_sqrt(extraParams[0]) * nd4j::math::nd4j_sqrt(extraParams[1]))); } op_def static Y op(X d1, X d2, Y *extraParams) { extraParams[0] += static_cast(nd4j::math::nd4j_abs(d1) * nd4j::math::nd4j_abs(d1)); extraParams[1] += static_cast(nd4j::math::nd4j_abs(d2) * nd4j::math::nd4j_abs(d2)); return (d1 * d2); } op_def static void aggregateExtraParams(Y *extraParamsTotal, Y *extraParamsLocal) { extraParamsTotal[0] += extraParamsLocal[0]; extraParamsTotal[1] += extraParamsLocal[1]; } #ifdef __CUDACC__ static _CUDA_D inline Y opAtomic(X d1, X d2, Y *extraParams) { nd4j::math::atomics::nd4j_atomicAdd(&extraParams[0], nd4j::math::nd4j_abs(d1) * nd4j::math::nd4j_abs(d1)); nd4j::math::atomics::nd4j_atomicAdd(&extraParams[1], nd4j::math::nd4j_abs(d2) * nd4j::math::nd4j_abs(d2)); return (d1 * d2); } #endif op_def static Y update(Y old, Y opOutput, Y *extraParams) { return old + opOutput; } op_def static Y merge(Y old, Y opOutput, Y *extraParams) { return update(old, opOutput, extraParams); } }; /** * Dot product between 2 arrays */ template class Dot { public: static const int extraParamsLen = 0; op_def static X * generateExtraParams() { return nullptr; } op_def static void finalizeExtraParams(X *extraParamsRef) { //no-op //delete[] * extraParamsRef; } op_def static Y startingValue(const X *input) { return static_cast(0.0f); } op_def static Y postProcess(Y reduction, Nd4jLong n, Y *extraParamsRef) { return reduction; } op_def static Y op(X d1, X d2, Y *extraParamsRef) { return static_cast(d1 * d2); } #ifdef __CUDACC__ __device__ static inline Y opAtomic(X d1, X d2, Y *extraParamsRef) { return op(d1, d2, extraParamsRef); } #endif op_def static Y update(Y old, Y opOutput, Y *extraParamsRef) { return opOutput + old; } op_def static Y merge(Y old, Y opOutput, Y *extraParamsRef) { return update(old, opOutput, extraParamsRef); } op_def static void aggregateExtraParams(Y *extraParamsTotal, Y *extraParamsLocal) {} }; /** * Op to check equality within arrays */ template class EqualsWithEps { public: static const int extraParamsLen = 0; op_def static X * generateExtraParams() { return nullptr; } op_def static void finalizeExtraParams(X *extraParamsRef) { //no-op } op_def static Z startingValue(const X *input) { return static_cast(0.0f); } op_def static Z postProcess(Z reduction, Nd4jLong n, Z *extraParamsRef) { return reduction; } op_def static Z op(X d1, X d2, Z *extraParamsRef) { double eps = nd4j::math::nd4j_abs(extraParamsRef[2]); return static_cast(!nd4j::math::nd4j_eq(d1, d2, eps)); } #ifdef __CUDACC__ __device__ static inline Z opAtomic(X d1, X d2, Z *extraParamsRef) { return op(d1, d2, extraParamsRef); } #endif op_def static Z update(Z old, Z opOutput, Z *extraParamsRef) { return opOutput + old; } op_def static Z merge(X old, Z opOutput, Z *extraParamsRef) { return update(old, opOutput, extraParamsRef); } op_def static void aggregateExtraParams(Z *extraParamsTotal, Z *extraParamsLocal) {} }; template class EuclideanDistance { public: static const int extraParamsLen = 0; op_def static X * generateExtraParams() { return nullptr; } op_def static void finalizeExtraParams(X *extraParamsRef) { //no-op } op_def static Y startingValue(const X *input) { return static_cast(0.0f); } op_def static Y postProcess(Y reduction, Nd4jLong n, Y *extraParamsRef) { return nd4j::math::nd4j_sqrt(reduction); } op_def static Y op(X d1, X d2, Y *extraParamsRef) { X ret = d1 - d2; return static_cast(ret * ret); } #ifdef __CUDACC__ __device__ static inline Y opAtomic(X d1, X d2, Y *extraParamsRef) { return op(d1, d2, extraParamsRef); } #endif op_def static Y update(Y old, Y opOutput, Y *extraParamsRef) { return opOutput + old; } op_def static Y merge(Y old, Y opOutput, Y *extraParamsRef) { return update(old, opOutput, extraParamsRef); } op_def static void aggregateExtraParams(Y *extraParamsTotal, Y *extraParamsLocal) {} }; template class ManhattanDistance { public: static const int extraParamsLen = 0; op_def static X * generateExtraParams() { return nullptr; } op_def static void finalizeExtraParams(X *extraParamsRef) { //no-op } op_def static Y startingValue(const X *input) { return static_cast(0.0f); } op_def static Y postProcess(Y reduction, Nd4jLong n, Y *extraParamsRef) { return reduction; } op_def static Y op(X d1, X d2, Y *extraParamsRef) { return nd4j::math::nd4j_abs(d1 - d2); } op_def static Y update(Y old, Y opOutput, Y *extraParamsRef) { return old + opOutput; } op_def static void aggregateExtraParams(Y *extraParamsTotal, Y *extraParamsLocal) { } #ifdef __CUDACC__ __device__ static inline Y opAtomic(X d1, X d2, Y *extraParamsRef) { return op(d1, d2, extraParamsRef); } #endif #ifndef __clang__ #pragma omp declare simd uniform(extraParamsRef) #endif op_def static Y merge(X old, X opOutput, X *extraParamsRef) { return update(old, opOutput, extraParamsRef); } }; template class IndexAbsoluteMax { public: static _CUDA_HD inline functions::indexreduce::IndexValue op(functions::indexreduce::IndexValue val, X *extraParams) { return nd4j::math::nd4j_abs(val); } static _CUDA_HD inline functions::indexreduce::IndexValue update(functions::indexreduce::IndexValue &old, functions::indexreduce::IndexValue &opOutput, X *extraParams) { opOutput.value = nd4j::math::nd4j_abs(opOutput.value); old.value = nd4j::math::nd4j_abs(old.value); if (opOutput.value > old.value) return opOutput; #ifdef __CUDACC__ // workaround for cuda race condition at merge phase else if (opOutput.value == old.value && opOutput.index < old.index) return opOutput; #elif defined(__GNUC__) #endif return old; } static _CUDA_HD inline functions::indexreduce::IndexValue merge( functions::indexreduce::IndexValue f1, functions::indexreduce::IndexValue f2, X *extraParams) { if (nd4j::math::nd4j_abs(f1.value) > nd4j::math::nd4j_abs(f2.value)) return f2; return f1; } static _CUDA_HD inline functions::indexreduce::IndexValue postProcess( functions::indexreduce::IndexValue reduction, int n, int xOffset, X *dx, int incx, X *extraParams, X *result) { return reduction; } static _CUDA_HD inline X startingValue(const X *input) { return 0; } static _CUDA_HD inline functions::indexreduce::IndexValue startingIndexValue(X *input) { functions::indexreduce::IndexValue local; local.value = startingValue(input); local.index = 0; return local; } static _CUDA_HD inline functions::indexreduce::IndexValue op(functions::indexreduce::IndexValue d1, functions::indexreduce::IndexValue d2, X *extraParams) { return d1; } }; template class FirstIndex { public: static _CUDA_HD inline functions::indexreduce::IndexValue op(functions::indexreduce::IndexValue val, X *extraParams) { return val; } static _CUDA_HD functions::indexreduce::IndexValue update(functions::indexreduce::IndexValue &old, functions::indexreduce::IndexValue &opOutput, X *extraParams) { #ifdef __CUDACC__ if (opOutput.index < 0) return old; #endif auto res = simdOps::MatchCondition::op(opOutput.value, extraParams); //printf("res: %f; oldIdx: %i; newIdx: %i\n", res, old.index, opOutput.index); if (res == static_cast(0)) return old; if (old.index < 0) return opOutput; if (old.index > opOutput.index) return opOutput; return old; } static _CUDA_HD inline X startingValue(const X *input) { return -nd4j::DataTypeUtils::infOrMax(); } static _CUDA_HD inline functions::indexreduce::IndexValue startingIndexValue(X *input) { functions::indexreduce::IndexValue local; local.value = startingValue(input); local.index = -1; return local; } static _CUDA_HD inline functions::indexreduce::IndexValue op(functions::indexreduce::IndexValue d1, functions::indexreduce::IndexValue d2, X *extraParams) { return d1; } static _CUDA_HD inline functions::indexreduce::IndexValue merge( functions::indexreduce::IndexValue f1, functions::indexreduce::IndexValue f2, X *extraParams) { if (f1.index > f2.index) return f2; return f1; } static _CUDA_HD inline functions::indexreduce::IndexValue postProcess( functions::indexreduce::IndexValue reduction, int n, int xOffset, X *dx, int incx, X *extraParams, X *result) { return reduction; } }; template class LastIndex { public: static _CUDA_HD inline functions::indexreduce::IndexValue op(functions::indexreduce::IndexValue val, X *extraParams) { return val; } static _CUDA_HD functions::indexreduce::IndexValue update(functions::indexreduce::IndexValue &old, functions::indexreduce::IndexValue &opOutput, X *extraParams) { #ifdef __CUDACC__ if (opOutput.index < 0) return old; #endif auto res = simdOps::MatchCondition::op(opOutput.value, extraParams); if (res == static_cast(0)) return old; if (old.index < 0) return opOutput; if (old.index < opOutput.index) return opOutput; return old; } static _CUDA_HD inline X startingValue(const X *input) { return -nd4j::DataTypeUtils::infOrMax(); } static _CUDA_HD inline functions::indexreduce::IndexValue startingIndexValue(X *input) { functions::indexreduce::IndexValue local; local.value = startingValue(input); local.index = -1; return local; } static _CUDA_HD inline functions::indexreduce::IndexValue op(functions::indexreduce::IndexValue d1, functions::indexreduce::IndexValue d2, X *extraParams) { return d1; } static _CUDA_HD inline functions::indexreduce::IndexValue merge( functions::indexreduce::IndexValue f1, functions::indexreduce::IndexValue f2, X *extraParams) { if (f1.index < f2.index) return f2; return f1; } static _CUDA_HD inline functions::indexreduce::IndexValue postProcess( functions::indexreduce::IndexValue reduction, int n, int xOffset, X *dx, int incx, X *extraParams, X *result) { return reduction; } }; template class IndexMax { public: static _CUDA_HD inline functions::indexreduce::IndexValue op(functions::indexreduce::IndexValue val, X *extraParams) { return val; } static _CUDA_HD functions::indexreduce::IndexValue update(functions::indexreduce::IndexValue &old, functions::indexreduce::IndexValue &opOutput, X *extraParams) { if (opOutput.value > old.value) { return opOutput; } #ifdef __CUDACC__ // workaround for cuda race condition at merge phase else if (opOutput.value == old.value && opOutput.index < old.index) return opOutput; #elif defined(__GNUC__) #endif return old; } static _CUDA_HD inline functions::indexreduce::IndexValue merge( functions::indexreduce::IndexValue f1, functions::indexreduce::IndexValue f2, X *extraParams) { if (f1.value > f2.value) return f2; return f1; } static _CUDA_HD inline functions::indexreduce::IndexValue postProcess( functions::indexreduce::IndexValue reduction, int n, int xOffset, X *dx, int incx, X *extraParams, X *result) { return reduction; } static _CUDA_HD inline X startingValue(const X *input) { return -nd4j::DataTypeUtils::infOrMax(); } static _CUDA_HD inline functions::indexreduce::IndexValue startingIndexValue(X *input) { functions::indexreduce::IndexValue local; local.value = startingValue(input); local.index = 0; return local; } static _CUDA_HD inline functions::indexreduce::IndexValue op(functions::indexreduce::IndexValue d1, functions::indexreduce::IndexValue d2, X *extraParams) { return d1; } }; template class IndexAbsoluteMin { public: static _CUDA_HD inline functions::indexreduce::IndexValue op( functions::indexreduce::IndexValue val, X *extraParams) { return val; } static _CUDA_HD inline X startingValue(const X *input) { return nd4j::DataTypeUtils::infOrMax(); } static _CUDA_HD inline functions::indexreduce::IndexValue startingIndexValue(X *input) { functions::indexreduce::IndexValue local; local.value = startingValue(input); local.index = 0; return local; } static _CUDA_HD inline functions::indexreduce::IndexValue update(functions::indexreduce::IndexValue &old, functions::indexreduce::IndexValue &opOutput, X *extraParams) { opOutput.value = nd4j::math::nd4j_abs(opOutput.value); old.value = nd4j::math::nd4j_abs(old.value); if (opOutput.value < old.value) return opOutput; #ifdef __CUDACC__ // workaround for cuda race condition at merge phase else if (opOutput.value == old.value && opOutput.index < old.index) return opOutput; #elif defined(__GNUC__) #endif return old; } static _CUDA_HD inline functions::indexreduce::IndexValue merge( functions::indexreduce::IndexValue f1, functions::indexreduce::IndexValue f2, X *extraParams) { if (nd4j::math::nd4j_abs(f1.value) < nd4j::math::nd4j_abs(f2.value)) return f2; return f1; } static _CUDA_HD inline functions::indexreduce::IndexValue postProcess( functions::indexreduce::IndexValue reduction, int n, int xOffset, X *dx, int incx, X *extraParams, X *result) { return reduction; } static _CUDA_HD inline functions::indexreduce::IndexValue op(functions::indexreduce::IndexValue d1, functions::indexreduce::IndexValue d2, X *extraParams) { return d1; } }; template class IndexMin { public: static _CUDA_HD inline functions::indexreduce::IndexValue op( functions::indexreduce::IndexValue val, X *extraParams) { return val; } static _CUDA_HD inline X startingValue(const X *input) { return nd4j::DataTypeUtils::infOrMax(); } static _CUDA_HD inline functions::indexreduce::IndexValue startingIndexValue(X *input) { functions::indexreduce::IndexValue local; local.value = startingValue(input); local.index = 0; return local; } static _CUDA_HD inline functions::indexreduce::IndexValue update(functions::indexreduce::IndexValue &old, functions::indexreduce::IndexValue &opOutput, X *extraParams) { if (opOutput.value < old.value) return opOutput; #ifdef __CUDACC__ // workaround for cuda race condition at merge phase else if (opOutput.value == old.value && opOutput.index < old.index) return opOutput; #elif defined(__GNUC__) #endif return old; } static _CUDA_HD inline functions::indexreduce::IndexValue merge( functions::indexreduce::IndexValue f1, functions::indexreduce::IndexValue f2, X *extraParams) { if (f1.value < f2.value) return f2; return f1; } static _CUDA_HD inline functions::indexreduce::IndexValue postProcess( functions::indexreduce::IndexValue reduction, int n, int xOffset, X *dx, int incx, X *extraParams, X *result) { return reduction; } static _CUDA_HD inline functions::indexreduce::IndexValue op(functions::indexreduce::IndexValue d1, functions::indexreduce::IndexValue d2, X *extraParams) { return d1; } }; template class SummaryStatsVariance { public: static _CUDA_HD inline Z getValue(const bool biasCorrected, functions::summarystats::SummaryStatsData val) { if (biasCorrected) { Z ret = static_cast(val.varianceBiasCorrected()); if (ret < static_cast(0.0f)) return static_cast(val.variance()); return ret; } return static_cast(val.variance()); } static _CUDA_HD inline functions::summarystats::SummaryStatsData op(functions::summarystats::SummaryStatsData d1, Z *extraParams) { return d1; } }; template class SummaryStatsStandardDeviation { public: static _CUDA_HD inline Z getValue(const bool biasCorrected, functions::summarystats::SummaryStatsData val) { if (biasCorrected) { auto ret = static_cast(val.varianceBiasCorrected()); if (ret < static_cast(0.0f)) return nd4j::math::nd4j_sqrt(val.variance()); else return nd4j::math::nd4j_sqrt(ret); } return nd4j::math::nd4j_sqrt(val.variance()); } static _CUDA_HD inline functions::summarystats::SummaryStatsData op(functions::summarystats::SummaryStatsData d1, Z *extraParams) { return d1; } }; template class DropOut { public: no_op_exec_special_same no_op_exec_special_same_cuda inline _CUDA_D static X op(X d1, X *params) { X prob = params[0]; #ifdef __CUDACC__ X length = params[1]; X tid = blockIdx.x * blockDim.x + threadIdx.x; X rnd = nd4j::math::nd4j_abs(nd4j::math::nd4j_cos(static_cast(clock64()) * static_cast(tid) + static_cast(length) * static_cast(tid))); #else X rnd = static_cast(rand() / RAND_MAX); #endif return rnd >= prob ? static_cast(0.0f) : d1; } }; template class DropOutInverted { public: no_op_exec_special no_op_exec_special_cuda #ifdef __CUDACC__ __device__ #endif inline static Z op(X d1, Y d2, Z *params) { Y prob = d2; #ifdef __CUDACC__ X length = params[1]; X tid = blockIdx.x * blockDim.x + threadIdx.x; X rnd = nd4j::math::nd4j_abs(nd4j::math::nd4j_cos(static_cast(clock64()) * static_cast(tid) + static_cast(length) * static_cast(tid))); #else X rnd = static_cast(rand() / RAND_MAX); #endif return rnd >= static_cast(prob) ? static_cast(0.0f) : reinterpret_cast(d1 / static_cast(prob)); } }; template class ReplaceNans { public: no_op_exec_special no_op_exec_special_cuda op_def static Z op(X d1, Y d2, Z *params) { return nd4j::math::nd4j_isnan(d1) ? static_cast(d2) : static_cast(d1) ; } }; // this op is used for conditional pairwise transforms only template class CompareAndReplace{ public: // op definition for PairWise Transform op_def static Z op(X d1, Y d2, Z *params) { auto zd1 = static_cast(d1); auto zd2 = static_cast(d2); auto compare = params[0]; auto eps = params[2]; int mode = (int) params[3]; if (mode == 0) // equals if (nd4j::math::nd4j_abs(zd1 - compare) <= eps) return zd2; else return zd1; else if (mode == 1) // not equals eps if (nd4j::math::nd4j_abs(zd1 - compare) > eps) return zd2; else return zd1; else if (mode == 2) // less_than eps if (zd1 < compare) return zd2; else return zd1; else if (mode ==3) // greater_than if (zd1 > compare) return zd2; else return zd1; else if (mode == 4) // less_or_equals_than if (zd1 <= compare) return zd2; else return zd1; else if (mode == 5) // greater_or_equals_than if (zd1 >= compare) return zd2; else return zd1; else if (mode == 6) // abs_less_than if (nd4j::math::nd4j_abs(zd1) < compare) return zd2; else return zd1; else if (mode == 7) // abs_greater_than if (nd4j::math::nd4j_abs(zd1) > compare) return zd2; else return zd1; else if (mode == 8) // is inf if (nd4j::math::nd4j_isinf(zd1)) return zd2; else return zd1; else if (mode == 9) // is nan if (nd4j::math::nd4j_isnan(zd1)) return zd2; else return zd1; else if (mode == 10) if (zd1 == compare) return zd2; else return zd1; else if (mode == 11) if (zd1 != compare) return zd2; else return zd1; else if (mode == 12) // abs_greater_or_equals_than if (nd4j::math::nd4j_abs(zd1) >= compare) return zd2; else return zd1; else if (mode == 13) // abs_less_or_equals_than if (nd4j::math::nd4j_abs(zd1) <= compare) return zd2; else return zd1; else printf("Undefined boolean operation: [%i]\n", mode); return zd1; } }; template class CompareAndSet { public: // op definition for PairWise Transform op_def static Z op(X dX, Y dY, Z *params) { auto d1 = static_cast(dX); auto d2 = static_cast(dY); auto compare = params[0]; auto eps = params[2]; auto mode = static_cast(params[3]); if (mode == 0) // equals if (nd4j::math::nd4j_abs(d2 - compare) <= eps) return d2; else return d1; else if (mode == 1) // not equals if (nd4j::math::nd4j_abs(d2 - compare) > eps) return d2; else return d1; else if (mode == 2) // less_than if (d2 < compare) return d2; else return d1; else if (mode ==3) // greater_than if (d2 > compare) return d2; else return d1; else if (mode == 4) // less_or_equals_than if (d2 <= compare) return d2; else return d1; else if (mode == 5) // greater_or_equals_than if (d2 >= compare) return d2; else return d1; else if (mode == 6) // abs_less_than if (nd4j::math::nd4j_abs(d2) < compare) return d2; else return d1; else if (mode == 7) // abs_greater_than if (nd4j::math::nd4j_abs(d2) > compare) return d2; else return d1; else if (mode == 8) // is inf if (nd4j::math::nd4j_isinf(d2)) return d2; else return d1; else if (mode == 9) // is nan if (nd4j::math::nd4j_isnan(d2)) return d2; else return d1; else if (mode == 10) if (d2 == compare) return d2; else return d1; else if (mode == 11) if (d2 != compare) return d2; else return d1; else if (mode == 12) // abs_greater_or_equals_than if (nd4j::math::nd4j_abs(d1) >= compare) return d2; else return d1; else if (mode == 13) // abs_less_or_equals_than if (nd4j::math::nd4j_abs(d1) <= compare) return d2; else return d1; else printf("Undefined boolean operation: [%i]\n", mode); return d1; } }; template class CompareAndSetTransform { public: no_op_exec_special_same no_op_exec_special_same_cuda // op definition for Transform op_def static X op(X d1, X *params) { auto compare = params[0]; auto set = params[1]; auto eps = params[2]; // with mode == 0 we do set if d1 equals to compare, and with mode == 1 - we go otherwise int mode = (int) params[3]; if (mode == 0) // equals if (nd4j::math::nd4j_abs(d1 - compare) <= eps) return set; else return d1; //return nd4j::math::nd4j_abs(d1 - compare) <= eps ? set : d1; else if (mode == 1) // not equals if (nd4j::math::nd4j_abs(d1 - compare) > eps) return set; else return d1; //return nd4j::math::nd4j_abs(d1 - compare) > eps ? set : d1; else if (mode == 2) // less_than if (d1 < compare) return set; else return d1; else if (mode ==3) // greater_than if (d1 > compare) return set; else return d1; else if (mode == 4) // less_or_equals_than if (d1 <= compare) return set; else return d1; else if (mode == 5) // greater_or_equals_than if (d1 >= compare) return set; else return d1; else if (mode == 6) // abs_less_than if (nd4j::math::nd4j_abs(d1) < compare) return set; else return d1; else if (mode == 7) // abs_greater_than if (nd4j::math::nd4j_abs(d1) > compare) return set; else return d1; else if (mode == 8) // is inf if (nd4j::math::nd4j_isinf(d1)) return set; else return d1; else if (mode == 9) // is nan if (nd4j::math::nd4j_isnan(d1)) return set; else return d1; else if (mode == 10) if (d1 == compare) return set; else return d1; else if (mode == 11) if (d1 != compare) return set; else return d1; else if (mode == 12) // abs_greater_or_equals_than if (nd4j::math::nd4j_abs(d1) >= compare) return set; else return d1; else if (mode == 13) // abs_less_or_equals_than if (nd4j::math::nd4j_abs(d1) <= compare) return set; else return d1; else printf("Undefined boolean operation: [%i]\n", mode); return d1; } }; } #endif