/* ****************************************************************************** * * * This program and the accompanying materials are made available under the * terms of the Apache License, Version 2.0 which is available at * https://www.apache.org/licenses/LICENSE-2.0. * * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. * * SPDX-License-Identifier: Apache-2.0 ******************************************************************************/ #pragma once #ifndef OPS_H_ #define OPS_H_ #include #include #include #include #include #include #include #define MIN_V 1e-12 #define MAX_FLOAT 1e37 #define MIN_FLOAT 1e-37 #define MAX_INT 2147483647 #define MIN_CUTFOFF -3.79297773665f #define FLOAT_MIN_NORMAL 1.17549435e-38 #define EPS 1e-5 #define AFFINITY close #define DOUBLE_PI_T T(2.0 * 3.14159265358979323846) #define DOUBLE_PI_X X(2.0 * 3.14159265358979323846) #define no_op_exec_special_any static const bool requiresSpecial = false; static void execSpecial(const X *dx, const Nd4jLong *xShapeBuffer, Z *result, const Nd4jLong *resultShapeBuffer, X *extraParams, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {} #define no_op_exec_special_bool static const bool requiresSpecial = false; static void execSpecial(const X *dx, const Nd4jLong *xShapeBuffer, Z *result, const Nd4jLong *resultShapeBuffer, X *extraParams, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {} #define no_op_exec_special_same static const bool requiresSpecial = false; static void execSpecial(const X *dx, const Nd4jLong *xShapeBuffer, X *result, const Nd4jLong *resultShapeBuffer, X *extraParams, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {} #define no_op_exec_special static const bool requiresSpecial = false; static void execSpecial(const X *dx, const Nd4jLong *xShapeBuffer, Z *result, const Nd4jLong *resultShapeBuffer, Z *extraParams, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {} #define no_op_exec_special_accumulation static const bool requiresSpecialAccumulation = false; static void execSpecial(const X *x, const Nd4jLong *xShapeInfo, Z *extraParams, Z *result, const Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset){} #define no_op_exec_special_accumulation_long static const bool requiresSpecialAccumulation = false; static void execSpecial(const X *x, const Nd4jLong *xShapeInfo, X *extraParams, Z *result, const Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset){} #define no_op_exec_special_accumulation_same static const bool requiresSpecialAccumulation = false; static void execSpecial(const X *x, const Nd4jLong *xShapeInfo, X *extraParams, X *result, const Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset){} #ifdef __CUDACC__ #define no_op_exec_special_any_cuda static __device__ void execSpecialCuda(const X *dx, const Nd4jLong *xShapeBuffer, Z *result, const Nd4jLong *resultShapeBuffer, X *extraParams, int *allocationPointer, Z *reductionPointer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {} #define no_op_exec_special_bool_cuda static __device__ void execSpecialCuda(const X *dx, const Nd4jLong *xShapeBuffer, Z *result, const Nd4jLong *resultShapeBuffer, X *extraParams, int *allocationPointer, Z *reductionPointer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {} #define no_op_exec_special_same_cuda static __device__ void execSpecialCuda(const X *dx, const Nd4jLong *xShapeBuffer, X *result, const Nd4jLong *resultShapeBuffer, X *extraParams, int *allocationPointer, X *reductionPointer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {} #define no_op_exec_special_cuda static __device__ void execSpecialCuda(const X *dx, const Nd4jLong *xShapeBuffer,Z *result, const Nd4jLong *resultShapeBuffer,Z *extraParams, int *allocationPointer, Z *reductionPointer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {} #define no_op_exec_special_accumulation_same_cuda static inline __device__ void execSpecialCuda(const X *dx, const Nd4jLong *xShapeInfo, X *extraParams, X *result, const Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, X *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets) {} #define no_op_exec_special_accumulation_long_cuda static inline __device__ void execSpecialCuda(const X *dx, const Nd4jLong *xShapeInfo, X *extraParams, Z *result, const Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, Z *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets) {} #define no_op_exec_special_accumulation_cuda static inline __device__ void execSpecialCuda(const X *dx, const Nd4jLong *xShapeInfo, Z *extraParams, Z *result, const Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, Z *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets) {} #else // hacky fix for isnan/being being out of scope //#ifdef IOS //#define isinf(x) 0 // this isn't right. But std::isinf fails //#define isnan(x) 0 //#else //#define isnan std::isnan //#define isinf std::isinf //#endif #define no_op_exec_special_cuda #define no_op_exec_special_accumulation_cuda #define no_op_exec_special_accumulation_same_cuda #define no_op_exec_special_accumulation_long_cuda #define no_op_exec_special_any_cuda #define no_op_exec_special_bool_cuda #define no_op_exec_special_same_cuda #define no_op_exec_special_accumulation_same_cuda #endif #define SELU_ALPHA 1.6732632423543772848170429916717 #define SELU_LAMBDA 1.0507009873554804934193349852946 namespace functions { namespace indexreduce { template struct IndexValue { T value; Nd4jLong index; _CUDA_HD IndexValue() = default; _CUDA_HD IndexValue(const T val, const Nd4jLong ind): index(ind), value(val) {} }; } namespace summarystats { template class SummaryStatsData; } } namespace simdOps { template class Add { public: op_def static Z op(X d1, Y d2) { return static_cast(d1 + d2); } op_def static Z op(X d1, Y d2, Z *params) { return static_cast(d1 + d2); } op_def static Z op(X d1) { return static_cast(d1); } // op for MetaOps op_def static Z op(X d1, Y *params) { return static_cast(d1 + params[0]); } op_def static X startingValue() { return static_cast(0.f); } }; template class NewAdd { public: op_def static X op(X d1, Y d2, X *params) { return d1 + d2; } }; template class Subtract { public: op_def static Z op(X d1, Y d2) { return static_cast(d1 - d2); } op_def static Z op(X d1, Y d2, Z *params) { return static_cast(d1 - d2); } op_def static Z op(X d1) { return static_cast(d1); } // op for MetaOps op_def static Z op(X d1, Y *params) { return static_cast(d1 - params[0]); } }; template class SquaredSubtract { public: op_def static Z op(X d1, Y d2) { auto d = static_cast(d1 - d2); return d * d; } op_def static Z op(X d1, Y d2, Z *params) { auto d = static_cast(d1 - d2); return d * d; } op_def static Z op(X d1) { return d1; } // op for MetaOps op_def static Z op(X d1, Y *params) { auto d = static_cast(d1 - params[0]); return d * d; } }; template class SquaredReverseSubtract { public: op_def static Z op(X d1, Y d2) { auto d = static_cast(d2 - d1); return d * d; } op_def static Z op(X d1, Y d2, Z *params) { auto d = static_cast(d2 - d1); return d * d; } op_def static Z op(X d1) { return d1; } // op for MetaOps op_def static Z op(X d1, Y *params) { auto d = static_cast(params[0] - d1); return d * d; } }; template class ReverseSubtract { public: op_def static Z op(X d1, Y d2) { return static_cast(d2 - d1); } op_def static Z op(X d1, Y d2, Z *params) { return static_cast(d2 - d1); } op_def static Z op(X d1) { return d1; } // op for MetaOps op_def static Z op(X d1, Y *params) { return static_cast(params[0] - d1); } }; template class LogPoissonLossFull { public: op_def static Z op(X z, Y c) { auto zz = static_cast(z); auto zc = static_cast(c); return (sd::math::nd4j_exp(c) - zz * zc + (zz * sd::math::nd4j_log(z) - zz + static_cast(0.5f) * sd::math::nd4j_log(static_cast(DOUBLE_PI_X) * zz))); } op_def static Z op(X z, Y c, Z *params) { auto zz = static_cast(z); auto zc = static_cast(c); return (sd::math::nd4j_exp(c) - zz * zc + (zz * sd::math::nd4j_log(z) - zz + static_cast(0.5f) * sd::math::nd4j_log(static_cast(DOUBLE_PI_X) * zz))); } op_def static Z op(X z) { auto zz = static_cast(z); return (zz * sd::math::nd4j_log(z) - zz + static_cast(0.5f) * sd::math::nd4j_log(static_cast(DOUBLE_PI_X) * zz)); } // op for MetaOps op_def static X op(X z, Y *params) { return (sd::math::nd4j_exp(params[0]) - z * params[0] + (z * sd::math::nd4j_log(z) - z + static_cast(0.5f) * sd::math::nd4j_log(DOUBLE_PI_X * z))); } }; template class LogPoissonLoss { public: op_def static Z op(X z, Y c) { auto zz = static_cast(z); auto zc = static_cast(c); return (sd::math::nd4j_exp(c) - zz * zc); } op_def static Z op(X z, Y c, Z *params) { auto zz = static_cast(z); auto zc = static_cast(c); return (sd::math::nd4j_exp(c) - zz * zc); } op_def static Z op(X z) { return static_cast(z); } // op for MetaOps op_def static Z op(X z, Y *params) { return (sd::math::nd4j_exp(params[0]) - static_cast(z) * static_cast(params[0])); } }; template class Multiply { public: op_def static Z op(X d1, Y d2) { return static_cast(d1 * d2); } op_def static Z op(X d1, Y d2, Z *params) { return static_cast(d1 * d2); } op_def static Z op(X d1) { return static_cast(d1); } // op for MetaOps op_def static Z op(X d1, Y *params) { return static_cast(d1 * params[0]); } op_def static X startingValue() { return static_cast(1.f); } }; template class Divide { public: op_def static Z op(X d1, Y d2) { return static_cast(d1 / d2); } op_def static Z op(X d1, Y d2, Z *params) { return static_cast(d1 / d2); } op_def static Z op(X d1) { return static_cast(d1); } // op for MetaOps op_def static Z op(X d1, Y *params) { return static_cast(d1 / params[0]); } op_def static X startingValue() { return static_cast(1); } }; template class DivideNoNan { public: op_def static Z op(X d1, Y d2) { if (d2 == (Y)0) return (Z)0; return static_cast(d1 / d2); } op_def static Z op(X d1, Y d2, Z *params) { if (d2 == (Y)0) return (Z)0; return static_cast(d1 / d2); } op_def static Z op(X d1) { return static_cast(d1); } // op for MetaOps op_def static Z op(X d1, Y *params) { if (params[0] == (Y)0) return (Z)0; return static_cast(d1 / params[0]); } op_def static X startingValue() { return static_cast(1); } }; template class SafeDivide { public: op_def static Z op(X d1, Y d2) { if(d2 == static_cast(0)) return static_cast(0); return static_cast(d1 / d2); } op_def static Z op(X d1, Y d2, Z *params) { if(d2 == static_cast(0)) return static_cast(0); return static_cast(d1 / d2); } op_def static Z op(X d1) { return static_cast(d1); } // op for MetaOps op_def static Z op(X d1, Y *params) { if(params[0] == static_cast(0)) return static_cast(0); return static_cast(d1 / params[0]); } }; template class FloorDiv { public: op_def static Z op(X d1, Y d2) { return sd::math::nd4j_floor(static_cast(d1 / d2)); } op_def static Z op(X d1, Y d2, Z *params) { return sd::math::nd4j_floor(static_cast(d1 / d2)); } op_def static Z op(X d1) { return sd::math::nd4j_floor(static_cast(d1)); } // op for MetaOps op_def static Z op(X d1, Y *params) { return sd::math::nd4j_floor(static_cast(d1 / params[0])); } }; template class TruncateDiv { public: op_def static Z op(X d1, Y d2) { auto i1 = static_cast(d1); auto i2 = static_cast(d2); return static_cast(i1 / i2); } op_def static Z op(X d1, Y d2, Z *params) { auto i1 = static_cast(d1); auto i2 = static_cast(d2); return static_cast(i1 / i2); } op_def static Z op(X d1) { return d1; } // op for MetaOps op_def static Z op(X d1, Y *params) { auto i1 = static_cast(d1); auto i2 = static_cast(params[0]); return static_cast(i1 / i2); } }; template class TruncateMod { public: op_def static Z op(X d1, Y d2) { auto i1 = static_cast(d1); auto i2 = static_cast(d2); return static_cast(i1 % i2); } op_def static Z op(X d1, Y d2, Z *params) { auto i1 = static_cast(d1); auto i2 = static_cast(d2); return static_cast(i1 % i2); } op_def static Z op(X d1) { return static_cast(d1); } // op for MetaOps op_def static Z op(X d1, Y *params) { auto i1 = static_cast(d1); auto i2 = static_cast(params[0]); return static_cast(i1 % i2); } }; template class Remainder { public: op_def static Z op(X d1, Y d2) { return sd::math::nd4j_remainder(d1, d2); } op_def static Z op(X d1, Y d2, Z *params) { return sd::math::nd4j_remainder(d1, d2); } op_def static Z op(X d1) { return d1; } // op for MetaOps op_def static Z op(X d1, Y *params) { return sd::math::nd4j_remainder(d1, params[0]); } }; template class FMod { public: op_def static Z op(X d1, Y d2) { return sd::math::nd4j_fmod(d1, d2); } op_def static Z op(X d1, Y d2, Z *params) { return sd::math::nd4j_fmod(d1, d2); } op_def static Z op(X d1) { return d1; } // op for MetaOps op_def static Z op(X d1, Y *params) { return sd::math::nd4j_fmod(d1, params[0]); } }; template class FloorMod { public: op_def static Z op(X d1, Y d2) { auto m = sd::math::nd4j_fmod(d1, d2); return (d1 < static_cast(0)) == (d2 < static_cast(0)) ? m : sd::math::nd4j_fmod(m + static_cast(d2), d2); } op_def static Z op(X d1, Y d2, Z *params) { auto m = sd::math::nd4j_fmod(d1, d2); return (d1 < static_cast(0.0f)) == (d2 < static_cast(0)) ? m : sd::math::nd4j_fmod(m + static_cast(d2), d2); } op_def static Z op(X d1) { return d1; } // op for MetaOps op_def static Z op(X d1, Y *params) { return op(d1, params[0]); } }; template class ReverseDivide { public: op_def static Z op(X d1, Y d2) { return static_cast(d2 / d1); } op_def static Z op(X d1, Y d2, Z *params) { return static_cast(d2 / d1); } op_def static Z op(X d1) { return static_cast(d1); } // op for MetaOps op_def static Z op(X d1, Y *params) { return static_cast(params[0] / d1); } }; template class CopyPws { public: op_def static Z op(X d1, Y d2) { return static_cast(d2); } op_def static Z op(X d1, Y d2, Z *params) { return static_cast(d2); } op_def static Z op(X d1) { return static_cast(d1); } op_def static Z op(X d1, Y *params) { return static_cast(d1); } }; template class Copy { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return d1; } }; template class Copy2 { public: op_def static Z op(X d1, Y d2) { return static_cast(d2); } op_def static Z op(X d1, Y d2, Z *params) { return static_cast(d2); } op_def static Z op(X d1) { return static_cast(d1); } op_def static Z op(X d1, Y *params) { return static_cast(d1); } }; template class Axpy { public: op_def static Z op(X d1, Y d2) { return static_cast(d2 + d1); } op_def static Z op(X d1, Y d2, Z *params) { auto alpha = params[0]; return alpha * static_cast(d1) + static_cast(d2); } op_def static Z op(X d1) { return static_cast(d1); } }; template class Assign { public: no_op_exec_special_any no_op_exec_special_any_cuda op_def static Z op(X d1, X *params) { return static_cast(d1); } }; template class And { public: no_op_exec_special_bool no_op_exec_special_bool_cuda op_def static Z op(X d1, X d2) { return d2 + d1; } op_def static Z op(X d1, X d2, X *params) { if (params != nullptr) { auto comp = params[0]; return d1 != comp && d2 != comp ? static_cast(1) : static_cast(0); } else { auto b1 = static_cast(d1); auto b2 = static_cast(d2); return (b1 && b2) ? static_cast(1) : static_cast(0); } } op_def static Z op(X d1) { return d1; } // op for MetaOps op_def static Z op(X d1, X *params) { return static_cast(119); } }; template class IntOr { public: op_def static X op(X d1, X d2) { return d2 | d1; } op_def static X op(X d1, X d2, X *params) { return op(d1, d2); } }; template class IntAnd { public: op_def static X op(X d1, X d2) { return d2 & d1; } op_def static X op(X d1, X d2, X *params) { return op(d1, d2); } }; template class IntXor { public: op_def static X op(X d1, X d2) { return d2 ^ d1; } op_def static X op(X d1, X d2, X *params) { return op(d1, d2); } }; template class ShiftLeft { public: op_def static X op(X d1, X d2) { return d1 << d2; } op_def static X op(X d1, X d2, X *params) { return op(d1, d2); } }; template class ShiftRight { public: op_def static X op(X d1, X d2) { return d1 >> d2; } op_def static X op(X d1, X d2, X *params) { return op(d1, d2); } }; template class CyclicShiftLeft { public: op_def static X op(X d1, X d2) { return sd::math::nd4j_rotl(d1, d2); } op_def static X op(X d1, X d2, X *params) { return op(d1, d2); } }; template class CyclicShiftRight { public: op_def static X op(X d1, X d2) { return sd::math::nd4j_rotr(d1, d2); } op_def static X op(X d1, X d2, X *params) { return op(d1, d2); } }; template class Or { public: no_op_exec_special_bool no_op_exec_special_bool_cuda op_def static Z op(X d1, X d2) { return d2 + d1; } op_def static Z op(X d1, X d2, X *params) { if (params != nullptr) { auto comp = params[0]; return d1 != comp || d2 != comp ? static_cast(1) : static_cast(0); } else { auto b1 = static_cast(d1); auto b2 = static_cast(d2); return b1 || b2 ? static_cast(1) : static_cast(0); } } op_def static Z op(X d1) { return d1; } // op for MetaOps op_def static Z op(X d1, X *params) { return static_cast(119); } }; template class Xor { public: no_op_exec_special_bool no_op_exec_special_bool_cuda op_def static Z op(X d1, X d2) { return d2 + d1; } op_def static Z op(X d1, X d2, X *params) { if (params != nullptr) { auto comp = params[0]; return ((d1 == comp && d2 != comp) || (d1 != comp && d2 == comp)) ? static_cast(1) : static_cast(0); } else { auto b1 = static_cast(d1); auto b2 = static_cast(d2); return (!b1 && b2 )||(b1 && !b2) ? static_cast(1) : static_cast(0); } } op_def static Z op(X d1) { return d1; } }; template class Not { public: no_op_exec_special_bool no_op_exec_special_bool_cuda op_def static Z op(X d1, X d2) { return static_cast(0); } op_def static Z op(X d1, X d2, X *params) { return d1 != d2 ? static_cast(1) : static_cast(0); } // this transform op should run only on boolean input op_def static Z op(X d1, X *params) { auto b1 = static_cast(d1); return !b1; } }; template class LogicalNot { public: op_def static Z op(X d1, Y d2) { return !((int) d1 && (int) d2); } op_def static Z op(X d1, Y d2, Z *params) { return static_cast(!(static_cast(d1) && static_cast(d2))); } op_def static Z op(X d1) { return d1; } // op for MetaOps op_def static Z op(X d1, Y *params) { return static_cast(119); } }; template class LogicalXor { public: op_def static Z op(X d1, Y d2) { auto i1 = static_cast(d1); auto i2 = static_cast(d2); return (i1 | i2) &~ (i1 & i2); } op_def static Z op(X d1, Y d2, Z *params) { return op(d1, d2); } op_def static Z op(X d1) { return d1; } // op for MetaOps op_def static Z op(X d1, Y *params) { return static_cast(119); } }; template class LogicalAnd { public: op_def static Z op(X d1, Y d2) { return static_cast(d1) & static_cast(d2); } op_def static Z op(X d1, Y d2, Z *params) { return op(d1, d2); } op_def static Z op(Y d1) { return d1; } // op for MetaOps op_def static Z op(X d1, Y *params) { return static_cast(119); } }; template class LogicalOr { public: op_def static Z op(X d1, Y d2) { return static_cast(d1) | static_cast(d2); } op_def static Z op(X d1, Y d2, Z *params) { return op(d1, d2); } op_def static Z op(X d1) { return d1; } // op for MetaOps op_def static Z op(X d1, Y *params) { return static_cast(119); } }; template class Mod { public: op_def static Z op(X d1, Y d2) { auto dx = static_cast(d2); auto f = sd::math::nd4j_floor(d1 / dx); auto r = f * dx; return d1 - r; } op_def static Z op(X d1, Y d2, Z *params) { return op(d1, d2); } // op for MetaOp op_def static Z op(X d1, Y *params) { return op(d1, params[0]); } }; template class ReverseMod { public: op_def static Z op(X d1, Y d2) { return static_cast(d2) % static_cast(d1); } op_def static Z op(X d1, Y d2, Z *params) { return op(d1, d2); } // op for MetaOp op_def static Z op(X d1, Y *params) { return op(d1, params[0]); } }; /** * Whether 2 elements in an array * are epsilion equal */ template class Epsilon { public: op_def static Z op(X d1, X d2) { X diff = d1 - d2; X absDiff = sd::math::nd4j_abs(diff); if (absDiff <= static_cast(MIN_V)) return static_cast(1); return static_cast(0); } op_def static Z op(X d1, X d2, X *params) { return op(d1, d2); } op_def static Z op(X d1, X *params) { return d1; } }; template class EqualTo { public: op_def static Z op(X d1, X d2) { return d1 == d2; } op_def static Z op(X d1, X d2, X *params) { return op(d1, d2); } op_def static Z op(X d1, X *params) { return d1; } }; template class NotEqualTo { public: op_def static Z op(X d1, X d2) { return d1 != d2; } op_def static Z op(X d1, X d2, X *params) { return op(d1, d2); } op_def static Z op(X d1, X *params) { return d1; } }; template class GreaterThanOrEqual { public: op_def static Z op(X d1, X d2) { return d1 >= d2; } op_def static Z op(X d1, X d2, X *params) { return op(d1, d2); } // FIXME: this signature clashes with MetaOp stuff op_def static Z op(X d1, X *params) { return d1; } }; template class GreaterThan { public: op_def static Z op(X d1, X d2) { return d1 > d2; } op_def static Z op(X d1, X d2, X *params) { return op(d1, d2); } // FIXME: this signature clashes with MetaOp stuff op_def static Z op(X d1, X *params) { return d1; } }; template class LessThan { public: op_def static Z op(X d1, X d2) { return d1 < d2; } op_def static Z op(X d1, X d2, X *params) { return op(d1, d2); } op_def static Z op(X d1, X *params) { return d1; } }; template class LessThanOrEqual { public: op_def static Z op(X d1, X d2) { return d1 <= d2; } op_def static Z op(X d1, X d2, X *params) { return op(d1, d2); } op_def static Z op(X d1, X *params) { return d1; } }; template class Abs { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return sd::math::nd4j_abs(d1); } }; template class Ceiling { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return sd::math::nd4j_ceil(d1); } }; template class Cosine { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return sd::math::nd4j_cos(d1); } }; template class Exp { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return sd::math::nd4j_exp(d1); } }; template class HardTanhDerivative { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return ((d1 >= static_cast(-1.f) && d1 <= static_cast(1.f)) ? static_cast(1.f) : static_cast(0.f)); } }; template class HardTanh { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { if (d1 < static_cast(-1)) return static_cast(-1); else if (d1 > static_cast(1)) return static_cast(1); else return d1; } }; template class Floor { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return sd::math::nd4j_floor(d1); } }; template class Log { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return sd::math::nd4j_log(d1); } }; template class Log1p { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return sd::math::nd4j_log(1 + d1); } }; template class LogX { public: op_def static Z op(X d1, Y d2, Z *params) { return sd::math::nd4j_log(d1) / sd::math::nd4j_log(d2) ; } }; template class StabilizeFP16 { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { if (d1 <= static_cast(0)) return static_cast(sd::DataTypeUtils::min()); else return d1; } }; template class StabilizeX { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { if (d1 <= static_cast(0)) return sd::DataTypeUtils::min(); else return d1; } }; template class SpecialDerivative { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return d1 * (static_cast(1.f) - d1); } }; template class Neg { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return -d1; } }; template class Erf { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return sd::math::nd4j_erf(d1); } }; template class Erfc { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return sd::math::nd4j_erfc(d1); } }; template class Reciprocal { public: no_op_exec_special_same no_op_exec_special_same_cuda // op_def static T op(T d1) { // return (T(1.0f) / d1); // } // op for MetaOps op_def static X op(X d1, X *params) { return (static_cast(1) / d1); } }; template class Sqr { public: no_op_exec_special no_op_exec_special_cuda op_def static Z op(X d1, Z *params) { return sd::math::nd4j_pow(d1, static_cast(2)); } op_def static Z op(X d1) { return sd::math::nd4j_pow(d1, static_cast(2)); } }; template class RelativeError { public: no_op_exec_special no_op_exec_special_cuda op_def static Z op(X d1, Y d2) { return sd::math::nd4j_re(d1, d2); } op_def static Z op(X d1, Y d2, Z *params) { return op(d1, d2); } op_def static Z op(X d1) { return static_cast(0); } }; template class BinaryRelativeError { public: no_op_exec_special no_op_exec_special_cuda op_def static Z op(X d1, Y d2, Z *params) { X threshold = params[0]; return sd::math::nd4j_re(d1, d2) > threshold ? static_cast(1) : static_cast(0); } op_def static Z op(X d1) { return static_cast(0); } }; template class BinaryMinimumAbsoluteRelativeError { public: no_op_exec_special no_op_exec_special_cuda op_def static Z op(X d1, X *params) { X d2 = params[0]; X thresholdRelative = params[1]; X thresholdAbsolute = params[2]; return sd::math::nd4j_re(d1, d2) > thresholdRelative ? (sd::math::nd4j_abs(d1 - static_cast(d2)) < thresholdAbsolute ? static_cast(0) : static_cast(1)) : static_cast(0); } op_def static Z op(X d1, Y d2, Z *params) { X thresholdRelative = params[0]; X thresholdAbsolute = params[1]; return sd::math::nd4j_re(d1, d2) > thresholdRelative ? (sd::math::nd4j_abs(d1 - static_cast(d2)) < thresholdAbsolute ? static_cast(0) : static_cast(1)) : static_cast(0); } op_def static Z op(X d1) { return static_cast(0); } }; template class ReversePow { public: no_op_exec_special no_op_exec_special_cuda op_def static Z op(X d1, Z *params) { return sd::math::nd4j_pow(params[0], d1); } op_def static Z op(X d1, Y d2) { return sd::math::nd4j_pow(d2, d1); } op_def static Z op(X d1, Y d2, Z *params) { return sd::math::nd4j_pow(d2, d1); } op_def static Z op(X d1) { return d1; } }; template class Pow { public: no_op_exec_special no_op_exec_special_cuda op_def static Z op(X d1, Z *params) { return sd::math::nd4j_pow(d1, params[0]); } op_def static Z op(X d1, Y d2) { return sd::math::nd4j_pow(d1, d2); } op_def static Z op(X d1, Y d2, Z *params) { return sd::math::nd4j_pow(d1, d2); } op_def static Z op(X d1) { return d1; } }; template class PowDerivative { public: no_op_exec_special no_op_exec_special_cuda op_def static Z op(X d1, Z *params) { return params[0] * sd::math::nd4j_pow(d1, static_cast(params[0]) - static_cast(1.f)); } op_def static Z op(X d1, Y d2) { return static_cast(d2) * sd::math::nd4j_pow(d1, static_cast(d2) - static_cast(1.f)); } op_def static Z op(X d1, Y d2, Z *params) { return static_cast(d2) * sd::math::nd4j_pow(d1, static_cast(d2) - static_cast(1.f)); } op_def static Z op(X d1) { return static_cast(d1); } }; template class IGamma { public: no_op_exec_special no_op_exec_special_cuda op_def static Z op(X d1, Z *params) { return sd::math::nd4j_igamma(d1, params[0]); } op_def static Z op(X d1, Y d2) { return sd::math::nd4j_igamma(d1, d2); } op_def static Z op(X d1, Y d2, Z *params) { return sd::math::nd4j_igamma(d1, d2); } op_def static Z op(X d1) { return d1; } }; template class IGammac { public: no_op_exec_special no_op_exec_special_cuda op_def static Z op(X d1, Z *params) { return sd::math::nd4j_igammac(d1, params[0]); } op_def static Z op(X d1, Y d2) { return sd::math::nd4j_igammac(d1, d2); } op_def static Z op(X d1, Y d2, Z *params) { return sd::math::nd4j_igammac(d1, d2); } op_def static Z op(X d1) { return d1; } }; template class Round { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return sd::math::nd4j_round(d1); } }; template class IsNan { public: no_op_exec_special_bool no_op_exec_special_bool_cuda no_op_exec_special_accumulation no_op_exec_special_accumulation_cuda op_def static Z op(X d1, X *params) { return sd::math::nd4j_isnan(d1) ? static_cast(1) : static_cast(0); } op_def static X startingValue(const X *input) { return static_cast(0); } op_def static Z merge(X old, X opOutput, X *extraParams) { return opOutput + old; } op_def static Z update(X old, X opOutput, X *extraParams) { return opOutput + old; } op_def static Z postProcess(X reduction, Nd4jLong n, X *extraParams) { return reduction; } }; template class Expm1 { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return sd::math::nd4j_exp(d1) - static_cast(1); } }; template class IsPositive { public: no_op_exec_special_bool no_op_exec_special_bool_cuda no_op_exec_special_accumulation no_op_exec_special_accumulation_cuda op_def static Z op(X d1, X *params) { return d1 > (X)0.f; } op_def static X startingValue(const X *input) { return static_cast(0); } op_def static Z merge(X old, X opOutput, X *extraParams) { return opOutput + old; } op_def static Z update(X old, X opOutput, X *extraParams) { return opOutput + old; } op_def static Z postProcess(X reduction, Nd4jLong n, X *extraParams) { return reduction; } }; template class IsNegative { public: no_op_exec_special_bool no_op_exec_special_bool_cuda no_op_exec_special_accumulation no_op_exec_special_accumulation_cuda op_def static Z op(X d1, X *params) { return d1 < (X)0.f; } op_def static X startingValue(const X *input) { return static_cast(0); } op_def static Z merge(X old, X opOutput, X *extraParams) { return opOutput + old; } op_def static Z update(X old, X opOutput, X *extraParams) { return opOutput + old; } op_def static Z postProcess(X reduction, Nd4jLong n, X *extraParams) { return reduction; } }; template class IsInf { public: no_op_exec_special_bool no_op_exec_special_bool_cuda no_op_exec_special_accumulation no_op_exec_special_accumulation_cuda op_def static Z op(X d1, X *params) { return sd::math::nd4j_isinf(d1) ? static_cast(1) : static_cast(0); } op_def static X startingValue(const X *input) { return static_cast(0); } op_def static Z merge(X old, X opOutput, X *extraParams) { return opOutput + old; } op_def static Z update(X old, X opOutput, X *extraParams) { return opOutput + old; } op_def static Z postProcess(X reduction, Nd4jLong n, X *extraParams) { return reduction; } }; template class IsInfOrNan{ public: no_op_exec_special_bool no_op_exec_special_bool_cuda no_op_exec_special_accumulation no_op_exec_special_accumulation_cuda op_def static Z op(X d1, X *params) { return sd::math::nd4j_isfin(d1) ? static_cast(0) : static_cast(1); } op_def static X startingValue(const X *input) { return static_cast(0); } op_def static Z merge(X old, X opOutput, X *extraParams) { return opOutput == static_cast(0) && old == static_cast(0) ? static_cast(0) : static_cast(1); } op_def static Z update(X old, X opOutput, X *extraParams) { return opOutput == static_cast(0) && old == static_cast(0) ? static_cast(0) : static_cast(1); } op_def static Z postProcess(X reduction, Nd4jLong n, X *extraParams) { return reduction != static_cast(0); } }; template class IsFinite { public: no_op_exec_special_bool no_op_exec_special_bool_cuda no_op_exec_special_accumulation no_op_exec_special_accumulation_cuda op_def static Z op(X d1, X *params) { return sd::math::nd4j_isfin(d1) ? static_cast(1) : static_cast(0); } op_def static X startingValue(const X *input) { return static_cast(1); } op_def static Z merge(X old, X opOutput, X *extraParams) { return opOutput == static_cast(0) || old == static_cast(0) ? static_cast(0) : static_cast(1); } op_def static Z update(X old, X opOutput, X *extraParams) { return opOutput == static_cast(0) || old == static_cast(0) ? static_cast(0) : static_cast(1); } op_def static Z postProcess(X reduction, Nd4jLong n, X *extraParams) { return reduction != static_cast(0); } }; template class ClipByValue { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { if (d1 > params[1]) return params[1]; if (d1 < params[0]) return params[0]; return d1; } }; template class LstmClip { public: no_op_exec_special no_op_exec_special_cuda op_def static Z op(X d1, Y d2, Z *params) { X _v = (X) d2; if (d1 > _v) return _v; else if (d1 < -_v) return -_v; else return d1; } }; template class Swish { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return d1 * sd::math::nd4j_sigmoid(d1); } }; template class Mish { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return d1 * sd::math::nd4j_tanh(sd::math::nd4j_softplus(d1)); } }; template class MishDerivative { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { auto ex = sd::math::nd4j_exp(d1); auto e2x = ex * ex; auto e3x = ex * ex * ex; return (ex * (4 * (d1 + 1) + 4 * e2x + e3x + ex *(4 * d1 + 6))) / sd::math::nd4j_pow((2 * ex + e2x + 2), (X) 2.f); } }; template class GELU { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { return d1 * sd::math::nd4j_sigmoid(static_cast(1.702f) * d1); } }; template class PreciseGELU { public: no_op_exec_special_same no_op_exec_special_same_cuda op_def static X op(X d1, X *params) { auto sp = sd::math::nd4j_sqrt(static_cast