cavis/libnd4j/blas/NativeOps.h

1715 lines
53 KiB
C
Raw Normal View History

2019-06-06 14:21:15 +02:00
/*******************************************************************************
* Copyright (c) 2015-2018 Skymind, Inc.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
//
// Created by agibsonccc on 2/21/16.
//
#ifndef NATIVEOPERATIONS_NATIVEOPS_H
#define NATIVEOPERATIONS_NATIVEOPS_H
/*
#ifndef thread_local
# if __STDC_VERSION__ >= 201112 && !defined __STDC_NO_THREADS__
# define thread_local _Thread_local
# elif defined _WIN32 && ( \
defined _MSC_VER || \
defined __ICL || \
defined __DMC__ || \
defined __BORLANDC__ )
# define thread_local __declspec(thread)
// note that ICC (linux) and Clang are covered by __GNUC__
# elif defined __GNUC__ || \
defined __SUNPRO_C || \
defined __xlC__
# define thread_local __thread
# else
# error "Cannot define thread_local"
# endif
#endif
*/
#include <pointercast.h>
#include <types/float16.h>
#include <cnpy.h>
//DO NOT REMOVE: THIS IS AN EDITOR SEMANTICS THING FOR CLION
//IT DEFINES THE EXPORT MACRO FOR THE EDITOR AND THEN
//RE ADDS THE DEFINITION VIA dll.h
#ifdef _WIN32
#define ND4J_EXPORT __declspec(dllexport)
#else
#define ND4J_EXPORT
#endif
#include <dll.h>
#include <helpers/BlasHelper.h>
/*
int tad_threshold = 1;
int element_threshold = 32;
bool debug = false;
bool verbose = false;
*/
#include <array/ShapeList.h>
#include <array/ConstantDescriptor.h>
#include <helpers/ConstantShapeHelper.h>
2019-06-06 14:21:15 +02:00
#include <array/ConstantDataBuffer.h>
#include <helpers/ConstantHelper.h>
#include <array/TadPack.h>
#include <graph/VariablesSet.h>
#include <graph/GraphState.h>
#include <graph/execution/LogicExecutor.h>
#include <graph/ResultWrapper.h>
#include <DebugInfo.h>
class ND4J_EXPORT NativeOps {
public:
NativeOps();
/**
*
* @param p
* @param len
*/
void tryPointer(Nd4jPointer extra, Nd4jPointer p, int len);
/**
*
* @param num
*/
void setElementThreshold(int num);
/**
*
* @param num
*/
void setTADThreshold(int num);
/**
*
* @param opNum
* @param x
* @param xShapeInfo
* @param extraParams
*/
void execIndexReduceScalar(Nd4jPointer *extraPointers,
int opNum,
void *hX, Nd4jLong *hXShapeInfo,
void *dX, Nd4jLong *dXShapeInfo,
void *extraParams,
void *hZ, Nd4jLong *hZShapeInfo,
void *dZ, Nd4jLong *dZShapeInfo);
/**
*
* @param opNum
* @param x
* @param xShapeInfo
* @param extraParams
* @param result
* @param resultShapeInfoBuffer
* @param dimension
* @param dimensionLength
*/
void execIndexReduce(Nd4jPointer *extraPointers,
int opNum,
void *hX, Nd4jLong *hXShapeInfo,
void *dX, Nd4jLong *dXShapeInfo,
void *extraParams,
void *hZ, Nd4jLong *hZShapeInfo,
void *dZ, Nd4jLong *dZShapeInfo,
void *hDimension, Nd4jLong *hDimensionShape,
void *dDimension, Nd4jLong *dDimensionShape);
/**
*
* @param opNum
* @param x
* @param xShapeInfo
* @param y
* @param yShapeInfo
* @param result
* @param resultShapeInfo
* @param dimension
* @param dimensionLength
*/
void execBroadcast(
Nd4jPointer *extraPointers,
int opNum,
void *hX, Nd4jLong *hXShapeInfo,
void *dX, Nd4jLong *dXShapeInfo,
void *hY, Nd4jLong *hYShapeInfo,
void *dY, Nd4jLong *dYShapeInfo,
void *hZ, Nd4jLong *hZShapeInfo,
void *dZ, Nd4jLong *dZShapeInfo,
void *hDimension, Nd4jLong *hDimensionShape,
void *dDimension, Nd4jLong *dDimensionShape);
void execBroadcastBool(
Nd4jPointer *extraPointers,
int opNum,
void *hX, Nd4jLong *hXShapeInfo,
void *dX, Nd4jLong *dXShapeInfo,
void *hY, Nd4jLong *hYShapeInfo,
void *dY, Nd4jLong *dYShapeInfo,
void *hZ, Nd4jLong *hZShapeInfo,
void *dZ, Nd4jLong *dZShapeInfo,
void *hDimension, Nd4jLong *hDimensionShape,
void *dDimension, Nd4jLong *dDimensionShape);
/**
*
* @param opNum
* @param dx
* @param xShapeInfo
* @param y
* @param yShapeInfo
* @param result
* @param resultShapeInfo
* @param extraParams
* @param n
*/
void execPairwiseTransform(
Nd4jPointer *extraPointers,
int opNum,
void *hX, Nd4jLong *hXShapeInfo,
void *dX, Nd4jLong *dXShapeInfo,
void *hY, Nd4jLong *hYShapeInfo,
void *dY, Nd4jLong *dYShapeInfo,
void *hZ, Nd4jLong *hZShapeInfo,
void *dZ, Nd4jLong *dZShapeInfo,
void *extraParams);
void execPairwiseTransformBool(
Nd4jPointer *extraPointers,
int opNum,
void *hX, Nd4jLong *hXShapeInfo,
void *dX, Nd4jLong *dXShapeInfo,
void *hY, Nd4jLong *hYShapeInfo,
void *dY, Nd4jLong *dYShapeInfo,
void *hZ, Nd4jLong *hZShapeInfo,
void *dZ, Nd4jLong *dZShapeInfo,
void *extraParams);
/**
*
* @param opNum
* @param x
* @param xShapeInfo
* @param extraParams
* @param result
* @param resultShapeInfo
*/
void execReduceFloat(Nd4jPointer *extraPointers,
int opNum,
void *hX, Nd4jLong *hXShapeInfo,
void *dX, Nd4jLong *dXShapeInfo,
void *extraParams,
void *hZ, Nd4jLong *hZShapeInfo,
void *dZ, Nd4jLong *dZShapeInfo);
void execReduceSame(Nd4jPointer *extraPointers,
int opNum,
void *hX, Nd4jLong *hXShapeInfo,
void *dX, Nd4jLong *dXShapeInfo,
void *extraParams,
void *hZ, Nd4jLong *hZShapeInfo,
void *dZ, Nd4jLong *dZShapeInfo);
void execReduceBool(Nd4jPointer *extraPointers,
int opNum,
void *hX, Nd4jLong *hXShapeInfo,
void *dX, Nd4jLong *dXShapeInfo,
void *extraParams,
void *hZ, Nd4jLong *hZShapeInfo,
void *dZ, Nd4jLong *dZShapeInfo);
void execReduceLong(Nd4jPointer *extraPointers,
int opNum,
void *hX, Nd4jLong *hXShapeInfo,
void *dX, Nd4jLong *dXShapeInfo,
void *extraParams,
void *hZ, Nd4jLong *hZShapeInfo,
void *dZ, Nd4jLong *dZShapeInfo);
/**
*
* @param opNum
* @param x
* @param xShapeInfo
* @param extraParams
* @param result
* @param resultShapeInfo
*/
void execReduceFloat(Nd4jPointer *extraPointers,
int opNum,
void *hX, Nd4jLong *hXShapeInfo,
void *dX, Nd4jLong *dXShapeInfo,
void *extraParams,
void *hZ, Nd4jLong *hZShapeInfo,
void *dZ, Nd4jLong *dZShapeInfo,
void *hDimension, Nd4jLong *hDimensionShape,
void *dDimension, Nd4jLong *dDimensionShape);
void execReduceSame(Nd4jPointer *extraPointers,
int opNum,
void *hX, Nd4jLong *hXShapeInfo,
void *dX, Nd4jLong *dXShapeInfo,
void *extraParams,
void *hZ, Nd4jLong *hZShapeInfo,
void *dZ, Nd4jLong *dZShapeInfo,
void *hDimension, Nd4jLong *hDimensionShape,
void *dDimension, Nd4jLong *dDimensionShape);
void execReduceBool(Nd4jPointer *extraPointers,
int opNum,
void *hX, Nd4jLong *hXShapeInfo,
void *dX, Nd4jLong *dXShapeInfo,
void *extraParams,
void *hZ, Nd4jLong *hZShapeInfo,
void *dZ, Nd4jLong *dZShapeInfo,
void *hDimension, Nd4jLong *hDimensionShape,
void *dDimension, Nd4jLong *dDimensionShape);
void execReduceLong(Nd4jPointer *extraPointers,
int opNum,
void *hX, Nd4jLong *hXShapeInfo,
void *dX, Nd4jLong *dXShapeInfo,
void *extraParams,
void *hZ, Nd4jLong *hZShapeInfo,
void *dZ, Nd4jLong *dZShapeInfo,
void *hDimension, Nd4jLong *hDimensionShape,
void *dDimension, Nd4jLong *dDimensionShape);
/**
*
* @param opNum
* @param x
* @param xShapeInfo
* @param extraParamsVals
* @param y
* @param yShapeInfo
* @param result
* @param resultShapeInfo
*/
void execReduce3(Nd4jPointer *extraPointers,
int opNum,
void *hX, Nd4jLong *hXShapeInfo,
void *dX, Nd4jLong *dXShapeInfo,
void *extraParamsVals,
void *hY, Nd4jLong *hYShapeInfo,
void *dY, Nd4jLong *dYShapeInfo,
void *hZ, Nd4jLong *hZShapeInfo,
void *dZ, Nd4jLong *dZShapeInfo);
/**
*
* @param opNum
* @param x
* @param xShapeInfo
* @param extraParamsVals
* @param y
* @param yShapeInfo
*/
void execReduce3Scalar(Nd4jPointer *extraPointers,
int opNum,
void *hX, Nd4jLong *hXShapeInfo,
void *dX, Nd4jLong *dXShapeInfo,
void *extraParamsVals,
void *hY, Nd4jLong *hYShapeInfo,
void *dY, Nd4jLong *dYShapeInfo,
void *hZ, Nd4jLong *hZShapeInfo,
void *dZ, Nd4jLong *dZShapeInfo);
/**
*
* @param opNum
* @param x
* @param xShapeInfo
* @param extraParamsVals
* @param y
* @param yShapeInfo
* @param result
* @param resultShapeInfoBuffer
* @param dimension
* @param dimensionLength
*/
void execReduce3(Nd4jPointer *extraPointers,
int opNum,
void *hX, Nd4jLong *hXShapeInfo,
void *dX, Nd4jLong *dXShapeInfo,
void *extraParamsVals,
void *hY, Nd4jLong *hYShapeInfo,
void *dY, Nd4jLong *dYShapeInfo,
void *hZ, Nd4jLong *hZShapeInfo,
void *dZ, Nd4jLong *dZShapeInfo,
void *hDimension, Nd4jLong *hDimensionShape,
void *dDimension, Nd4jLong *dDimensionShape,
Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets,
Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets);
void execReduce3All(Nd4jPointer *extraPointers,
int opNum,
void *hX, Nd4jLong *hXShapeInfo,
void *dX, Nd4jLong *dXShapeInfo,
void *extraParamsVals,
void *hY, Nd4jLong *hYShapeInfo,
void *dY, Nd4jLong *dYShapeInfo,
void *hZ, Nd4jLong *hZShapeInfo,
void *dZ, Nd4jLong *dZShapeInfo,
void *hDimension, Nd4jLong *hDimensionShape,
void *dDimension, Nd4jLong *dDimensionShape,
Nd4jLong *xTadShapeInfo, Nd4jLong *xOffsets,
Nd4jLong *yTadShapeInfo, Nd4jLong *yOffsets);
/**
*
* @param opNum
* @param x
* @param xShapeInfo
* @param result
* @param resultShapeInfo
* @param scalar
* @param extraParams
* @param n
*/
void execScalar(Nd4jPointer *extraPointers,
int opNum,
void *hX, Nd4jLong *hXShapeInfo,
void *dX, Nd4jLong *dXShapeInfo,
void *hZ, Nd4jLong *hZShapeInfo,
void *dZ, Nd4jLong *dZShapeInfo,
void *hScalar, Nd4jLong *hSscalarShapeInfo,
void *dScalar, Nd4jLong *dSscalarShapeInfo,
void *extraParams);
void execScalarBool(Nd4jPointer *extraPointers,
int opNum,
void *hX, Nd4jLong *hXShapeInfo,
void *dX, Nd4jLong *dXShapeInfo,
void *hZ, Nd4jLong *hZShapeInfo,
void *dZ, Nd4jLong *dZShapeInfo,
void *hScalar, Nd4jLong *hSscalarShapeInfo,
void *dScalar, Nd4jLong *dSscalarShapeInfo,
void *extraParams);
/**
*
* @param opNum
* @param x
* @param xShapeInfo
* @param extraParams
*/
void execSummaryStatsScalar(Nd4jPointer *extraPointers,
int opNum,
void *hX, Nd4jLong *hXShapeInfo,
void *dX, Nd4jLong *dXShapeInfo,
void *extraParams,
void *hZ, Nd4jLong *hZShapeInfo,
void *dZ, Nd4jLong *dZShapeInfo,
bool biasCorrected);
/**
*
* @param opNum
* @param x
* @param xShapeInfo
* @param extraParams
* @param result
* @param resultShapeInfo
*/
void execSummaryStats(Nd4jPointer *extraPointers,
int opNum,
void *hX, Nd4jLong *hXShapeInfo,
void *dX, Nd4jLong *dXShapeInfo,
void *extraParams,
void *hZ, Nd4jLong *hZShapeInfo,
void *dZ, Nd4jLong *dZShapeInfo,
bool biasCorrected);
/**
*
* @param opNum
* @param x
* @param xShapeInfo
* @param extraParams
* @param result
* @param resultShapeInfoBuffer
* @param dimension
* @param dimensionLength
*/
void execSummaryStats(Nd4jPointer *extraPointers,
int opNum,
void *hX, Nd4jLong *hXShapeInfo,
void *dX, Nd4jLong *dXShapeInfo,
void *extraParams,
void *hZ, Nd4jLong *hZShapeInfo,
void *dZ, Nd4jLong *dZShapeInfo,
void *hDimension, Nd4jLong *hDimensionShape,
void *dDimension, Nd4jLong *dDimensionShape,
bool biasCorrected,
Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets);
/**
*
* @param opNum
* @param dx
* @param xShapeInfo
* @param result
* @param resultShapeInfo
* @param extraParams
* @param n
*/
void execTransformFloat(Nd4jPointer *extraPointers,
int opNum,
void *hX, Nd4jLong *hXShapeInfo,
void *dX, Nd4jLong *dXShapeInfo,
void *hZ, Nd4jLong *hZShapeInfo,
void *dZ, Nd4jLong *dZShapeInfo,
void *extraParams);
void execTransformSame(Nd4jPointer *extraPointers,
int opNum,
void *hX, Nd4jLong *hXShapeInfo,
void *dX, Nd4jLong *dXShapeInfo,
void *hZ, Nd4jLong *hZShapeInfo,
void *dZ, Nd4jLong *dZShapeInfo,
void *extraParams);
void execTransformBool(Nd4jPointer *extraPointers,
int opNum,
void *hX, Nd4jLong *hXShapeInfo,
void *dX, Nd4jLong *dXShapeInfo,
void *hZ, Nd4jLong *hZShapeInfo,
void *dZ, Nd4jLong *dZShapeInfo,
void *extraParams);
void execTransformAny(Nd4jPointer *extraPointers,
int opNum,
void *hX, Nd4jLong *hXShapeInfo,
void *dX, Nd4jLong *dXShapeInfo,
void *hZ, Nd4jLong *hZShapeInfo,
void *dZ, Nd4jLong *dZShapeInfo,
void *extraParams);
void execTransformStrict(Nd4jPointer *extraPointers,
int opNum,
void *hX, Nd4jLong *hXShapeInfo,
void *dX, Nd4jLong *dXShapeInfo,
void *hZ, Nd4jLong *hZShapeInfo,
void *dZ, Nd4jLong *dZShapeInfo,
void *extraParams);
/**
*
* @param extraPointers
* @param opNum
* @param x
* @param xShapeInfo
* @param z
* @param zShapeInfo
* @param scalars
* @param extraParams
* @param dimension
* @param dimensionLength
*/
void execScalar(Nd4jPointer *extraPointers,
int opNum,
void *hX, Nd4jLong *hXShapeInfo,
void *dX, Nd4jLong *dXShapeInfo,
void *hZ, Nd4jLong *hZShapeInfo,
void *dZ, Nd4jLong *dZShapeInfo,
void *hScalars, Nd4jLong *hScalarShapeInfo,
void *dScalars, Nd4jLong *dScalarShapeInfo,
void *extraParams,
void *hDimension, Nd4jLong *hDimensionShape,
void *dDimension, Nd4jLong *dDimensionShape,
Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets,
Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ);
void execScalarBool(Nd4jPointer *extraPointers,
int opNum,
void *hX, Nd4jLong *hXShapeInfo,
void *dX, Nd4jLong *dXShapeInfo,
void *hZ, Nd4jLong *hZShapeInfo,
void *dZ, Nd4jLong *dZShapeInfo,
void *hScalars, Nd4jLong *hScalarShapeInfo,
void *dScalars, Nd4jLong *dScalarShapeInfo,
void *extraParams,
void *hDimension, Nd4jLong *hDimensionShape,
void *dDimension, Nd4jLong *dDimensionShape,
Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets,
Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ);
/**
* Append an input array
* to the end of a flat array
* in a particular order
* @param offset the offset of the array to start at
* @param order the order
* @param result the result array
* @param resultShapeInfo the shape info for te array
* @param input the input for the array
* @param inputShapeInfo the shape information for that array
*/
void flatten(
Nd4jPointer *extraPointers,
int offset,
char order,
void *result, Nd4jLong *resultShapeInfo,
void *dresult, Nd4jLong *dresultShapeInfo,
void *input, Nd4jLong *inputShapeInfo,
void *dinput, Nd4jLong *dinputShapeInfo);
void concat(
Nd4jPointer *extraPointers,
int dimension,
int numArrays,
Nd4jPointer *data, Nd4jPointer *inputShapeInfo,
Nd4jPointer *ddata, Nd4jPointer *dinputShapeInfo,
void *result, Nd4jLong *resultShapeInfo,
void *dresult, Nd4jLong *dresultShapeInfo,
Nd4jPointer *tadPointers, Nd4jPointer *offsetPointers);
void specialConcat (
Nd4jPointer *extraPointers,
int dimension,
int numArrays,
Nd4jPointer *data,
Nd4jPointer *inputShapeInfo,
void *result,
Nd4jLong *resultShapeInfo,
Nd4jPointer *tadPointers,
Nd4jPointer *offsetPointers);
/**
* This method implementation exists only for cuda.
* The other backends should have dummy method for JNI compatibility reasons.
*/
void initializeDevicesAndFunctions();
void initializeFunctions(Nd4jPointer *functions);
/**
* This method acquires memory chunk of requested size on host side
*
* @param pointer pointer that'll be used for allocation
* @param memorySize memory size, in bytes
* @param flags optional parameter
*/
Nd4jPointer mallocHost(Nd4jLong memorySize, int flags);
/**
* This method acquires memory chunk of requested size on specified device
*
* @param pointer pointer that'll be used for allocation
* @param memorySize memory size, in bytes
* @param ptrToDeviceId pointer to deviceId. For cuda that's just and int, for OpenCL that's pointer to device_id, etc
* @param flags optional parameter
*/
Nd4jPointer mallocDevice(Nd4jLong memorySize, int deviceId, int flags);
/**
* This method releases previously allocated host memory space
*
* @param pointer pointer that'll be freed
*/
int freeHost(Nd4jPointer pointer);
/**
* This method releases previously allocated memory space on device
*
* @param pointer pointer that'll be freed
* @param ptrToDeviceId pointer to deviceId.
*/
int freeDevice(Nd4jPointer pointer, int deviceId);
/**
*
* @return
*/
int ompGetMaxThreads();
/**
*
* @return
*/
int ompGetNumThreads();
/**
*
* @param threads
*/
void setOmpNumThreads(int threads);
/**
*
* @param threads
*/
void setOmpMinThreads(int threads);
/**
*
* @return
*/
Nd4jPointer createContext();
/**
*
* @return
*/
Nd4jPointer createStream();
/**
*
* @return
*/
Nd4jPointer createEvent();
/**
*
* @param event
* @param stream
* @return
*/
int registerEvent(Nd4jPointer event, Nd4jPointer stream);
/**
*
* @param event
* @return
*/
int destroyEvent(Nd4jPointer event);
/**
*
* @param ptrToDeviceId
* @return
*/
int setDevice(int deviceId);
/**
*
* @return
*/
int getDevice();
/**
*
* @param stream
* @return
*/
int streamSynchronize(Nd4jPointer stream);
/**
*
* @param event
* @return
*/
int eventSynchronize(Nd4jPointer event);
/**
*
* @param ptrToDeviceId
* @return
*/
Nd4jLong getDeviceFreeMemory(int deviceId);
/**
* Returns amount of free memory for current device
* @return
*/
Nd4jLong getDeviceFreeMemory();
/**
*
* @param ptrToDeviceId
* @return
*/
Nd4jLong getDeviceTotalMemory(int deviceId);
/**
*
* @param ptrToDeviceId
* @return
*/
int getDeviceMajor(int deviceId);
/**
* This method returns amount of cached memory
* @param deviceId
* @return
*/
Nd4jLong getCachedMemory(int deviceId);
2019-06-06 14:21:15 +02:00
/**
*
* @param ptrToDeviceId
* @return
*/
int getDeviceMinor(int deviceId);
/**
*
* @param ptrToDeviceId
* @return
*/
const char * getDeviceName(int deviceId);
/**
*
* @param dst
* @param src
* @param size
* @param flags
* @param reserved
* @return
*/
int memcpy(Nd4jPointer dst,
Nd4jPointer src,
Nd4jLong size,
int flags,
Nd4jPointer reserved);
/**
*
* @param dst
* @param src
* @param size
* @param flags
* @param reserved
* @return
*/
int memcpyAsync(Nd4jPointer dst,
Nd4jPointer src,
Nd4jLong size,
int flags,
Nd4jPointer reserved);
/**
*
* @param dst
* @param value
* @param size
* @param flags
* @param reserved
* @return
*/
int memset(Nd4jPointer dst,
int value,
Nd4jLong size,
int flags,
Nd4jPointer reserved);
/**
*
* @param dst
* @param value
* @param size
* @param flags
* @param reserved
* @return
*/
int memsetAsync(Nd4jPointer dst,
int value,
Nd4jLong size,
int flags,
Nd4jPointer reserved);
/**
*
* @param dst
* @param src
* @param size
* @param flags
* @param reserved
* @return
*/
int memcpyConstantAsync(Nd4jLong dst,
Nd4jPointer src,
Nd4jLong size,
int flags,
Nd4jPointer reserved);
/**
*
* @return
*/
Nd4jPointer getConstantSpace();
/**
*
* @return
*/
int getAvailableDevices();
/**
*
* @param reallyEnable
*/
void enableDebugMode(bool reallyEnable);
/**
*
* @param reallyEnable
*/
void enableVerboseMode(bool reallyEnable);
/**
*
* @param gridSize
*/
void setGridLimit(int gridSize);
/**
*
* @param xShapeInfo
* @param dimension
* @param dimensionLength
* @param targetBuffer
* @param offsetsBuffer
*/
nd4j::TadPack* tadOnlyShapeInfo(Nd4jLong *xShapeInfo,
int *dimension,
int dimensionLength);
/*
* PullRow special op
*/
/**
*
* @param extraPointers
* @param x
* @param xShapeInfo
* @param z
* @param zShapeInfo
* @param n
* @param indexes
* @param tadShapeInfo
* @param tadOffsets
* @param zTadShapeInfo
* @param zTadOffsets
*/
void pullRows(Nd4jPointer *extraPointers,
void *x, Nd4jLong *xShapeInfo,
void *dx, Nd4jLong *dxShapeInfo,
void *z, Nd4jLong *zShapeInfo,
void *dz, Nd4jLong *dzShapeInfo,
Nd4jLong n,
Nd4jLong *indexes,
Nd4jLong *tadShapeInfo,
Nd4jLong *tadOffsets,
Nd4jLong *zTadShapeInfo,
Nd4jLong *zTadOffsets);
/**
*
* @param extras
* @param dx
* @param dz
* @param n
* @param length
* @param propagate
*/
void average(Nd4jPointer *extras,
Nd4jPointer *x, Nd4jLong *xShapeInfo,
Nd4jPointer *dx, Nd4jLong *dxShapeInfo,
void *z, Nd4jLong *zShapeInfo,
void *dz, Nd4jLong *dzShapeInfo,
int n,
Nd4jLong length,
bool propagate);
void accumulate(Nd4jPointer *extras,
Nd4jPointer *x, Nd4jLong *xShapeInfo,
Nd4jPointer *dx, Nd4jLong *dxShapeInfo,
void *z, Nd4jLong *zShapeInfo,
void *dz, Nd4jLong *dzShapeInfo,
int n,
Nd4jLong length);
/**
* P2P enabler
*/
/**
*
* @param enable
*/
void enableP2P(bool enable);
/**
*
*/
void checkP2P();
/**
*
* @return
*/
bool isP2PAvailable();
/**
* Shuffle methods
*/
/**
*
* @param extras
* @param dx
* @param xShapeInfo
* @param dz
* @param zShapeInfo
* @param N
* @param shuffleMap
* @param tadShapeInfo
* @param tadOffsets
*/
void shuffle(Nd4jPointer *extras,
Nd4jPointer *x, Nd4jPointer *xShapeInfo,
Nd4jPointer *dx, Nd4jPointer *dxShapeInfo,
Nd4jPointer *z, Nd4jPointer *zShapeInfo,
Nd4jPointer *dz, Nd4jPointer *dzShapeInfo,
int N,
int *shuffleMap,
Nd4jPointer *tadShapeInfo,
Nd4jPointer *tadOffsets);
/**
* Type Conversions
*/
/**
*
* @param extras
* @param srcType
* @param x
* @param N
* @param dstType
* @param z
*/
void convertTypes(Nd4jPointer *extras, int srcType, Nd4jPointer x, Nd4jLong N, int dstType, Nd4jPointer z);
/**
*
* @return
*/
bool isExperimentalEnabled();
/**
* Aggregate
*/
/**
*
* @param extraPointers
* @param opNum
* @param arguments
* @param numArguments
* @param shapeArguments
* @param numShapeArguments
* @param indexArguments
* @param numIndexArguments
* @param intArrays
* @param numIntArrays
* @param realArguments
* @param numRealArguments
*/
void execAggregate(Nd4jPointer *extraPointers,
int opNum,
void **arguments,
int numArguments,
Nd4jLong **shapeArguments,
int numShapeArguments,
int *indexArguments,
int numIndexArguments,
int **intArrays,
int numIntArrays,
void *realArguments,
int numRealArguments,
nd4j::DataType dtype);
template <typename T>
void _batchExecutor(Nd4jPointer *extraPointers,
int numAggregates,
int opNum,
int maxArgs,
int maxShapes,
int maxIntArrays,
int maxIntArraySize,
int maxIdx,
int maxReals,
void *ptrToArguments,
nd4j::DataType dtype);
void execAggregateBatch(Nd4jPointer *extraPointers,
int numAggregates,
int opNum,
int maxArgs,
int maxShapes,
int maxIntArrays,
int maxIntArraySize,
int maxIdx,
int maxReals,
void *ptrToArguments,
nd4j::DataType dtype);
/**
* Random operations
*/
/**
*
* @param extraPointers
* @param opNum
* @param state
* @param z
* @param zShapeBuffer
* @param extraArguments
*/
void execRandom(Nd4jPointer *extraPointers,
int opNum,
Nd4jPointer state,
void *hZ, Nd4jLong *hZShapeBuffer,
void *dZ, Nd4jLong *dZShapeBuffer,
void *extraArguments);
/**
*
* @param extraPointers
* @param opNum
* @param state
* @param x
* @param xShapeBuffer
* @param y
* @param yShapeBuffer
* @param z
* @param zShapeBuffer
* @param extraArguments
*/
void execRandom(Nd4jPointer *extraPointers,
int opNum,
Nd4jPointer state,
void *hX, Nd4jLong *hXShapeBuffer,
void *dX, Nd4jLong *dXShapeBuffer,
void *hY, Nd4jLong *hYShapeBuffer,
void *dY, Nd4jLong *dYShapeBuffer,
void *hZ, Nd4jLong *hZShapeBuffer,
void *dZ, Nd4jLong *dZShapeBuffer,
void *extraArguments);
/**
*
* @param extraPointers
* @param opNum
* @param state
* @param x
* @param xShapeBuffer
* @param z
* @param zShapeBuffer
* @param extraArguments
*/
void execRandom(Nd4jPointer *extraPointers,
int opNum,
Nd4jPointer state,
void *hX, Nd4jLong *hXShapeBuffer,
void *dX, Nd4jLong *dXShapeBuffer,
void *hZ, Nd4jLong *hZShapeBuffer,
void *dZ, Nd4jLong *dZShapeBuffer,
void *extraArguments);
/**
*
* @param extraPointers
* @param seed
* @param bufferSize
* @param ptrToBuffer
* @return
*/
Nd4jPointer initRandom(Nd4jPointer *extraPointers,
long seed,
long bufferSize,
Nd4jPointer ptrToBuffer);
/**
*
* @param extraPointers
* @param seed
* @param ptrRandom
*/
void refreshBuffer(Nd4jPointer *extraPointers,
long seed,
Nd4jPointer ptrRandom);
/**
*
* @param extraPointers
* @param seed
* @param ptrRandom
*/
void reSeedBuffer(Nd4jPointer *extraPointers,
long seed,
Nd4jPointer ptrRandom);
/**
*
* @param ptrRandom
*/
void destroyRandom(Nd4jPointer ptrRandom);
/**
* Grid operations
*/
/**
*
* @param extras
* @param opTypeA
* @param opNumA
* @param opTypeB
* @param opNumB
* @param N
* @param dx
* @param xShapeInfo
* @param dy
* @param yShapeInfo
* @param dz
* @param zShapeInfo
* @param extraA
* @param extraB
* @param scalarA
* @param scalarB
*/
/*
void execMetaPredicateShape(Nd4jPointer *extras,
const int opTypeA,
const int opNumA,
const int opTypeB,
const int opNumB,
Nd4jLong N,
void *hX, Nd4jLong *hXShapeBuffer,
void *dX, Nd4jLong *dXShapeBuffer,
void *hY, Nd4jLong *hYShapeBuffer,
void *dY, Nd4jLong *dYShapeBuffer,
void *hZ, Nd4jLong *hZShapeBuffer,
void *dZ, Nd4jLong *dZShapeBuffer,
void *extraA,
void *extraB,
double scalarA,
double scalarB);
*/
/**
*
* @param data
* @param shapeBuffer
* @param wordSize
* @param headerSize
* @return
*/
template <typename T>
static Nd4jPointer _numpyHeaderForNd4j(Nd4jPointer data,Nd4jPointer shapeBuffer,Nd4jLong wordSize,Nd4jLong *headerSize) {
Nd4jLong *shapeBufferCast = reinterpret_cast<Nd4jLong *>(shapeBuffer);
int rank = shape::rank(shapeBufferCast);
Nd4jLong *shape = shape::shapeOf(shapeBufferCast);
unsigned int *npShape = new unsigned int[rank];
for(int i = 0; i < rank; i++) {
npShape[i] = shape[i];
}
Nd4jLong length = shape::prodLong(shape,rank);
auto npHeader = cnpy::createNpyHeader<T>(data,npShape,rank,wordSize);
char *ret = new char[npHeader.size() + 1];
int count = 0;
for(int i = 0; i < npHeader.size(); i++) {
ret[count] = npHeader[i];
count++;
}
ret[count] = '\0';
count++;
*headerSize = count;
return reinterpret_cast<Nd4jPointer>(ret);
}
Nd4jPointer numpyHeaderForNd4j(Nd4jPointer data,Nd4jPointer shapeBuffer,Nd4jLong wordSize,Nd4jLong *headerSize) {
auto shapeBufferCast = reinterpret_cast<Nd4jLong *>(shapeBuffer);
auto type = nd4j::ArrayOptions::dataType(shapeBufferCast);
BUILD_SINGLE_SELECTOR(type, return _numpyHeaderForNd4j, (data, shapeBuffer, wordSize, headerSize), LIBND4J_TYPES);
}
/**
* Load numpy from a header
* based on the cnpy parse from header method.
* @param data the header data to parse
* @return a pointer to a numpy cnpy:NpyArray struct
*/
Nd4jPointer loadNpyFromHeader(Nd4jPointer data) {
char *header = reinterpret_cast<char *>(data);
cnpy::NpyArray arr = cnpy::loadNpyFromHeader(header);
cnpy::NpyArray *ret = new cnpy::NpyArray();
int totalLengthOfShape = 1;
for(int i = 0; i < arr.shape.size(); i++) {
totalLengthOfShape *= arr.shape[i];
}
ret->data = arr.data;
ret->wordSize = arr.wordSize;
ret->shape = arr.shape;
return reinterpret_cast<Nd4jPointer>(ret);
}
/**
* Create a numpy array from an nd4j
* array
* @param data a pointer to the data
* @param shapeBuffer the shapebuffer for the nd4j array
* @param wordSize the word size (4 for float, 8 for doubles)
* @return a pointer to a numpy array
*/
template <typename T>
static Nd4jPointer _numpyFromNd4j(Nd4jPointer data,Nd4jPointer shapeBuffer,Nd4jLong wordSize) {
Nd4jLong *shapeBufferCast = reinterpret_cast<Nd4jLong *>(shapeBuffer);
int rank = shape::rank(shapeBufferCast);
Nd4jLong *shape = shape::shapeOf(shapeBufferCast);
unsigned int *npShape = new unsigned int[rank];
for(int i = 0; i < rank; i++) {
npShape[i] = shape[i];
}
Nd4jLong length = shape::prodLong(shape,rank);
auto npHeader = cnpy::createNpyHeader<T>(data,npShape,rank,wordSize);
char *dataChar = reinterpret_cast<char *>(data);
char *npHeaderData = npHeader.data();
char *ret = new char[(wordSize * length) + npHeader.size()];
char *cursorStart = ret;
std::memcpy(reinterpret_cast<void *>(ret), reinterpret_cast<void *>(npHeaderData), npHeader.size() * sizeof(Nd4jLong));
//move to next
cursorStart += npHeader.size();
std::memcpy(reinterpret_cast<void *>(ret), reinterpret_cast<void *>(dataChar), length * wordSize * sizeof(Nd4jLong));
Nd4jPointer rettPointer = reinterpret_cast<Nd4jPointer>(ret);
return rettPointer;
}
Nd4jPointer numpyFromNd4j(Nd4jPointer data,Nd4jPointer shapeBuffer,Nd4jLong wordSize) {
auto shapeBufferCast = reinterpret_cast<Nd4jLong *>(shapeBuffer);
auto type = nd4j::ArrayOptions::dataType(shapeBufferCast);
BUILD_SINGLE_SELECTOR(type, return _numpyFromNd4j, (data, shapeBuffer, wordSize), LIBND4J_TYPES);
}
/**
*
* @param npyArray
* @return
*/
Dev branch merge: dev_20190606 (#7904) * correct logsoftmax looss (#2) * Small SameDiff listener fix (#4) * Various fixes (#6) * #7839 Fix for asXMatrix and tests * #7866 EmbeddingSequenceLayer dtype fix + test * #7856 SameDiff save/load stream methods * #7859 RegressionEvaluation rank 4 fix + tests + axis configuration * EvaluationBinary 3d/4d * More evaluation 3d/4d tests * #7847 Evaluation empty checks * Small test ifx * #7848 Fix median edge case * Improve DL4J samediff layer tests * [WIP] FastText wrapper implemented (#8) * FastText implemented * Some fixes * Fix shapes for wordsNearest * Validation of input vectors * Fixes * Fixed test * Thread tagged * Some tweaks * setContextClassLoader for DeallocatorServiceThread * Numpy format tests (#1) * Various fixes (#11) * #7852 SameDiff gather fix * #7892 SameDiff placeholder to constant conversion * #7890 validate input rank for MLN/CG init methods * Fix broken permute shape calculation * Permute and gather fixes * Tests * #7850 LogSumExp fix + test * Handful of test fixes * Empty arrays with non-scalar shapes (#10) * minor rearrangements for lambdas * empty tensors with non-scalar shapes * numpy empty tensors with non-scalar shapes * few more empty tweaks * Small fixes * conv3d signature update * micro fix in batchnorm mkldnn * Import fixes * Fix * MKL-DNN update * Small fill fix * fill with empty input + test * Fixes * Small error improvement * Fix * one special test * couple of fixes for lstm * Rewrite TFGraphMapper.getNDArrayFromTensor to be maintainable and less error prone * Fixes * FP16 * Unsigned * BFloat16 * Fill op - empty tweaks * - couple of fixes for empty arrays construction - stack updated * strided slice fix * one transform test * provide method for reducing shapeInfo in case of input array is empty * Fixed reduceAlongDimensions to use empty input properly. * couple of broadcast tests * couple of tests broadcast tests + tweak to make them pass * add check of non-empty to methods producing sub-arrays * Fixed reshapeC with zeros in shape. * complete empty check in reduce_... legacy ops * Concat and cumsum/prod * Tweak to empty shape inference on import * add empty check to the rest of reduce legacy ops * one more test * correct typo in evalReduceShapeInfoEmpty * Added tests for reduce_* ops to tests with zero shapes. * few more tests for empty reductions * Fixed strided_slice op with empty case and tests. * one more empty reduction test * Fixed strided_slice test. * add empty check to NDArray::reshapei * infOrMax * empty min/max with infinity tests * made unstack working correctly with empty arrays * few IndexReduce tests + tweaks for empty shapes * add test for empty concat * few tests fixed * Validation fix for reductions on empty shapes * Reverse fix * Reduction shape calc fixes * SameDiff.generateOutputVariable: don't use shape function to determine number of outputs * Range fix * - NDArray constructor updated for scalars/empty arrays - few tests fixed * More fixes * Empty creator fixes * concat fix * concat fix * TF import tests: allow 'both all NaN' and 'both all inf' to pass * Slice, zero fraction, and reshape fixes * transpose, gather * Zero fraction * scalar cast fix * Empty reduction axis support * few more tests fixed * Fixed input checks conforming with TF for concat op and tests. * few tests fixed * matmul scalar shape fix * Fixed checkout for data type and scalarity with concat to allow non-empty scalars with vector concats. * broadcast bool fix * few more tests * few more tests * correct evalReduceShapeInfoEmpty * argmax/argmin + tests * one more empty edge case + one more test * argmax/argmin/realdiv_bp tweaks * empty reshape test + fix * Helper fixes * Small fixes * Gather test fix * Gather test fix * Small fixes * reduce scalar zero values * scalar mean workaround * Remove debug code * along dim mean workaround * one more test * - equalsTo() tweak for empty arrays - one more test * broadcast tweaks
2019-06-15 13:34:34 +02:00
Nd4jPointer shapeBufferForNumpy(Nd4jPointer npyArray);
2019-06-06 14:21:15 +02:00
/**
* Get the shape buffer from a
* numpy array.
* **Warning** this allocates memory
* @param npyArray
* @return
*/
Nd4jPointer shapeBufferForNumpyHeader(Nd4jPointer npyArray) {
cnpy::NpyArray arr = cnpy::loadNpyFromHeader(reinterpret_cast<char *>(npyArray));
auto shape = new unsigned int[arr.shape.size()];
for(unsigned int i = 0; i < arr.shape.size(); i++) {
shape[i] = arr.shape[i];
}
auto shapeBuffer = shape::shapeBufferOfNpy(arr.shape.size(), shape, arr.fortranOrder);
delete[] shape;
return reinterpret_cast<Nd4jPointer>(shapeBuffer);
}
/**
*
* @param npyArray
* @return
*/
Nd4jPointer dataPointForNumpyHeader(Nd4jPointer npyArray) {
cnpy::NpyArray arr = cnpy::loadNpyFromHeader(reinterpret_cast<char *>(npyArray));
unsigned char *dataToPrint = reinterpret_cast<unsigned char *>(arr.data);
return dataToPrint;
}
/**
*
* @param npyArray
* @return
*/
Nd4jPointer dataPointForNumpyStruct(Nd4jPointer npyArrayStruct) {
cnpy::NpyArray *arrPointer = reinterpret_cast<cnpy::NpyArray *>(npyArrayStruct);
unsigned char *dataToPrint = reinterpret_cast<unsigned char *>(arrPointer->data);
return reinterpret_cast<Nd4jPointer>(dataToPrint);
}
/**
*
* @param npyArray
* @param fromFile
* @return
*/
Nd4jPointer dataPointForNumpy(Nd4jPointer npyArray) {
char *npyArrayBuffer = reinterpret_cast< char *>(npyArray);
cnpy::NpyArray arr = cnpy::loadNpyFromPointer(npyArrayBuffer);
return dataPointForNumpyStruct(reinterpret_cast<Nd4jPointer>(&arr));
}
/**
* Load a numpy array from a file
* and return it as an Nd4jPointer
* @param path
* @return
*/
Nd4jPointer numpyFromFile(std::string path) {
char *numpyBuffer = cnpy::loadFile(path.data());
return reinterpret_cast<Nd4jPointer >(numpyBuffer);
}
////// NPZ //////
void* mapFromNpzFile(std::string path){
cnpy::npz_t* mapPtr = new cnpy::npz_t();
cnpy::npz_t map = cnpy::npzLoad(path);
mapPtr->insert(map.begin(), map.end());
return reinterpret_cast<void*>(mapPtr);
}
int getNumNpyArraysInMap(void *map){
cnpy::npz_t* arrays = reinterpret_cast<cnpy::npz_t*>(map);
int n = arrays->size();
return n;
}
const char* getNpyArrayNameFromMap(void *map, int index){
cnpy::npz_t* arrays = reinterpret_cast<cnpy::npz_t*>(map);
cnpy::npz_t::iterator it = arrays->begin();
cnpy::npz_t::iterator end = arrays->end();
int cnt = 0;
for(; it != end; ++it, ++cnt){
if (cnt == index){
// FIXME: @fariz, this is a leak!
return const_cast<const char *>(strdup(it->first.c_str()));
}
}
throw std::runtime_error("No array at index.");
}
void* getNpyArrayFromMap(void *map, int index){
cnpy::npz_t* arrays = reinterpret_cast<cnpy::npz_t*>(map);
cnpy::npz_t::iterator it = arrays->begin();
cnpy::npz_t::iterator end = arrays->end();
cnpy::NpyArray *arr = new cnpy::NpyArray();
int cnt = 0;
for(; it != end; ++it, ++cnt){
if (cnt == index){
*arr = it->second;
return arr;
}
}
throw std::runtime_error("No array at index.");
}
int dataTypeFromNpyHeader(void *header);
void* getNpyArrayData(void *npArray){
cnpy::NpyArray* npyArray2 = reinterpret_cast<cnpy::NpyArray*>(npArray);
return reinterpret_cast<void*>(npyArray2->data);
}
int getNpyArrayRank(void *npArray){
cnpy::NpyArray* arr = reinterpret_cast<cnpy::NpyArray*>(npArray);
int rank = arr->shape.size();
return rank;
}
Nd4jLong* getNpyArrayShape(void *npArray){
cnpy::NpyArray* arr = reinterpret_cast<cnpy::NpyArray*>(npArray);
int ndim = arr->shape.size();
Nd4jLong* shape = new Nd4jLong[ndim];
for (int i=0; i<ndim; i++){
shape[i] = arr->shape.at(i);
}
return shape;
}
char getNpyArrayOrder(void *npArray){
cnpy::NpyArray* arr = reinterpret_cast<cnpy::NpyArray*>(npArray);
return (arr->fortranOrder)?'f':'c';
}
int getNpyArrayElemSize(void *npArray){
cnpy::NpyArray* arr = reinterpret_cast<cnpy::NpyArray*>(npArray);
return arr->wordSize;
}
void deleteNPArrayStruct(void *npArray){
cnpy::NpyArray* arr = reinterpret_cast<cnpy::NpyArray*>(npArray);
delete arr;
}
void deleteNPArrayMap(void *map){
cnpy::npz_t* arrays = reinterpret_cast<cnpy::npz_t*>(map);
delete arrays;
}
//////
/**
* Get the element size for a numpy array
* @param npyArray the numpy array's address
* to get the length for
* @return
*/
int elementSizeForNpyArray(Nd4jPointer npyArray) {
cnpy::NpyArray arr = cnpy::loadNpyFromPointer(reinterpret_cast<char *>(npyArray));
cnpy::NpyArray *arrPointer = &arr;
int size = arrPointer->wordSize;
// arrPointer->destruct();
return size;
}
/**
* Get the element size for a numpy array
* @param npyArray the numpy array's address
* to get the length for
* @return
*/
int elementSizeForNpyArrayHeader(Nd4jPointer npyArray) {
cnpy::NpyArray arr = cnpy::loadNpyFromHeader(reinterpret_cast<char *>(npyArray));
cnpy::NpyArray *arrPointer = &arr;
int size = arrPointer->wordSize;
return size;
}
void releaseNumpy(Nd4jPointer npyArray) {
free(reinterpret_cast<void *>(npyArray));
}
/**
* Return the length of a shape buffer
* based on the pointer
* @param buffer the buffer pointer to check
* @return
*/
int lengthForShapeBufferPointer(Nd4jPointer buffer);
/**
* The pointer to get the address for
*
* @param address the address to get the pointer
* @return the pointer for the given address
*/
Nd4jPointer pointerForAddress(Nd4jLong address);
/**
* This method takes single N-dimensional tensor, and copies its TADs to target arrays
*
* @param x
* @param xShapeInfo
* @param targets
* @param zShapeInfo
* @return
*/
void tear(Nd4jPointer *extraPointers,
void *x, Nd4jLong *xShapeInfo,
void *dx, Nd4jLong *dxShapeInfo,
Nd4jPointer *targets, Nd4jLong *zShapeInfo,
Nd4jLong *tadShapeInfo,
Nd4jLong *tadOffsets);
Nd4jLong encodeBitmap(Nd4jPointer *extraPointers, void *dx, Nd4jLong *xShapeInfo, Nd4jLong N, int *dz, float threshold);
void decodeBitmap(Nd4jPointer *extraPointers, void *dx, Nd4jLong N, void *dz, Nd4jLong *zShapeInfo);
void encodeThresholdP1(Nd4jPointer *extraPointers, void *dx, Nd4jLong *xShapeInfo, Nd4jLong N, int *dz, float threshold);
void encodeThresholdP2Int(Nd4jPointer *extraPointers, int *dx, Nd4jLong N, int *dz);
void encodeThresholdP3(Nd4jPointer *extraPointers, void *dx, Nd4jLong *xShapeInfo, int *offsets, Nd4jLong N, int *dz);
void decodeThreshold(Nd4jPointer *extraPointers, void *dx, Nd4jLong N, void *dz, Nd4jLong *zShapeInfo);
void sort(Nd4jPointer *extraPointers,
void *x, Nd4jLong *xShapeInfo,
void *dx, Nd4jLong *dxShapeInfo,
bool descending);
Merge master to upstream (#7945) * Shugeo strided slice zeros (#14) * Modified strided_slice op to properly work with empty-like shapes. * Fixed test for reduce_mean with empty-like input. * [WIP] Last merge (#15) * correct logsoftmax looss (#2) * Small SameDiff listener fix (#4) * Various fixes (#6) * #7839 Fix for asXMatrix and tests * #7866 EmbeddingSequenceLayer dtype fix + test * #7856 SameDiff save/load stream methods * #7859 RegressionEvaluation rank 4 fix + tests + axis configuration * EvaluationBinary 3d/4d * More evaluation 3d/4d tests * #7847 Evaluation empty checks * Small test ifx * #7848 Fix median edge case * Improve DL4J samediff layer tests * [WIP] FastText wrapper implemented (#8) * FastText implemented * Some fixes * Fix shapes for wordsNearest * Validation of input vectors * Fixes * Fixed test * Thread tagged * Some tweaks * setContextClassLoader for DeallocatorServiceThread * Numpy format tests (#1) * Various fixes (#11) * #7852 SameDiff gather fix * #7892 SameDiff placeholder to constant conversion * #7890 validate input rank for MLN/CG init methods * Fix broken permute shape calculation * Permute and gather fixes * Tests * #7850 LogSumExp fix + test * Handful of test fixes * Empty arrays with non-scalar shapes (#10) * minor rearrangements for lambdas * empty tensors with non-scalar shapes * numpy empty tensors with non-scalar shapes * few more empty tweaks * Small fixes * conv3d signature update * micro fix in batchnorm mkldnn * Import fixes * Fix * MKL-DNN update * Small fill fix * fill with empty input + test * Fixes * Small error improvement * Fix * one special test * couple of fixes for lstm * Rewrite TFGraphMapper.getNDArrayFromTensor to be maintainable and less error prone * Fixes * FP16 * Unsigned * BFloat16 * Fill op - empty tweaks * - couple of fixes for empty arrays construction - stack updated * strided slice fix * one transform test * provide method for reducing shapeInfo in case of input array is empty * Fixed reduceAlongDimensions to use empty input properly. * couple of broadcast tests * couple of tests broadcast tests + tweak to make them pass * add check of non-empty to methods producing sub-arrays * Fixed reshapeC with zeros in shape. * complete empty check in reduce_... legacy ops * Concat and cumsum/prod * Tweak to empty shape inference on import * add empty check to the rest of reduce legacy ops * one more test * correct typo in evalReduceShapeInfoEmpty * Added tests for reduce_* ops to tests with zero shapes. * few more tests for empty reductions * Fixed strided_slice op with empty case and tests. * one more empty reduction test * Fixed strided_slice test. * add empty check to NDArray::reshapei * infOrMax * empty min/max with infinity tests * made unstack working correctly with empty arrays * few IndexReduce tests + tweaks for empty shapes * add test for empty concat * few tests fixed * Validation fix for reductions on empty shapes * Reverse fix * Reduction shape calc fixes * SameDiff.generateOutputVariable: don't use shape function to determine number of outputs * Range fix * - NDArray constructor updated for scalars/empty arrays - few tests fixed * More fixes * Empty creator fixes * concat fix * concat fix * TF import tests: allow 'both all NaN' and 'both all inf' to pass * Slice, zero fraction, and reshape fixes * transpose, gather * Zero fraction * scalar cast fix * Empty reduction axis support * few more tests fixed * Fixed input checks conforming with TF for concat op and tests. * few tests fixed * matmul scalar shape fix * Fixed checkout for data type and scalarity with concat to allow non-empty scalars with vector concats. * broadcast bool fix * few more tests * few more tests * correct evalReduceShapeInfoEmpty * argmax/argmin + tests * one more empty edge case + one more test * argmax/argmin/realdiv_bp tweaks * empty reshape test + fix * Helper fixes * Small fixes * Gather test fix * Gather test fix * Small fixes * reduce scalar zero values * scalar mean workaround * Remove debug code * along dim mean workaround * one more test * - equalsTo() tweak for empty arrays - one more test * broadcast tweaks * [WIP] Fixing outstanding issues for NLP (#9) * Avoid using not-inited objects * Test fixed. * Redundant method avoided for models like FastText * KMeans++ implementation * KMeans++ implementation * Disable parallel execution * KMeans++ * Tests * Dev branch merge (#16) * SameDiff: convertDataType and gradient check util improvements (#12) * GradCheck util improvements * StopGradient constructor + test * SameDiff: Add datatype conversion * Javadoc and add DataType.isNumerical() * Small fix * Fix SameDiff TF import test cases intermediate naming (workaround for bad default) * TFGraphTestAllHelper: check intermediates in execution order * Add missing debug listener * [WIP] lstmBlock fix + other changes (#13) - fixes lstmBlock issue - changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer - CheckNumerics op - fixes for ReduceBool IsInfOrNan & IsFinite * Small test fix * CheckNumerics op wrapper * Fix some issues on master (#17) * Fix DataVec test issue * Fix issue with dl4j SameDiff output layer * Dtype fix for lambda layers * #7912 BertIterator dtype fix (use float32 not global default) * [WIP] Next set of CUDA stuff (#7) New CUDA implementations and improvements * bad file * Dev branch master merge (#23) * SameDiff: convertDataType and gradient check util improvements (#12) * GradCheck util improvements * StopGradient constructor + test * SameDiff: Add datatype conversion * Javadoc and add DataType.isNumerical() * Small fix * Fix SameDiff TF import test cases intermediate naming (workaround for bad default) * TFGraphTestAllHelper: check intermediates in execution order * Add missing debug listener * [WIP] lstmBlock fix + other changes (#13) - fixes lstmBlock issue - changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer - CheckNumerics op - fixes for ReduceBool IsInfOrNan & IsFinite * Small test fix * CheckNumerics op wrapper * Compatibility of deserialization (#18) Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com> * SameDiff: add activation gradient checking support for debugging (#19) * SameDiff gradient checker: first pass on activation gradient checks * Fixes + tests for activation gradient checking * Javadoc * [WIP] Some nd4j data type corrections (#20) * Adjust data type * Set correct Data type. * Size of proper data type. * fix averaged cpu load (#22) * SameDiff ops, TF import and fixes (#24) * CheckNumerics tests + fixes + misc fixes Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fake quant Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fixes Signed-off-by: AlexDBlack <blacka101@gmail.com> * FakeQuantWithMinMaxArgs Signed-off-by: AlexDBlack <blacka101@gmail.com> * CheckNumerics fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fix libnd4j ALL_INTS and ALL_FLOATS declaration (uint and bfloat types) Signed-off-by: AlexDBlack <blacka101@gmail.com> * Small fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * Javadoc Signed-off-by: AlexDBlack <blacka101@gmail.com> * Exception tweak Signed-off-by: AlexDBlack <blacka101@gmail.com> * fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fix for out of scope stack allocated var use Signed-off-by: AlexDBlack <blacka101@gmail.com> * Ignores Signed-off-by: AlexDBlack <blacka101@gmail.com> * Ignore for known failing test (already logged issue) Signed-off-by: AlexDBlack <blacka101@gmail.com> * Merge upstream to fork (#25) * Add thousand-separator commas to TotalParams (#7915) * Add thousand-separator commas to TotalParams The number of parameters can be quite large, and it would help the reading of the summary printout to have the TotalParams column & values at the bottom have thousand-separator-commas in them. * Add thousand-separator commas to MultiLayerNetwork Corresponding change to MultiLayerNetwork Signed-off-by: Jxtps Jxtps <jxtps435@gmail.com> * Update contributing and issue/PR templates (#7934) Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fix link to AdaDelta paper (#7942) Fix link to AdaDelta paper hosted on matthewzeiler.com Signed-off-by: Jxtps * Fixes, and ignores for known/logged failing issues (#7943) Signed-off-by: AlexDBlack <blacka101@gmail.com> * SameDiff + DL4J/SameDiff: Multiple fixes (#28) * #7919 HDF5 attribute buffer length fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7909 Arbiter constructor exception ux improvements Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7925 RNN output layer length checks Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7939 Add listener for validating inputs are not incorrectly modified Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7939 Integrate NonInplaceValidationListener into tests * #7844 DL4J SameDiff fixes for variable minibatch size * DL4J SameDiff fixes - ensure gradient for input placeholder is available Signed-off-by: AlexDBlack <blacka101@gmail.com> * Tweaks to ExternalErrorsFunction - use placeholders, make more robust * Another fix * More fixes * More SameDiff/DL4J fixes * Scope out scalar array creation in BaseScalarOp * Remove debug code Signed-off-by: AlexDBlack <blacka101@gmail.com> * [WIP] Final dev branch merge (#29) * SameDiff: convertDataType and gradient check util improvements (#12) * GradCheck util improvements * StopGradient constructor + test * SameDiff: Add datatype conversion * Javadoc and add DataType.isNumerical() * Small fix * Fix SameDiff TF import test cases intermediate naming (workaround for bad default) * TFGraphTestAllHelper: check intermediates in execution order * Add missing debug listener * [WIP] lstmBlock fix + other changes (#13) - fixes lstmBlock issue - changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer - CheckNumerics op - fixes for ReduceBool IsInfOrNan & IsFinite * Small test fix * CheckNumerics op wrapper * Compatibility of deserialization (#18) Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com> * SameDiff: add activation gradient checking support for debugging (#19) * SameDiff gradient checker: first pass on activation gradient checks * Fixes + tests for activation gradient checking * Javadoc * [WIP] Some nd4j data type corrections (#20) * Adjust data type * Set correct Data type. * Size of proper data type. * fix averaged cpu load (#22) * [WIP] Multiple dataset iterators (#27) * Splitting dataset into arbitrary number * Fixes * Multiple split of iterator * Test * Test * Some fixes * signature change * one more tweak Signed-off-by: raver119 <raver119@gmail.com> * one more test for sequential use of DataSetIteratorSplitter Signed-off-by: raver119 <raver119@gmail.com> * Fixes * Fixes * one more test for Alexander Signed-off-by: raver119 <raver119@gmail.com> * Some fixes * Some fixes * one more test for Alexander Signed-off-by: raver119 <raver119@gmail.com> * minor test fix Signed-off-by: raver119 <raver119@gmail.com> * Some fixes * Some fixes * couple of assertions tweaked Signed-off-by: raver119 <raver119@gmail.com> * MDS splitter test :/ Signed-off-by: raver119 <raver119@gmail.com> * Minor refactoring * Multi dataset * Some fixes * More tests * Small number of test fixes/improvements (failures on CI) (#31) Signed-off-by: AlexDBlack <blacka101@gmail.com> * [WIP] More CUDA stuff (#26) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * LRN BP CUDA Signed-off-by: raver119 <raver119@gmail.com> * less memory Signed-off-by: raver119 <raver119@gmail.com> * Fixed bug with crop_and_resize op helper. * get rid of unnecessary index-calculation dunction Signed-off-by: Yurii <yurii@skymind.io> * Fixed sort with nth_element cuda-based helper. * Refactored nth_element. * Refactored nth_element op and tests. * Modified usage of dim array with sortTad routine. * Refactored main routine of helper for non_max_image_suppression op. * non_max_image_suppression op helper with cuda kernel implementation. Initial revision. * fix vol2col cuda kernel * meh Signed-off-by: raver119 <raver119@gmail.com> * topK concept Signed-off-by: raver119 <raver119@gmail.com> * unsorted topK with scanWitdh of 1 Signed-off-by: raver119 <raver119@gmail.com> * correct vol2col tests * sorted/unsorted topK Signed-off-by: raver119 <raver119@gmail.com> * implementation and fixing col2im/col2vol * Corrected usage flags with input/output with reverse op. * dup is const now Signed-off-by: raver119 <raver119@gmail.com> * percentile op Signed-off-by: raver119 <raver119@gmail.com> * group tests for mapool2d Signed-off-by: Yurii <yurii@skymind.io> * special test for george Signed-off-by: raver119 <raver119@gmail.com> * less threads for sortTad Signed-off-by: raver119 <raver119@gmail.com> * provide conv2d for cuda Signed-off-by: Yurii <yurii@skymind.io> * remove auther in sort tad kernel code Signed-off-by: Yurii <yurii@skymind.io> * provide depthwise_conv2d for cuda Signed-off-by: Yurii <yurii@skymind.io> * - max_pooling_with_argmax - null check for special use Signed-off-by: raver119 <raver119@gmail.com> * dts cuda Signed-off-by: raver119 <raver119@gmail.com> * provide sconv2d for cuda Signed-off-by: Yurii <yurii@skymind.io> * std cuda Signed-off-by: raver119 <raver119@gmail.com> * Refactored non_max_suppression op to conform TF implementation. * Improved suppression helper. * provide pooling3d for cuda Signed-off-by: Yurii <yurii@skymind.io> * minor lstm rearrangements Signed-off-by: raver119 <raver119@gmail.com> * more of minor lstm rearrangements Signed-off-by: raver119 <raver119@gmail.com> * (bi)dynamic_rnn Signed-off-by: raver119 <raver119@gmail.com> * templates init order Signed-off-by: raver119 <raver119@gmail.com> * Refactored non_max_suppression op. * Added cuda kernel for non_max_suppression. * CPU sort by key/value Signed-off-by: raver119 <raver119@gmail.com> * CPU sort TAD by key/value Signed-off-by: raver119 <raver119@gmail.com> * CPU sort TAD by key/value tests Signed-off-by: raver119 <raver119@gmail.com> * Eliminate compiler error with cuda implementation. * - repaired gradCheck in cuda - provide conv2d_bp for cuda Signed-off-by: Yurii <yurii@skymind.io> * missed signature Signed-off-by: raver119 <raver119@gmail.com> * provide depthwise_conv2d_bp for cuda Signed-off-by: Yurii <yurii@skymind.io> * Implementation of lup helper with cuda kernel. Initial commit. * further work on backprops for convolutions Signed-off-by: Yurii <yurii@skymind.io> * CUDA linear sort by key/val Signed-off-by: raver119 <raver119@gmail.com> * CUDA tad sort by key/val Signed-off-by: raver119 <raver119@gmail.com> * start providing of backprop for pooling2d/3d Signed-off-by: Yurii <yurii@skymind.io> * Added atomicAdd for bool datatype. * dynamic partition concept Signed-off-by: raver119 <raver119@gmail.com> * dynamic partition concept Signed-off-by: raver119 <raver119@gmail.com> * dynamic partition scalar CUDA Signed-off-by: raver119 <raver119@gmail.com> * important comment Signed-off-by: raver119 <raver119@gmail.com> * fix pooling2d/3d backprop helpers Signed-off-by: Yurii <yurii@skymind.io> * Added non-linear test with dynamic_partition. * Improved test for dynamic_partition. * dynamic_partition TAD concept Signed-off-by: raver119 <raver119@gmail.com> * - dynamic_partition TAD CUDA impl - dynamic_partition TAD CPU fix Signed-off-by: raver119 <raver119@gmail.com> * - rewrite cpu code for usampling2d/3d - write cuda code for usampling2d/3d Signed-off-by: Yurii <yurii@skymind.io> * dynamic_stitch CUDA vector case Signed-off-by: raver119 <raver119@gmail.com> * dynamic_stitch CUDA TAD case concept Signed-off-by: raver119 <raver119@gmail.com> * dynamic_stitch CUDA TAD case impl Signed-off-by: raver119 <raver119@gmail.com> * Added tests for dynamic_stitch 3D-4D cases. * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * Fixed type check for dynamic stitch. * min/max bp Signed-off-by: raver119 <raver119@gmail.com> * rewrite code for upsampling2d/3d cpu Signed-off-by: Yurii <yurii@skymind.io> * reduce min/max/norm_max bp Signed-off-by: raver119 <raver119@gmail.com> * lup implementation. Additional enhancements. * provide code for upsamling2d/3d backprop Signed-off-by: Yurii <yurii@skymind.io> * weightedCrossEntropyWithLogits Signed-off-by: raver119 <raver119@gmail.com> * Fixed template math atomicMul for 64bit ints. * Refactored dynamic_partition_bp op. * inverseBroadcast fix Signed-off-by: raver119 <raver119@gmail.com> * DynamicPartitionBP test datatype fixed. * - nd4j_atomicMul Windows fix - cpu/NDArrayLambda.hpp excluded from CUDA Signed-off-by: raver119 <raver119@gmail.com>
2019-06-27 17:37:04 +02:00
void sortByKey(Nd4jPointer *extraPointers,
void *x, Nd4jLong *xShapeInfo,
void *dx, Nd4jLong *dxShapeInfo,
void *y, Nd4jLong *yShapeInfo,
void *dy, Nd4jLong *dyShapeInfo,
bool descending);
void sortByValue(Nd4jPointer *extraPointers,
void *x, Nd4jLong *xShapeInfo,
void *dx, Nd4jLong *dxShapeInfo,
void *y, Nd4jLong *yShapeInfo,
void *dy, Nd4jLong *dyShapeInfo,
bool descending);
2019-06-06 14:21:15 +02:00
void sortTad(Nd4jPointer *extraPointers,
void *x, Nd4jLong *xShapeInfo,
void *dx, Nd4jLong *dxShapeInfo,
int *dimension,
int dimensionLength,
Nd4jLong *tadShapeInfo,
Nd4jLong *tadOffsets,
bool descending);
Merge master to upstream (#7945) * Shugeo strided slice zeros (#14) * Modified strided_slice op to properly work with empty-like shapes. * Fixed test for reduce_mean with empty-like input. * [WIP] Last merge (#15) * correct logsoftmax looss (#2) * Small SameDiff listener fix (#4) * Various fixes (#6) * #7839 Fix for asXMatrix and tests * #7866 EmbeddingSequenceLayer dtype fix + test * #7856 SameDiff save/load stream methods * #7859 RegressionEvaluation rank 4 fix + tests + axis configuration * EvaluationBinary 3d/4d * More evaluation 3d/4d tests * #7847 Evaluation empty checks * Small test ifx * #7848 Fix median edge case * Improve DL4J samediff layer tests * [WIP] FastText wrapper implemented (#8) * FastText implemented * Some fixes * Fix shapes for wordsNearest * Validation of input vectors * Fixes * Fixed test * Thread tagged * Some tweaks * setContextClassLoader for DeallocatorServiceThread * Numpy format tests (#1) * Various fixes (#11) * #7852 SameDiff gather fix * #7892 SameDiff placeholder to constant conversion * #7890 validate input rank for MLN/CG init methods * Fix broken permute shape calculation * Permute and gather fixes * Tests * #7850 LogSumExp fix + test * Handful of test fixes * Empty arrays with non-scalar shapes (#10) * minor rearrangements for lambdas * empty tensors with non-scalar shapes * numpy empty tensors with non-scalar shapes * few more empty tweaks * Small fixes * conv3d signature update * micro fix in batchnorm mkldnn * Import fixes * Fix * MKL-DNN update * Small fill fix * fill with empty input + test * Fixes * Small error improvement * Fix * one special test * couple of fixes for lstm * Rewrite TFGraphMapper.getNDArrayFromTensor to be maintainable and less error prone * Fixes * FP16 * Unsigned * BFloat16 * Fill op - empty tweaks * - couple of fixes for empty arrays construction - stack updated * strided slice fix * one transform test * provide method for reducing shapeInfo in case of input array is empty * Fixed reduceAlongDimensions to use empty input properly. * couple of broadcast tests * couple of tests broadcast tests + tweak to make them pass * add check of non-empty to methods producing sub-arrays * Fixed reshapeC with zeros in shape. * complete empty check in reduce_... legacy ops * Concat and cumsum/prod * Tweak to empty shape inference on import * add empty check to the rest of reduce legacy ops * one more test * correct typo in evalReduceShapeInfoEmpty * Added tests for reduce_* ops to tests with zero shapes. * few more tests for empty reductions * Fixed strided_slice op with empty case and tests. * one more empty reduction test * Fixed strided_slice test. * add empty check to NDArray::reshapei * infOrMax * empty min/max with infinity tests * made unstack working correctly with empty arrays * few IndexReduce tests + tweaks for empty shapes * add test for empty concat * few tests fixed * Validation fix for reductions on empty shapes * Reverse fix * Reduction shape calc fixes * SameDiff.generateOutputVariable: don't use shape function to determine number of outputs * Range fix * - NDArray constructor updated for scalars/empty arrays - few tests fixed * More fixes * Empty creator fixes * concat fix * concat fix * TF import tests: allow 'both all NaN' and 'both all inf' to pass * Slice, zero fraction, and reshape fixes * transpose, gather * Zero fraction * scalar cast fix * Empty reduction axis support * few more tests fixed * Fixed input checks conforming with TF for concat op and tests. * few tests fixed * matmul scalar shape fix * Fixed checkout for data type and scalarity with concat to allow non-empty scalars with vector concats. * broadcast bool fix * few more tests * few more tests * correct evalReduceShapeInfoEmpty * argmax/argmin + tests * one more empty edge case + one more test * argmax/argmin/realdiv_bp tweaks * empty reshape test + fix * Helper fixes * Small fixes * Gather test fix * Gather test fix * Small fixes * reduce scalar zero values * scalar mean workaround * Remove debug code * along dim mean workaround * one more test * - equalsTo() tweak for empty arrays - one more test * broadcast tweaks * [WIP] Fixing outstanding issues for NLP (#9) * Avoid using not-inited objects * Test fixed. * Redundant method avoided for models like FastText * KMeans++ implementation * KMeans++ implementation * Disable parallel execution * KMeans++ * Tests * Dev branch merge (#16) * SameDiff: convertDataType and gradient check util improvements (#12) * GradCheck util improvements * StopGradient constructor + test * SameDiff: Add datatype conversion * Javadoc and add DataType.isNumerical() * Small fix * Fix SameDiff TF import test cases intermediate naming (workaround for bad default) * TFGraphTestAllHelper: check intermediates in execution order * Add missing debug listener * [WIP] lstmBlock fix + other changes (#13) - fixes lstmBlock issue - changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer - CheckNumerics op - fixes for ReduceBool IsInfOrNan & IsFinite * Small test fix * CheckNumerics op wrapper * Fix some issues on master (#17) * Fix DataVec test issue * Fix issue with dl4j SameDiff output layer * Dtype fix for lambda layers * #7912 BertIterator dtype fix (use float32 not global default) * [WIP] Next set of CUDA stuff (#7) New CUDA implementations and improvements * bad file * Dev branch master merge (#23) * SameDiff: convertDataType and gradient check util improvements (#12) * GradCheck util improvements * StopGradient constructor + test * SameDiff: Add datatype conversion * Javadoc and add DataType.isNumerical() * Small fix * Fix SameDiff TF import test cases intermediate naming (workaround for bad default) * TFGraphTestAllHelper: check intermediates in execution order * Add missing debug listener * [WIP] lstmBlock fix + other changes (#13) - fixes lstmBlock issue - changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer - CheckNumerics op - fixes for ReduceBool IsInfOrNan & IsFinite * Small test fix * CheckNumerics op wrapper * Compatibility of deserialization (#18) Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com> * SameDiff: add activation gradient checking support for debugging (#19) * SameDiff gradient checker: first pass on activation gradient checks * Fixes + tests for activation gradient checking * Javadoc * [WIP] Some nd4j data type corrections (#20) * Adjust data type * Set correct Data type. * Size of proper data type. * fix averaged cpu load (#22) * SameDiff ops, TF import and fixes (#24) * CheckNumerics tests + fixes + misc fixes Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fake quant Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fixes Signed-off-by: AlexDBlack <blacka101@gmail.com> * FakeQuantWithMinMaxArgs Signed-off-by: AlexDBlack <blacka101@gmail.com> * CheckNumerics fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fix libnd4j ALL_INTS and ALL_FLOATS declaration (uint and bfloat types) Signed-off-by: AlexDBlack <blacka101@gmail.com> * Small fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * Javadoc Signed-off-by: AlexDBlack <blacka101@gmail.com> * Exception tweak Signed-off-by: AlexDBlack <blacka101@gmail.com> * fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fix for out of scope stack allocated var use Signed-off-by: AlexDBlack <blacka101@gmail.com> * Ignores Signed-off-by: AlexDBlack <blacka101@gmail.com> * Ignore for known failing test (already logged issue) Signed-off-by: AlexDBlack <blacka101@gmail.com> * Merge upstream to fork (#25) * Add thousand-separator commas to TotalParams (#7915) * Add thousand-separator commas to TotalParams The number of parameters can be quite large, and it would help the reading of the summary printout to have the TotalParams column & values at the bottom have thousand-separator-commas in them. * Add thousand-separator commas to MultiLayerNetwork Corresponding change to MultiLayerNetwork Signed-off-by: Jxtps Jxtps <jxtps435@gmail.com> * Update contributing and issue/PR templates (#7934) Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fix link to AdaDelta paper (#7942) Fix link to AdaDelta paper hosted on matthewzeiler.com Signed-off-by: Jxtps * Fixes, and ignores for known/logged failing issues (#7943) Signed-off-by: AlexDBlack <blacka101@gmail.com> * SameDiff + DL4J/SameDiff: Multiple fixes (#28) * #7919 HDF5 attribute buffer length fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7909 Arbiter constructor exception ux improvements Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7925 RNN output layer length checks Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7939 Add listener for validating inputs are not incorrectly modified Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7939 Integrate NonInplaceValidationListener into tests * #7844 DL4J SameDiff fixes for variable minibatch size * DL4J SameDiff fixes - ensure gradient for input placeholder is available Signed-off-by: AlexDBlack <blacka101@gmail.com> * Tweaks to ExternalErrorsFunction - use placeholders, make more robust * Another fix * More fixes * More SameDiff/DL4J fixes * Scope out scalar array creation in BaseScalarOp * Remove debug code Signed-off-by: AlexDBlack <blacka101@gmail.com> * [WIP] Final dev branch merge (#29) * SameDiff: convertDataType and gradient check util improvements (#12) * GradCheck util improvements * StopGradient constructor + test * SameDiff: Add datatype conversion * Javadoc and add DataType.isNumerical() * Small fix * Fix SameDiff TF import test cases intermediate naming (workaround for bad default) * TFGraphTestAllHelper: check intermediates in execution order * Add missing debug listener * [WIP] lstmBlock fix + other changes (#13) - fixes lstmBlock issue - changes NDArray method reshape(), permute(), transpose() by making them return instance instead of pointer - CheckNumerics op - fixes for ReduceBool IsInfOrNan & IsFinite * Small test fix * CheckNumerics op wrapper * Compatibility of deserialization (#18) Signed-off-by: Alexander Stoyakin <alexander.stoyakin@gmail.com> * SameDiff: add activation gradient checking support for debugging (#19) * SameDiff gradient checker: first pass on activation gradient checks * Fixes + tests for activation gradient checking * Javadoc * [WIP] Some nd4j data type corrections (#20) * Adjust data type * Set correct Data type. * Size of proper data type. * fix averaged cpu load (#22) * [WIP] Multiple dataset iterators (#27) * Splitting dataset into arbitrary number * Fixes * Multiple split of iterator * Test * Test * Some fixes * signature change * one more tweak Signed-off-by: raver119 <raver119@gmail.com> * one more test for sequential use of DataSetIteratorSplitter Signed-off-by: raver119 <raver119@gmail.com> * Fixes * Fixes * one more test for Alexander Signed-off-by: raver119 <raver119@gmail.com> * Some fixes * Some fixes * one more test for Alexander Signed-off-by: raver119 <raver119@gmail.com> * minor test fix Signed-off-by: raver119 <raver119@gmail.com> * Some fixes * Some fixes * couple of assertions tweaked Signed-off-by: raver119 <raver119@gmail.com> * MDS splitter test :/ Signed-off-by: raver119 <raver119@gmail.com> * Minor refactoring * Multi dataset * Some fixes * More tests * Small number of test fixes/improvements (failures on CI) (#31) Signed-off-by: AlexDBlack <blacka101@gmail.com> * [WIP] More CUDA stuff (#26) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * LRN BP CUDA Signed-off-by: raver119 <raver119@gmail.com> * less memory Signed-off-by: raver119 <raver119@gmail.com> * Fixed bug with crop_and_resize op helper. * get rid of unnecessary index-calculation dunction Signed-off-by: Yurii <yurii@skymind.io> * Fixed sort with nth_element cuda-based helper. * Refactored nth_element. * Refactored nth_element op and tests. * Modified usage of dim array with sortTad routine. * Refactored main routine of helper for non_max_image_suppression op. * non_max_image_suppression op helper with cuda kernel implementation. Initial revision. * fix vol2col cuda kernel * meh Signed-off-by: raver119 <raver119@gmail.com> * topK concept Signed-off-by: raver119 <raver119@gmail.com> * unsorted topK with scanWitdh of 1 Signed-off-by: raver119 <raver119@gmail.com> * correct vol2col tests * sorted/unsorted topK Signed-off-by: raver119 <raver119@gmail.com> * implementation and fixing col2im/col2vol * Corrected usage flags with input/output with reverse op. * dup is const now Signed-off-by: raver119 <raver119@gmail.com> * percentile op Signed-off-by: raver119 <raver119@gmail.com> * group tests for mapool2d Signed-off-by: Yurii <yurii@skymind.io> * special test for george Signed-off-by: raver119 <raver119@gmail.com> * less threads for sortTad Signed-off-by: raver119 <raver119@gmail.com> * provide conv2d for cuda Signed-off-by: Yurii <yurii@skymind.io> * remove auther in sort tad kernel code Signed-off-by: Yurii <yurii@skymind.io> * provide depthwise_conv2d for cuda Signed-off-by: Yurii <yurii@skymind.io> * - max_pooling_with_argmax - null check for special use Signed-off-by: raver119 <raver119@gmail.com> * dts cuda Signed-off-by: raver119 <raver119@gmail.com> * provide sconv2d for cuda Signed-off-by: Yurii <yurii@skymind.io> * std cuda Signed-off-by: raver119 <raver119@gmail.com> * Refactored non_max_suppression op to conform TF implementation. * Improved suppression helper. * provide pooling3d for cuda Signed-off-by: Yurii <yurii@skymind.io> * minor lstm rearrangements Signed-off-by: raver119 <raver119@gmail.com> * more of minor lstm rearrangements Signed-off-by: raver119 <raver119@gmail.com> * (bi)dynamic_rnn Signed-off-by: raver119 <raver119@gmail.com> * templates init order Signed-off-by: raver119 <raver119@gmail.com> * Refactored non_max_suppression op. * Added cuda kernel for non_max_suppression. * CPU sort by key/value Signed-off-by: raver119 <raver119@gmail.com> * CPU sort TAD by key/value Signed-off-by: raver119 <raver119@gmail.com> * CPU sort TAD by key/value tests Signed-off-by: raver119 <raver119@gmail.com> * Eliminate compiler error with cuda implementation. * - repaired gradCheck in cuda - provide conv2d_bp for cuda Signed-off-by: Yurii <yurii@skymind.io> * missed signature Signed-off-by: raver119 <raver119@gmail.com> * provide depthwise_conv2d_bp for cuda Signed-off-by: Yurii <yurii@skymind.io> * Implementation of lup helper with cuda kernel. Initial commit. * further work on backprops for convolutions Signed-off-by: Yurii <yurii@skymind.io> * CUDA linear sort by key/val Signed-off-by: raver119 <raver119@gmail.com> * CUDA tad sort by key/val Signed-off-by: raver119 <raver119@gmail.com> * start providing of backprop for pooling2d/3d Signed-off-by: Yurii <yurii@skymind.io> * Added atomicAdd for bool datatype. * dynamic partition concept Signed-off-by: raver119 <raver119@gmail.com> * dynamic partition concept Signed-off-by: raver119 <raver119@gmail.com> * dynamic partition scalar CUDA Signed-off-by: raver119 <raver119@gmail.com> * important comment Signed-off-by: raver119 <raver119@gmail.com> * fix pooling2d/3d backprop helpers Signed-off-by: Yurii <yurii@skymind.io> * Added non-linear test with dynamic_partition. * Improved test for dynamic_partition. * dynamic_partition TAD concept Signed-off-by: raver119 <raver119@gmail.com> * - dynamic_partition TAD CUDA impl - dynamic_partition TAD CPU fix Signed-off-by: raver119 <raver119@gmail.com> * - rewrite cpu code for usampling2d/3d - write cuda code for usampling2d/3d Signed-off-by: Yurii <yurii@skymind.io> * dynamic_stitch CUDA vector case Signed-off-by: raver119 <raver119@gmail.com> * dynamic_stitch CUDA TAD case concept Signed-off-by: raver119 <raver119@gmail.com> * dynamic_stitch CUDA TAD case impl Signed-off-by: raver119 <raver119@gmail.com> * Added tests for dynamic_stitch 3D-4D cases. * minor tests tweaks Signed-off-by: raver119 <raver119@gmail.com> * Fixed type check for dynamic stitch. * min/max bp Signed-off-by: raver119 <raver119@gmail.com> * rewrite code for upsampling2d/3d cpu Signed-off-by: Yurii <yurii@skymind.io> * reduce min/max/norm_max bp Signed-off-by: raver119 <raver119@gmail.com> * lup implementation. Additional enhancements. * provide code for upsamling2d/3d backprop Signed-off-by: Yurii <yurii@skymind.io> * weightedCrossEntropyWithLogits Signed-off-by: raver119 <raver119@gmail.com> * Fixed template math atomicMul for 64bit ints. * Refactored dynamic_partition_bp op. * inverseBroadcast fix Signed-off-by: raver119 <raver119@gmail.com> * DynamicPartitionBP test datatype fixed. * - nd4j_atomicMul Windows fix - cpu/NDArrayLambda.hpp excluded from CUDA Signed-off-by: raver119 <raver119@gmail.com>
2019-06-27 17:37:04 +02:00
void sortTadByKey(Nd4jPointer *extraPointers,
void *x, Nd4jLong *xShapeInfo,
void *dx, Nd4jLong *dxShapeInfo,
void *y, Nd4jLong *yShapeInfo,
void *dy, Nd4jLong *dyShapeInfo,
int *dimension,
int dimensionLength,
bool descending);
void sortTadByValue(Nd4jPointer *extraPointers,
void *x, Nd4jLong *xShapeInfo,
void *dx, Nd4jLong *dxShapeInfo,
void *y, Nd4jLong *yShapeInfo,
void *dy, Nd4jLong *dyShapeInfo,
int *dimension,
int dimensionLength,
bool descending);
2019-06-06 14:21:15 +02:00
// special sort impl for sorting out COO indices and values
void sortCooIndices(Nd4jPointer *extraPointers, Nd4jLong *indices, void *values, Nd4jLong length, int rank);
Nd4jLong* mmapFile(Nd4jPointer *extraPointers, const char *fileName, Nd4jLong length);
void munmapFile(Nd4jPointer *extraPointers, Nd4jLong* ptrMap, Nd4jLong length);
// flatbuffers execution
nd4j::graph::ResultWrapper* executeFlatGraph(Nd4jPointer *extraPointers, Nd4jPointer flatBufferPointer);
const char* getAllCustomOps();
const char* getAllOperations();
// customOp executioner
int execCustomOp(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputs, Nd4jPointer* outputBuffers, Nd4jPointer* outputShapes, int numOutputs, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool* bArgs, int numBArgs, bool isInplace);
int execCustomOp(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer opContext);
nd4j::ShapeList* calculateOutputShapes(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs);
nd4j::ShapeList* calculateOutputShapes(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool *bArgs, int numBArgs);
void deleteShapeList(Nd4jPointer shapeList);
int registerGraph(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer flatBufferPointer);
nd4j::graph::VariablesSet *executeStoredGraph(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer *inputBuffers, Nd4jPointer *inputShapes, int* inputIndices, int numInputs);
int unregisterGraph(Nd4jPointer *extraPointers, Nd4jLong graphId);
void deleteCharArray(Nd4jPointer pointer);
2019-06-06 14:21:15 +02:00
void deleteIntArray(Nd4jPointer pointer);
void deleteLongArray(Nd4jPointer pointer);
void deletePointerArray(Nd4jPointer pointer);
void deleteVariablesSet(Nd4jPointer pointer);
// GraphState creation
Nd4jPointer getGraphState(Nd4jLong id);
void deleteGraphState(Nd4jPointer state);
void deleteResultWrapper(Nd4jPointer ptr);
int estimateThreshold(Nd4jPointer *extraPointers, Nd4jPointer x, Nd4jLong *xShapeInfo, int N, float threshold);
// this method executes op that requires scope to be present: if/while/cond/whatever
Nd4jStatus execCustomOpWithScope(Nd4jPointer *extraPointers, Nd4jPointer state, Nd4jLong opHash, Nd4jLong *scopes, int numScopes, Nd4jPointer *inputBuffers, Nd4jPointer *inputShapes, int numInputs, Nd4jPointer *outputBuffers, Nd4jPointer *outputShapes, int numOutputs);
//void fillUtf8String(Nd4jPointer *extraPointers, const char **string, int numStrings, Nd4jPointer buffer);
Nd4jPointer createUtf8String(Nd4jPointer *extraPointers, const char *string, int length);
void deleteUtf8String(Nd4jPointer *extraPointers, Nd4jPointer ptr);
void scatterUpdate(Nd4jPointer *extraPointers, int opCode, int numOfSubArrs,
void* hX, Nd4jLong* hXShapeInfo, Nd4jLong* hXOffsets,
void* dX, Nd4jLong* dXShapeInfo, Nd4jLong* dXOffsets,
void* hY, Nd4jLong* hYShapeInfo, Nd4jLong* hYOffsets,
void* dY, Nd4jLong* dYShapeInfo, Nd4jLong* dYOffsets,
int* hIindexes, int* dIindexes);
void inspectArray(Nd4jPointer *extraPointers, Nd4jPointer buffer, Nd4jLong *shapeInfo, Nd4jPointer specialBuffer, Nd4jLong *specialShapeInfo, Nd4jPointer debugInfo);
nd4j::ConstantDataBuffer* shapeBuffer(int rank, Nd4jLong *shape, Nd4jLong *strides, nd4j::DataType dtype, char order, Nd4jLong ews, bool empty);
nd4j::ConstantDataBuffer* constantBuffer(nd4j::DataType dtype, Nd4jLong *data, int length);
nd4j::ConstantDataBuffer* constantBuffer(nd4j::DataType dtype, double *data, int length);
nd4j::ConstantDataBuffer* constantBuffer(nd4j::DataType dtype, nd4j::ConstantDescriptor *descriptor);
void deleteShapeBuffer(Nd4jPointer ptr);
void deleteTadPack(Nd4jPointer ptr);
const char* runLightBenchmarkSuit(bool printOut);
const char* runFullBenchmarkSuit(bool printOut);
2019-06-06 14:21:15 +02:00
};
#endif //NATIVEOPERATIONS_NATIVEOPS_H