commit
44394da4b8
|
@ -77,7 +77,7 @@ public class PythonObject {
|
|||
|
||||
long address = bp.address();
|
||||
long size = bp.capacity();
|
||||
NumpyArray npArr = NumpyArray.builder().address(address).shape(new long[]{size}).strides(new long[]{1}).dtype(DataType.BYTE).build();
|
||||
NumpyArray npArr = NumpyArray.builder().address(address).shape(new long[]{size}).strides(new long[]{1}).dtype(DataType.INT8).build();
|
||||
nativePythonObject = Python.memoryview(new PythonObject(npArr)).nativePythonObject;
|
||||
}
|
||||
|
||||
|
@ -320,20 +320,23 @@ public class PythonObject {
|
|||
public NumpyArray toNumpy() throws PythonException{
|
||||
PyObject np = PyImport_ImportModule("numpy");
|
||||
PyObject ndarray = PyObject_GetAttrString(np, "ndarray");
|
||||
if (PyObject_IsInstance(nativePythonObject, ndarray) == 0){
|
||||
if (PyObject_IsInstance(nativePythonObject, ndarray) != 1){
|
||||
throw new PythonException("Object is not a numpy array! Use Python.ndarray() to convert object to a numpy array.");
|
||||
}
|
||||
Py_DecRef(ndarray);
|
||||
Py_DecRef(np);
|
||||
|
||||
Pointer objPtr = new Pointer(nativePythonObject);
|
||||
PyArrayObject npArr = new PyArrayObject(objPtr);
|
||||
Pointer ptr = PyArray_DATA(npArr);
|
||||
SizeTPointer shapePtr = PyArray_SHAPE(npArr);
|
||||
long[] shape = new long[PyArray_NDIM(npArr)];
|
||||
shapePtr.get(shape, 0, shape.length);
|
||||
SizeTPointer stridesPtr = PyArray_STRIDES(npArr);
|
||||
SizeTPointer shapePtr = PyArray_SHAPE(npArr);
|
||||
if (shapePtr != null)
|
||||
shapePtr.get(shape, 0, shape.length);
|
||||
long[] strides = new long[shape.length];
|
||||
stridesPtr.get(strides, 0, strides.length);
|
||||
SizeTPointer stridesPtr = PyArray_STRIDES(npArr);
|
||||
if (stridesPtr != null)
|
||||
stridesPtr.get(strides, 0, strides.length);
|
||||
int npdtype = PyArray_TYPE(npArr);
|
||||
|
||||
DataType dtype;
|
||||
|
@ -345,28 +348,27 @@ public class PythonObject {
|
|||
case NPY_SHORT:
|
||||
dtype = DataType.SHORT; break;
|
||||
case NPY_INT:
|
||||
dtype = DataType.INT; break;
|
||||
dtype = DataType.INT32; break;
|
||||
case NPY_LONG:
|
||||
dtype = DataType.LONG; break;
|
||||
case NPY_UINT:
|
||||
dtype = DataType.UINT32; break;
|
||||
case NPY_BYTE:
|
||||
dtype = DataType.BYTE; break;
|
||||
dtype = DataType.INT8; break;
|
||||
case NPY_UBYTE:
|
||||
dtype = DataType.UBYTE; break;
|
||||
dtype = DataType.UINT8; break;
|
||||
case NPY_BOOL:
|
||||
dtype = DataType.BOOL; break;
|
||||
case NPY_HALF:
|
||||
dtype = DataType.HALF; break;
|
||||
dtype = DataType.FLOAT16; break;
|
||||
case NPY_LONGLONG:
|
||||
dtype = DataType.INT64; break;
|
||||
case NPY_USHORT:
|
||||
dtype = DataType.UINT16; break;
|
||||
case NPY_ULONG:
|
||||
dtype = DataType.UINT64; break;
|
||||
case NPY_ULONGLONG:
|
||||
dtype = DataType.UINT64; break;
|
||||
default:
|
||||
default:
|
||||
throw new PythonException("Unsupported array data type: " + npdtype);
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
/* ******************************************************************************
|
||||
* Copyright (c) 2020 Konduit K.K.
|
||||
*
|
||||
* This program and the accompanying materials are made available under the
|
||||
* terms of the Apache License, Version 2.0 which is available at
|
||||
* https://www.apache.org/licenses/LICENSE-2.0.
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
******************************************************************************/
|
||||
|
||||
package org.datavec.python;
|
||||
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||
import org.nd4j.linalg.factory.Nd4j;
|
||||
|
||||
import static junit.framework.TestCase.assertEquals;
|
||||
|
||||
@RunWith(Parameterized.class)
|
||||
public class ScalarAndArrayTest {
|
||||
|
||||
@Parameterized.Parameters(name = "{index}: Testing with INDArray={0}")
|
||||
public static INDArray[] data() {
|
||||
return new INDArray[]{
|
||||
Nd4j.scalar(10),
|
||||
Nd4j.ones(10, 10, 10, 10)
|
||||
};
|
||||
}
|
||||
|
||||
private INDArray indArray;
|
||||
|
||||
public ScalarAndArrayTest(INDArray indArray) {
|
||||
this.indArray = indArray;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testINDArray() throws PythonException {
|
||||
assertEquals(indArray, new PythonObject(indArray).toNumpy().getNd4jArray());
|
||||
}
|
||||
}
|
|
@ -37,6 +37,11 @@ import static org.junit.Assert.assertEquals;
|
|||
@Slf4j
|
||||
public class AssertTestsExtendBaseClass extends BaseDL4JTest {
|
||||
|
||||
@Override
|
||||
public long getTimeoutMilliseconds() {
|
||||
return 240000L;
|
||||
}
|
||||
|
||||
//Set of classes that are exclusions to the rule (either run manually or have their own logging + timeouts)
|
||||
private static final Set<Class<?>> exclusions = new HashSet<>();
|
||||
|
||||
|
|
|
@ -79,10 +79,11 @@ if(NOT SD_CUDA)
|
|||
if ("${OPENBLAS_PATH}" STREQUAL "")
|
||||
#we don't want OpenBLAS on Apple
|
||||
if (NOT APPLE)
|
||||
# note: this is not a typo
|
||||
set(BLA_VENDOR "OpenBLAS")
|
||||
endif()
|
||||
|
||||
# look around for system blas instead
|
||||
# look around for system blas instead, see: https://cmake.org/cmake/help/latest/module/FindBLAS.html
|
||||
find_package(BLAS REQUIRED)
|
||||
if (BLAS_FOUND)
|
||||
message("Found external BLAS implementation: ${BLAS_LIBRARIES} ")
|
||||
|
@ -91,6 +92,7 @@ if(NOT SD_CUDA)
|
|||
else()
|
||||
# if we have externally provided OPENBLAS_PATH - let's use it
|
||||
set(HAVE_OPENBLAS 1)
|
||||
message("Setting openblas")
|
||||
include_directories(${OPENBLAS_PATH}/include/)
|
||||
link_directories(${OPENBLAS_PATH} ${OPENBLAS_PATH}/lib/)
|
||||
set(OPENBLAS_LIBRARIES openblas)
|
||||
|
|
|
@ -5,7 +5,7 @@ project(mkldnn-download NONE)
|
|||
include(ExternalProject)
|
||||
ExternalProject_Add(mkldnn
|
||||
GIT_REPOSITORY https://github.com/intel/mkl-dnn.git
|
||||
GIT_TAG v1.2.1
|
||||
GIT_TAG v1.2.2
|
||||
SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/mkldnn-src"
|
||||
BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/mkldnn-build"
|
||||
CONFIGURE_COMMAND ""
|
||||
|
|
|
@ -49,7 +49,7 @@ if (SD_IOS_BUILD)
|
|||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DSD_IOS_BUILD=true")
|
||||
endif()
|
||||
|
||||
if(WIN32)
|
||||
if(WIN32 AND NOT ANDROID)
|
||||
get_property(dirs DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES)
|
||||
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wa,-mbig-obj")
|
||||
|
@ -231,7 +231,11 @@ if(SD_CUDA)
|
|||
${LOOPS_SOURCES} ${ARRAY_SOURCES} ${TYPES_SOURCES}
|
||||
${MEMORY_SOURCES} ${GRAPH_SOURCES} ${CUSTOMOPS_SOURCES} ${INDEXING_SOURCES} ${EXCEPTIONS_SOURCES} ${OPS_SOURCES} ${PERF_SOURCES} ${CUSTOMOPS_CUDNN_SOURCES} ${CUSTOMOPS_MKLDNN_SOURCES})
|
||||
|
||||
add_library(${SD_LIBRARY_NAME} SHARED $<TARGET_OBJECTS:nd4jobj>)
|
||||
# Don't output dynamic linked lib when a static lib build is specified unless the tests are built
|
||||
if(NOT SD_STATIC_LIB OR SD_BUILD_TESTS)
|
||||
add_library(${SD_LIBRARY_NAME} SHARED $<TARGET_OBJECTS:nd4jobj>)
|
||||
endif()
|
||||
|
||||
|
||||
if (WIN32)
|
||||
message("MSVC runtime for library: ${MSVC_RT_LIB}")
|
||||
|
@ -241,7 +245,7 @@ if(SD_CUDA)
|
|||
if (SD_BUILD_TESTS OR SD_STATIC_LIB)
|
||||
add_library(${SD_LIBRARY_NAME}static STATIC $<TARGET_OBJECTS:nd4jobj>)
|
||||
set_property(TARGET ${SD_LIBRARY_NAME}static PROPERTY MSVC_RUNTIME_LIBRARY "${MSVC_RT_LIB}$<$<CONFIG:Debug>:Debug>")
|
||||
install(TARGETS ${SD_LIBRARY_NAME}static DESTINATION .)
|
||||
install(TARGETS ${SD_LIBRARY_NAME}static DESTINATION .)
|
||||
endif()
|
||||
|
||||
# on windows we want to make sure we use MT or MD, but since we use it in one lib, we must use it everywhere to avoid conflicts
|
||||
|
@ -320,14 +324,16 @@ elseif(SD_CPU)
|
|||
${MEMORY_SOURCES} ${GRAPH_SOURCES} ${CUSTOMOPS_SOURCES} ${EXCEPTIONS_SOURCES} ${INDEXING_SOURCES} ${CUSTOMOPS_MKLDNN_SOURCES} ${CUSTOMOPS_GENERIC_SOURCES}
|
||||
${OPS_SOURCES} ${PERF_SOURCES})
|
||||
if(IOS)
|
||||
add_library(${SD_LIBRARY_NAME} STATIC $<TARGET_OBJECTS:nd4jobj>)
|
||||
add_library(${SD_LIBRARY_NAME} STATIC $<TARGET_OBJECTS:nd4jobj>)
|
||||
else()
|
||||
# static library is built only if we're going to build tests, skip otherwise
|
||||
if (SD_BUILD_TESTS OR SD_STATIC_LIB)
|
||||
add_library(${SD_LIBRARY_NAME}static STATIC $<TARGET_OBJECTS:nd4jobj>)
|
||||
endif()
|
||||
|
||||
add_library(${SD_LIBRARY_NAME} SHARED $<TARGET_OBJECTS:nd4jobj>)
|
||||
if(SD_BUILD_TESTS OR NOT SD_STATIC_LIB)
|
||||
add_library(${SD_LIBRARY_NAME} SHARED $<TARGET_OBJECTS:nd4jobj>)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# we're including {MKLDNN} here in case of building from sources. in future that'll replace {MKLDNN_LIBRARIES}. same applies to BLAS
|
||||
|
|
|
@ -21,6 +21,33 @@ set -eu
|
|||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
cd "$DIR"
|
||||
|
||||
setwindows_msys() {
|
||||
if [[ $KERNEL == *"windows"* ]]; then
|
||||
export CMAKE_COMMAND="$CMAKE_COMMAND -G \"MSYS Makefiles\""
|
||||
fi
|
||||
}
|
||||
|
||||
setandroid_defaults() {
|
||||
if [[ -z ${ANDROID_NDK:-} ]]; then
|
||||
export ANDROID_NDK=$HOME/Android/android-ndk/
|
||||
echo "No ANDROID_NDK variable set. Setting to default of $ANDROID_NDK"
|
||||
else
|
||||
echo "USING ANDROID NDK $ANDROID_NDK"
|
||||
fi
|
||||
|
||||
if [[ -z ${ANDROID_VERSION:-} ]]; then
|
||||
export ANDROID_VERSION=21
|
||||
echo "No ANDROID_VERSION variable set. Setting to default of $ANDROID_VERSION"
|
||||
else
|
||||
echo "USING ANDROID VERSION $ANDROID_VERSION"
|
||||
# android needs static linking
|
||||
|
||||
fi
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
export CMAKE_COMMAND="cmake"
|
||||
if which cmake3 &> /dev/null; then
|
||||
export CMAKE_COMMAND="cmake3"
|
||||
|
@ -57,7 +84,7 @@ VERBOSE_ARG="VERBOSE=1"
|
|||
HELPER=
|
||||
CHECK_VECTORIZATION="OFF"
|
||||
NAME=
|
||||
while [[ $# > 0 ]]
|
||||
while [[ $# -gt 0 ]]
|
||||
do
|
||||
key="$1"
|
||||
value="${2:-}"
|
||||
|
@ -141,7 +168,7 @@ case $key in
|
|||
# unknown option
|
||||
;;
|
||||
esac
|
||||
if [[ $# > 0 ]]; then
|
||||
if [[ $# -gt 0 ]]; then
|
||||
shift # past argument or value
|
||||
fi
|
||||
done
|
||||
|
@ -190,44 +217,65 @@ case "$OS" in
|
|||
if [ -z "$ARCH" ]; then
|
||||
ARCH="armv7-a"
|
||||
fi
|
||||
export ANDROID_BIN="$ANDROID_NDK/toolchains/arm-linux-androideabi-4.9/prebuilt/$KERNEL/"
|
||||
|
||||
setandroid_defaults
|
||||
|
||||
# Note here for android 32 bit prefix on the binutils is different
|
||||
# See https://developer.android.com/ndk/guides/other_build_systems
|
||||
export ANDROID_BIN="$ANDROID_NDK/toolchains/arm-linux-androideabi/prebuilt/$KERNEL/"
|
||||
export ANDROID_CPP="$ANDROID_NDK/sources/cxx-stl/llvm-libc++/"
|
||||
export ANDROID_CC="$ANDROID_NDK/toolchains/llvm/prebuilt/$KERNEL/bin/clang"
|
||||
export ANDROID_ROOT="$ANDROID_NDK/platforms/android-21/arch-arm/"
|
||||
export ANDROID_ROOT="$ANDROID_NDK/platforms/android-$ANDROID_VERSION/arch-arm/"
|
||||
export CMAKE_COMMAND="$CMAKE_COMMAND -DCMAKE_TOOLCHAIN_FILE=cmake/android-arm.cmake -DSD_ANDROID_BUILD=true"
|
||||
setwindows_msys
|
||||
;;
|
||||
|
||||
android-arm64)
|
||||
if [ -z "$ARCH" ]; then
|
||||
ARCH="armv8-a"
|
||||
fi
|
||||
|
||||
setandroid_defaults
|
||||
|
||||
echo "BUILDING ANDROID ARM with KERNEL $KERNEL"
|
||||
export ANDROID_BIN="$ANDROID_NDK/toolchains/aarch64-linux-android-4.9/prebuilt/$KERNEL/"
|
||||
export ANDROID_CPP="$ANDROID_NDK/sources/cxx-stl/llvm-libc++/"
|
||||
export ANDROID_CC="$ANDROID_NDK/toolchains/llvm/prebuilt/$KERNEL/bin/clang"
|
||||
export ANDROID_ROOT="$ANDROID_NDK/platforms/android-21/arch-arm64/"
|
||||
export CMAKE_COMMAND="$CMAKE_COMMAND -DCMAKE_TOOLCHAIN_FILE=cmake/android-arm64.cmake -DSD_ANDROID_BUILD=true"
|
||||
export ANDROID_ROOT="$ANDROID_NDK/platforms/android-$ANDROID_VERSION/arch-arm64/"
|
||||
export CMAKE_COMMAND="$CMAKE_COMMAND -DCMAKE_TOOLCHAIN_FILE=cmake/android-arm64.cmake -DSD_ANDROID_BUILD=true"
|
||||
setwindows_msys
|
||||
;;
|
||||
|
||||
android-x86)
|
||||
if [ -z "$ARCH" ]; then
|
||||
ARCH="i686"
|
||||
fi
|
||||
export ANDROID_BIN="$ANDROID_NDK/toolchains/x86-4.9/prebuilt/$KERNEL/"
|
||||
|
||||
setandroid_defaults
|
||||
export ANDROID_BIN="$ANDROID_NDK/toolchains/arm-linux-androideabi-4.9/prebuilt/$KERNEL/"
|
||||
export ANDROID_CPP="$ANDROID_NDK/sources/cxx-stl/llvm-libc++/"
|
||||
export ANDROID_CC="$ANDROID_NDK/toolchains/llvm/prebuilt/$KERNEL/bin/clang"
|
||||
export ANDROID_ROOT="$ANDROID_NDK/platforms/android-21/arch-x86/"
|
||||
export ANDROID_ROOT="$ANDROID_NDK/platforms/android-$ANDROID_VERSION/arch-x86/"
|
||||
export CMAKE_COMMAND="$CMAKE_COMMAND -DCMAKE_TOOLCHAIN_FILE=cmake/android-x86.cmake -DSD_ANDROID_BUILD=true"
|
||||
setwindows_msys
|
||||
;;
|
||||
|
||||
android-x86_64)
|
||||
|
||||
if [ -z "$ARCH" ]; then
|
||||
ARCH="x86-64"
|
||||
fi
|
||||
export ANDROID_BIN="$ANDROID_NDK/toolchains/x86_64-4.9/prebuilt/$KERNEL/"
|
||||
echo "BUILDING ANDROID x86_64"
|
||||
|
||||
setandroid_defaults
|
||||
|
||||
|
||||
export ANDROID_BIN="$ANDROID_NDK/toolchains/arm-linux-androideabi-4.9/prebuilt/$KERNEL/"
|
||||
export ANDROID_CPP="$ANDROID_NDK/sources/cxx-stl/llvm-libc++/"
|
||||
export ANDROID_CC="$ANDROID_NDK/toolchains/llvm/prebuilt/$KERNEL/bin/clang"
|
||||
export ANDROID_ROOT="$ANDROID_NDK/platforms/android-21/arch-x86_64/"
|
||||
export ANDROID_ROOT="$ANDROID_NDK/platforms/android-$ANDROID_VERSION/arch-x86_64/"
|
||||
export CMAKE_COMMAND="$CMAKE_COMMAND -DCMAKE_TOOLCHAIN_FILE=cmake/android-x86_64.cmake -DSD_ANDROID_BUILD=true"
|
||||
setwindows_msys
|
||||
;;
|
||||
|
||||
ios-x86_64)
|
||||
|
@ -400,9 +448,9 @@ if [ -z "$NAME" ]; then
|
|||
fi
|
||||
|
||||
if [ "$LIBTYPE" == "dynamic" ]; then
|
||||
SHARED_LIBS_ARG="-DSD_SHARED_LIB=OFF"
|
||||
SHARED_LIBS_ARG="-DSD_SHARED_LIB=ON -DSD_STATIC_LIB=OFF"
|
||||
else
|
||||
SHARED_LIBS_ARG="-DSD_SHARED_LIB=ON"
|
||||
SHARED_LIBS_ARG="-DSD_SHARED_LIB=OFF -DSD_STATIC_LIB=ON"
|
||||
fi
|
||||
|
||||
if [ "$BUILD" == "release" ]; then
|
||||
|
@ -464,7 +512,9 @@ if [ "$CHIP" == "cuda" ] && [ -n "$CHIP_VERSION" ]; then
|
|||
esac
|
||||
fi
|
||||
|
||||
|
||||
[[ -z ${OPENBLAS_PATH:-} ]] && OPENBLAS_PATH=""
|
||||
OPENBLAS_PATH="${OPENBLAS_PATH//\\//}"
|
||||
|
||||
if [[ -n "${BUILD_PATH:-}" ]]; then
|
||||
PREVIFS="$IFS"
|
||||
|
@ -537,7 +587,7 @@ echo CHECK_VECTORIZATION = "$CHECK_VECTORIZATION"
|
|||
echo HELPERS = "$HELPERS"
|
||||
mkbuilddir
|
||||
pwd
|
||||
eval $CMAKE_COMMAND "$BLAS_ARG" "$ARCH_ARG" "$NAME_ARG" -DSD_CHECK_VECTORIZATION="${CHECK_VECTORIZATION}" $HELPERS "$SHARED_LIBS_ARG" "$MINIFIER_ARG" "$OPERATIONS_ARG" "$BUILD_TYPE" "$PACKAGING_ARG" "$EXPERIMENTAL_ARG" "$TESTS_ARG" "$CUDA_COMPUTE" -DOPENBLAS_PATH="$OPENBLAS_PATH" -DDEV=FALSE -DCMAKE_NEED_RESPONSE=YES -DMKL_MULTI_THREADED=TRUE ../..
|
||||
eval "$CMAKE_COMMAND" "$BLAS_ARG" "$ARCH_ARG" "$NAME_ARG" -DSD_CHECK_VECTORIZATION="${CHECK_VECTORIZATION}" "$HELPERS" "$SHARED_LIBS_ARG" "$MINIFIER_ARG" "$OPERATIONS_ARG" "$BUILD_TYPE" "$PACKAGING_ARG" "$EXPERIMENTAL_ARG" "$TESTS_ARG" "$CUDA_COMPUTE" -DOPENBLAS_PATH="$OPENBLAS_PATH" -DDEV=FALSE -DCMAKE_NEED_RESPONSE=YES -DMKL_MULTI_THREADED=TRUE ../..
|
||||
|
||||
if [ "$PARALLEL" == "true" ]; then
|
||||
MAKE_ARGUMENTS="$MAKE_ARGUMENTS -j $MAKEJ"
|
||||
|
@ -551,9 +601,10 @@ if [ "$CHECK_VECTORIZATION" == "ON" ]; then
|
|||
if [ "$MAKE_COMMAND" == "make" ]; then
|
||||
MAKE_ARGUMENTS="$MAKE_ARGUMENTS --output-sync=target"
|
||||
fi
|
||||
exec 3>&1
|
||||
eval $MAKE_COMMAND $MAKE_ARGUMENTS 2>&1 >&3 3>&- | python3 ../../auto_vectorization/auto_vect.py && cd ../../..
|
||||
|
||||
exec 3>&1
|
||||
eval "$MAKE_COMMAND" "$MAKE_ARGUMENTS" 2>&1 >&3 3>&- | python3 ../../auto_vectorization/auto_vect.py && cd ../../..
|
||||
exec 3>&-
|
||||
else
|
||||
eval $MAKE_COMMAND $MAKE_ARGUMENTS && cd ../../..
|
||||
eval "$MAKE_COMMAND" "$MAKE_ARGUMENTS" && cd ../../..
|
||||
fi
|
||||
|
|
|
@ -1,27 +1,22 @@
|
|||
# CMake toolchain to build for Android 5.0 or newer. Sample usage:
|
||||
#
|
||||
# ANDROID_BIN="/path/to/android-ndk/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/" \
|
||||
# ANDROID_CPP="/path/to/android-ndk/sources/cxx-stl/llvm-libc++/" \
|
||||
# ANDROID_CC="/path/to/android-ndk/toolchains/llvm/prebuilt/linux-x86_64/bin/clang" \
|
||||
# ANDROID_ROOT="/path/to/android-ndk/platforms/android-21/arch-arm/" \
|
||||
# cmake -DCMAKE_TOOLCHAIN_FILE=android-arm.cmake -DCMAKE_INSTALL_PREFIX=..
|
||||
#
|
||||
# If you really need to use libnd4j on a CPU with no FPU, replace "libs/armeabi-v7a" by "libs/armeabi" and
|
||||
# "-march=armv7-a -mfloat-abi=softfp -mfpu=vfpv3-d16" with "-march=armv5te -mtune=xscale -msoft-float"
|
||||
set(CMAKE_SYSTEM_NAME Android)
|
||||
set(CMAKE_ANDROID_ARCH_ABI armeabi-v7a)
|
||||
set(CMAKE_ANDROID_NDK "$ENV{ANDROID_NDK}")
|
||||
set(CMAKE_ANDROID_STL_TYPE c++_shared)
|
||||
set(CMAKE_SYSTEM_VERSION "$ENV{ANDROID_VERSION}")
|
||||
set(CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION clang)
|
||||
|
||||
set(CMAKE_SYSTEM_NAME UnixPaths)
|
||||
set(CMAKE_SYSTEM_PROCESSOR arm)
|
||||
set(ANDROID TRUE)
|
||||
if (WIN32)
|
||||
set(CMAKE_C_COMPILER "$ENV{ANDROID_CC}.exe")
|
||||
set(CMAKE_CXX_COMPILER "$ENV{ANDROID_CC}++.exe")
|
||||
else()
|
||||
set(CMAKE_C_COMPILER "$ENV{ANDROID_CC}")
|
||||
set(CMAKE_CXX_COMPILER "$ENV{ANDROID_CC}++")
|
||||
endif (WIN32)
|
||||
|
||||
set(CMAKE_C_COMPILER "$ENV{ANDROID_CC}")
|
||||
set(CMAKE_CXX_COMPILER "$ENV{ANDROID_CC}++")
|
||||
|
||||
set(CMAKE_C_LINK_EXECUTABLE "<CMAKE_C_COMPILER> <FLAGS> <CMAKE_C_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -target armv7-none-linux-androideabi -Wl,--fix-cortex-a8 -Wl,--no-undefined -z text -o <TARGET> <LINK_LIBRARIES> -gcc-toolchain $ENV{ANDROID_BIN} --sysroot=$ENV{ANDROID_ROOT} -lm -lc")
|
||||
set(CMAKE_CXX_LINK_EXECUTABLE "<CMAKE_CXX_COMPILER> <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -target armv7-none-linux-androideabi -Wl,--fix-cortex-a8 -Wl,--no-undefined -z text -o <TARGET> <LINK_LIBRARIES> -gcc-toolchain $ENV{ANDROID_BIN} --sysroot=$ENV{ANDROID_ROOT} -L$ENV{ANDROID_CPP}/libs/armeabi-v7a/ -nostdlib++ -lc++_static -lc++abi -landroid_support -lm -lc")
|
||||
|
||||
set(CMAKE_C_CREATE_SHARED_LIBRARY "<CMAKE_C_COMPILER> <CMAKE_SHARED_LIBRARY_C_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_C_FLAG><TARGET_SONAME> -target armv7-none-linux-androideabi -Wl,--fix-cortex-a8 -Wl,--no-undefined -z text -o <TARGET> <OBJECTS> <LINK_LIBRARIES> -gcc-toolchain $ENV{ANDROID_BIN} --sysroot=$ENV{ANDROID_ROOT} -lm -lc")
|
||||
set(CMAKE_CXX_CREATE_SHARED_LIBRARY "<CMAKE_CXX_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -target armv7-none-linux-androideabi -Wl,--fix-cortex-a8 -Wl,--no-undefined -z text -o <TARGET> <OBJECTS> <LINK_LIBRARIES> -gcc-toolchain $ENV{ANDROID_BIN} --sysroot=$ENV{ANDROID_ROOT} -L$ENV{ANDROID_CPP}/libs/armeabi-v7a/ -nostdlib++ -lc++_static -lc++abi -landroid_support -lm -lc")
|
||||
add_definitions(-D__ANDROID_API__=$ENV{ANDROID_VERSION} -DANDROID -fPIC -ffunction-sections -funwind-tables -fstack-protector-strong -target armv7a-linux-androideabi)
|
||||
|
||||
add_definitions(-D__ANDROID_API__=21 -DANDROID -fPIC -ffunction-sections -funwind-tables -fstack-protector-strong -target armv7-none-linux-androideabi -march=armv7-a -mfloat-abi=softfp -mfpu=vfpv3-d16)
|
||||
|
||||
include_directories("$ENV{ANDROID_CPP}/include/" "$ENV{ANDROID_CPP}/../llvm-libc++abi/include/" "$ENV{ANDROID_NDK}/sources/android/support/include/" "$ENV{ANDROID_CPP}/libs/armeabi-v7a/include/" "$ENV{ANDROID_NDK}/sysroot/usr/include/" "$ENV{ANDROID_NDK}/sysroot/usr/include/arm-linux-androideabi/" "$ENV{ANDROID_ROOT}/usr/include/")
|
||||
|
|
|
@ -1,24 +1,22 @@
|
|||
# CMake toolchain to build for Android 5.0 or newer. Sample usage:
|
||||
#
|
||||
# ANDROID_BIN="/path/to/android-ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/" \
|
||||
# ANDROID_CPP="/path/to/android-ndk/sources/cxx-stl/llvm-libc++/" \
|
||||
# ANDROID_CC="/path/to/android-ndk/toolchains/llvm/prebuilt/linux-x86_64/bin/clang" \
|
||||
# ANDROID_ROOT="/path/to/android-ndk/platforms/android-21/arch-arm64/" \
|
||||
# cmake -DCMAKE_TOOLCHAIN_FILE=android-arm64.cmake -DCMAKE_INSTALL_PREFIX=..
|
||||
|
||||
set(CMAKE_SYSTEM_NAME UnixPaths)
|
||||
set(CMAKE_SYSTEM_PROCESSOR arm64)
|
||||
set(ANDROID TRUE)
|
||||
|
||||
set(CMAKE_C_COMPILER "$ENV{ANDROID_CC}")
|
||||
set(CMAKE_CXX_COMPILER "$ENV{ANDROID_CC}++")
|
||||
|
||||
set(CMAKE_C_LINK_EXECUTABLE "<CMAKE_C_COMPILER> <FLAGS> <CMAKE_C_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -target aarch64-none-linux-android -Wl,--no-undefined -z text -o <TARGET> <LINK_LIBRARIES> -gcc-toolchain $ENV{ANDROID_BIN} --sysroot=$ENV{ANDROID_ROOT} -lm -lc")
|
||||
set(CMAKE_CXX_LINK_EXECUTABLE "<CMAKE_CXX_COMPILER> <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -target aarch64-none-linux-android -Wl,--no-undefined -z text -o <TARGET> <LINK_LIBRARIES> -gcc-toolchain $ENV{ANDROID_BIN} --sysroot=$ENV{ANDROID_ROOT} -L$ENV{ANDROID_CPP}/libs/arm64-v8a/ -nostdlib++ -lc++_static -lc++abi -lm -lc")
|
||||
|
||||
set(CMAKE_C_CREATE_SHARED_LIBRARY "<CMAKE_C_COMPILER> <CMAKE_SHARED_LIBRARY_C_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_C_FLAG><TARGET_SONAME> -target aarch64-none-linux-android -Wl,--no-undefined -z text -o <TARGET> <OBJECTS> <LINK_LIBRARIES> -gcc-toolchain $ENV{ANDROID_BIN} --sysroot=$ENV{ANDROID_ROOT} -lm -lc")
|
||||
set(CMAKE_CXX_CREATE_SHARED_LIBRARY "<CMAKE_CXX_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -target aarch64-none-linux-android -Wl,--no-undefined -z text -o <TARGET> <OBJECTS> <LINK_LIBRARIES> -gcc-toolchain $ENV{ANDROID_BIN} --sysroot=$ENV{ANDROID_ROOT} -L$ENV{ANDROID_CPP}/libs/arm64-v8a/ -nostdlib++ -lc++_static -lc++abi -lm -lc")
|
||||
|
||||
add_definitions(-D__ANDROID_API__=21 -DANDROID -fPIC -ffunction-sections -funwind-tables -fstack-protector-strong -target aarch64-none-linux-android -march=armv8-a)
|
||||
|
||||
include_directories("$ENV{ANDROID_CPP}/include/" "$ENV{ANDROID_CPP}/../llvm-libc++abi/include/" "$ENV{ANDROID_NDK}/sources/android/support/include/" "$ENV{ANDROID_CPP}/libs/arm64-v8a/include/" "$ENV{ANDROID_NDK}/sysroot/usr/include/" "$ENV{ANDROID_NDK}/sysroot/usr/include/aarch64-linux-android/" "$ENV{ANDROID_ROOT}/usr/include/")
|
||||
# CMake toolchain to build for Android 5.0 or newer. Sample usage:
|
||||
#
|
||||
set(CMAKE_SYSTEM_NAME Android)
|
||||
set(CMAKE_ANDROID_ARCH_ABI arm64-v8a)
|
||||
set(CMAKE_ANDROID_NDK "$ENV{ANDROID_NDK}")
|
||||
set(CMAKE_ANDROID_STL_TYPE c++_shared)
|
||||
set(CMAKE_SYSTEM_VERSION "$ENV{ANDROID_VERSION}")
|
||||
set(CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION clang)
|
||||
|
||||
set(ANDROID TRUE)
|
||||
if (WIN32)
|
||||
set(CMAKE_C_COMPILER "$ENV{ANDROID_CC}.exe")
|
||||
set(CMAKE_CXX_COMPILER "$ENV{ANDROID_CC}++.exe")
|
||||
else()
|
||||
set(CMAKE_C_COMPILER "$ENV{ANDROID_CC}")
|
||||
set(CMAKE_CXX_COMPILER "$ENV{ANDROID_CC}++")
|
||||
endif (WIN32)
|
||||
|
||||
|
||||
|
||||
add_definitions(-D__ANDROID_API__=$ENV{ANDROID_VERSION} -DANDROID -fPIC -ffunction-sections -funwind-tables -fstack-protector-strong -target aarch64-none-linux-android -march=armv8-a)
|
||||
|
||||
|
|
|
@ -1,24 +1,22 @@
|
|||
# CMake toolchain to build for Android 5.0 or newer. Sample usage:
|
||||
#
|
||||
# ANDROID_BIN="/path/to/android-ndk/toolchains/x86-4.9/prebuilt/linux-x86_64/" \
|
||||
# ANDROID_CPP="/path/to/android-ndk/sources/cxx-stl/llvm-libc++/" \
|
||||
# ANDROID_CC="/path/to/android-ndk/toolchains/llvm/prebuilt/linux-x86_64/bin/clang" \
|
||||
# ANDROID_ROOT="/path/to/android-ndk/platforms/android-21/arch-x86/" \
|
||||
# cmake -DCMAKE_TOOLCHAIN_FILE=android-x86.cmake -DCMAKE_INSTALL_PREFIX=..
|
||||
set(CMAKE_SYSTEM_NAME Android)
|
||||
set(CMAKE_ANDROID_ARCH_ABI x86)
|
||||
set(CMAKE_ANDROID_NDK "$ENV{ANDROID_NDK}")
|
||||
set(CMAKE_ANDROID_STL_TYPE c++_shared)
|
||||
set(CMAKE_SYSTEM_VERSION "$ENV{ANDROID_VERSION}")
|
||||
set(CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION clang)
|
||||
|
||||
set(CMAKE_SYSTEM_NAME UnixPaths)
|
||||
set(CMAKE_SYSTEM_PROCESSOR atom)
|
||||
set(ANDROID TRUE)
|
||||
if (WIN32)
|
||||
set(CMAKE_C_COMPILER "$ENV{ANDROID_CC}.exe")
|
||||
set(CMAKE_CXX_COMPILER "$ENV{ANDROID_CC}++.exe")
|
||||
else()
|
||||
set(CMAKE_C_COMPILER "$ENV{ANDROID_CC}")
|
||||
set(CMAKE_CXX_COMPILER "$ENV{ANDROID_CC}++")
|
||||
endif (WIN32)
|
||||
|
||||
set(CMAKE_C_COMPILER "$ENV{ANDROID_CC}")
|
||||
set(CMAKE_CXX_COMPILER "$ENV{ANDROID_CC}++")
|
||||
|
||||
set(CMAKE_C_LINK_EXECUTABLE "<CMAKE_C_COMPILER> <FLAGS> <CMAKE_C_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -target i686-none-linux-android -Wl,--no-undefined -z text -o <TARGET> <LINK_LIBRARIES> -gcc-toolchain $ENV{ANDROID_BIN} --sysroot=$ENV{ANDROID_ROOT} -lm -lc")
|
||||
set(CMAKE_CXX_LINK_EXECUTABLE "<CMAKE_CXX_COMPILER> <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -target i686-none-linux-android -Wl,--no-undefined -z text -o <TARGET> <LINK_LIBRARIES> -gcc-toolchain $ENV{ANDROID_BIN} --sysroot=$ENV{ANDROID_ROOT} -L$ENV{ANDROID_CPP}/libs/x86/ -nostdlib++ -lc++_static -lc++abi -landroid_support -lm -lc")
|
||||
|
||||
set(CMAKE_C_CREATE_SHARED_LIBRARY "<CMAKE_C_COMPILER> <CMAKE_SHARED_LIBRARY_C_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_C_FLAG><TARGET_SONAME> -target i686-none-linux-android -Wl,--no-undefined -z text -o <TARGET> <OBJECTS> <LINK_LIBRARIES> -gcc-toolchain $ENV{ANDROID_BIN} --sysroot=$ENV{ANDROID_ROOT} -lm -lc")
|
||||
set(CMAKE_CXX_CREATE_SHARED_LIBRARY "<CMAKE_CXX_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -target i686-none-linux-android -Wl,--no-undefined -z text -o <TARGET> <OBJECTS> <LINK_LIBRARIES> -gcc-toolchain $ENV{ANDROID_BIN} --sysroot=$ENV{ANDROID_ROOT} -L$ENV{ANDROID_CPP}/libs/x86/ -nostdlib++ -lc++_static -lc++abi -landroid_support -lm -lc")
|
||||
add_definitions(-D__ANDROID_API__=$ENV{ANDROID_VERSION} -DANDROID -fPIC -ffunction-sections -funwind-tables -fstack-protector-strong -target i686-linux-android)
|
||||
|
||||
add_definitions(-D__ANDROID_API__=21 -DANDROID -fPIC -ffunction-sections -funwind-tables -fstack-protector-strong -target i686-none-linux-android -march=i686 -mtune=atom -mssse3 -mfpmath=sse)
|
||||
|
||||
include_directories("$ENV{ANDROID_CPP}/include/" "$ENV{ANDROID_CPP}/../llvm-libc++abi/include/" "$ENV{ANDROID_NDK}/sources/android/support/include/" "$ENV{ANDROID_CPP}/libs/x86/include/" "$ENV{ANDROID_NDK}/sysroot/usr/include/" "$ENV{ANDROID_NDK}/sysroot/usr/include/i686-linux-android/" "$ENV{ANDROID_ROOT}/usr/include/")
|
||||
|
|
|
@ -1,24 +1,21 @@
|
|||
# CMake toolchain to build for Android 5.0 or newer. Sample usage:
|
||||
#
|
||||
# ANDROID_BIN="/path/to/android-ndk/toolchains/x86_64-4.9/prebuilt/linux-x86_64/" \
|
||||
# ANDROID_CPP="/path/to/android-ndk/sources/cxx-stl/llvm-libc++/" \
|
||||
# ANDROID_CC="/path/to/android-ndk/toolchains/llvm/prebuilt/linux-x86_64/bin/clang" \
|
||||
# ANDROID_ROOT="/path/to/android-ndk/platforms/android-21/arch-x86_64/" \
|
||||
# cmake -DCMAKE_TOOLCHAIN_FILE=android-x86_64.cmake -DCMAKE_INSTALL_PREFIX=..
|
||||
|
||||
set(CMAKE_SYSTEM_NAME UnixPaths)
|
||||
set(CMAKE_SYSTEM_PROCESSOR atom64)
|
||||
set(CMAKE_SYSTEM_NAME Android)
|
||||
set(CMAKE_ANDROID_ARCH_ABI x86_64)
|
||||
set(CMAKE_ANDROID_NDK "$ENV{ANDROID_NDK}")
|
||||
set(CMAKE_ANDROID_STL_TYPE c++_shared)
|
||||
set(CMAKE_SYSTEM_VERSION "$ENV{ANDROID_VERSION}")
|
||||
set(CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION clang)
|
||||
|
||||
set(ANDROID TRUE)
|
||||
if (WIN32)
|
||||
set(CMAKE_C_COMPILER "$ENV{ANDROID_CC}.exe")
|
||||
set(CMAKE_CXX_COMPILER "$ENV{ANDROID_CC}++.exe")
|
||||
else()
|
||||
set(CMAKE_C_COMPILER "$ENV{ANDROID_CC}")
|
||||
set(CMAKE_CXX_COMPILER "$ENV{ANDROID_CC}++")
|
||||
endif (WIN32)
|
||||
|
||||
set(CMAKE_C_COMPILER "$ENV{ANDROID_CC}")
|
||||
set(CMAKE_CXX_COMPILER "$ENV{ANDROID_CC}++")
|
||||
|
||||
set(CMAKE_C_LINK_EXECUTABLE "<CMAKE_C_COMPILER> <FLAGS> <CMAKE_C_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -target x86_64-none-linux-android -Wl,--no-undefined -z text -o <TARGET> <LINK_LIBRARIES> -gcc-toolchain $ENV{ANDROID_BIN} --sysroot=$ENV{ANDROID_ROOT} -lm -lc")
|
||||
set(CMAKE_CXX_LINK_EXECUTABLE "<CMAKE_CXX_COMPILER> <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -target x86_64-none-linux-android -Wl,--no-undefined -z text -o <TARGET> <LINK_LIBRARIES> -gcc-toolchain $ENV{ANDROID_BIN} --sysroot=$ENV{ANDROID_ROOT} -L$ENV{ANDROID_CPP}/libs/x86_64/ -nostdlib++ -lc++_static -lc++abi -lm -lc")
|
||||
|
||||
set(CMAKE_C_CREATE_SHARED_LIBRARY "<CMAKE_C_COMPILER> <CMAKE_SHARED_LIBRARY_C_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_C_FLAG><TARGET_SONAME> -target x86_64-none-linux-android -Wl,--no-undefined -z text -o <TARGET> <OBJECTS> <LINK_LIBRARIES> -gcc-toolchain $ENV{ANDROID_BIN} --sysroot=$ENV{ANDROID_ROOT} -lm -lc")
|
||||
set(CMAKE_CXX_CREATE_SHARED_LIBRARY "<CMAKE_CXX_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG><TARGET_SONAME> -target x86_64-none-linux-android -Wl,--no-undefined -z text -o <TARGET> <OBJECTS> <LINK_LIBRARIES> -gcc-toolchain $ENV{ANDROID_BIN} --sysroot=$ENV{ANDROID_ROOT} -L$ENV{ANDROID_CPP}/libs/x86_64/ -nostdlib++ -lc++_static -lc++abi -lm -lc")
|
||||
|
||||
add_definitions(-D__ANDROID_API__=21 -DANDROID -fPIC -ffunction-sections -funwind-tables -fstack-protector-strong -target x86_64-none-linux-android -march=x86-64 -mtune=atom)
|
||||
|
||||
include_directories("$ENV{ANDROID_CPP}/include/" "$ENV{ANDROID_CPP}/../llvm-libc++abi/include/" "$ENV{ANDROID_NDK}/sources/android/support/include/" "$ENV{ANDROID_CPP}/libs/x86_64/include/" "$ENV{ANDROID_NDK}/sysroot/usr/include/" "$ENV{ANDROID_NDK}/sysroot/usr/include/x86_64-linux-android/" "$ENV{ANDROID_ROOT}/usr/include/")
|
||||
add_definitions(-D__ANDROID_API__=$ENV{ANDROID_VERSION} -DANDROID -fPIC -ffunction-sections -funwind-tables -fstack-protector-strong -target x86_64-none-linux-android)
|
||||
|
|
|
@ -277,13 +277,13 @@ namespace sd {
|
|||
/**
|
||||
* constructor creates new NDArray using shape information from "shapeInfo", set all elements in new array to zeros, if copyStrides is true then use stride values from "shapeInfo", else calculate strides independently
|
||||
*/
|
||||
NDArray(Nd4jLong* shapeInfo, const bool copyStrides = false, sd::LaunchContext* context = sd::LaunchContext::defaultContext());
|
||||
NDArray(Nd4jLong* shapeInfo, const bool copyStrides = false, sd::LaunchContext* context = sd::LaunchContext::defaultContext(), const bool nullify = true);
|
||||
|
||||
/**
|
||||
* constructor creates new NDArray using shape information from "shapeInfo", set all elements in new array to be zeros, if copyStrides is true then use stride values from "shapeInfo", else calculate strides independently
|
||||
* set dtype as array type
|
||||
*/
|
||||
NDArray(Nd4jLong* shapeInfo, const sd::DataType dtype, const bool copyStrides = false, sd::LaunchContext* context = sd::LaunchContext::defaultContext());
|
||||
NDArray(Nd4jLong* shapeInfo, const sd::DataType dtype, const bool copyStrides = false, sd::LaunchContext* context = sd::LaunchContext::defaultContext(), const bool nullify = true);
|
||||
|
||||
/**
|
||||
* this constructor creates new array using shape information contained in vector argument
|
||||
|
|
|
@ -143,7 +143,7 @@ NDArray::NDArray(void* buffer, const char order, const std::vector<Nd4jLong> &sh
|
|||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// creates new NDArray using shape information from "shapeInfo" array, set all elements in new array to be zeros
|
||||
NDArray::NDArray(Nd4jLong* shapeInfo, const sd::DataType dtype, const bool copyStrides, sd::LaunchContext * context) {
|
||||
NDArray::NDArray(Nd4jLong* shapeInfo, const sd::DataType dtype, const bool copyStrides, sd::LaunchContext * context, const bool nullify) {
|
||||
|
||||
if (shapeInfo == nullptr)
|
||||
throw std::runtime_error("NDArray constructor: can't be initalized without shapeinfo");
|
||||
|
@ -161,7 +161,9 @@ NDArray::NDArray(Nd4jLong* shapeInfo, const sd::DataType dtype, const bool copyS
|
|||
|
||||
if (!isEmpty()) {
|
||||
_buffer = std::make_shared<DataBuffer>(lengthOf() * sizeOfT(), dtype, getContext()->getWorkspace());
|
||||
_buffer->setToZeroBuffers();
|
||||
|
||||
if (nullify)
|
||||
_buffer->setToZeroBuffers();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -213,7 +215,7 @@ NDArray::NDArray(sd::LaunchContext * context) {
|
|||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// creates new NDArray using shape information from "shapeInfo" array, set all elements in new array to be zeros, set dtype as array type
|
||||
NDArray::NDArray(Nd4jLong* shapeInfo, const bool copyStrides, sd::LaunchContext * context):
|
||||
NDArray::NDArray(Nd4jLong* shapeInfo, const bool copyStrides, sd::LaunchContext * context, const bool nullify):
|
||||
NDArray(shapeInfo, ArrayOptions::dataType(shapeInfo), copyStrides, context) {
|
||||
}
|
||||
|
||||
|
@ -3339,9 +3341,6 @@ void NDArray::nullify() {
|
|||
if (isEmpty())
|
||||
return;
|
||||
|
||||
if (isS())
|
||||
throw std::runtime_error("NDArray::nullify: can't nullify string array");
|
||||
|
||||
if (isView() || ews() != 1)
|
||||
assign(0);
|
||||
else
|
||||
|
|
|
@ -54,6 +54,8 @@ class ND4J_EXPORT LaunchContext {
|
|||
static std::vector<std::shared_ptr<LaunchContext>> _contexts;
|
||||
static std::mutex _mutex;
|
||||
|
||||
static MAP_IMPL<int, std::mutex*> _deviceMutexes;
|
||||
|
||||
// used for MKLDNN
|
||||
void *_engine = nullptr;
|
||||
|
||||
|
@ -93,7 +95,6 @@ class ND4J_EXPORT LaunchContext {
|
|||
void setCudaSpecialStream(cudaStream_t* cudaStream);
|
||||
void setCublasHandle(void *handle);
|
||||
|
||||
|
||||
#endif // JCPP
|
||||
|
||||
#endif // CUDA
|
||||
|
@ -111,6 +112,12 @@ class ND4J_EXPORT LaunchContext {
|
|||
void setDeviceID(int deviceID) { _deviceID = deviceID; }
|
||||
sd::ErrorReference* errorReference();
|
||||
|
||||
#ifndef __JAVACPP_HACK__
|
||||
// this method returns mutex shared between all threads that use the same device
|
||||
static std::mutex* deviceMutex();
|
||||
|
||||
#endif
|
||||
|
||||
static bool isInitialized();
|
||||
static void releaseBuffers();
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
//
|
||||
|
||||
#include <execution/LaunchContext.h>
|
||||
#include <execution/AffinityManager.h>
|
||||
#include <helpers/logger.h>
|
||||
#include <exceptions/cuda_exception.h>
|
||||
#include <thread>
|
||||
|
@ -42,6 +43,8 @@ namespace sd {
|
|||
}
|
||||
|
||||
std::vector<std::shared_ptr<LaunchContext>> LaunchContext::_contexts = std::vector<std::shared_ptr<LaunchContext>>();
|
||||
MAP_IMPL<int, std::mutex*> LaunchContext::_deviceMutexes;
|
||||
std::mutex LaunchContext::_mutex;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
LaunchContext::LaunchContext() {
|
||||
|
@ -68,6 +71,10 @@ namespace sd {
|
|||
return LaunchContext::_contexts[0].get();
|
||||
}
|
||||
|
||||
std::mutex* LaunchContext::deviceMutex() {
|
||||
return &_mutex;
|
||||
}
|
||||
|
||||
void LaunchContext::swapContextBuffers(ContextBuffers &buffers) {
|
||||
//
|
||||
}
|
||||
|
|
|
@ -31,6 +31,7 @@ namespace sd {
|
|||
|
||||
std::vector<std::shared_ptr<LaunchContext>> LaunchContext::_contexts = std::vector<std::shared_ptr<LaunchContext>>();
|
||||
std::mutex LaunchContext::_mutex;
|
||||
MAP_IMPL<int, std::mutex*> LaunchContext::_deviceMutexes;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
LaunchContext::LaunchContext(cudaStream_t *cudaStream, cudaStream_t& specialCudaStream, void* reductionPointer, void* scalarPointer, int* allocationPointer) {
|
||||
|
@ -44,6 +45,11 @@ LaunchContext::LaunchContext(cudaStream_t *cudaStream, cudaStream_t& specialCuda
|
|||
_isAllocated = false;
|
||||
}
|
||||
|
||||
std::mutex* LaunchContext::deviceMutex() {
|
||||
auto deviceId = AffinityManager::currentDeviceId();
|
||||
return _deviceMutexes[deviceId];
|
||||
}
|
||||
|
||||
LaunchContext::~LaunchContext() {
|
||||
if (_isAllocated) {
|
||||
|
||||
|
@ -85,6 +91,8 @@ LaunchContext::LaunchContext() {
|
|||
|
||||
_contexts.resize(numDevices);
|
||||
for (int e = 0; e < numDevices; e++) {
|
||||
_deviceMutexes[e] = new std::mutex();
|
||||
|
||||
AffinityManager::setCurrentNativeDevice(e);
|
||||
|
||||
LaunchContext::_contexts[e] = std::make_shared<LaunchContext>();
|
||||
|
|
|
@ -252,6 +252,8 @@ NDArray* MmulHelper::mmulMxM(const NDArray* A, const NDArray* B, NDArray* C, dou
|
|||
const bool typeIntFloat = AB && aType == DataType::INT8 && cType == DataType::FLOAT32 && major >= 6;
|
||||
const bool typeHalfFloat = AB && aType == DataType::HALF && cType == DataType::FLOAT32 && major >= 6;
|
||||
|
||||
std::lock_guard<std::mutex> lock(*LaunchContext::deviceMutex());
|
||||
|
||||
auto handle = reinterpret_cast<cublasHandle_t *>(A->getContext()->getCublasHandle());
|
||||
auto stream = A->getContext()->getCudaStream();
|
||||
|
||||
|
@ -394,6 +396,8 @@ NDArray* MmulHelper::mmulMxV(const NDArray* A, const NDArray* X, sd::NDArray* Y,
|
|||
const bool typeDouble = AXY && aType == DataType::DOUBLE;
|
||||
const bool typeFloat = AXY && aType == DataType::FLOAT32;
|
||||
|
||||
std::lock_guard<std::mutex> lock(*LaunchContext::deviceMutex());
|
||||
|
||||
auto handle = reinterpret_cast<cublasHandle_t *>(A->getContext()->getCublasHandle());
|
||||
auto stream = A->getContext()->getCudaStream();
|
||||
|
||||
|
|
|
@ -4076,7 +4076,7 @@ INLINEDEF _CUDA_HD bool reshapeC(const Nd4jLong* oldShapeInfo, Nd4jLong* newShap
|
|||
|
||||
// *** FIRST STAGE - exclude unity dimensions from oldShapeInfo and newShapeInfo (if such are present of course), since they don't affect on strides evaluation, however they complicate code
|
||||
|
||||
// FIXME - indeed we don't need to allocate so large memory amount (2*MAX_RANK), sufficient amount is (2*oldNumOfNonUnities + 2*newNumOfNonUnities)
|
||||
// FIXME - indeed we don't need to allocate so large memory amount (4*MAX_RANK), sufficient amount is (2*oldNumOfNonUnities + 2*newNumOfNonUnities)
|
||||
Nd4jLong tempBuffer[4*MAX_RANK];
|
||||
Nd4jLong *oldShape = tempBuffer, *newShape = tempBuffer + 2*MAX_RANK, *oldStrides, *newStrides;
|
||||
|
||||
|
|
|
@ -45,6 +45,7 @@
|
|||
#include <ops/declarable/headers/util.h>
|
||||
#include <ops/declarable/headers/BarnesHutTsne.h>
|
||||
#include <ops/declarable/headers/images.h>
|
||||
#include <ops/declarable/headers/updaters.h>
|
||||
#include <system/dll.h>
|
||||
#include <helpers/shape.h>
|
||||
#include <helpers/TAD.h>
|
||||
|
|
|
@ -106,6 +106,7 @@ namespace sd {
|
|||
void storeResult(Context &block, int outputNumber, NDArray& array);
|
||||
void storeResult(Context &block, int outputNumber, NDArray* array);
|
||||
sd::NDArray* getZ(Context& block, int inputId = 0);
|
||||
sd::NDArray* getNullifiedZ(Context& block, int inputId = 0);
|
||||
|
||||
/**
|
||||
* This method pre-allocates NDArrays for Op output, in case they are not available at op execution time
|
||||
|
|
|
@ -77,7 +77,15 @@ namespace sd {
|
|||
* @param inputId
|
||||
* @return
|
||||
*/
|
||||
sd::NDArray *getZ(graph::Context &ctx, int inputId);
|
||||
sd::NDArray* getZ(graph::Context &ctx, int inputId);
|
||||
|
||||
/**
|
||||
* Helper method, needed for compatibility with DeclarableOp macros
|
||||
* @param ctx
|
||||
* @param inputId
|
||||
* @return
|
||||
*/
|
||||
sd::NDArray* getNullifiedZ(graph::Context &ctx, int inputId);
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,7 +30,7 @@ namespace sd {
|
|||
CUSTOM_OP_IMPL(bits_hamming_distance, 2, 1, true, 0, 0) {
|
||||
auto x = INPUT_VARIABLE(0);
|
||||
auto y = INPUT_VARIABLE(1);
|
||||
auto output = OUTPUT_VARIABLE(0);
|
||||
auto output = OUTPUT_NULLIFIED(0);
|
||||
|
||||
REQUIRE_TRUE(x->lengthOf() == y->lengthOf(), 0, "bits_hamming_distance: both arguments must have the same length");
|
||||
REQUIRE_TRUE(x->dataType() == y->dataType(), 0, "bits_hamming_distance: both arguments must have the same data type");
|
||||
|
|
|
@ -32,7 +32,7 @@ namespace sd {
|
|||
auto values = INPUT_VARIABLE(2);
|
||||
NDArray *def = nullptr;
|
||||
|
||||
auto output = OUTPUT_VARIABLE(0);
|
||||
auto output = OUTPUT_NULLIFIED(0);
|
||||
|
||||
if (block.width() > 3)
|
||||
def = INPUT_VARIABLE(3);
|
||||
|
|
|
@ -30,7 +30,7 @@ namespace sd {
|
|||
auto input = INPUT_VARIABLE(0);
|
||||
auto delim = INPUT_VARIABLE(1);
|
||||
|
||||
auto indices = OUTPUT_VARIABLE(0);
|
||||
auto indices = OUTPUT_NULLIFIED(0);
|
||||
auto values = OUTPUT_VARIABLE(1);
|
||||
|
||||
auto d = delim->e<std::string>(0);
|
||||
|
|
|
@ -30,7 +30,7 @@ namespace sd {
|
|||
CUSTOM_OP_IMPL(lstsq, 2, 1, false, 0, 0) {
|
||||
auto a = INPUT_VARIABLE(0);
|
||||
auto b = INPUT_VARIABLE(1);
|
||||
auto z = OUTPUT_VARIABLE(0);
|
||||
auto z = OUTPUT_NULLIFIED(0);
|
||||
bool fastFlag = true;
|
||||
double l2_factor = 0.;
|
||||
if (block.numB() > 0) {
|
||||
|
@ -56,7 +56,7 @@ namespace sd {
|
|||
CUSTOM_OP_IMPL(solve_ls, 2, 1, false, 0, 0) {
|
||||
auto a = INPUT_VARIABLE(0);
|
||||
auto b = INPUT_VARIABLE(1);
|
||||
auto z = OUTPUT_VARIABLE(0);
|
||||
auto z = OUTPUT_NULLIFIED(0);
|
||||
bool fastFlag = true;
|
||||
double l2_factor = 0.;
|
||||
if (block.numB() > 0) {
|
|
@ -114,7 +114,7 @@ namespace sd {
|
|||
|
||||
CUSTOM_OP_IMPL(logdet, 1, 1, false, 0, 0) {
|
||||
auto input = INPUT_VARIABLE(0);
|
||||
auto output = OUTPUT_VARIABLE(0);
|
||||
auto output = OUTPUT_NULLIFIED(0);
|
||||
|
||||
REQUIRE_TRUE(input->rankOf() >=2, 0, "logdet: The rank of input array should not less than 2, but %i is given", input->rankOf());
|
||||
REQUIRE_TRUE(input->sizeAt(-1) == input->sizeAt(-2), 0, "logdet: The last two dimmensions should be equal, but %i and %i are given", input->sizeAt(-1), input->sizeAt(-2));
|
|
@ -28,7 +28,7 @@ namespace sd {
|
|||
namespace ops {
|
||||
CUSTOM_OP_IMPL(col2im, 1, 1, false, 0, 9) {
|
||||
auto x = INPUT_VARIABLE(0);
|
||||
auto z = OUTPUT_VARIABLE(0);
|
||||
auto z = OUTPUT_NULLIFIED(0);
|
||||
|
||||
REQUIRE_TRUE(x->rankOf() == 6, 0, "col2im input should be 6D, but got %i instead", x->rankOf());
|
||||
REQUIRE_TRUE(z->rankOf() == 4, 0, "col2im output should be 4D, but got %i instead", z->rankOf());
|
||||
|
@ -45,8 +45,6 @@ namespace sd {
|
|||
LaunchContext* ctx = block.launchContext();
|
||||
helpers::col2im(*ctx, *x, *z, strideY, strideX, padHeight, padWidth, imgHeight, imgWidth, dY, dX);
|
||||
|
||||
STORE_RESULT(*z);
|
||||
|
||||
return ND4J_STATUS_OK;
|
||||
}
|
||||
DECLARE_SHAPE_FN(col2im) {
|
||||
|
|
|
@ -34,10 +34,10 @@ namespace ops {
|
|||
CUSTOM_OP_IMPL(conv1d, 2, 1, false, 0, 5) {
|
||||
|
||||
auto input = INPUT_VARIABLE(0); // [bS, iW, iC] (NWC) or [bS, iC, iW] (NCW)
|
||||
auto weights = INPUT_VARIABLE(1); // [kW, iC, oC] always
|
||||
auto weights = INPUT_VARIABLE(1); // [kW, iC, oC], [oC, iC, kW], [oC, kW, iC]
|
||||
auto bias = block.width() > 2 ? INPUT_VARIABLE(2) : nullptr; // [oC]
|
||||
|
||||
auto output = OUTPUT_VARIABLE(0); // [bS, oW, oC] (NWC) or [bS, oC, oW] (NCW)
|
||||
auto output = OUTPUT_NULLIFIED(0); // [bS, oW, oC] (NWC) or [bS, oC, oW] (NCW)
|
||||
|
||||
int kW = INT_ARG(0) > 0 ? INT_ARG(0) : static_cast<int>(weights->sizeAt(0));// filter(kernel) width
|
||||
int sW = INT_ARG(1); // strides width
|
||||
|
@ -45,12 +45,13 @@ CUSTOM_OP_IMPL(conv1d, 2, 1, false, 0, 5) {
|
|||
int dW = INT_ARG(3); // dilations width
|
||||
int paddingMode = INT_ARG(4); // 0-VALID, 1-SAME, 2-CAUSAL
|
||||
int isNCW = block.getIArguments()->size() > 5 ? !INT_ARG(5) : 1; // INT_ARG(4): 0-NCW, 1-NWC
|
||||
int wFormat = block.getIArguments()->size() > 6 ? INT_ARG(6) : 0; // 0 - [kW, iC, oC], 1 - [oC, iC, kW], 2 - [oC, kW, iC]
|
||||
|
||||
const int rank = 3;
|
||||
REQUIRE_TRUE(input->rankOf() == rank, 0, "CUSTOM CONV1D OP: rank of input array must be equal to %i, but got %i instead !", rank, input->rankOf());
|
||||
REQUIRE_TRUE(weights->rankOf() == rank, 0, "CUSTOM CONV1D OP: rank of weights array must be equal to %i, but got %i instead !", rank, weights->rankOf());
|
||||
|
||||
int indIOioC, indIiW, indWoC(2);
|
||||
int indIOioC, indIiW, indWoC(0 == wFormat ? 2 : 0);
|
||||
if(!isNCW) {
|
||||
indIOioC = 2; indIiW = 1;
|
||||
}
|
||||
|
@ -63,7 +64,7 @@ CUSTOM_OP_IMPL(conv1d, 2, 1, false, 0, 5) {
|
|||
int iC = input->sizeAt(indIOioC); // input channels
|
||||
int oC = weights->sizeAt(indWoC); // output channels
|
||||
|
||||
std::vector<Nd4jLong> expectedWeightsShape = {kW, iC, oC};
|
||||
std::vector<Nd4jLong> expectedWeightsShape = 0 == wFormat ? std::vector<Nd4jLong>({kW, iC, oC}) : (1 == wFormat ? std::vector<Nd4jLong>({oC, iC, kW}) : std::vector<Nd4jLong>({oC, kW, iC}));
|
||||
REQUIRE_TRUE(weights->isSameShape(expectedWeightsShape), 0, "CUSTOM CONV1D OP: wrong shape of weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsShape).c_str(), ShapeUtils::shapeAsString(weights).c_str());
|
||||
if (bias)
|
||||
REQUIRE_TRUE(bias->rankOf() <= 2 && oC == bias->lengthOf(), 0, "CUSTOM CONV1D OP: wrong shape of array with biases, expected rank, length: <=2, %i, but got %i, %i instead !", oC, bias->rankOf(), bias->lengthOf());
|
||||
|
@ -83,11 +84,11 @@ CUSTOM_OP_IMPL(conv1d, 2, 1, false, 0, 5) {
|
|||
auto weightsReshaped = weights->reshape(weights->ordering(), {1, weights->sizeAt(0), weights->sizeAt(1), weights->sizeAt(2)}); // [kW, iC, oC] -> [1, kW, iC, oC]
|
||||
|
||||
sd::ops::conv2d conv2d;
|
||||
const Nd4jStatus status = conv2d.execute({&inputReshaped, &weightsReshaped, bias}, {&outputReshaped}, {}, {1,kW, 1,sW, 0,pW, 1,dW, paddingMode, !isNCW}, {});
|
||||
const Nd4jStatus status = conv2d.execute({&inputReshaped, &weightsReshaped, bias}, {&outputReshaped}, {}, {1,kW, 1,sW, 0,pW, 1,dW, paddingMode, !isNCW, wFormat}, {});
|
||||
if (status != ND4J_STATUS_OK)
|
||||
return status;
|
||||
|
||||
// ConvolutionUtils::conv2d(block, &inputReshaped, &weightsReshaped, bias, &outputReshaped, 1,kW, 1,sW, 0,pW, 1,dW, paddingMode, isNCW);
|
||||
// ConvolutionUtils::conv2d(block, &inputReshaped, &weightsReshaped, bias, &outputReshaped, 1,kW, 1,sW, 0,pW, 1,dW, paddingMode, isNCW, wFormat);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -105,8 +106,9 @@ DECLARE_SHAPE_FN(conv1d) {
|
|||
int dW = INT_ARG(3); // dilations width
|
||||
int paddingMode = INT_ARG(4); // 0-VALID, 1-SAME
|
||||
int isNCW = block.getIArguments()->size() > 5 ? !INT_ARG(5) : 1; // INT_ARG(4): 1-NWC, 0-NCW
|
||||
int wFormat = block.getIArguments()->size() > 6 ? INT_ARG(6) : 0; // 0 - [kW, iC, oC], 1 - [oC, iC, kW], 2 - [oC, kW, iC]
|
||||
|
||||
int indIOioC, indIiW, indWoC(2);
|
||||
int indIOioC, indIiW, indWoC(0 == wFormat ? 2 : 0);
|
||||
if(!isNCW) {
|
||||
indIOioC = 2; indIiW = 1;
|
||||
}
|
||||
|
@ -123,7 +125,7 @@ DECLARE_SHAPE_FN(conv1d) {
|
|||
int iC = inputShapeInfo[indIOioC+1]; // input channels
|
||||
int oC = weightsShapeInfo[indWoC+1]; // output channels
|
||||
|
||||
std::vector<Nd4jLong> expectedWeightsShape = {kW, iC, oC};
|
||||
std::vector<Nd4jLong> expectedWeightsShape = 0 == wFormat ? std::vector<Nd4jLong>({kW, iC, oC}) : (1 == wFormat ? std::vector<Nd4jLong>({oC, iC, kW}) : std::vector<Nd4jLong>({oC, kW, iC}));
|
||||
REQUIRE_TRUE(ShapeUtils::areShapesEqual(weightsShapeInfo, expectedWeightsShape), 0, "CUSTOM CONV1D OP: wrong shape of weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsShape).c_str(), ShapeUtils::shapeAsString(weightsShapeInfo).c_str());
|
||||
if (biasShapeInfo)
|
||||
REQUIRE_TRUE(biasShapeInfo[0] <= 2 && oC == shape::length(biasShapeInfo), 0, "CUSTOM CONV1D OP: wrong shape of array with biases, expected rank, length: <=2, %i, but got %i, %i instead !", oC, biasShapeInfo[0], shape::length(biasShapeInfo));
|
||||
|
@ -163,13 +165,13 @@ DECLARE_TYPES(conv1d) {
|
|||
CUSTOM_OP_IMPL(conv1d_bp, 3, 2, false, 0, 5) {
|
||||
|
||||
auto input = INPUT_VARIABLE(0); // [bS, iW, iC] (NWC) or [bS, iC, iW] (NCW)
|
||||
auto weights = INPUT_VARIABLE(1); // [kW, iC, oC] always
|
||||
auto weights = INPUT_VARIABLE(1); // [kW, iC, oC], [oC, iC, kW], [oC, kW, iC]
|
||||
auto bias = block.width() > 3 ? INPUT_VARIABLE(2) : nullptr; // [oC]
|
||||
auto gradO = block.width() > 3 ? INPUT_VARIABLE(3) : INPUT_VARIABLE(2); // [bS, oW, oC] (NWC) or [bS, oC, oW] (NCW), epsilon_next
|
||||
|
||||
auto gradI = OUTPUT_VARIABLE(0); // [bS, iW, iC] (NWC) or [bS, iC, iW] (NCW), epsilon
|
||||
auto gradW = OUTPUT_VARIABLE(1); // [kW, iC, oC] always
|
||||
auto gradB = block.width() > 3 ? OUTPUT_VARIABLE(2) : nullptr; // [oC]
|
||||
auto gradI = OUTPUT_NULLIFIED(0); // [bS, iW, iC] (NWC) or [bS, iC, iW] (NCW), epsilon
|
||||
auto gradW = OUTPUT_NULLIFIED(1); // [kW, iC, oC], [oC, iC, kW], [oC, kW, iC]
|
||||
auto gradB = block.width() > 3 ? OUTPUT_NULLIFIED(2) : nullptr; // [oC]
|
||||
|
||||
int kW = INT_ARG(0) > 0 ? INT_ARG(0) : static_cast<int>(weights->sizeAt(0));// filter(kernel) width
|
||||
int sW = INT_ARG(1); // strides width
|
||||
|
@ -177,12 +179,14 @@ CUSTOM_OP_IMPL(conv1d_bp, 3, 2, false, 0, 5) {
|
|||
int dW = INT_ARG(3); // dilations width
|
||||
int paddingMode = INT_ARG(4); // 0-VALID, 1-SAME, 2-CAUSAL
|
||||
int isNCW = block.getIArguments()->size() > 5 ? !INT_ARG(5) : 1; // INT_ARG(4): 1-NWC, 0-NCW
|
||||
int wFormat = block.getIArguments()->size() > 6 ? INT_ARG(6) : 0; // 0 - [kW, iC, oC], 1 - [oC, iC, kW], 2 - [oC, kW, iC]
|
||||
|
||||
const int rank = 3;
|
||||
REQUIRE_TRUE(input->rankOf() == rank, 0, "CUSTOM CONV1D_BP OP: rank of input array must be equal to %i, but got %i instead !", rank, input->rankOf());
|
||||
REQUIRE_TRUE(weights->rankOf() == rank, 0, "CUSTOM CONV1D_BP OP: rank of weights array must be equal to %i, but got %i instead !", rank, weights->rankOf());
|
||||
REQUIRE_TRUE(gradO->rankOf() == rank, 0, "CUSTOM CONV1D_BP OP: rank of output gradients (next epsilon) array must be equal to %i, but got %i instead !", rank, gradO->rankOf());
|
||||
int indIOioC, indIiW, indWoC(2);
|
||||
|
||||
int indIOioC, indIiW, indWoC(0 == wFormat ? 2 : 0);
|
||||
if(!isNCW) {
|
||||
indIOioC = 2; indIiW = 1;
|
||||
}
|
||||
|
@ -199,7 +203,7 @@ CUSTOM_OP_IMPL(conv1d_bp, 3, 2, false, 0, 5) {
|
|||
ConvolutionUtils::calcOutSizePool2D(trueoH,trueoW, 1,kW, 1,sW, 0,pW, 1,dW, 1,iW, paddingMode);
|
||||
|
||||
std::vector<Nd4jLong> expectedGradOShape = ShapeUtils::composeShapeUsingDimsAndIdx({bS,oC,trueoW, 0,indIOioC,indIiW});
|
||||
std::vector<Nd4jLong> expectedWeightsShape = {kW, iC, oC};
|
||||
std::vector<Nd4jLong> expectedWeightsShape = 0 == wFormat ? std::vector<Nd4jLong>({kW, iC, oC}) : (1 == wFormat ? std::vector<Nd4jLong>({oC, iC, kW}) : std::vector<Nd4jLong>({oC, kW, iC}));
|
||||
REQUIRE_TRUE(gradO->isSameShape(expectedGradOShape), 0, "CUSTOM CONV1D_BP OP: wrong shape of output gradients (next epsilon) array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedGradOShape).c_str(), ShapeUtils::shapeAsString(gradO).c_str());
|
||||
REQUIRE_TRUE(weights->isSameShape(expectedWeightsShape), 0, "CUSTOM CONV1D_BP OP: wrong shape of weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsShape).c_str(), ShapeUtils::shapeAsString(weights).c_str());
|
||||
if(bias)
|
||||
|
@ -222,11 +226,11 @@ CUSTOM_OP_IMPL(conv1d_bp, 3, 2, false, 0, 5) {
|
|||
auto gradWReshaped = gradW ->reshape(gradW->ordering(), {1, weights->sizeAt(0), weights->sizeAt(1), weights->sizeAt(2)}, false);// [kW, iC, oC] -> [1, kW, iC, oC]
|
||||
|
||||
sd::ops::conv2d_bp conv2dBP;
|
||||
auto status = conv2dBP.execute({&inputReshaped, &weightsReshaped, bias, &gradOReshaped}, {&gradIReshaped, &gradWReshaped, gradB}, {}, {1,kW, 1,sW, 0,pW, 1,dW, paddingMode, !isNCW}, {});
|
||||
auto status = conv2dBP.execute({&inputReshaped, &weightsReshaped, bias, &gradOReshaped}, {&gradIReshaped, &gradWReshaped, gradB}, {}, {1,kW, 1,sW, 0,pW, 1,dW, paddingMode, !isNCW, wFormat}, {});
|
||||
if (status != ND4J_STATUS_OK)
|
||||
return status;
|
||||
|
||||
// ConvolutionUtils::conv2dBP(block, &inputReshaped, &weightsReshaped, bias, &gradOReshaped, &gradIReshaped, &gradWReshaped, gradB, 1,kW, 1,sW, 0,pW, 1,dW, paddingMode, isNCW);
|
||||
// ConvolutionUtils::conv2dBP(block, &inputReshaped, &weightsReshaped, bias, &gradOReshaped, &gradIReshaped, &gradWReshaped, gradB, 1,kW, 1,sW, 0,pW, 1,dW, paddingMode, isNCW, wFormat);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -235,7 +239,7 @@ CUSTOM_OP_IMPL(conv1d_bp, 3, 2, false, 0, 5) {
|
|||
DECLARE_SHAPE_FN(conv1d_bp) {
|
||||
|
||||
auto inputShapeInfo = inputShape->at(0); // [bS, iW, iC] (NWC) or [bS, iC, iW] (NCW)
|
||||
auto weightsShapeInfo = inputShape->at(1); // [kW, iC, oC] always
|
||||
auto weightsShapeInfo = inputShape->at(1); // [kW, iC, oC], [oC, iC, kW], [oC, kW, iC]
|
||||
Nd4jLong* biasShapeInfo = block.width() > 3 ? inputShape->at(2) : nullptr; // [oC]
|
||||
Nd4jLong* gradOShapeInfo = block.width() > 3 ? inputShape->at(3) : inputShape->at(2); // [bS, oW, oC] (NWC) or [bS, oC, oW] (NCW), epsilon_next
|
||||
|
||||
|
@ -250,8 +254,9 @@ DECLARE_SHAPE_FN(conv1d_bp) {
|
|||
int dW = INT_ARG(3); // dilations width
|
||||
int paddingMode = INT_ARG(4); // 0-VALID, 1-SAME
|
||||
int isNCW = block.getIArguments()->size() > 5 ? !INT_ARG(5) : 1; // INT_ARG(4): 1-NWC, 0-NCW
|
||||
int wFormat = block.getIArguments()->size() > 6 ? INT_ARG(6) : 0; // 0 - [kW, iC, oC], 1 - [oC, iC, kW], 2 - [oC, kW, iC]
|
||||
|
||||
int indIOioC, indIiW, indWoC(2);
|
||||
int indIOioC, indIiW, indWoC(0 == wFormat ? 2 : 0);
|
||||
if(!isNCW) {
|
||||
indIOioC = 2; indIiW = 1;
|
||||
}
|
||||
|
@ -268,7 +273,7 @@ DECLARE_SHAPE_FN(conv1d_bp) {
|
|||
ConvolutionUtils::calcOutSizePool2D(trueoH,trueoW, 1,kW, 1,sW, 0,pW, 1,dW, 1,iW, paddingMode);
|
||||
|
||||
std::vector<Nd4jLong> expectedGradOShape = ShapeUtils::composeShapeUsingDimsAndIdx({bS,oC,trueoW, 0,indIOioC,indIiW});
|
||||
std::vector<Nd4jLong> expectedWeightsShape = {kW, iC, oC};
|
||||
std::vector<Nd4jLong> expectedWeightsShape = 0 == wFormat ? std::vector<Nd4jLong>({kW, iC, oC}) : (1 == wFormat ? std::vector<Nd4jLong>({oC, iC, kW}) : std::vector<Nd4jLong>({oC, kW, iC}));
|
||||
REQUIRE_TRUE(ShapeUtils::areShapesEqual(gradOShapeInfo, expectedGradOShape), 0, "CUSTOM CONV1D_BP OP: wrong shape of output gradients (next epsilon) array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedGradOShape).c_str(), ShapeUtils::shapeAsString(gradOShapeInfo).c_str());
|
||||
REQUIRE_TRUE(ShapeUtils::areShapesEqual(weightsShapeInfo, expectedWeightsShape), 0, "CUSTOM CONV1D_BP OP: wrong shape of weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsShape).c_str(), ShapeUtils::shapeAsString(weightsShapeInfo).c_str());
|
||||
if(biasShapeInfo)
|
||||
|
|
|
@ -37,10 +37,10 @@ namespace ops {
|
|||
CUSTOM_OP_IMPL(conv2d, 2, 1, false, 0, 9) {
|
||||
|
||||
auto input = INPUT_VARIABLE(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW)
|
||||
auto weights = INPUT_VARIABLE(1); // [kH, kW, iC, oC] always
|
||||
auto weights = INPUT_VARIABLE(1); // [kH, kW, iC, oC], [oC, iC, kH, kW], [oC, kH, kW, iC]
|
||||
auto bias = block.width() > 2 ? INPUT_VARIABLE(2) : nullptr; // [oC]
|
||||
|
||||
auto output = OUTPUT_VARIABLE(0); // [bS, oH, oW, oC] (NHWC) or [bS, oC, oH, oW] (NCHW)
|
||||
auto output = OUTPUT_NULLIFIED(0); // [bS, oH, oW, oC] (NHWC) or [bS, oC, oH, oW] (NCHW)
|
||||
|
||||
int sH = INT_ARG(2); // strides height
|
||||
int sW = INT_ARG(3); // strides width
|
||||
|
@ -49,21 +49,22 @@ CUSTOM_OP_IMPL(conv2d, 2, 1, false, 0, 9) {
|
|||
int dH = INT_ARG(6); // dilations height
|
||||
int dW = INT_ARG(7); // dilations width
|
||||
int isSameMode = INT_ARG(8); // 0-VALID, 1-SAME
|
||||
bool isNCHW = block.getIArguments()->size() > 9 ? !INT_ARG(9) : 1; // INT_ARG(9): 0-NCHW, 1-NHWC
|
||||
int isNCHW = block.getIArguments()->size() > 9 ? !INT_ARG(9) : 1; // INT_ARG(9): 0-NCHW, 1-NHWC
|
||||
int wFormat = block.getIArguments()->size() > 10 ? INT_ARG(10) : 0; // 0 - [kH, kW, iC, oC], 1 - [oC, iC, kH, kW], 2 - [oC, kH, kW, iC]
|
||||
|
||||
int kH = INT_ARG(0) > 0 ? INT_ARG(0) : static_cast<int>(weights->sizeAt(0)); // filter(kernel) height
|
||||
int kW = INT_ARG(1) > 0 ? INT_ARG(1) : static_cast<int>(weights->sizeAt(1)); // filter(kernel) width
|
||||
|
||||
int bS, iC, iH, iW, oC, oH, oW; // batch size, input channels, input height/width, output channels, output height/width;
|
||||
int indIOioC, indIiH, indWoC, indWiC, indWkH, indOoH; // corresponding indexes
|
||||
ConvolutionUtils::getSizesAndIndexesConv2d(isNCHW, *input, *output, bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWiC, indWoC, indWkH, indOoH);
|
||||
ConvolutionUtils::getSizesAndIndexesConv2d(isNCHW, wFormat, *input, *output, bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWiC, indWoC, indWkH, indOoH);
|
||||
|
||||
std::vector<Nd4jLong> expectedWeightsShape = {kH, kW, iC, oC};
|
||||
std::vector<Nd4jLong> expectedWeightsShape = ConvolutionUtils::expectWeightsShape(wFormat, kH, kW, iC, oC);
|
||||
REQUIRE_TRUE(weights->isSameShape(expectedWeightsShape), 0, "CUSTOM CONV2D OP: wrong shape of weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsShape).c_str(), ShapeUtils::shapeAsString(weights).c_str());
|
||||
if (bias)
|
||||
REQUIRE_TRUE(bias->rankOf() <= 2 && oC == bias->lengthOf(), 0, "CUSTOM CONV2D OP: wrong shape of array with biases, expected rank, length: <=2, %i, but got %i, %i instead !", oC, bias->rankOf(), bias->lengthOf());
|
||||
|
||||
ConvolutionUtils::conv2d(block, input, weights, bias, output, kH,kW,sH,sW,pH,pW,dH,dW,isSameMode,isNCHW);
|
||||
ConvolutionUtils::conv2d(block, input, weights, bias, output, kH,kW,sH,sW,pH,pW,dH,dW,isSameMode,isNCHW,wFormat);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -73,7 +74,7 @@ CUSTOM_OP_IMPL(conv2d, 2, 1, false, 0, 9) {
|
|||
DECLARE_SHAPE_FN(conv2d) {
|
||||
|
||||
auto inputShapeInfo = inputShape->at(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW)
|
||||
auto weightsShapeInfo = inputShape->at(1); // [kH, kW, iC, oC] always
|
||||
auto weightsShapeInfo = inputShape->at(1); // [kH, kW, iC, oC], [oC, iC, kH, kW], [oC, kH, kW, iC]
|
||||
auto biasShapeInfo = block.width() > 2 ? inputShape->at(2) : nullptr; // [oC]
|
||||
|
||||
//output [bS, oH, oW, oC] (NHWC) or [bS, oC, oH, oW] (NCHW)
|
||||
|
@ -86,6 +87,7 @@ DECLARE_SHAPE_FN(conv2d) {
|
|||
int dW = INT_ARG(7); // dilations width
|
||||
int isSameMode = INT_ARG(8); // 0-VALID, 1-SAME
|
||||
int isNCHW = block.getIArguments()->size() > 9 ? !INT_ARG(9) : 1; // INT_ARG(9): 0-NCHW, 1-NHWC
|
||||
int wFormat = block.getIArguments()->size() > 10 ? INT_ARG(10) : 0; // 0 - [kH, kW, iC, oC], 1 - [oC, iC, kH, kW], 2 - [oC, kH, kW, iC]
|
||||
|
||||
int kH = INT_ARG(0) > 0 ? INT_ARG(0) : static_cast<int>(shape::sizeAt(weightsShapeInfo, 0)); // filter(kernel) height
|
||||
int kW = INT_ARG(1) > 0 ? INT_ARG(1) : static_cast<int>(shape::sizeAt(weightsShapeInfo, 1)); // filter(kernel) width
|
||||
|
@ -95,7 +97,7 @@ DECLARE_SHAPE_FN(conv2d) {
|
|||
REQUIRE_TRUE(inputShapeInfo[0] == rank, 0, "CUSTOM CONV2D OP: rank of input array must be equal to %i, but got %i instead !", rank, inputShapeInfo[0]);
|
||||
REQUIRE_TRUE(weightsShapeInfo[0] == rank, 0, "CUSTOM CONV2D OP: rank of weights array must be equal to %i, but got %i instead !", rank, weightsShapeInfo[0]);
|
||||
|
||||
int indIOioC, indIiH, indWoC(3);
|
||||
int indIOioC, indIiH, indWoC(0 == wFormat ? 3 : 0);
|
||||
if(!isNCHW) {
|
||||
indIOioC = 3; indIiH = 1;
|
||||
}
|
||||
|
@ -109,7 +111,7 @@ DECLARE_SHAPE_FN(conv2d) {
|
|||
const int iC = inputShapeInfo[indIOioC+1]; // input channels
|
||||
const int oC = weightsShapeInfo[indWoC+1]; // output channels
|
||||
|
||||
std::vector<Nd4jLong> expectedWeightsShape = {kH, kW, iC, oC};
|
||||
std::vector<Nd4jLong> expectedWeightsShape = ConvolutionUtils::expectWeightsShape(wFormat, kH, kW, iC, oC);
|
||||
REQUIRE_TRUE(ShapeUtils::areShapesEqual(weightsShapeInfo, expectedWeightsShape), 0, "CUSTOM CONV2D OP: wrong shape of weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsShape).c_str(), ShapeUtils::shapeAsString(weightsShapeInfo).c_str());
|
||||
if (biasShapeInfo)
|
||||
REQUIRE_TRUE(biasShapeInfo[0] <= 2 && oC == shape::length(biasShapeInfo), 0, "CUSTOM CONV2D OP: wrong shape of array with biases, expected rank, length: <=2, %i, but got %i, %i instead !", oC, biasShapeInfo[0], shape::length(biasShapeInfo));
|
||||
|
@ -157,13 +159,13 @@ DECLARE_SHAPE_FN(conv2d) {
|
|||
CUSTOM_OP_IMPL(conv2d_bp, 3, 2, false, 0, 9) {
|
||||
|
||||
auto input = INPUT_VARIABLE(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW)
|
||||
auto weights = INPUT_VARIABLE(1); // [kH, kW, iC, oC] always
|
||||
auto weights = INPUT_VARIABLE(1); // [kH, kW, iC, oC], [oC, iC, kH, kW], [oC, kH, kW, iC]
|
||||
auto bias = block.width() > 3 ? INPUT_VARIABLE(2) : nullptr; // [oC]
|
||||
auto gradO = block.width() > 3 ? INPUT_VARIABLE(3) : INPUT_VARIABLE(2); // [bS, oH, oW, oC] (NHWC) or [bS, oC, oH, oW] (NCHW), epsilon_next
|
||||
|
||||
auto gradI = OUTPUT_VARIABLE(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW), epsilon
|
||||
auto gradW = OUTPUT_VARIABLE(1); // [kH, kW, iC, oC] always
|
||||
auto gradB = block.width() > 3 ? OUTPUT_VARIABLE(2) : nullptr; // [oC]
|
||||
auto gradI = OUTPUT_NULLIFIED(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW), epsilon
|
||||
auto gradW = OUTPUT_NULLIFIED(1); // [kH, kW, iC, oC], [oC, iC, kH, kW], [oC, kH, kW, iC]
|
||||
auto gradB = block.width() > 3 ? OUTPUT_NULLIFIED(2) : nullptr; // [oC]
|
||||
|
||||
int kH = INT_ARG(0); // filter(kernel) height
|
||||
int kW = INT_ARG(1); // filter(kernel) width
|
||||
|
@ -175,6 +177,7 @@ CUSTOM_OP_IMPL(conv2d_bp, 3, 2, false, 0, 9) {
|
|||
int dW = INT_ARG(7); // dilations width
|
||||
int isSameMode = INT_ARG(8); // 0-VALID, 1-SAME
|
||||
int isNCHW = block.getIArguments()->size() > 9 ? !INT_ARG(9) : 1; // INT_ARG(9): 0-NCHW, 1-NHWC
|
||||
int wFormat = block.getIArguments()->size() > 10 ? INT_ARG(10) : 0; // 0 - [kH, kW, iC, oC], 1 - [oC, iC, kH, kW], 2 - [oC, kH, kW, iC]
|
||||
|
||||
REQUIRE_TRUE(input->rankOf() == 4, 0, "CUSTOM CONV2D_BP OP: rank of input array must be equal to 4, but got %i instead !", input->rankOf());
|
||||
REQUIRE_TRUE(weights->rankOf() == 4, 0, "CUSTOM CONV2D_BP OP: rank of weights array must be equal to 4, but got %i instead !", weights->rankOf());
|
||||
|
@ -182,19 +185,19 @@ CUSTOM_OP_IMPL(conv2d_bp, 3, 2, false, 0, 9) {
|
|||
|
||||
int bS, iC, iH, iW, oC, oH, oW; // batch size, input channels, input height/width, output channels, output height/width;
|
||||
int indIOioC, indIiH, indWoC, indWiC, indWkH, indOoH; // corresponding indexes
|
||||
ConvolutionUtils::getSizesAndIndexesConv2d(isNCHW, *input, *gradO, bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWiC, indWoC, indWkH, indOoH);
|
||||
ConvolutionUtils::getSizesAndIndexesConv2d(isNCHW, wFormat, *input, *gradO, bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWiC, indWoC, indWkH, indOoH);
|
||||
|
||||
int trueoH, trueoW; // true output height, width
|
||||
ConvolutionUtils::calcOutSizePool2D(trueoH, trueoW, kH, kW, sH, sW, pH, pW, dH, dW, iH, iW, isSameMode);
|
||||
|
||||
std::vector<Nd4jLong>expectedGradOShape = ShapeUtils::composeShapeUsingDimsAndIdx({bS,oC,trueoH,trueoW, 0,indIOioC,indOoH,indOoH+1});
|
||||
std::vector<Nd4jLong>expectedWeightsShape = {kH, kW, iC, oC};
|
||||
std::vector<Nd4jLong>expectedWeightsShape = ConvolutionUtils::expectWeightsShape(wFormat, kH, kW, iC, oC);
|
||||
REQUIRE_TRUE(gradO->isSameShape(expectedGradOShape), 0, "CUSTOM CONV2D_BP OP: wrong shape of output gradients (next epsilon) array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedGradOShape).c_str(), ShapeUtils::shapeAsString(gradO).c_str());
|
||||
REQUIRE_TRUE(weights->isSameShape(expectedWeightsShape), 0, "CUSTOM CONV2D_BP OP: wrong shape of weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsShape).c_str(), ShapeUtils::shapeAsString(weights).c_str());
|
||||
if(bias)
|
||||
REQUIRE_TRUE(bias->rankOf() <= 2 && oC == bias->lengthOf(), 0, "CUSTOM CONV2D_BP OP: wrong shape of array with biases, expected rank, length: <=2, %i, but got %i, %i instead !", oC, bias->rankOf(), bias->lengthOf());
|
||||
|
||||
ConvolutionUtils::conv2dBP(block, input, weights, bias, gradO, gradI, gradW, gradB, kH,kW,sH,sW,pH,pW,dH,dW,isSameMode,isNCHW);
|
||||
ConvolutionUtils::conv2dBP(block, input, weights, bias, gradO, gradI, gradW, gradB, kH,kW,sH,sW,pH,pW,dH,dW,isSameMode,isNCHW,wFormat);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -204,7 +207,7 @@ CUSTOM_OP_IMPL(conv2d_bp, 3, 2, false, 0, 9) {
|
|||
DECLARE_SHAPE_FN(conv2d_bp) {
|
||||
|
||||
auto inputShapeInfo = inputShape->at(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW)
|
||||
auto weightsShapeInfo = inputShape->at(1); // [kH, kW, iC, oC] always
|
||||
auto weightsShapeInfo = inputShape->at(1); // [kH, kW, iC, oC], [oC, iC, kH, kW], [oC, kH, kW, iC]
|
||||
auto biasShapeInfo = block.width() > 3 ? inputShape->at(2) : nullptr; // [oC]
|
||||
auto gradOShapeInfo = block.width() > 3 ? inputShape->at(3) : inputShape->at(2); // [bS, oH, oW, oC] (NHWC) or [bS, oC, oH, oW] (NCHW), epsilon_next
|
||||
|
||||
|
@ -224,8 +227,9 @@ DECLARE_SHAPE_FN(conv2d_bp) {
|
|||
const int dW = INT_ARG(7); // dilations width
|
||||
const int isSameMode = INT_ARG(8); // 0-VALID, 1-SAME
|
||||
const int isNCHW = block.getIArguments()->size() > 9 ? !INT_ARG(9) : 1; // INT_ARG(9): 0-NCHW, 1-NHWC
|
||||
const int wFormat = block.getIArguments()->size() > 10 ? INT_ARG(10) : 0; // 0 - [kH, kW, iC, oC], 1 - [oC, iC, kH, kW], 2 - [oC, kH, kW, iC]
|
||||
|
||||
int indIOioC, indIiH, indOoH, indWoC(3);
|
||||
int indIOioC, indIiH, indOoH, indWoC(0 == wFormat ? 3 : 0);
|
||||
if(!isNCHW) {
|
||||
indIOioC = 3; indIiH = 1; indOoH = 1;
|
||||
}
|
||||
|
@ -243,7 +247,7 @@ DECLARE_SHAPE_FN(conv2d_bp) {
|
|||
ConvolutionUtils::calcOutSizePool2D(trueoH, trueoW, kH, kW, sH, sW, pH, pW, dH, dW, iH, iW, isSameMode);
|
||||
|
||||
std::vector<Nd4jLong> expectedGradOShape = ShapeUtils::composeShapeUsingDimsAndIdx({bS,oC,trueoH,trueoW, 0,indIOioC,indOoH,indOoH+1});
|
||||
std::vector<Nd4jLong> expectedWeightsShape = {kH, kW, iC, oC};
|
||||
std::vector<Nd4jLong> expectedWeightsShape = ConvolutionUtils::expectWeightsShape(wFormat, kH, kW, iC, oC);
|
||||
REQUIRE_TRUE(ShapeUtils::areShapesEqual(gradOShapeInfo, expectedGradOShape), 0, "CUSTOM CONV2D_BP OP: wrong shape of output gradients (next epsilon) array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedGradOShape).c_str(), ShapeUtils::shapeAsString(gradOShapeInfo).c_str());
|
||||
REQUIRE_TRUE(ShapeUtils::areShapesEqual(weightsShapeInfo, expectedWeightsShape), 0, "CUSTOM CONV2D_BP OP: wrong shape of weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsShape).c_str(), ShapeUtils::shapeAsString(weightsShapeInfo).c_str());
|
||||
if(biasShapeInfo)
|
||||
|
@ -264,10 +268,10 @@ DECLARE_SHAPE_FN(conv2d_bp) {
|
|||
CUSTOM_OP_IMPL(conv2d_input_bp, 3, 1, false, 0, 9) {
|
||||
|
||||
auto gradIShape = INPUT_VARIABLE(0); // [4]
|
||||
auto weights = INPUT_VARIABLE(1); // [kH, kW, iC, oC] always
|
||||
auto weights = INPUT_VARIABLE(1); // [kH, kW, iC, oC], [oC, iC, kH, kW], [oC, kH, kW, iC]
|
||||
auto gradO = INPUT_VARIABLE(2); // [bS, oH, oW, oC] (NHWC) or [bS, oC, oH, oW] (NCHW), epsilon_next
|
||||
|
||||
auto gradI = OUTPUT_VARIABLE(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW), epsilon
|
||||
auto gradI = OUTPUT_NULLIFIED(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW), epsilon
|
||||
|
||||
int kH = INT_ARG(0); // filter(kernel) height
|
||||
int kW = INT_ARG(1); // filter(kernel) width
|
||||
|
@ -279,6 +283,7 @@ CUSTOM_OP_IMPL(conv2d_input_bp, 3, 1, false, 0, 9) {
|
|||
int dW = INT_ARG(7); // dilations width
|
||||
int isSameMode = INT_ARG(8); // 0-VALID, 1-SAME
|
||||
int isNCHW = block.getIArguments()->size() > 9 ? !INT_ARG(9) : 1; // INT_ARG(9): 0-NCHW, 1-NHWC
|
||||
int wFormat = block.getIArguments()->size() > 10 ? INT_ARG(10) : 0; // 0 - [kH, kW, iC, oC], 1 - [oC, iC, kH, kW], 2 - [oC, kH, kW, iC]
|
||||
|
||||
const int rank = gradO->rankOf();
|
||||
|
||||
|
@ -295,17 +300,17 @@ CUSTOM_OP_IMPL(conv2d_input_bp, 3, 1, false, 0, 9) {
|
|||
|
||||
int bS, iC, iH, iW, oC, oH, oW; // batch size, input channels, input height/width, output channels, output height/width;
|
||||
int indIOioC, indIiH, indWoC, indWiC, indWkH, indOoH; // corresponding indexes
|
||||
ConvolutionUtils::getSizesAndIndexesConv2d(isNCHW, input, *gradO, bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWiC, indWoC, indWkH, indOoH);
|
||||
ConvolutionUtils::getSizesAndIndexesConv2d(isNCHW, wFormat, input, *gradO, bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWiC, indWoC, indWkH, indOoH);
|
||||
|
||||
int trueoH, trueoW; // true output height, width
|
||||
ConvolutionUtils::calcOutSizePool2D(trueoH, trueoW, kH, kW, sH, sW, pH, pW, dH, dW, iH, iW, isSameMode);
|
||||
|
||||
std::vector<Nd4jLong> expectedGradOShape = ShapeUtils::composeShapeUsingDimsAndIdx({bS,oC,trueoH,trueoW, 0,indIOioC,indOoH,indOoH+1});
|
||||
std::vector<Nd4jLong> expectedWeightsShape = {kH, kW, iC, oC};
|
||||
std::vector<Nd4jLong> expectedWeightsShape = ConvolutionUtils::expectWeightsShape(wFormat, kH, kW, iC, oC);
|
||||
REQUIRE_TRUE(gradO->isSameShape(expectedGradOShape), 0, "CUSTOM CONV2D_INPUT_BP OP: wrong shape of output gradients (next epsilon) array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedGradOShape).c_str(), ShapeUtils::shapeAsString(gradO).c_str());
|
||||
REQUIRE_TRUE(weights->isSameShape(expectedWeightsShape), 0, "CUSTOM CONV2D_INPUT_BP OP: wrong shape of weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsShape).c_str(), ShapeUtils::shapeAsString(weights).c_str());
|
||||
|
||||
ConvolutionUtils::conv2dBP(block, &input, weights, nullptr, gradO, gradI, nullptr, nullptr, kH,kW,sH,sW,pH,pW,dH,dW,isSameMode,isNCHW);
|
||||
ConvolutionUtils::conv2dBP(block, &input, weights, nullptr, gradO, gradI, nullptr, nullptr, kH,kW,sH,sW,pH,pW,dH,dW,isSameMode,isNCHW,wFormat);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -321,7 +326,7 @@ CUSTOM_OP_IMPL(conv2d_input_bp, 3, 1, false, 0, 9) {
|
|||
DECLARE_SHAPE_FN(conv2d_input_bp) {
|
||||
|
||||
auto gradIShapeShapeInfo = inputShape->at(0); // [4]
|
||||
auto weightsShapeInfo = inputShape->at(1); // [kH, kW, iC, oC] always
|
||||
auto weightsShapeInfo = inputShape->at(1); // [kH, kW, iC, oC], [oC, iC, kH, kW], [oC, kH, kW, iC]
|
||||
auto gradOShapeInfo = inputShape->at(2); // [bS, oH, oW, oC] (NHWC) or [bS, oC, oH, oW] (NCHW), epsilon_next
|
||||
|
||||
const int rank = 4;
|
||||
|
@ -340,8 +345,9 @@ DECLARE_SHAPE_FN(conv2d_input_bp) {
|
|||
const int dW = INT_ARG(7); // dilations width
|
||||
const int isSameMode = INT_ARG(8); // 0-VALID, 1-SAME
|
||||
const int isNCHW = block.getIArguments()->size() > 9 ? !INT_ARG(9) : 1; // INT_ARG(9): 0-NCHW, 1-NHWC
|
||||
const int wFormat = block.getIArguments()->size() > 10 ? INT_ARG(10) : 0; // 0 - [kH, kW, iC, oC], 1 - [oC, iC, kH, kW], 2 - [oC, kH, kW, iC]
|
||||
|
||||
int indIOioC, indIiH, indWoC(3), indOoH;
|
||||
int indIOioC, indIiH, indWoC(0 == wFormat ? 3 : 0), indOoH;
|
||||
if(!isNCHW) {
|
||||
indIOioC = 3; indIiH = 1; indOoH = 1;
|
||||
}
|
||||
|
@ -361,7 +367,7 @@ DECLARE_SHAPE_FN(conv2d_input_bp) {
|
|||
ConvolutionUtils::calcOutSizePool2D(trueoH, trueoW, kH, kW, sH, sW, pH, pW, dH, dW, iH, iW, isSameMode);
|
||||
|
||||
std::vector<Nd4jLong> expectedGradOShape = ShapeUtils::composeShapeUsingDimsAndIdx({bS,oC,trueoH,trueoW, 0,indIOioC,indOoH,indOoH+1});
|
||||
std::vector<Nd4jLong> expectedWeightsShape = {kH, kW, iC, oC};
|
||||
std::vector<Nd4jLong> expectedWeightsShape = ConvolutionUtils::expectWeightsShape(wFormat, kH, kW, iC, oC);
|
||||
REQUIRE_TRUE(ShapeUtils::areShapesEqual(gradOShapeInfo, expectedGradOShape), 0, "CUSTOM CONV2D_INPUT_BP OP: wrong shape of output gradients (next epsilon) array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedGradOShape).c_str(), ShapeUtils::shapeAsString(gradOShapeInfo).c_str());
|
||||
REQUIRE_TRUE(ShapeUtils::areShapesEqual(weightsShapeInfo, expectedWeightsShape), 0, "CUSTOM CONV2D_INPUT_BP OP: wrong shape of weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsShape).c_str(), ShapeUtils::shapeAsString(weightsShapeInfo).c_str());
|
||||
|
||||
|
|
|
@ -32,7 +32,7 @@ namespace ops {
|
|||
|
||||
CUSTOM_OP_IMPL(conv3dnew, 2, 1, false, 0, 13) {
|
||||
auto input = INPUT_VARIABLE(0); // [bS, iD, iH, iW, iC] (NDHWC) or [bS, iC, iD, iH, iW] (NCDHW)
|
||||
auto weights = INPUT_VARIABLE(1); // [kD, kH, kW, iC, oC] always
|
||||
auto weights = INPUT_VARIABLE(1); // [kD, kH, kW, iC, oC], [oC, iC, kD, kH, kW], [oC, kD, kH, kW, iC]
|
||||
auto bias = block.width() > 2 ? INPUT_VARIABLE(2) : nullptr; // [oC]
|
||||
auto output = OUTPUT_VARIABLE(0); // [bS, oD, oH, oW, oC] (NDHWC) or [bS, oC, oD, oH, oW] (NCDHW)
|
||||
|
||||
|
@ -52,14 +52,15 @@ CUSTOM_OP_IMPL(conv3dnew, 2, 1, false, 0, 13) {
|
|||
int dH = INT_ARG(10); // dilations height
|
||||
int dW = INT_ARG(11); // dilations width
|
||||
int paddingMode = INT_ARG(12); // 0-SAME, 1-VALID
|
||||
int isNCDHW = block.getIArguments()->size() > 13 ? !INT_ARG(13) : 1; // INT_ARG(13): 1-NDHWC, 0-NCDHW
|
||||
int isNCDHW = block.getIArguments()->size() > 13 ? !INT_ARG(13) : 1; // INT_ARG(13): 1-NDHWC, 0-NCDHW
|
||||
int wFormat = block.getIArguments()->size() > 14 ? INT_ARG(14) : 0; // 0-[kD, kH, kW, iC, oC], 1-[oC, iC, kD, kH, kW], 2-[oC, kD, kH, kW, iC]
|
||||
|
||||
int bS, iC, iD, iH, iW, oC, oD, oH, oW; // batch size, input channels, input depth/height/width, output channels, output depth/height/width;
|
||||
int indIOioC, indIOioD, indWoC, indWiC, indWkD; // corresponding indexes
|
||||
ConvolutionUtils::getSizesAndIndexesConv3d(isNCDHW, *input, *output, bS, iC, iD, iH, iW, oC, oD, oH, oW, indIOioC, indIOioD, indWiC, indWoC, indWkD);
|
||||
ConvolutionUtils::getSizesAndIndexesConv3d(isNCDHW, wFormat, *input, *output, bS, iC, iD, iH, iW, oC, oD, oH, oW, indIOioC, indIOioD, indWiC, indWoC, indWkD);
|
||||
|
||||
REQUIRE_TRUE(paddingMode < 2, 0, "CUSTOM CONV3D OP: causal padding mode (paddingMode = 2) is not allowed for this operation !");
|
||||
std::vector<Nd4jLong> expectedWeightsShape = {kD, kH, kW, iC, oC};
|
||||
std::vector<Nd4jLong> expectedWeightsShape = ConvolutionUtils::expectWeightsShape(wFormat, kD, kH, kW, iC, oC);
|
||||
REQUIRE_TRUE(weights->isSameShape(expectedWeightsShape), 0, "CUSTOM CONV3D OP: wrong shape of weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsShape).c_str(), ShapeUtils::shapeAsString(weights).c_str());
|
||||
if (bias)
|
||||
REQUIRE_TRUE(bias->rankOf() <= 2 && oC == bias->lengthOf(), 0, "CUSTOM CONV3D OP: wrong shape of array with biases, expected rank, length: <=2, %i, but got %i, %i instead !", oC, bias->rankOf(), bias->lengthOf());
|
||||
|
@ -71,14 +72,24 @@ CUSTOM_OP_IMPL(conv3dnew, 2, 1, false, 0, 13) {
|
|||
std::vector<int> permutForOutput;
|
||||
|
||||
if (isNCDHW)
|
||||
permutForOutput = {0,2,3,4,1}; // [bS, oC, oD, oH, oW] -> [bS, oD, oH, oW, oC]
|
||||
permutForOutput = {0,2,3,4,1}; // [bS, oC, oD, oH, oW] -> [bS, oD, oH, oW, oC]
|
||||
else
|
||||
input = new NDArray(input->permute({0,4,1,2,3}));
|
||||
|
||||
std::vector<int> wAxes;
|
||||
if(0 == wFormat)
|
||||
wAxes = {3,0,1,2};
|
||||
else if(1 == wFormat)
|
||||
wAxes = {1,2,3,4};
|
||||
else
|
||||
wAxes = {4,1,2,3};
|
||||
|
||||
NDArray columns(input->ordering(), {bS, iC, kD, kH, kW, oD, oH, oW}, input->dataType(), block.launchContext());
|
||||
ConvolutionUtils::vol2col(block, *input, columns, sD, sH, sW, pD, pH, pW, dD, dH, dW); // [bS, iC, iD, iH, iW] is convoluted to [bS, iC, kD, kH, kW, oD, oH, oW]
|
||||
// [bS, iC, kD, kH, kW, oD, oH, oW] x [kD, kH, kW, iC, oC] = [bS, oD, oH, oW, oC]
|
||||
MmulHelper::tensorDot(&columns, weights, output, {1,2,3,4}, {3,0,1,2}, permutForOutput);
|
||||
// [bS, iC, kD, kH, kW, oD, oH, oW] x [oC, iC, kD, kH, kW] = [bS, oD, oH, oW, oC]
|
||||
// [bS, iC, kD, kH, kW, oD, oH, oW] x [oC, kD, kH, kW, iC] = [bS, oD, oH, oW, oC]
|
||||
MmulHelper::tensorDot(&columns, weights, output, {1,2,3,4}, wAxes, permutForOutput);
|
||||
|
||||
if(bias)
|
||||
// output->applyBroadcast(broadcast::Add, {indIOioC}, bias);
|
||||
|
@ -101,7 +112,7 @@ CUSTOM_OP_IMPL(conv3dnew, 2, 1, false, 0, 13) {
|
|||
DECLARE_SHAPE_FN(conv3dnew) {
|
||||
|
||||
auto inputShapeInfo = inputShape->at(0); // [bS, iD, iH, iW, iC] (NDHWC) or [bS, iC, iD, iH, iW] (NCDHW)
|
||||
auto weightsShapeInfo = inputShape->at(1); // [kD, kH, kW, iC, oC] always
|
||||
auto weightsShapeInfo = inputShape->at(1); // [kD, kH, kW, iC, oC], [oC, iC, kD, kH, kW], [oC, kD, kH, kW, iC]
|
||||
auto biasShapeInfo = block.width() > 2 ? inputShape->at(2) : nullptr; // [oC]
|
||||
|
||||
int kD = INT_ARG(0) > 0 ? INT_ARG(0) : static_cast<int>(shape::sizeAt(weightsShapeInfo, 0));// filter(kernel) depth
|
||||
|
@ -118,13 +129,14 @@ DECLARE_SHAPE_FN(conv3dnew) {
|
|||
int dW = INT_ARG(11); // dilations width
|
||||
int paddingMode = INT_ARG(12); // 1-SAME, 0-VALID;
|
||||
int isNCDHW = block.getIArguments()->size() > 13 ? !INT_ARG(13) : 1; // INT_ARG(13): 1-NDHWC, 0-NCDHW
|
||||
int wFormat = block.getIArguments()->size() > 14 ? INT_ARG(14) : 0; // 0-[kD, kH, kW, iC, oC], 1-[oC, iC, kD, kH, kW], 2-[oC, kD, kH, kW, iC]
|
||||
|
||||
const int rank = 5;
|
||||
REQUIRE_TRUE(paddingMode < 2, 0, "CUSTOM CONV3D OP: causal padding mode (paddingMode = 2) is not allowed for this operation !");
|
||||
REQUIRE_TRUE(inputShapeInfo[0] == rank, 0, "CUSTOM CONV3D OP: rank of input array must be equal to %i, but got %i instead !", rank, inputShapeInfo);
|
||||
REQUIRE_TRUE(weightsShapeInfo[0] == rank, 0, "CUSTOM CONV3D OP: rank of weights array must be equal to %i, but got %i instead !", rank, weightsShapeInfo);
|
||||
|
||||
int indIOioC, indIiD, indWoC(4);
|
||||
int indIOioC, indIiD, indWoC(0 == wFormat ? 4 : 0);
|
||||
if(!isNCDHW) {
|
||||
indIOioC = 4; indIiD = 1;
|
||||
}
|
||||
|
@ -139,7 +151,7 @@ DECLARE_SHAPE_FN(conv3dnew) {
|
|||
int iC = inputShapeInfo[indIOioC+1]; // input channels
|
||||
int oC = weightsShapeInfo[indWoC+1]; // output channels
|
||||
|
||||
std::vector<Nd4jLong> expectedWeightsShape = {kD, kH, kW, iC, oC};
|
||||
std::vector<Nd4jLong> expectedWeightsShape = ConvolutionUtils::expectWeightsShape(wFormat, kD, kH, kW, iC, oC);
|
||||
REQUIRE_TRUE(ShapeUtils::areShapesEqual(weightsShapeInfo, expectedWeightsShape), 0, "CUSTOM CONV3D OP: wrong shape of weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsShape).c_str(), ShapeUtils::shapeAsString(weightsShapeInfo).c_str());
|
||||
if (biasShapeInfo)
|
||||
REQUIRE_TRUE(biasShapeInfo[0] <= 2 && oC == shape::length(biasShapeInfo), 0, "CUSTOM CONV3D OP: wrong shape of array with biases, expected rank, length: <=2, %i, but got %i, %i instead !", oC, biasShapeInfo[0], shape::length(biasShapeInfo));
|
||||
|
@ -174,12 +186,12 @@ DECLARE_SHAPE_FN(conv3dnew) {
|
|||
CUSTOM_OP_IMPL(conv3dnew_bp, 3, 2, false, 0, 13) {
|
||||
|
||||
auto input = INPUT_VARIABLE(0); // [bS, iD, iH, iW, iC] (NDHWC) or [bS, iC, iD, iH, iW] (NCDHW)
|
||||
auto weights = INPUT_VARIABLE(1); // [kD, kH, kW, iC, oC] always
|
||||
auto weights = INPUT_VARIABLE(1); // [kD, kH, kW, iC, oC], [oC, iC, kD, kH, kW], [oC, kD, kH, kW, iC]
|
||||
auto bias = block.width() > 3 ? INPUT_VARIABLE(2) : nullptr; // [oC]
|
||||
auto gradO = block.width() > 3 ? INPUT_VARIABLE(3) : INPUT_VARIABLE(2); // [bS, oD, oH, oW, oC] (NDHWC) or [bS, oC, oD, oH, oW] (NCDHW), epsilon_next
|
||||
|
||||
auto gradI = OUTPUT_VARIABLE(0); // [bS, iD, iH, iW, iC] (NDHWC) or [bS, iC, iD, iH, iW] (NCDHW), epsilon
|
||||
auto gradW = OUTPUT_VARIABLE(1); // [kD, kH, kW, iC, oC] always
|
||||
auto gradW = OUTPUT_VARIABLE(1); // [kD, kH, kW, iC, oC], [oC, iC, kD, kH, kW], [oC, kD, kH, kW, iC]
|
||||
auto gradB = block.width() > 3 ? OUTPUT_VARIABLE(2) : nullptr; // [oC]
|
||||
|
||||
REQUIRE_TRUE(input->rankOf() == 5, 0, "CUSTOM CONV3D_BP OP: rank of input array must be equal to 5, but got %i instead !", input->rankOf());
|
||||
|
@ -200,17 +212,18 @@ CUSTOM_OP_IMPL(conv3dnew_bp, 3, 2, false, 0, 13) {
|
|||
int dW = INT_ARG(11); // dilations width
|
||||
int paddingMode = INT_ARG(12); // 1-SAME, 0-VALID
|
||||
int isNCDHW = block.getIArguments()->size() > 13 ? !INT_ARG(13) : 1; // INT_ARG(13): 1-NDHWC, 0-NCDHW
|
||||
int wFormat = block.getIArguments()->size() > 14 ? INT_ARG(14) : 0; // 0-[kD, kH, kW, iC, oC], 1-[oC, iC, kD, kH, kW], 2-[oC, kD, kH, kW, iC]
|
||||
|
||||
int bS, iC, iD, iH, iW, oC, oD, oH, oW; // batch size, input channels, input depth/height/width, output channels, output depth/height/width;
|
||||
int indIOioC, indIOioD, indWoC, indWiC, indWkD; // corresponding indexes
|
||||
ConvolutionUtils::getSizesAndIndexesConv3d(isNCDHW, *input, *gradO, bS, iC, iD, iH, iW, oC, oD, oH, oW, indIOioC, indIOioD, indWiC, indWoC, indWkD);
|
||||
ConvolutionUtils::getSizesAndIndexesConv3d(isNCDHW, wFormat, *input, *gradO, bS, iC, iD, iH, iW, oC, oD, oH, oW, indIOioC, indIOioD, indWiC, indWoC, indWkD);
|
||||
|
||||
int trueoD, trueoH, trueoW; // true output depth/height/width
|
||||
ConvolutionUtils::calcOutSizePool3D(trueoD, trueoH, trueoW, kD, kH, kW, sD, sH, sW, pD, pH, pW, dD, dH, dW, iD, iH, iW, paddingMode);
|
||||
|
||||
REQUIRE_TRUE(paddingMode < 2, 0, "CUSTOM CONV3D_BP OP: causal padding mode (paddingMode = 2) is not allowed for this operation !");
|
||||
std::vector<Nd4jLong> expectedGradOShape = ShapeUtils::composeShapeUsingDimsAndIdx({bS,oC,trueoD,trueoH,trueoW, 0,indIOioC,indIOioD,indIOioD+1,indIOioD+2});
|
||||
std::vector<Nd4jLong> expectedWeightsShape = {kD, kH, kW, iC, oC};
|
||||
std::vector<Nd4jLong> expectedWeightsShape = ConvolutionUtils::expectWeightsShape(wFormat, kD, kH, kW, iC, oC);
|
||||
REQUIRE_TRUE(gradO->isSameShape(expectedGradOShape), 0, "CUSTOM CONV3D_BP OP: wrong shape of output gradients (next epsilon) array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedGradOShape).c_str(), ShapeUtils::shapeAsString(gradO).c_str());
|
||||
REQUIRE_TRUE(weights->isSameShape(expectedWeightsShape), 0, "CUSTOM CONV3D_BP OP: wrong shape of weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsShape).c_str(), ShapeUtils::shapeAsString(weights).c_str());
|
||||
if(bias)
|
||||
|
@ -231,10 +244,25 @@ CUSTOM_OP_IMPL(conv3dnew_bp, 3, 2, false, 0, 13) {
|
|||
gradOaxesForDot = {0,2,3,4}; // bS, oD, oH, oW
|
||||
}
|
||||
|
||||
std::vector<int> wPermut, colPermut;
|
||||
|
||||
if(0 == wFormat) {
|
||||
wPermut = {3,0,1,2,4};
|
||||
colPermut = {2,3,4,1,0,5,6,7};
|
||||
}
|
||||
else if(1 == wFormat) {
|
||||
wPermut = {1,2,3,4,0};
|
||||
colPermut = {1,2,3,4,0,5,6,7};
|
||||
}
|
||||
else {
|
||||
wPermut = {4,1,2,3,0};
|
||||
colPermut = {2,3,4,1,0,5,6,7};
|
||||
}
|
||||
|
||||
// ----- calculation of gradW and gradB ----- //
|
||||
NDArray columns(input->ordering(), {bS, iC, kD, kH, kW, oD, oH, oW}, input->dataType(), block.launchContext());
|
||||
ConvolutionUtils::vol2col(block, *input, columns, sD, sH, sW, pD, pH, pW, dD, dH, dW); // [bS, iC, iD, iH, iW] is convoluted to [bS, iC, kD, kH, kW, oD, oH, oW]
|
||||
MmulHelper::tensorDot(&columns, gradO, gradW, {0,5,6,7}, gradOaxesForDot, {3,0,1,2,4}); // [bS, iC, kD, kH, kW, oD, oH, oW] x [bS, oD, oH, oW, oC]/[bS, oC, oD, oH, oW] = [iC, kD, kH, kW, oC]
|
||||
MmulHelper::tensorDot(&columns, gradO, gradW, {0,5,6,7}, gradOaxesForDot, wPermut); // [bS, iC, kD, kH, kW, oD, oH, oW] x [bS, oD, oH, oW, oC]/[bS, oC, oD, oH, oW] = [iC, kD, kH, kW, oC]
|
||||
|
||||
//----- calculation of gradO -----//
|
||||
if(gradB) {
|
||||
|
@ -246,7 +274,10 @@ CUSTOM_OP_IMPL(conv3dnew_bp, 3, 2, false, 0, 13) {
|
|||
}
|
||||
|
||||
//----- calculation of gradI -----//
|
||||
MmulHelper::tensorDot(weights, gradO, &columns, {indWoC}, {indIOioC}, {2,3,4,1,0,5,6,7}); // [kD, kH, kW, iC, oC] x [bS, oD, oH, oW, oC]/[bS, oC, oD, oH, oW] = [kD, kH, kW, iC, bS, oD, oH, oW]
|
||||
// [kD, kH, kW, iC, oC] x [bS, oD, oH, oW, oC]/[bS, oC, oD, oH, oW] = [kD, kH, kW, iC, bS, oD, oH, oW]
|
||||
// [oC, iC, kD, kH, kW] x [bS, oD, oH, oW, oC]/[bS, oC, oD, oH, oW] = [kD, kH, kW, iC, bS, oD, oH, oW]
|
||||
// [oC, kD, kH, kW, iC] x [bS, oD, oH, oW, oC]/[bS, oC, oD, oH, oW] = [kD, kH, kW, iC, bS, oD, oH, oW]
|
||||
MmulHelper::tensorDot(weights, gradO, &columns, {indWoC}, {indIOioC}, colPermut);
|
||||
ConvolutionUtils::col2vol(block, columns, *gradI, sD, sH, sW, pD, pH, pW, dD, dH, dW); // columns [bS, iC, kD, kH, kW, oD, oH, oW] is de-convoluted to [bS, iC, iD, iH, iW]
|
||||
|
||||
if(!isNCDHW) {
|
||||
|
@ -270,7 +301,7 @@ CUSTOM_OP_IMPL(conv3dnew_bp, 3, 2, false, 0, 13) {
|
|||
DECLARE_SHAPE_FN(conv3dnew_bp) {
|
||||
|
||||
Nd4jLong* inputShapeInfo = inputShape->at(0); // [bS, iD, iH, iW, iC] (NDHWC) or [bS, iC, iD, iH, iW] (NCDHW)
|
||||
Nd4jLong* weightsShapeInfo = inputShape->at(1); // [kD, kH, kW, iC, oC] always
|
||||
Nd4jLong* weightsShapeInfo = inputShape->at(1); // [kD, kH, kW, iC, oC], [oC, iC, kD, kH, kW], [oC, kD, kH, kW, iC]
|
||||
Nd4jLong* biasShapeInfo = block.width() > 3 ? inputShape->at(2) : nullptr; // [oC]
|
||||
Nd4jLong* gradOShapeInfo = block.width() > 3 ? inputShape->at(3) : inputShape->at(2); // [bS, oD, oH, oW, oC] (NDHWC) or [bS, oC, oD, oH, oW] (NCDHW), epsilon_next
|
||||
|
||||
|
@ -288,6 +319,7 @@ DECLARE_SHAPE_FN(conv3dnew_bp) {
|
|||
int dW = INT_ARG(11); // dilations width
|
||||
int paddingMode = INT_ARG(12); // 1-SAME, 0-VALID
|
||||
int isNCDHW = block.getIArguments()->size() > 13 ? !INT_ARG(13) : 1; // INT_ARG(13): 1-NDHWC, 0-NCDHW
|
||||
int wFormat = block.getIArguments()->size() > 14 ? INT_ARG(14) : 0; // 0-[kD, kH, kW, iC, oC], 1-[oC, iC, kD, kH, kW], 2-[oC, kD, kH, kW, iC]
|
||||
|
||||
const int rank = 5;
|
||||
REQUIRE_TRUE(paddingMode < 2, 0, "CUSTOM CONV3D OP: causal padding mode (paddingMode = 2) is not allowed for this operation !");
|
||||
|
@ -295,7 +327,7 @@ DECLARE_SHAPE_FN(conv3dnew_bp) {
|
|||
REQUIRE_TRUE(weightsShapeInfo[0] == rank, 0, "CUSTOM CONV3D_BP OP: rank of weights array must be equal to %i, but got %i instead !", rank, weightsShapeInfo);
|
||||
REQUIRE_TRUE(gradOShapeInfo[0] == rank, 0, "CUSTOM CONV3D_BP OP: rank of output gradients (next epsilon) array must be equal to %i, but got %i instead !", rank, gradOShapeInfo);
|
||||
|
||||
int indIOioC, indIiD, indWoC(4);
|
||||
int indIOioC, indIiD, indWoC(0 == wFormat ? 4 : 0);
|
||||
if(!isNCDHW) {
|
||||
indIOioC = 4; indIiD = 1;
|
||||
}
|
||||
|
@ -314,7 +346,7 @@ DECLARE_SHAPE_FN(conv3dnew_bp) {
|
|||
ConvolutionUtils::calcOutSizePool3D(trueoD, trueoH, trueoW, kD, kH, kW, sD, sH, sW, pD, pH, pW, dD, dH, dW, iD, iH, iW, paddingMode);
|
||||
|
||||
std::vector<Nd4jLong> expectedGradOShape = ShapeUtils::composeShapeUsingDimsAndIdx({bS,oC,trueoD,trueoH,trueoW, 0,indIOioC,indIiD,indIiD+1,indIiD+2});
|
||||
std::vector<Nd4jLong> expectedWeightsShape = {kD, kH, kW, iC, oC};
|
||||
std::vector<Nd4jLong> expectedWeightsShape = ConvolutionUtils::expectWeightsShape(wFormat, kD, kH, kW, iC, oC);
|
||||
REQUIRE_TRUE(ShapeUtils::areShapesEqual(gradOShapeInfo, expectedGradOShape), 0, "CUSTOM CONV3D_BP OP: wrong shape of output gradients (next epsilon) array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedGradOShape).c_str(), ShapeUtils::shapeAsString(gradOShapeInfo).c_str());
|
||||
REQUIRE_TRUE(ShapeUtils::areShapesEqual(weightsShapeInfo, expectedWeightsShape), 0, "CUSTOM CONV3D_BP OP: wrong shape of weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsShape).c_str(), ShapeUtils::shapeAsString(weightsShapeInfo).c_str());
|
||||
if(biasShapeInfo)
|
||||
|
|
|
@ -35,10 +35,10 @@ namespace ops {
|
|||
CUSTOM_OP_IMPL(deconv2d, 2, 1, false, 0, 9) {
|
||||
|
||||
auto input = INPUT_VARIABLE(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW)
|
||||
auto weights = INPUT_VARIABLE(1); // [kH, kW, oC, iC] always
|
||||
auto weights = INPUT_VARIABLE(1); // [kH, kW, oC, iC], [iC, oC, kH, kW], [iC, kH, kW, oC]
|
||||
auto bias = block.width() > 2 ? INPUT_VARIABLE(2) : nullptr; // [oC]
|
||||
|
||||
auto output = OUTPUT_VARIABLE(0); // [bS, oH, oW, oC] (NHWC) or [bS, oC, oH, oW] (NCHW)
|
||||
auto output = OUTPUT_NULLIFIED(0); // [bS, oH, oW, oC] (NHWC) or [bS, oC, oH, oW] (NCHW)
|
||||
|
||||
REQUIRE_TRUE(input->rankOf() == 4, 0, "CUSTOM DECONV2D OP: rank of input array must be equal to 4, but got %i instead !", input->rankOf());
|
||||
REQUIRE_TRUE(weights->rankOf() == 4, 0, "CUSTOM DECONV2D OP: rank of weights array must be equal to 4, but got %i instead !", weights->rankOf());
|
||||
|
@ -53,12 +53,13 @@ CUSTOM_OP_IMPL(deconv2d, 2, 1, false, 0, 9) {
|
|||
int dW = INT_ARG(7); // dilations width
|
||||
int isSameMode = INT_ARG(8); // 0-VALID, 1-SAME
|
||||
int isNCHW = block.getIArguments()->size() > 9 ? !INT_ARG(9) : 1; // INT_ARG(9): 0-NCHW, 1-NHWC
|
||||
int wFormat = block.getIArguments()->size() > 10 ? INT_ARG(10) : 0; // 0 - [kH, kW, oC, iC], 1 - [iC, oC, kH, kW], 2 - [iC, kH, kW, oC]
|
||||
|
||||
int bS, iC, iH, iW, oC, oH, oW; // batch size, input channels, input height/width, output channels, output height/width;
|
||||
int indIOioC, indIiH, indWoC, indWiC, indWkH, indOoH; // corresponding indexes
|
||||
ConvolutionUtils::getSizesAndIndexesConv2d(isNCHW, *input, *output, bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWoC, indWiC, indWkH, indOoH);
|
||||
ConvolutionUtils::getSizesAndIndexesConv2d(isNCHW, wFormat, *input, *output, bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWoC, indWiC, indWkH, indOoH);
|
||||
|
||||
std::vector<Nd4jLong> expectedWeightsShape = {kH, kW, oC, iC};
|
||||
std::vector<Nd4jLong> expectedWeightsShape = ConvolutionUtils::expectWeightsShape(wFormat, kH, kW, oC, iC);
|
||||
REQUIRE_TRUE(weights->isSameShape(expectedWeightsShape), 0, "CUSTOM DECONV2D OP: wrong shape of weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsShape).c_str(), ShapeUtils::shapeAsString(weights).c_str());
|
||||
if (bias)
|
||||
REQUIRE_TRUE(bias->rankOf() <= 2 && oC == bias->lengthOf(), 0, "CUSTOM DECONV2D OP: wrong shape of array with biases, expected rank, length: <=2, %i, but got %i, %i instead !", oC, bias->rankOf(), bias->lengthOf());
|
||||
|
@ -66,6 +67,12 @@ CUSTOM_OP_IMPL(deconv2d, 2, 1, false, 0, 9) {
|
|||
if(!isNCHW)
|
||||
output = new NDArray(output->permute({0, 3, 1, 2})); // [bS, oH, oW, oC] -> [bS, oC, oH, oW]
|
||||
|
||||
std::vector<int> colPermut;
|
||||
if(1 == wFormat)
|
||||
colPermut = {1, 2, 3, 0, 4, 5};
|
||||
else
|
||||
colPermut = {2, 3, 1, 0, 4, 5};
|
||||
|
||||
if(isSameMode) // Note: we're intentionally swapping iH and oH, to calculated the padding for a"normal" conv (not deconv) forward pass
|
||||
ConvolutionUtils::calcPadding2D(pH, pW, iH, iW, oH, oW, kH, kW, sH, sW, dH, dW);
|
||||
|
||||
|
@ -73,8 +80,9 @@ CUSTOM_OP_IMPL(deconv2d, 2, 1, false, 0, 9) {
|
|||
|
||||
//----- calculation of output -----//
|
||||
// NHWC: [kH, kW, oC, iC] x [bS, iH, iW, iC] = [kH, kW, oC, bS, iH, iW]
|
||||
// NCHW: [kH, kW, oC, iC] x [bS, iC, iH, iW] = [kH, kW, oC, bS, iH, iW]
|
||||
sd::MmulHelper::tensorDot(weights, input, &columns, {indWiC}, {indIOioC}, {2, 3, 1, 0, 4, 5});
|
||||
// NHWC: [iC, oC, kH, kW] x [bS, iH, iW, iC] = [oC, kH, kW, bS, iH, iW]
|
||||
// NHWC: [iC, kH, kW, oC] x [bS, iH, iW, iC] = [kH, kW, oC, bS, iH, iW]
|
||||
sd::MmulHelper::tensorDot(weights, input, &columns, {indWiC}, {indIOioC}, colPermut);
|
||||
LaunchContext* ctx = block.launchContext();
|
||||
helpers::col2im(*ctx, columns, *output, sH, sW, pH, pW, oH, oW, dH, dW); // [bS, oC, kH, kW, iH, iW] is de-convoluted to [bS, oC, oH, oW]
|
||||
|
||||
|
@ -97,7 +105,7 @@ CUSTOM_OP_IMPL(deconv2d, 2, 1, false, 0, 9) {
|
|||
DECLARE_SHAPE_FN(deconv2d) {
|
||||
|
||||
auto inputShapeInfo = inputShape->at(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW)
|
||||
auto weightsShapeInfo = inputShape->at(1); // [kH, kW, oC, iC] always
|
||||
auto weightsShapeInfo = inputShape->at(1); // [kH, kW, oC, iC], [iC, oC, kH, kW], [iC, kH, kW, oC]
|
||||
auto biasShapeInfo = block.width() > 2 ? inputShape->at(2) : nullptr; // [oC]
|
||||
|
||||
const int rank = 4;
|
||||
|
@ -114,8 +122,9 @@ DECLARE_SHAPE_FN(deconv2d) {
|
|||
int dW = INT_ARG(7); // dilations width
|
||||
int isSameMode = INT_ARG(8); // 0-VALID, 1-SAME
|
||||
int isNCHW = block.getIArguments()->size() > 9 ? !INT_ARG(9) : 1; // INT_ARG(9): 1-NHWC, 0-NCHW
|
||||
int wFormat = block.getIArguments()->size() > 10 ? INT_ARG(10) : 0; // 0 - [kH, kW, oC, iC], 1 - [iC, oC, kH, kW], 2 - [iC, kH, kW, oC]
|
||||
|
||||
int indIOioC, indIiH, indWoC(2);
|
||||
int indIOioC, indIiH, indWoC(0 == wFormat ? 2 : (1 == wFormat ? 1 : 3));
|
||||
if(!isNCHW) {
|
||||
indIOioC = 3; indIiH = 1;
|
||||
}
|
||||
|
@ -129,7 +138,7 @@ DECLARE_SHAPE_FN(deconv2d) {
|
|||
const int iC = inputShapeInfo[indIOioC+1]; // input channels
|
||||
const int oC = weightsShapeInfo[indWoC+1]; // output channels
|
||||
|
||||
std::vector<Nd4jLong> expectedWeightsShape = {kH, kW, oC, iC};
|
||||
std::vector<Nd4jLong> expectedWeightsShape = ConvolutionUtils::expectWeightsShape(wFormat, kH, kW, oC, iC);
|
||||
REQUIRE_TRUE(shape::shapeEquals(4, expectedWeightsShape.data(), shape::rank(weightsShapeInfo), shape::shapeOf(weightsShapeInfo)), 0, "CUSTOM DECONV2D OP: wrong shape of weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsShape).c_str(), ShapeUtils::shapeAsString(weightsShapeInfo).c_str());
|
||||
if (biasShapeInfo)
|
||||
REQUIRE_TRUE(shape::rank(biasShapeInfo) <= 2 && oC == shape::length(biasShapeInfo), 0, "CUSTOM DECONV2D OP: wrong shape of array with biases, expected rank, length: <=2, %i, but got %i, %i instead !", oC, biasShapeInfo[0], shape::length(biasShapeInfo));
|
||||
|
@ -163,12 +172,12 @@ DECLARE_SHAPE_FN(deconv2d) {
|
|||
CUSTOM_OP_IMPL(deconv2d_bp, 3, 2, false, 0, 9) {
|
||||
|
||||
auto input = INPUT_VARIABLE(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCDHW)
|
||||
auto weights = INPUT_VARIABLE(1); // [kH, kW, oC, iC] always
|
||||
auto weights = INPUT_VARIABLE(1); // [kH, kW, oC, iC], [iC, oC, kH, kW], [iC, kH, kW, oC]
|
||||
auto bias = block.width() > 3 ? INPUT_VARIABLE(2) : nullptr; // [oC]
|
||||
auto gradO = block.width() > 3 ? INPUT_VARIABLE(3) : INPUT_VARIABLE(2); // [bS, oH, oW, oC] (NHWC) or [bS, oC, oH, oW] (NCDHW), epsilon_next
|
||||
|
||||
auto gradI = OUTPUT_VARIABLE(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCDHW), gradI
|
||||
auto gradW = OUTPUT_VARIABLE(1); // [kH, kW, oC, iC] always
|
||||
auto gradW = OUTPUT_VARIABLE(1); // [kH, kW, oC, iC], [iC, oC, kH, kW], [iC, kH, kW, oC]
|
||||
auto gradB = block.width() > 3 ? OUTPUT_VARIABLE(2) : nullptr; // [oC]
|
||||
|
||||
REQUIRE_TRUE(input->rankOf() == 4, 0, "CUSTOM DECONV2D_BP OP: rank of input array must be equal to 4, but got %i instead !", input->rankOf());
|
||||
|
@ -186,16 +195,17 @@ CUSTOM_OP_IMPL(deconv2d_bp, 3, 2, false, 0, 9) {
|
|||
int dW = INT_ARG(7); // dilations width
|
||||
int isSameMode = INT_ARG(8); // 0-VALID, 1-SAME
|
||||
int isNCHW = block.getIArguments()->size() > 9 ? !INT_ARG(9) : 1; // INT_ARG(9): 1-NHWC, 0-NCHW
|
||||
int wFormat = block.getIArguments()->size() > 10 ? INT_ARG(10) : 0; // 0 - [kH, kW, oC, iC], 1 - [iC, oC, kH, kW], 2 - [iC, kH, kW, oC]
|
||||
|
||||
int bS, iC, iH, iW, oC, oH, oW; // batch size, input channels, input height/width, output channels, output height/width;
|
||||
int indIOioC, indIiH, indWoC, indWiC, indWkH, indOoH; // corresponding indexes
|
||||
ConvolutionUtils::getSizesAndIndexesConv2d(isNCHW, *input, *gradO, bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWoC, indWiC, indWkH, indOoH);
|
||||
ConvolutionUtils::getSizesAndIndexesConv2d(isNCHW, wFormat, *input, *gradO, bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWoC, indWiC, indWkH, indOoH);
|
||||
|
||||
int trueoH, trueoW; // true output height, width
|
||||
ConvolutionUtils::calcOutSizeDeconv2D(trueoH, trueoW, kH, kW, sH, sW, pH, pW, dH, dW, iH, iW, isSameMode);
|
||||
|
||||
std::vector<Nd4jLong> expectedGradOShape = ShapeUtils::composeShapeUsingDimsAndIdx({bS,oC,trueoH,trueoW, 0,indIOioC,indOoH,indOoH+1});
|
||||
std::vector<Nd4jLong> expectedWeightsShape = {kH, kW, oC, iC};
|
||||
std::vector<Nd4jLong> expectedWeightsShape = ConvolutionUtils::expectWeightsShape(wFormat, kH, kW, oC, iC);
|
||||
REQUIRE_TRUE(gradO->isSameShape(expectedGradOShape), 0, "CUSTOM DECONV2D_BP OP: wrong shape of output gradients (next epsilon) array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedGradOShape).c_str(), ShapeUtils::shapeAsString(gradO).c_str());
|
||||
REQUIRE_TRUE(weights->isSameShape(expectedWeightsShape), 0, "CUSTOM DECONV2D_BP OP: wrong shape of weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsShape).c_str(), ShapeUtils::shapeAsString(weights).c_str());
|
||||
if(bias)
|
||||
|
@ -206,29 +216,34 @@ CUSTOM_OP_IMPL(deconv2d_bp, 3, 2, false, 0, 9) {
|
|||
ConvolutionUtils::calcPadding2D(pH, pW, iH, iW, oH, oW, kH, kW, sH, sW, dH, dW);
|
||||
}
|
||||
|
||||
|
||||
// ----- calculation of gradI -> pass it through conv2d_ff ----- //
|
||||
// ----- calculation of gradI -> pass it through conv2d_ff ----- //
|
||||
sd::ops::conv2d conv2d;
|
||||
const Nd4jStatus status = conv2d.execute({gradO, weights}, {gradI}, {}, {kH,kW, sH,sW, pH,pW, dH,dW, isSameMode, !isNCHW}, {});
|
||||
const Nd4jStatus status = conv2d.execute({gradO, weights}, {gradI}, {}, {kH,kW, sH,sW, pH,pW, dH,dW, isSameMode, !isNCHW, wFormat}, {});
|
||||
if (status != ND4J_STATUS_OK)
|
||||
return status;
|
||||
|
||||
// -----prepare permutation arrays and axes for dot product ----- //
|
||||
std::vector<int> inputAxesForDot;
|
||||
std::vector<int> inputAxes;
|
||||
|
||||
if(!isNCHW) {
|
||||
gradO = new NDArray(gradO->permute({0, 3, 1, 2})); // [bS, oH, oW, oC] -> [bS, oC, oH, oW]
|
||||
inputAxesForDot = {0, 1, 2}; // bS, iH, iW
|
||||
inputAxes = {0, 1, 2}; // bS, iH, iW
|
||||
}
|
||||
else
|
||||
inputAxesForDot = {0, 2, 3}; // bS, iH, iW
|
||||
inputAxes = {0, 2, 3}; // bS, iH, iW
|
||||
|
||||
std::vector<int> gradWAxes; // empty for wFormat = 1
|
||||
if(0 == wFormat)
|
||||
gradWAxes = {3, 2, 0, 1};
|
||||
else if(2 == wFormat)
|
||||
gradWAxes = {0, 3, 1, 2};
|
||||
|
||||
// ----- calculation of gradW ----- //
|
||||
NDArray columns(input->ordering(), {bS, oC, kH, kW, iH, iW}, input->dataType(), block.launchContext());
|
||||
|
||||
LaunchContext* ctx = block.launchContext();
|
||||
helpers::im2col(*ctx, *gradO, columns, kH, kW, sH, sW, pH, pW, dH, dW, NDArrayFactory::create(0.f, input->getContext())); // [bS, oC, oH, oW] is convoluted to [bS, oC, kH, kW, iH, iW]
|
||||
MmulHelper::tensorDot(input, &columns, gradW, inputAxesForDot, {0, 4, 5}, {3, 2, 0, 1}); // [bS, iC, iH, iW]/[bS, iH, iW, iC] x [bS, oC, kH, kW, iH, iW] = [iC, oC, kH, kW]
|
||||
MmulHelper::tensorDot(input, &columns, gradW, inputAxes, {0, 4, 5}, gradWAxes); // [bS, iC, iH, iW]/[bS, iH, iW, iC] x [bS, oC, kH, kW, iH, iW] = [iC, oC, kH, kW]
|
||||
|
||||
// ----- calculation of gradB ----- //
|
||||
if(gradB) {
|
||||
|
@ -248,7 +263,7 @@ CUSTOM_OP_IMPL(deconv2d_bp, 3, 2, false, 0, 9) {
|
|||
DECLARE_SHAPE_FN(deconv2d_bp) {
|
||||
|
||||
auto inputShapeInfo = inputShape->at(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCDHW)
|
||||
auto weightsShapeInfo = inputShape->at(1); // [kH, kW, oC, iC] always
|
||||
auto weightsShapeInfo = inputShape->at(1); // [kH, kW, oC, iC], [iC, oC, kH, kW], [iC, kH, kW, oC]
|
||||
Nd4jLong* biasShapeInfo = block.width() > 3 ? inputShape->at(2) : nullptr; // [oC]
|
||||
Nd4jLong* gradOShapeInfo = block.width() > 3 ? inputShape->at(3) : inputShape->at(2); // [bS, oH, oW, oC] (NHWC) or [bS, oC, oH, oW] (NCDHW), epsilon_next
|
||||
|
||||
|
@ -267,8 +282,9 @@ DECLARE_SHAPE_FN(deconv2d_bp) {
|
|||
int dW = INT_ARG(7); // dilations width
|
||||
int isSameMode = INT_ARG(8); // 0-VALID, 1-SAME
|
||||
int isNCHW = block.getIArguments()->size() > 9 ? !INT_ARG(9) : 1; // INT_ARG(9): 1-NHWC, 0-NCHW
|
||||
int wFormat = block.getIArguments()->size() > 10 ? INT_ARG(10) : 0; // 0 - [kH, kW, oC, iC], 1 - [iC, oC, kH, kW], 2 - [iC, kH, kW, oC]
|
||||
|
||||
int indIOioC, indIiH, indWoC(2), indOoH;
|
||||
int indIOioC, indIiH, indOoH, indWoC(0 == wFormat ? 2 : (1 == wFormat ? 1 : 3));
|
||||
if(!isNCHW) {
|
||||
indIOioC = 3; indIiH = 1; indOoH = 1;
|
||||
}
|
||||
|
@ -286,7 +302,7 @@ DECLARE_SHAPE_FN(deconv2d_bp) {
|
|||
ConvolutionUtils::calcOutSizeDeconv2D(trueoH, trueoW, kH, kW, sH, sW, pH, pW, dH, dW, iH, iW, isSameMode);
|
||||
|
||||
std::vector<Nd4jLong> expectedGradOShape = ShapeUtils::composeShapeUsingDimsAndIdx({bS,oC,trueoH,trueoW, 0,indIOioC,indOoH,indOoH+1});
|
||||
std::vector<Nd4jLong> expectedWeightsShape = {kH, kW, oC, iC};
|
||||
std::vector<Nd4jLong> expectedWeightsShape = ConvolutionUtils::expectWeightsShape(wFormat, kH, kW, oC, iC);
|
||||
REQUIRE_TRUE(shape::shapeEquals(4, expectedGradOShape.data(), shape::rank(gradOShapeInfo), shape::shapeOf(gradOShapeInfo)), 0, "CUSTOM DECONV2D_BP OP: wrong shape of output gradients next epsilon) array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedGradOShape).c_str(), ShapeUtils::shapeAsString(gradOShapeInfo).c_str());
|
||||
REQUIRE_TRUE(shape::shapeEquals(4, expectedWeightsShape.data(), shape::rank(weightsShapeInfo), shape::shapeOf(weightsShapeInfo)), 0, "CUSTOM DECONV2D_BP OP: wrong shape of weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsShape).c_str(), ShapeUtils::shapeAsString(weightsShapeInfo).c_str());
|
||||
if(biasShapeInfo)
|
||||
|
|
|
@ -32,10 +32,10 @@ namespace ops {
|
|||
CUSTOM_OP_IMPL(deconv2d_tf, 3, 1, false, 0, 9) {
|
||||
|
||||
auto gradO = INPUT_VARIABLE(2); // [bS, oH, oW, oC] (NHWC) or [bS, oC, oH, oW] (NCHW), epsilon_next
|
||||
auto weights = INPUT_VARIABLE(1); // [kH, kW, iC, oC] always
|
||||
auto weights = INPUT_VARIABLE(1); // [kH, kW, iC, oC], [oC, iC, kH, kW], [oC, kH, kW, iC]
|
||||
auto gradIShape = INPUT_VARIABLE(0); // [4] - shape of input of conv2d (that is shape of gradI)
|
||||
|
||||
auto gradI = OUTPUT_VARIABLE(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW), epsilon
|
||||
auto gradI = OUTPUT_NULLIFIED(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW), epsilon
|
||||
|
||||
int kH = INT_ARG(0) > 0 ? INT_ARG(0) : static_cast<int>(weights->sizeAt(0));// filter(kernel) height
|
||||
int kW = INT_ARG(1) > 0 ? INT_ARG(1) : static_cast<int>(weights->sizeAt(1));// filter(kernel) width
|
||||
|
@ -47,6 +47,7 @@ CUSTOM_OP_IMPL(deconv2d_tf, 3, 1, false, 0, 9) {
|
|||
int dW = INT_ARG(7); // dilations width
|
||||
int isSameMode = INT_ARG(8); // 0-VALID, 1-SAME
|
||||
int isNCHW = block.getIArguments()->size() > 9 ? !INT_ARG(9) : 1; // INT_ARG(9): 1-NHWC, 0-NCHW
|
||||
int wFormat = block.getIArguments()->size() > 10 ? INT_ARG(10) : 0; // 0 - [kH, kW, iC, oC], 1 - [oC, iC, kH, kW], 2 - [oC, kH, kW, iC]
|
||||
|
||||
const int rank = gradO->rankOf();
|
||||
|
||||
|
@ -57,20 +58,19 @@ CUSTOM_OP_IMPL(deconv2d_tf, 3, 1, false, 0, 9) {
|
|||
// create empty conv2d input array
|
||||
NDArray input(gradO->ordering(), gradIShape->asVectorT<Nd4jLong>(), gradO->dataType(), block.launchContext());
|
||||
|
||||
|
||||
int bS, iC, iH, iW, oC, oH, oW; // batch size, input channels, input height/width, output channels, output height/width;
|
||||
int indIOioC, indIiH, indWoC, indWiC, indWkH, indOoH; // corresponding indexes
|
||||
ConvolutionUtils::getSizesAndIndexesConv2d(isNCHW, input, *gradO, bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWiC, indWoC, indWkH, indOoH);
|
||||
ConvolutionUtils::getSizesAndIndexesConv2d(isNCHW, wFormat, input, *gradO, bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWiC, indWoC, indWkH, indOoH);
|
||||
|
||||
int trueoH, trueoW; // true output height, width
|
||||
ConvolutionUtils::calcOutSizePool2D(trueoH, trueoW, kH, kW, sH, sW, pH, pW, dH, dW, iH, iW, isSameMode);
|
||||
|
||||
std::vector<Nd4jLong> expectedGradOShape = ShapeUtils::composeShapeUsingDimsAndIdx({bS,oC,trueoH,trueoW, 0,indIOioC,indOoH,indOoH+1});
|
||||
std::vector<Nd4jLong> expectedWeightsShape = {kH, kW, iC, oC};
|
||||
std::vector<Nd4jLong> expectedWeightsShape = ConvolutionUtils::expectWeightsShape(wFormat, kH, kW, iC, oC);
|
||||
REQUIRE_TRUE(gradO->isSameShape(expectedGradOShape), 0, "CUSTOM DECONV2D_TF OP: wrong shape of input array, basing on array with output shape expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedGradOShape).c_str(), ShapeUtils::shapeAsString(gradO).c_str());
|
||||
REQUIRE_TRUE(weights->isSameShape(expectedWeightsShape), 0, "CUSTOM DECONV2D_TF OP: wrong shape of weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsShape).c_str(), ShapeUtils::shapeAsString(weights).c_str());
|
||||
|
||||
ConvolutionUtils::conv2dBP(block, &input, weights, nullptr, gradO, gradI, nullptr, nullptr, kH,kW,sH,sW,pH,pW,dH,dW,isSameMode,isNCHW);
|
||||
ConvolutionUtils::conv2dBP(block, &input, weights, nullptr, gradO, gradI, nullptr, nullptr, kH,kW,sH,sW,pH,pW,dH,dW,isSameMode,isNCHW,wFormat);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -84,7 +84,7 @@ CUSTOM_OP_IMPL(deconv2d_tf, 3, 1, false, 0, 9) {
|
|||
DECLARE_SHAPE_FN(deconv2d_tf) {
|
||||
|
||||
auto gradOShapeInfo = inputShape->at(2); // [bS, oH, oW, oC] (NHWC) or [bS, oC, oH, oW] (NCHW), epsilon_next
|
||||
auto weightsShapeInfo = inputShape->at(1); // [kH, kW, iC, oC] always
|
||||
auto weightsShapeInfo = inputShape->at(1); // [kH, kW, iC, oC], [oC, iC, kH, kW], [oC, kH, kW, iC]
|
||||
auto gradIShapeShapeInfo = inputShape->at(0); // [4]
|
||||
|
||||
const int rank = 4;
|
||||
|
@ -103,8 +103,9 @@ DECLARE_SHAPE_FN(deconv2d_tf) {
|
|||
const int dW = INT_ARG(7); // dilations width
|
||||
const int isSameMode = INT_ARG(8); // 0-VALID, 1-SAME
|
||||
const int isNCHW = block.getIArguments()->size() > 9 ? !INT_ARG(9) : 1; // INT_ARG(9): 1-NHWC, 0-NCHW
|
||||
const int wFormat = block.getIArguments()->size() > 10 ? INT_ARG(10) : 0; // 0 - [kH, kW, iC, oC], 1 - [oC, iC, kH, kW], 2 - [oC, kH, kW, iC]
|
||||
|
||||
int indIOioC, indIiH, indWoC(3), indOoH;
|
||||
int indIOioC, indIiH, indWoC(0 == wFormat ? 3 : 0), indOoH;
|
||||
if(!isNCHW) {
|
||||
indIOioC = 3; indIiH = 1; indOoH = 1;
|
||||
}
|
||||
|
@ -126,7 +127,7 @@ DECLARE_SHAPE_FN(deconv2d_tf) {
|
|||
ConvolutionUtils::calcOutSizeDeconv2D(trueiH, trueiW, kH, kW, sH, sW, pH, pW, dH, dW, oH, oW, isSameMode);
|
||||
|
||||
std::vector<Nd4jLong> expectedGradIShape = ShapeUtils::composeShapeUsingDimsAndIdx({bS,iC,trueiH,trueiW, 0,indIOioC,indIiH,indIiH+1});
|
||||
std::vector<Nd4jLong> expectedWeightsShape = {kH, kW, iC, oC};
|
||||
std::vector<Nd4jLong> expectedWeightsShape = ConvolutionUtils::expectWeightsShape(wFormat, kH, kW, iC, oC);
|
||||
REQUIRE_TRUE(expectedGradIShape == gradIShape, 0, "CUSTOM DECONV2D_TF OP: wrong shape of array with output shape, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedGradIShape).c_str(), ShapeUtils::shapeAsString(gradIShape).c_str());
|
||||
REQUIRE_TRUE(shape::shapeEquals(4, expectedWeightsShape.data(), shape::rank(weightsShapeInfo), shape::shapeOf(weightsShapeInfo)), 0, "CUSTOM DECONV2D_TF OP: wrong shape of weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsShape).c_str(), ShapeUtils::shapeAsString(weightsShapeInfo).c_str());
|
||||
|
||||
|
|
|
@ -32,7 +32,7 @@ namespace ops {
|
|||
CUSTOM_OP_IMPL(deconv3d, 2, 1, false, 0, 13) {
|
||||
|
||||
auto input = INPUT_VARIABLE(0); // [bS, iD, iH, iW, iC] (NDHWC) or [bS, iC, iD, iH, iW] (NCDHW)
|
||||
auto weights = INPUT_VARIABLE(1); // [kD, kH, kW, oC, iC] always
|
||||
auto weights = INPUT_VARIABLE(1); // [kD, kH, kW, oC, iC], [iC, oC, kD, kH, kW], [iC, kD, kH, kW, oC]
|
||||
auto bias = block.width() > 2 ? INPUT_VARIABLE(2) : nullptr; // [oC]
|
||||
|
||||
auto output = OUTPUT_VARIABLE(0); // [bS, oD, oH, oW, oC] (NDHWC) or [bS, oC, oD, oH, oW] (NCDHW)
|
||||
|
@ -53,13 +53,14 @@ CUSTOM_OP_IMPL(deconv3d, 2, 1, false, 0, 13) {
|
|||
int dH = INT_ARG(10); // dilations height
|
||||
int dW = INT_ARG(11); // dilations width
|
||||
int isSameMode = INT_ARG(12); // 0-SAME, 1-VALID
|
||||
int isNCDHW = block.getIArguments()->size() > 13 ? !INT_ARG(13) : 1; // INT_ARG(13): 1-NDHWC, 0-NCDHW
|
||||
int isNCDHW = block.getIArguments()->size() > 13 ? !INT_ARG(13) : 1; // INT_ARG(13): 1-NDHWC, 0-NCDHW
|
||||
int wFormat = block.getIArguments()->size() > 14 ? INT_ARG(14) : 0; // 0 - [kD, kH, kW, oC, iC], 1 - [iC, oC, kD, kH, kW], 2 - [iC, kD, kH, kW, oC]
|
||||
|
||||
int bS, iC, iD, iH, iW, oC, oD, oH, oW; // batch size, input channels, input depth/height/width, output channels, output depth/height/width;
|
||||
int indIOioC, indIOioD, indWoC, indWiC, indWkD; // corresponding indexes
|
||||
ConvolutionUtils::getSizesAndIndexesConv3d(isNCDHW, *input, *output, bS, iC, iD, iH, iW, oC, oD, oH, oW, indIOioC, indIOioD, indWoC, indWiC, indWkD);
|
||||
ConvolutionUtils::getSizesAndIndexesConv3d(isNCDHW, wFormat, *input, *output, bS, iC, iD, iH, iW, oC, oD, oH, oW, indIOioC, indIOioD, indWoC, indWiC, indWkD);
|
||||
|
||||
std::vector<Nd4jLong> expectedWeightsShape = {kD, kH, kW, oC, iC};
|
||||
std::vector<Nd4jLong> expectedWeightsShape = ConvolutionUtils::expectWeightsShape(wFormat, kD, kH, kW, oC, iC);
|
||||
REQUIRE_TRUE(weights->isSameShape(expectedWeightsShape), 0, "CUSTOM DECONV3D OP: wrong shape of weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsShape).c_str(), ShapeUtils::shapeAsString(weights).c_str());
|
||||
if (bias)
|
||||
REQUIRE_TRUE(bias->rankOf() <= 2 && oC == bias->lengthOf(), 0, "CUSTOM DECONV3D OP: wrong shape of array with biases, expected rank, length: <=2, %i, but got %i, %i instead !", oC, bias->rankOf(), bias->lengthOf());
|
||||
|
@ -67,16 +68,23 @@ CUSTOM_OP_IMPL(deconv3d, 2, 1, false, 0, 13) {
|
|||
if(!isNCDHW)
|
||||
output = new NDArray(output->permute({0, 4, 1, 2, 3})); // [bS, oD, oH, oW, oC] -> [bS, oC, oD, oH, oW]
|
||||
|
||||
std::vector<int> colPermut;
|
||||
if(1 == wFormat)
|
||||
colPermut = {1,2,3,4,0,5,6,7};
|
||||
else
|
||||
colPermut = {2,3,4,1,0,5,6,7};
|
||||
|
||||
if(isSameMode) // Note: we're intentionally swapping iH and oH, to calculated the padding for a"normal" conv (not deconv) forward pass
|
||||
ConvolutionUtils::calcPadding3D(pD, pH, pW, iD, iH, iW, oD, oH, oW, kD, kH, kW, sD, sH, sW, dD, dH, dW);
|
||||
|
||||
NDArray columns(input->ordering(), {bS, oC, kD, kH, kW, iD, iH, iW}, input->dataType(), block.launchContext());
|
||||
|
||||
//----- calculation of output -----//
|
||||
// NDHWC: [kD, kH, kW, oC, iC] x [bS, iD, iH, iW, iC] = [kD, kH, kW, oC, bS, iD, iH, iW]
|
||||
// NCDHW: [kD, kH, kW, oC, iC] x [bS, iC, iD, iH, iW] = [kD, kH, kW, oC, bS, iD, iH, iW]
|
||||
sd::MmulHelper::tensorDot(weights, input, &columns, {indWiC}, {indIOioC}, {2, 3, 4, 1, 0, 5, 6, 7}); // [bS, oC, kD, kH, kW, iD, iH, iW] -> [kD, kH, kW, oC, bS, iD, iH, iW]
|
||||
ConvolutionUtils::col2vol(block, columns, *output, sD, sH, sW, pD, pH, pW, dD, dH, dW); // [bS, oC, kD, kH, kW, iD, iH, iW] is de-convoluted to [bS, oC, oD, oH, oW]
|
||||
// [kD, kH, kW, oC, iC] x [bS, iD, iH, iW, iC] = [kD, kH, kW, oC, bS, iD, iH, iW]
|
||||
// [iC, oC, kD, kH, kW] x [bS, iD, iH, iW, iC] = [oC, kD, kH, kW, bS, iD, iH, iW]
|
||||
// [iC, kD, kH, kW, oC] x [bS, iD, iH, iW, iC] = [kD, kH, kW, oC, bS, iD, iH, iW]
|
||||
sd::MmulHelper::tensorDot(weights, input, &columns, {indWiC}, {indIOioC}, colPermut); // [bS, oC, kD, kH, kW, iD, iH, iW] -> [kD, kH, kW, oC, bS, iD, iH, iW]
|
||||
ConvolutionUtils::col2vol(block, columns, *output, sD, sH, sW, pD, pH, pW, dD, dH, dW); // [bS, oC, kD, kH, kW, iD, iH, iW] is de-convoluted to [bS, oC, oD, oH, oW]
|
||||
|
||||
//----- add biases if required -----//
|
||||
if(bias)
|
||||
|
@ -101,7 +109,7 @@ CUSTOM_OP_IMPL(deconv3d, 2, 1, false, 0, 13) {
|
|||
DECLARE_SHAPE_FN(deconv3d) {
|
||||
|
||||
auto inputShapeInfo = inputShape->at(0); // [bS, iD, iH, iW, iC] (NDHWC) or [bS, iC, iD, iH, iW] (NDCHW)
|
||||
auto weightsShapeInfo = inputShape->at(1); // [kD, kH, kW, oC, iC] always
|
||||
auto weightsShapeInfo = inputShape->at(1); // [kD, kH, kW, oC, iC], [iC, oC, kD, kH, kW], [iC, kD, kH, kW, oC]
|
||||
auto biasShapeInfo = block.width() > 2 ? inputShape->at(2) : nullptr; // [oC]
|
||||
|
||||
const int rank = 5;
|
||||
|
@ -122,8 +130,9 @@ DECLARE_SHAPE_FN(deconv3d) {
|
|||
int dW = INT_ARG(11); // dilations width
|
||||
int isSameMode = INT_ARG(12); // 0-SAME, 1-VALID
|
||||
int isNCDHW = block.getIArguments()->size() > 13 ? !INT_ARG(13) : 1; // INT_ARG(13): 1-NDHWC, 0-NCDHW
|
||||
int wFormat = block.getIArguments()->size() > 14 ? INT_ARG(14) : 0; // 0 - [kD, kH, kW, oC, iC], 1 - [iC, oC, kD, kH, kW], 2 - [iC, kD, kH, kW, oC]
|
||||
|
||||
int indIOioC, indIiD, indWoC(3);
|
||||
int indIOioC, indIiD, indWoC(0 == wFormat ? 3 : (1 == wFormat ? 1 : 4));
|
||||
if(!isNCDHW) {
|
||||
indIOioC = 4; indIiD = 1;
|
||||
}
|
||||
|
@ -138,7 +147,7 @@ DECLARE_SHAPE_FN(deconv3d) {
|
|||
const int iC = inputShapeInfo[indIOioC+1]; // input channels
|
||||
const int oC = weightsShapeInfo[indWoC+1]; // output channels
|
||||
|
||||
std::vector<Nd4jLong> expectedWeightsShape = {kD, kH, kW, oC, iC};
|
||||
std::vector<Nd4jLong> expectedWeightsShape = ConvolutionUtils::expectWeightsShape(wFormat, kD, kH, kW, oC, iC);
|
||||
REQUIRE_TRUE(shape::shapeEquals(5, expectedWeightsShape.data(), shape::rank(weightsShapeInfo), shape::shapeOf(weightsShapeInfo)), 0, "CUSTOM DECONV3D OP: wrong shape of weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsShape).c_str(), ShapeUtils::shapeAsString(weightsShapeInfo).c_str());
|
||||
if (biasShapeInfo)
|
||||
REQUIRE_TRUE(shape::rank(biasShapeInfo) <= 2 && oC == shape::length(biasShapeInfo), 0, "CUSTOM DECONV3D OP: wrong shape of array with biases, expected rank, length: <=2, %i, but got %i, %i instead !", oC, shape::rank(biasShapeInfo), shape::length(biasShapeInfo));
|
||||
|
@ -174,12 +183,12 @@ DECLARE_SHAPE_FN(deconv3d) {
|
|||
CUSTOM_OP_IMPL(deconv3d_bp, 3, 2, false, 0, 13) {
|
||||
|
||||
auto input = INPUT_VARIABLE(0); // [bS, iD, iH, iW, iC] (NDHWC) or [bS, iC, iD, iH, iW] (NCDHW)
|
||||
auto weights = INPUT_VARIABLE(1); // [kD, kH, kW, oC, iC] always
|
||||
auto weights = INPUT_VARIABLE(1); // [kD, kH, kW, oC, iC], [iC, oC, kD, kH, kW], [iC, kD, kH, kW, oC]
|
||||
auto bias = block.width() > 3 ? INPUT_VARIABLE(2) : nullptr; // [oC]
|
||||
auto gradO = block.width() > 3 ? INPUT_VARIABLE(3) : INPUT_VARIABLE(2); // [bS, oD, oH, oW, oC] (NDHWC) or [bS, oC, oD, oH, oW] (NCDHW), epsilon_next
|
||||
|
||||
auto gradI = OUTPUT_VARIABLE(0); // [bS, iD, iH, iW, iC] (NDHWC) or [bS, iC, iD, iH, iW] (NCDHW), gradI
|
||||
auto gradW = OUTPUT_VARIABLE(1); // [kD, kH, kW, oC, iC] always
|
||||
auto gradW = OUTPUT_VARIABLE(1); // [kD, kH, kW, oC, iC], [iC, oC, kD, kH, kW], [iC, kD, kH, kW, oC]
|
||||
auto gradB = block.width() > 3 ? OUTPUT_VARIABLE(2) : nullptr; // [oC]
|
||||
|
||||
REQUIRE_TRUE(input->rankOf() == 5, 0, "CUSTOM DECONV3D_BP OP: rank of input array must be equal to 5, but got %i instead !", input->rankOf());
|
||||
|
@ -201,16 +210,17 @@ CUSTOM_OP_IMPL(deconv3d_bp, 3, 2, false, 0, 13) {
|
|||
int dW = INT_ARG(11); // dilations width
|
||||
int isSameMode = INT_ARG(12); // 0-SAME, 1-VALID
|
||||
int isNCDHW = block.getIArguments()->size() > 13 ? !INT_ARG(13) : 1; // INT_ARG(13): 1-NDHWC, 0-NCDHW
|
||||
int wFormat = block.getIArguments()->size() > 14 ? INT_ARG(14) : 0; // 0 - [kD, kH, kW, oC, iC], 1 - [iC, oC, kD, kH, kW], 2 - [iC, kD, kH, kW, oC]
|
||||
|
||||
int bS, iC, iD, iH, iW, oC, oD, oH, oW; // batch size, input channels, input depth/height/width, output channels, output depth/height/width;
|
||||
int indIOioC, indIOioD, indWoC, indWiC, indWkD; // corresponding indexes
|
||||
ConvolutionUtils::getSizesAndIndexesConv3d(isNCDHW, *input, *gradO, bS, iC, iD, iH, iW, oC, oD, oH, oW, indIOioC, indIOioD, indWoC, indWiC, indWkD);
|
||||
ConvolutionUtils::getSizesAndIndexesConv3d(isNCDHW, wFormat, *input, *gradO, bS, iC, iD, iH, iW, oC, oD, oH, oW, indIOioC, indIOioD, indWoC, indWiC, indWkD);
|
||||
|
||||
int trueoD, trueoH, trueoW; // true output height, width
|
||||
ConvolutionUtils::calcOutSizeDeconv3D(trueoD, trueoH, trueoW, kD, kH, kW, sD, sH, sW, pD, pH, pW, dD, dH, dW, iD, iH, iW, isSameMode);
|
||||
|
||||
std::vector<Nd4jLong> expectedGradOShape = ShapeUtils::composeShapeUsingDimsAndIdx({bS,oC,trueoD,trueoH,trueoW, 0,indIOioC,indIOioD,indIOioD+1,indIOioD+2});
|
||||
std::vector<Nd4jLong> expectedWeightsShape = {kD, kH, kW, oC, iC};
|
||||
std::vector<Nd4jLong> expectedWeightsShape = ConvolutionUtils::expectWeightsShape(wFormat, kD, kH, kW, oC, iC);
|
||||
REQUIRE_TRUE(gradO->isSameShape(expectedGradOShape), 0, "CUSTOM DECONV3D_BP OP: wrong shape of output gradients (next epsilon) array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedGradOShape).c_str(), ShapeUtils::shapeAsString(gradO).c_str());
|
||||
REQUIRE_TRUE(weights->isSameShape(expectedWeightsShape), 0, "CUSTOM DECONV3D_BP OP: wrong shape of weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsShape).c_str(), ShapeUtils::shapeAsString(weights).c_str());
|
||||
if(bias)
|
||||
|
@ -221,7 +231,7 @@ CUSTOM_OP_IMPL(deconv3d_bp, 3, 2, false, 0, 13) {
|
|||
|
||||
// ----- calculation of gradI -> pass it through conv3d_ff ----- //
|
||||
sd::ops::conv3dnew conv3d;
|
||||
const Nd4jStatus status = conv3d.execute({gradO, weights}, {gradI}, {}, {kD,kH,kW, sD,sH,sW, pD,pH,pW, dD,dH,dW, isSameMode, !isNCDHW}, {});
|
||||
const Nd4jStatus status = conv3d.execute({gradO, weights}, {gradI}, {}, {kD,kH,kW, sD,sH,sW, pD,pH,pW, dD,dH,dW, isSameMode, !isNCDHW, wFormat}, {});
|
||||
if (status != ND4J_STATUS_OK)
|
||||
return status;
|
||||
|
||||
|
@ -235,10 +245,16 @@ CUSTOM_OP_IMPL(deconv3d_bp, 3, 2, false, 0, 13) {
|
|||
else
|
||||
inputAxesForDot = {0, 2, 3, 4}; // bS, iD, iH, iW
|
||||
|
||||
std::vector<int> gradWAxes; // empty for wFormat = 1
|
||||
if(0 == wFormat)
|
||||
gradWAxes = {4,3,0,1,2};
|
||||
else if(2 == wFormat)
|
||||
gradWAxes = {0,4,1,2,3};
|
||||
|
||||
// ----- calculation of gradW ----- //
|
||||
auto columns = NDArrayFactory::create(input->ordering(), {bS, oC, kD, kH, kW, iD, iH, iW}, input->dataType(), block.launchContext());
|
||||
ConvolutionUtils::vol2col(block, *gradO, columns, sD, sH, sW, pD, pH, pW, dD, dH, dW); // [bS, oC, oD, oH, oW] is deconvoluted to [bS, oC, kD, kH, kW, iD, iH, iW]
|
||||
MmulHelper::tensorDot(input, &columns, gradW, inputAxesForDot, {0, 5, 6, 7}, {4, 3, 0, 1, 2}); // [bS, iC, iD, iH, iW]/[bS, iD, iH, iW, iC] x [bS, oC, kD, kH, kW, iD, iH, iW] = [iC, oC, kD, kH, kW]
|
||||
ConvolutionUtils::vol2col(block, *gradO, columns, sD, sH, sW, pD, pH, pW, dD, dH, dW); // [bS, oC, oD, oH, oW] is deconvoluted to [bS, oC, kD, kH, kW, iD, iH, iW]
|
||||
MmulHelper::tensorDot(input, &columns, gradW, inputAxesForDot, {0, 5, 6, 7}, gradWAxes); // [bS, iC, iD, iH, iW]/[bS, iD, iH, iW, iC] x [bS, oC, kD, kH, kW, iD, iH, iW] = [iC, oC, kD, kH, kW]
|
||||
|
||||
// ----- calculation of gradB ----- //
|
||||
if(gradB) {
|
||||
|
@ -267,7 +283,7 @@ CUSTOM_OP_IMPL(deconv3d_bp, 3, 2, false, 0, 13) {
|
|||
DECLARE_SHAPE_FN(deconv3d_bp) {
|
||||
|
||||
auto inputShapeInfo = inputShape->at(0); // [bS, iD, iH, iW, iC] (NDHWC) or [bS, iC, iD, iH, iW] (NCDHW)
|
||||
auto weightsShapeInfo = inputShape->at(1); // [kD, kH, kW, oC, iC] always
|
||||
auto weightsShapeInfo = inputShape->at(1); // [kD, kH, kW, oC, iC], [iC, oC, kD, kH, kW], [iC, kD, kH, kW, oC]
|
||||
Nd4jLong* biasShapeInfo = block.width() > 3 ? inputShape->at(2) : nullptr; // [oC]
|
||||
Nd4jLong* gradOShapeInfo = block.width() > 3 ? inputShape->at(3) : inputShape->at(2); // [bS, oD, oH, oW, oC] (NDHWC) or [bS, oC, oD, oH, oW] (NCDHW), epsilon_next
|
||||
|
||||
|
@ -290,8 +306,9 @@ DECLARE_SHAPE_FN(deconv3d_bp) {
|
|||
int dW = INT_ARG(11); // dilations width
|
||||
int isSameMode = INT_ARG(12); // 0-SAME, 1-VALID
|
||||
int isNCDHW = block.getIArguments()->size() > 13 ? !INT_ARG(13) : 1; // INT_ARG(13): 1-NDHWC, 0-NCDHW
|
||||
int wFormat = block.getIArguments()->size() > 14 ? INT_ARG(14) : 0; // 0 - [kD, kH, kW, oC, iC], 1 - [iC, oC, kD, kH, kW], 2 - [iC, kD, kH, kW, oC]
|
||||
|
||||
int indIOioC, indIiD, indWoC(3);
|
||||
int indIOioC, indIiD, indWoC(0 == wFormat ? 3 : (1 == wFormat ? 1 : 4));
|
||||
if(!isNCDHW) {
|
||||
indIOioC = 4; indIiD = 1;
|
||||
}
|
||||
|
@ -310,8 +327,8 @@ DECLARE_SHAPE_FN(deconv3d_bp) {
|
|||
ConvolutionUtils::calcOutSizeDeconv3D(trueoD, trueoH, trueoW, kD, kH, kW, sD, sH, sW, pD, pH, pW, dD, dH, dW, iD, iH, iW, isSameMode);
|
||||
|
||||
std::vector<Nd4jLong> expectedGradOShape = ShapeUtils::composeShapeUsingDimsAndIdx({bS,oC,trueoD,trueoH,trueoW, 0,indIOioC,indIiD,indIiD+1,indIiD+2});
|
||||
std::vector<Nd4jLong> expectedWeightsShape = {kD, kH, kW, oC, iC};
|
||||
REQUIRE_TRUE(shape::shapeEquals(5, expectedGradOShape.data(), shape::rank(gradOShapeInfo), shape::shapeOf(gradOShapeInfo)), 0, "CUSTOM DECONV3D_BP OP: wrong shape of output gradients next epsilon) array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedGradOShape).c_str(), ShapeUtils::shapeAsString(gradOShapeInfo).c_str());
|
||||
std::vector<Nd4jLong> expectedWeightsShape = ConvolutionUtils::expectWeightsShape(wFormat, kD, kH, kW, oC, iC);
|
||||
REQUIRE_TRUE(shape::shapeEquals(5, expectedGradOShape.data(), shape::rank(gradOShapeInfo), shape::shapeOf(gradOShapeInfo)), 0, "CUSTOM DECONV3D_BP OP: wrong shape of output gradients (next epsilon) array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedGradOShape).c_str(), ShapeUtils::shapeAsString(gradOShapeInfo).c_str());
|
||||
REQUIRE_TRUE(shape::shapeEquals(5, expectedWeightsShape.data(), shape::rank(weightsShapeInfo), shape::shapeOf(weightsShapeInfo)), 0, "CUSTOM DECONV3D_BP OP: wrong shape of weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsShape).c_str(), ShapeUtils::shapeAsString(weightsShapeInfo).c_str());
|
||||
if(biasShapeInfo)
|
||||
REQUIRE_TRUE(biasShapeInfo[0] <= 2 && oC == shape::length(biasShapeInfo), 0, "CUSTOM DECONV3D_BP OP: wrong shape of array with biases, expected rank, length: <=2, %i, but got %i, %i instead !", oC, biasShapeInfo[0], shape::length(biasShapeInfo));
|
||||
|
|
|
@ -32,10 +32,10 @@ namespace ops {
|
|||
CUSTOM_OP_IMPL(depthwise_conv2d, 2, 1, false, 0, 9) {
|
||||
|
||||
auto input = INPUT_VARIABLE(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW)
|
||||
auto weights = INPUT_VARIABLE(1); // [kH, kW, iC, mC] always
|
||||
auto weights = INPUT_VARIABLE(1); // [kH, kW, iC, mC], [mC, iC, kH, kW], [mC, kH, kW, iC]
|
||||
auto bias = block.width() > 2 ? INPUT_VARIABLE(2) : nullptr; // [oC] = iC*mC
|
||||
|
||||
auto output = OUTPUT_VARIABLE(0); // [bS, oH, oW, iC*mC] (NHWC) or [bS, iC*mC, oH, oW] (NCHW)
|
||||
auto output = OUTPUT_NULLIFIED(0); // [bS, oH, oW, iC*mC] (NHWC) or [bS, iC*mC, oH, oW] (NCHW)
|
||||
|
||||
REQUIRE_TRUE(input->rankOf() == 4, 0, "CUSTOM DEPTHWISECONV2D OP: rank of input array must be equal to 4, but got %i instead !", input->rankOf());
|
||||
REQUIRE_TRUE(weights->rankOf() == 4, 0, "CUSTOM DEPTHWISECONV2D OP: rank of weights array must be equal to 4, but got %i instead !", weights->rankOf());
|
||||
|
@ -50,19 +50,20 @@ CUSTOM_OP_IMPL(depthwise_conv2d, 2, 1, false, 0, 9) {
|
|||
int dW = INT_ARG(7); // dilations width
|
||||
int isSameMode = INT_ARG(8); // 0-VALID, 1-SAME
|
||||
int isNCHW = block.getIArguments()->size() > 9 ? !INT_ARG(9) : 1; // INT_ARG(9): 0-NCHW, 1-NHWC
|
||||
int wFormat = block.getIArguments()->size() > 10 ? INT_ARG(10) : 0; // 0 - [kH, kW, iC, mC], 1 - [mC, iC, kH, kW], 2 - [mC, kH, kW, iC]
|
||||
|
||||
int bS, iC, iH, iW, mC, oC, oH, oW; // batch size, input channels, input height/width, channels multiplier(oC = iC*mC), output channels, output height/width
|
||||
int indIOioC, indIiH, indWmC, indWiC, indWkH, indOoH; // corresponding indexes
|
||||
ConvolutionUtils::getSizesAndIndexesConv2d(isNCHW, *input, *output, bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWiC, indWmC, indWkH, indOoH);
|
||||
ConvolutionUtils::getSizesAndIndexesConv2d(isNCHW, wFormat, *input, *output, bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWiC, indWmC, indWkH, indOoH);
|
||||
mC = weights->sizeAt(indWmC); // channels multiplier
|
||||
|
||||
std::vector<Nd4jLong> expectedWeightsShape = {kH, kW, iC, mC};
|
||||
std::vector<Nd4jLong> expectedWeightsShape = ConvolutionUtils::expectWeightsShape(wFormat, kH, kW, iC, mC);
|
||||
REQUIRE_TRUE(weights->isSameShape(expectedWeightsShape), 0, "CUSTOM DEPTHWISECONV2D OP: wrong shape of weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsShape).c_str(), ShapeUtils::shapeAsString(weights).c_str());
|
||||
REQUIRE_TRUE(output->sizeAt(indIOioC) == iC*mC, 0, "CUSTOM DEPTHWISECONV2D OP: the output_channels must be equal to input_channels * channels_multiplier = %i !", iC*mC);
|
||||
if (bias)
|
||||
REQUIRE_TRUE(bias->rankOf() <= 2 && oC == bias->lengthOf(), 0, "CUSTOM DEPTHWISECONV2D OP: wrong shape of array with biases, expected rank, length: <=2, %i, but got %i, %i instead !", oC, bias->rankOf(), bias->lengthOf());
|
||||
|
||||
ConvolutionUtils::depthwiseConv2d(block, input, weights, bias, output, kH,kW,sH,sW,pH,pW,dH,dW,isSameMode,isNCHW);
|
||||
ConvolutionUtils::depthwiseConv2d(block, input, weights, bias, output, kH,kW,sH,sW,pH,pW,dH,dW,isSameMode,isNCHW,wFormat);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -75,7 +76,7 @@ CUSTOM_OP_IMPL(depthwise_conv2d, 2, 1, false, 0, 9) {
|
|||
DECLARE_SHAPE_FN(depthwise_conv2d) {
|
||||
|
||||
Nd4jLong* inputShapeInfo = inputShape->at(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW)
|
||||
Nd4jLong* weightsShapeInfo = inputShape->at(1); // [kH, kW, iC, mC] always
|
||||
Nd4jLong* weightsShapeInfo = inputShape->at(1); // [kH, kW, iC, mC], [mC, iC, kH, kW], [mC, kH, kW, iC]
|
||||
Nd4jLong* biasShapeInfo = block.width() > 2 ? inputShape->at(2) : nullptr; // [oC] = iC*mC
|
||||
|
||||
const int rank = 4;
|
||||
|
@ -92,8 +93,9 @@ DECLARE_SHAPE_FN(depthwise_conv2d) {
|
|||
int dW = INT_ARG(7); // dilations width
|
||||
int isSameMode = INT_ARG(8); // 0-VALID, 1-SAME
|
||||
int isNCHW = block.getIArguments()->size() > 9 ? !INT_ARG(9) : 1; // INT_ARG(9): 1-NHWC, 0-NCHW
|
||||
int wFormat = block.getIArguments()->size() > 10 ? INT_ARG(10) : 0; // 0 - [kH, kW, iC, mC], 1 - [mC, iC, kH, kW], 2 - [mC, kH, kW, iC]
|
||||
|
||||
int indIOioC, indIiH, indWmC(3);
|
||||
int indIOioC, indIiH, indWmC(0 == wFormat ? 3 : 0);
|
||||
if(!isNCHW) {
|
||||
indIOioC = 3; indIiH = 1;
|
||||
}
|
||||
|
@ -109,7 +111,7 @@ DECLARE_SHAPE_FN(depthwise_conv2d) {
|
|||
const int oC = iC*mC; // output channels
|
||||
|
||||
|
||||
std::vector<Nd4jLong> expectedWeightsShape = {kH, kW, iC, mC};
|
||||
std::vector<Nd4jLong> expectedWeightsShape = ConvolutionUtils::expectWeightsShape(wFormat, kH, kW, iC, mC);
|
||||
REQUIRE_TRUE(shape::shapeEquals(4, expectedWeightsShape.data(), shape::rank(weightsShapeInfo), shape::shapeOf(weightsShapeInfo)), 0, "DEPTHWISECONV2D OP: wrong shape of weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsShape).c_str(), ShapeUtils::shapeAsString(weightsShapeInfo).c_str());
|
||||
if (biasShapeInfo)
|
||||
REQUIRE_TRUE(shape::rank(biasShapeInfo) <= 2 && oC == shape::length(biasShapeInfo), 0, "DEPTHWISECONV2D OP: wrong shape of array with biases, expected rank, length: <=2, %i, but got %i, %i instead !", oC, shape::rank(biasShapeInfo), shape::length(biasShapeInfo));
|
||||
|
@ -148,13 +150,13 @@ DECLARE_SHAPE_FN(depthwise_conv2d) {
|
|||
CUSTOM_OP_IMPL(depthwise_conv2d_bp, 3, 2, false, 0, 9) {
|
||||
|
||||
auto input = INPUT_VARIABLE(0); // [bS, iH, iW, iC] (NDHWC) or [bS, iC, iH, iW] (NCDHW)
|
||||
auto weights = INPUT_VARIABLE(1); // [kH, kW, iC, mC] always
|
||||
auto weights = INPUT_VARIABLE(1); // [kH, kW, iC, mC], [mC, iC, kH, kW], [mC, kH, kW, iC]
|
||||
auto bias = block.width() > 3 ? INPUT_VARIABLE(2) : nullptr; // [oC] = [iC*mC]
|
||||
auto gradO = block.width() > 3 ? INPUT_VARIABLE(3) : INPUT_VARIABLE(2); // [bS, oH, oW, oC] (NDHWC) or [bS, oC, oH, oW] (NCDHW), epsilon_next
|
||||
|
||||
auto gradI = OUTPUT_VARIABLE(0); // [bS, iH, iW, iC] (NDHWC) or [bS, iC, iH, iW] (NCDHW), epsilon
|
||||
auto gradW = OUTPUT_VARIABLE(1); // [kH, kW, iC, mC] always
|
||||
auto gradB = block.width() > 3 ? OUTPUT_VARIABLE(2) : nullptr; // [oC]
|
||||
auto gradI = OUTPUT_NULLIFIED(0); // [bS, iH, iW, iC] (NDHWC) or [bS, iC, iH, iW] (NCDHW), epsilon
|
||||
auto gradW = OUTPUT_NULLIFIED(1); // [kH, kW, iC, mC], [mC, iC, kH, kW], [mC, kH, kW, iC]
|
||||
auto gradB = block.width() > 3 ? OUTPUT_NULLIFIED(2) : nullptr; // [oC]
|
||||
|
||||
REQUIRE_TRUE(input->rankOf() == 4, 0, "CUSTOM DEPTHWISECONV2D_BP OP: rank of input array must be equal to 4, but got %i instead !", input->rankOf());
|
||||
REQUIRE_TRUE(weights->rankOf() == 4, 0, "CUSTOM DEPTHWISECONV2D_BP OP: rank of weights array must be equal to 4, but got %i instead !", weights->rankOf());
|
||||
|
@ -170,23 +172,24 @@ CUSTOM_OP_IMPL(depthwise_conv2d_bp, 3, 2, false, 0, 9) {
|
|||
int dW = INT_ARG(7); // dilations width
|
||||
int isSameMode = INT_ARG(8); // 0-VALID, 1-SAME
|
||||
int isNCHW = block.getIArguments()->size() > 9 ? !INT_ARG(9) : 1; // INT_ARG(9): 1-NHWC, 0-NCHW
|
||||
int wFormat = block.getIArguments()->size() > 10 ? INT_ARG(10) : 0; // 0 - [kH, kW, iC, mC], 1 - [mC, iC, kH, kW], 2 - [mC, kH, kW, iC]
|
||||
|
||||
int bS, iC, iH, iW, mC, oC, oH, oW; // batch size, input channels, input height/width, channels multiplier(oC = iC*mC), output channels, output height/width
|
||||
int indIOioC, indIiH, indWmC, indWiC, indWkH, indOoH; // corresponding indexes
|
||||
ConvolutionUtils::getSizesAndIndexesConv2d(isNCHW, *input, *gradO, bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWiC, indWmC, indWkH, indOoH);
|
||||
ConvolutionUtils::getSizesAndIndexesConv2d(isNCHW, wFormat, *input, *gradO, bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWiC, indWmC, indWkH, indOoH);
|
||||
mC = weights->sizeAt(indWmC); // channels multiplier
|
||||
|
||||
int trueoH, trueoW; // correct output height, width
|
||||
ConvolutionUtils::calcOutSizePool2D(trueoH, trueoW, kH, kW, sH, sW, pH, pW, dH, dW, iH, iW, isSameMode);
|
||||
|
||||
std::vector<Nd4jLong> expectedGradOShape = ShapeUtils::composeShapeUsingDimsAndIdx({bS,oC,trueoH,trueoW, 0,indIOioC,indOoH,indOoH+1});
|
||||
std::vector<Nd4jLong> expectedWeightsShape = {kH, kW, iC, mC};
|
||||
std::vector<Nd4jLong> expectedWeightsShape = ConvolutionUtils::expectWeightsShape(wFormat, kH, kW, iC, mC);
|
||||
REQUIRE_TRUE(gradO->isSameShape(expectedGradOShape), 0, "CUSTOM DEPTHWISECONV2D_BP OP: wrong shape of output gradients (next epsilon) array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedGradOShape).c_str(), ShapeUtils::shapeAsString(gradO).c_str());
|
||||
REQUIRE_TRUE(weights->isSameShape(expectedWeightsShape), 0, "CUSTOM DEPTHWISECONV2D_BP OP: wrong shape of weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsShape).c_str(), ShapeUtils::shapeAsString(weights).c_str());
|
||||
if(bias)
|
||||
REQUIRE_TRUE(bias->rankOf() <= 2 && oC == bias->lengthOf(), 0, "CUSTOM DEPTHWISECONV2D_BP OP: wrong shape of array with biases, expected rank, length: <=2, %i, but got %i, %i instead !", oC, bias->rankOf(), bias->lengthOf());
|
||||
|
||||
ConvolutionUtils::depthwiseConv2dBP(block, input, weights, bias, gradO, gradI, gradW, gradB, kH,kW, sH,sW, pH,pW, dH,dW, isSameMode, isNCHW);
|
||||
ConvolutionUtils::depthwiseConv2dBP(block, input, weights, bias, gradO, gradI, gradW, gradB, kH,kW, sH,sW, pH,pW, dH,dW, isSameMode, isNCHW, wFormat);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -214,8 +217,9 @@ DECLARE_SHAPE_FN(depthwise_conv2d_bp) {
|
|||
int dW = INT_ARG(7); // dilations width
|
||||
int isSameMode = INT_ARG(8); // 0-VALID, 1-SAME
|
||||
int isNCHW = block.getIArguments()->size() > 9 ? !INT_ARG(9) : 1; // INT_ARG(9): 1-NHWC, 0-NCHW
|
||||
int wFormat = block.getIArguments()->size() > 10 ? INT_ARG(10) : 0; // 0 - [kH, kW, iC, mC], 1 - [mC, iC, kH, kW], 2 - [mC, kH, kW, iC]
|
||||
|
||||
int indIOioC, indIiH, indWmC(3);
|
||||
int indIOioC, indIiH, indWmC(0 == wFormat ? 3 : 0);
|
||||
if(!isNCHW) {
|
||||
indIOioC = 3; indIiH = 1;
|
||||
}
|
||||
|
@ -234,7 +238,7 @@ DECLARE_SHAPE_FN(depthwise_conv2d_bp) {
|
|||
ConvolutionUtils::calcOutSizePool2D(trueoH, trueoW, kH, kW, sH, sW, pH, pW, dH, dW, iH, iW, isSameMode);
|
||||
|
||||
std::vector<Nd4jLong> expectedGradOShape = ShapeUtils::composeShapeUsingDimsAndIdx({bS,oC,trueoH,trueoW, 0,indIOioC,indIiH,indIiH+1});
|
||||
std::vector<Nd4jLong> expectedWeightsShape = {kH, kW, iC, mC};
|
||||
std::vector<Nd4jLong> expectedWeightsShape = ConvolutionUtils::expectWeightsShape(wFormat, kH, kW, iC, mC);
|
||||
REQUIRE_TRUE(shape::shapeEquals(4, expectedGradOShape.data(), shape::rank(gradOShapeInfo), shape::shapeOf(gradOShapeInfo)), 0, "CUSTOM DEPTHWISECONV2D_BP OP: wrong shape of output gradients (next epsilon) array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedGradOShape).c_str(), ShapeUtils::shapeAsString(gradOShapeInfo).c_str());
|
||||
REQUIRE_TRUE(shape::shapeEquals(4, expectedWeightsShape.data(), shape::rank(weightsShapeInfo), shape::shapeOf(weightsShapeInfo)), 0, "CUSTOM DEPTHWISECONV2D_BP OP: wrong shape of weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsShape).c_str(), ShapeUtils::shapeAsString(weightsShapeInfo).c_str());
|
||||
if(biasShapeInfo)
|
||||
|
|
|
@ -30,8 +30,7 @@ namespace sd {
|
|||
namespace ops {
|
||||
CUSTOM_OP_IMPL(im2col, 1, 1, false, 0, 9) {
|
||||
auto x = INPUT_VARIABLE(0);
|
||||
auto z = OUTPUT_VARIABLE(0);
|
||||
|
||||
auto z = OUTPUT_NULLIFIED(0);
|
||||
|
||||
REQUIRE_TRUE(x->rankOf() == 4, 0, "im2col input should be 4D, but got %i instead", x->rankOf());
|
||||
REQUIRE_TRUE(z->rankOf() == 6, 0, "im2col output should be 6D, but got %i instead", z->rankOf());
|
||||
|
@ -53,8 +52,6 @@ namespace sd {
|
|||
LaunchContext* ctx = block.launchContext();
|
||||
sd::ops::helpers::im2col(*ctx, *x, *z, kernelHeight, kernelWidth, strideY, strideX, padHeight, padWidth, dY, dX, NDArrayFactory::create(zeroPadVal, block.launchContext()));
|
||||
|
||||
STORE_RESULT(*z);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
@ -107,7 +104,7 @@ namespace sd {
|
|||
CUSTOM_OP_IMPL(im2col_bp, 2, 1, false, 0, 9) {
|
||||
auto input = INPUT_VARIABLE(0);
|
||||
auto gradAtOutput = INPUT_VARIABLE(1);
|
||||
auto z = OUTPUT_VARIABLE(0);
|
||||
auto z = OUTPUT_NULLIFIED(0);
|
||||
|
||||
REQUIRE_TRUE(input->rankOf() == 4, 0, "im2col_bp input should be 4D, but got %i instead", input->rankOf());
|
||||
REQUIRE_TRUE(gradAtOutput->rankOf() == 6, 0, "im2col_bp gradient at output (input idx 1) should be 6D, but got %i instead", gradAtOutput->rankOf());
|
||||
|
|
|
@ -29,7 +29,7 @@ namespace ops {
|
|||
CUSTOM_OP_IMPL(pointwise_conv2d, 2, 1, false, 0, 0) {
|
||||
|
||||
auto input = INPUT_VARIABLE(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW)
|
||||
auto weights = INPUT_VARIABLE(1); // [1, 1, iC, oC] always
|
||||
auto weights = INPUT_VARIABLE(1); // [1, 1, iC, oC], [oC, iC, 1, 1], [oC, 1, 1, iC]
|
||||
auto bias = block.width() > 2 ? INPUT_VARIABLE(2) : nullptr; // [oC]
|
||||
|
||||
auto output = OUTPUT_VARIABLE(0); // [bS, iH, iW, oC] (NHWC) or [bS, oC, iH, iW] (NCHW)
|
||||
|
@ -47,18 +47,19 @@ CUSTOM_OP_IMPL(pointwise_conv2d, 2, 1, false, 0, 0) {
|
|||
int pW = 0; // paddings width
|
||||
int dH = 1; // dilations height
|
||||
int dW = 1; // dilations width
|
||||
int isNCHW = block.getIArguments()->size() > 0 ? !INT_ARG(0) : 1; // INT_ARG(0): 0-NCHW, 1-NHWC
|
||||
int isNCHW = block.getIArguments()->size() > 0 ? !INT_ARG(0) : 1; // INT_ARG(0): 0-NCHW, 1-NHWC
|
||||
int wFormat = block.getIArguments()->size() > 1 ? INT_ARG(1) : 0; // 0 - [1, 1, iC, oC], 1 - [oC, iC, 1, 1], 2 - [oC, 1, 1, iC]
|
||||
|
||||
int bS, iC, iH, iW, oC, oH, oW; // batch size, input channels, input height/width, output channels, output height/width;
|
||||
int indIOioC, indIiH, indWoC, indWiC, indWkH, indOoH; // corresponding indexes
|
||||
ConvolutionUtils::getSizesAndIndexesConv2d(isNCHW, *input, *output, bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWiC, indWoC, indWkH, indOoH);
|
||||
ConvolutionUtils::getSizesAndIndexesConv2d(isNCHW, wFormat, *input, *output, bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWiC, indWoC, indWkH, indOoH);
|
||||
|
||||
std::vector<Nd4jLong> expectedWeightsShape = {1, 1, iC, oC};
|
||||
std::vector<Nd4jLong> expectedWeightsShape = ConvolutionUtils::expectWeightsShape(wFormat, 1, 1, iC, oC);
|
||||
REQUIRE_TRUE(weights->isSameShape(expectedWeightsShape), 0, "CUSTOM POINTWISECONV2D OP: wrong shape of weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsShape).c_str(), ShapeUtils::shapeAsString(weights).c_str());
|
||||
if (bias)
|
||||
REQUIRE_TRUE(bias->rankOf() <= 2 && oC == bias->lengthOf(), 0, "CUSTOM POINTWISECONV2D OP: wrong shape of array with biases, expected rank, length: <=2, %i, but got %i, %i instead !", oC, bias->rankOf(), bias->lengthOf());
|
||||
|
||||
ConvolutionUtils::conv2d(block, input, weights, bias, output, kH,kW, sH,sW, pH,pW, dH,dW, 1/*isSameMode*/, isNCHW);
|
||||
ConvolutionUtils::conv2d(block, input, weights, bias, output, kH,kW, sH,sW, pH,pW, dH,dW, 1/*isSameMode*/, isNCHW, wFormat);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -73,7 +74,7 @@ CUSTOM_OP_IMPL(pointwise_conv2d, 2, 1, false, 0, 0) {
|
|||
DECLARE_SHAPE_FN(pointwise_conv2d) {
|
||||
|
||||
Nd4jLong* inputShapeInfo = inputShape->at(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW)
|
||||
Nd4jLong* weightsShapeInfo = inputShape->at(1); // [1, 1, iC, oC] always
|
||||
Nd4jLong* weightsShapeInfo = inputShape->at(1); // [1, 1, iC, oC], [oC, iC, 1, 1], [oC, 1, 1, iC]
|
||||
Nd4jLong* biasShapeInfo = block.width() > 2 ? inputShape->at(2) : nullptr; // [oC]
|
||||
|
||||
const int rank = 4;
|
||||
|
@ -81,8 +82,9 @@ DECLARE_SHAPE_FN(pointwise_conv2d) {
|
|||
REQUIRE_TRUE(weightsShapeInfo[0] == rank, 0, "CUSTOM POINTWISECONV2D OP: rank of weights array must be equal to %i, but got %i instead !", rank, weightsShapeInfo[0]);
|
||||
|
||||
int isNCHW = block.getIArguments()->size() > 0 ? !INT_ARG(0) : 1; // INT_ARG(0): 0-NCHW, 1-NHWC
|
||||
int wFormat = block.getIArguments()->size() > 1 ? INT_ARG(1) : 0; // 0 - [1, 1, iC, oC], 1 - [oC, iC, 1, 1], 2 - [oC, 1, 1, iC]
|
||||
|
||||
int indIOioC, indWoC(3);
|
||||
int indIOioC, indWoC(0 == wFormat ? 3 : 0);
|
||||
if(!isNCHW)
|
||||
indIOioC = 3;
|
||||
else
|
||||
|
@ -92,7 +94,7 @@ DECLARE_SHAPE_FN(pointwise_conv2d) {
|
|||
const int iC = inputShapeInfo[indIOioC+1]; // input channels
|
||||
const int oC = weightsShapeInfo[indWoC+1]; // output channels
|
||||
|
||||
std::vector<Nd4jLong> expectedWeightsShape = {1, 1, iC, oC};
|
||||
std::vector<Nd4jLong> expectedWeightsShape = ConvolutionUtils::expectWeightsShape(wFormat, 1, 1, iC, oC);
|
||||
REQUIRE_TRUE(ShapeUtils::areShapesEqual(weightsShapeInfo, expectedWeightsShape), 0, "POINTWISECONV2D OP: wrong shape of weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsShape).c_str(), ShapeUtils::shapeAsString(weightsShapeInfo).c_str());
|
||||
if (biasShapeInfo)
|
||||
REQUIRE_TRUE(biasShapeInfo[0] <= 2 && oC == shape::length(biasShapeInfo), 0, "POINTWISECONV2D OP: wrong shape of array with biases, expected rank, length: <=2, %i, but got %i, %i instead !", oC, biasShapeInfo[0], shape::length(biasShapeInfo));
|
||||
|
|
|
@ -33,11 +33,11 @@ namespace ops {
|
|||
CUSTOM_OP_IMPL(sconv2d, 2, 1, false, 0, 9) {
|
||||
|
||||
NDArray *input = INPUT_VARIABLE(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW)
|
||||
NDArray *weightsDepth = INPUT_VARIABLE(1); // [kH, kW, iC, mC] always
|
||||
NDArray *weightsPoint = nullptr; // [1, 1, iC*mC, oC] always
|
||||
NDArray *weightsDepth = INPUT_VARIABLE(1); // [kH, kW, iC, mC], [mC, iC, kH, kW], [mC, kH, kW, iC]
|
||||
NDArray *weightsPoint = nullptr; // [1, 1, iC*mC, oC], [oC, iC*mC, 1, 1], [oC, 1, 1, iC*mC]
|
||||
NDArray *bias = nullptr; // [oC], if weightsPoint=nullptr then oC = iC*mC
|
||||
|
||||
NDArray *output = OUTPUT_VARIABLE(0); // [bS, oH, oW, oC] (NHWC) or [bS, oC, oH, oW] (NCHW)
|
||||
NDArray *output = OUTPUT_NULLIFIED(0); // [bS, oH, oW, oC] (NHWC) or [bS, oC, oH, oW] (NCHW)
|
||||
|
||||
if(block.width() == 3) {
|
||||
if((INPUT_VARIABLE(2))->rankOf() == 4)
|
||||
|
@ -66,17 +66,19 @@ CUSTOM_OP_IMPL(sconv2d, 2, 1, false, 0, 9) {
|
|||
int dH = INT_ARG(6); // dilations height
|
||||
int dW = INT_ARG(7); // dilations width
|
||||
int isSameMode = INT_ARG(8); // 0-VALID, 1-SAME
|
||||
int isNCHW = block.getIArguments()->size() > 9 ? !INT_ARG(9) : 1; // INT_ARG(9): 0-NCHW, 1-NHWC
|
||||
int isNCHW = block.getIArguments()->size() > 9 ? !INT_ARG(9) : 1; // INT_ARG(9): 0-NCHW, 1-NHWC
|
||||
int wFormat = block.getIArguments()->size() > 10 ? INT_ARG(10) : 0; // 0 - [kH, kW, iC, mC], 1 - [mC, iC, kH, kW], 2 - [mC, kH, kW, iC]
|
||||
|
||||
|
||||
int bS, iC, iH, iW, mC, oC, oH, oW; // batch size, input channels, input height/width, channels multiplier, output channels, output height/width
|
||||
int indIOioC, indIiH, indWmC, indWiC, indWkH, indOoH; // corresponding indexes
|
||||
ConvolutionUtils::getSizesAndIndexesConv2d(isNCHW, *input, *output, bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWiC, indWmC, indWkH, indOoH);
|
||||
ConvolutionUtils::getSizesAndIndexesConv2d(isNCHW, wFormat, *input, *output, bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWiC, indWmC, indWkH, indOoH);
|
||||
mC = weightsDepth->sizeAt(indWmC); // channels multiplier
|
||||
|
||||
std::vector<Nd4jLong> expectedWeightsDShape = {kH, kW, iC, mC};
|
||||
std::vector<Nd4jLong> expectedWeightsDShape = ConvolutionUtils::expectWeightsShape(wFormat, kH, kW, iC, mC);
|
||||
REQUIRE_TRUE(weightsDepth->isSameShape(expectedWeightsDShape), 0, " SCONV2D OP: wrong shape of weightsDepth array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsDShape).c_str(), ShapeUtils::shapeAsString(weightsDepth).c_str());
|
||||
if(weightsPoint) {
|
||||
std::vector<Nd4jLong> expectedWeightsPShape = {1, 1, iC*mC, oC};
|
||||
std::vector<Nd4jLong> expectedWeightsPShape = ConvolutionUtils::expectWeightsShape(wFormat, 1, 1, iC*mC, oC);
|
||||
REQUIRE_TRUE(weightsPoint->isSameShape(expectedWeightsPShape), 0, " SCONV2D OP: wrong shape of weightsPoint array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsPShape).c_str(), ShapeUtils::shapeAsString(weightsPoint).c_str());
|
||||
}
|
||||
if (bias)
|
||||
|
@ -84,11 +86,11 @@ CUSTOM_OP_IMPL(sconv2d, 2, 1, false, 0, 9) {
|
|||
|
||||
if (iC == 1) {
|
||||
nd4j_debug("SCONV2D OP: for input_channels = 1 this op is equivalent to standard conv2d\n","");
|
||||
ConvolutionUtils::conv2d(block, input, weightsDepth, bias, output, kH,kW, sH,sW, pH,pW, dH,dW, isSameMode, isNCHW);
|
||||
ConvolutionUtils::conv2d(block, input, weightsDepth, bias, output, kH,kW, sH,sW, pH,pW, dH,dW, isSameMode, isNCHW, wFormat);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
ConvolutionUtils::sconv2d(block, input, weightsDepth, weightsPoint, bias, output, kH,kW, sH,sW, pH,pW, dH,dW, isSameMode, isNCHW);
|
||||
ConvolutionUtils::sconv2d(block, input, weightsDepth, weightsPoint, bias, output, kH,kW, sH,sW, pH,pW, dH,dW, isSameMode, isNCHW, wFormat);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -103,8 +105,8 @@ CUSTOM_OP_IMPL(sconv2d, 2, 1, false, 0, 9) {
|
|||
DECLARE_SHAPE_FN(sconv2d) {
|
||||
|
||||
auto inputShapeInfo = inputShape->at(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW)
|
||||
auto weightsDShapeInfo = inputShape->at(1); // [kH, kW, iC, mC] always
|
||||
Nd4jLong* weightsPShapeInfo = nullptr; // [1, 1, iC*mC, oC] always
|
||||
auto weightsDShapeInfo = inputShape->at(1); // [kH, kW, iC, mC], [mC, iC, kH, kW], [mC, kH, kW, iC]
|
||||
Nd4jLong* weightsPShapeInfo = nullptr; // [1, 1, iC*mC, oC], [oC, iC*mC, 1, 1], [oC, 1, 1, iC*mC]
|
||||
Nd4jLong* biasShapeInfo = nullptr; // [oC], oC = iC*mC if weightsPoint=nullptr
|
||||
|
||||
if(block.width() == 3)
|
||||
|
@ -135,8 +137,9 @@ DECLARE_SHAPE_FN(sconv2d) {
|
|||
int dW = INT_ARG(7); // dilations width
|
||||
int isSameMode = INT_ARG(8); // 0-VALID, 1-SAME
|
||||
int isNCHW = block.getIArguments()->size() > 9 ? !INT_ARG(9) : 1; // INT_ARG(9): 1-NHWC, 0-NCHW
|
||||
int wFormat = block.getIArguments()->size() > 10 ? INT_ARG(10) : 0; // 0 - [kH, kW, iC, mC], 1 - [mC, iC, kH, kW], 2 - [mC, kH, kW, iC]
|
||||
|
||||
int indIOioC, indIiH, indWmC(3);
|
||||
int indIOioC, indIiH, indWmC(0 == wFormat ? 3 : 0);
|
||||
if(!isNCHW) {
|
||||
indIOioC = 3; indIiH = 1;
|
||||
}
|
||||
|
@ -148,13 +151,13 @@ DECLARE_SHAPE_FN(sconv2d) {
|
|||
const int iH = inputShapeInfo[indIiH+1]; // input height
|
||||
const int iW = inputShapeInfo[indIiH+2]; // input width
|
||||
const int iC = inputShapeInfo[indIOioC+1]; // input channels
|
||||
const int mC = weightsDShapeInfo[indWmC+1]; // channel multiplier
|
||||
const int oC = weightsPShapeInfo ? weightsPShapeInfo[indWmC+1] : iC*mC; // output channels (oC or iC*mC)
|
||||
const int mC = weightsDShapeInfo[indWmC+1]; // channel multiplier
|
||||
const int oC = weightsPShapeInfo ? weightsPShapeInfo[indWmC+1] : iC*mC; // output channels (oC or iC*mC)
|
||||
|
||||
std::vector<Nd4jLong> expectedWeightsDShape = {kH, kW, iC, mC};
|
||||
std::vector<Nd4jLong> expectedWeightsDShape = ConvolutionUtils::expectWeightsShape(wFormat, kH, kW, iC, mC);
|
||||
REQUIRE_TRUE(ShapeUtils::areShapesEqual(weightsDShapeInfo, expectedWeightsDShape), 0, "SCONV2D OP: wrong shape of depth weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsDShape).c_str(), ShapeUtils::shapeAsString(weightsDShapeInfo).c_str());
|
||||
if(weightsPShapeInfo) {
|
||||
std::vector<Nd4jLong> expectedWeightsPShape = {1, 1, iC*mC, oC};
|
||||
std::vector<Nd4jLong> expectedWeightsPShape = ConvolutionUtils::expectWeightsShape(wFormat, 1, 1, iC*mC, oC);
|
||||
REQUIRE_TRUE(ShapeUtils::areShapesEqual(weightsPShapeInfo, expectedWeightsPShape), 0, "SCONV2D OP: wrong shape of point array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsPShape).c_str(), ShapeUtils::shapeAsString(weightsPShapeInfo).c_str());
|
||||
}
|
||||
if (biasShapeInfo)
|
||||
|
@ -195,30 +198,30 @@ CUSTOM_OP_IMPL(sconv2d_bp, 3, 2, false, 0, 9) {
|
|||
|
||||
NDArray *input = INPUT_VARIABLE(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW)
|
||||
NDArray *gradO = INPUT_VARIABLE(1); // [bS, oH, oW, oC] (NHWC) or [bS, oC, oH, oW] (NCHW), epsilon_next
|
||||
NDArray *weightsDepth = INPUT_VARIABLE(2); // [kH, kW, iC, mC] always
|
||||
NDArray *weightsPoint = nullptr; // [1, 1, iC*mC, oC] always
|
||||
NDArray *weightsDepth = INPUT_VARIABLE(2); // [kH, kW, iC, mC], [mC, iC, kH, kW], [mC, kH, kW, iC]
|
||||
NDArray *weightsPoint = nullptr; // [1, 1, iC*mC, oC], [oC, iC*mC, 1, 1], [oC, 1, 1, iC*mC]
|
||||
NDArray *bias = nullptr; // [oC], oC = iC*mC if weightsPoint=nullptr
|
||||
|
||||
NDArray *gradI = OUTPUT_VARIABLE(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW), epsilon
|
||||
NDArray *gradWD = OUTPUT_VARIABLE(1); // [kH, kW, iC, mC] always
|
||||
NDArray *gradWP = nullptr; // [1, 1, iC*mC, oC] always
|
||||
NDArray *gradI = OUTPUT_NULLIFIED(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW), epsilon
|
||||
NDArray *gradWD = OUTPUT_NULLIFIED(1); // [kH, kW, iC, mC], [mC, iC, kH, kW], [mC, kH, kW, iC]
|
||||
NDArray *gradWP = nullptr; // [1, 1, iC*mC, oC], [oC, iC*mC, 1, 1], [oC, 1, 1, iC*mC]
|
||||
NDArray *gradB = nullptr; // [oC]
|
||||
|
||||
if(block.width() == 4) {
|
||||
if((INPUT_VARIABLE(3))->rankOf() == 4) {
|
||||
weightsPoint = INPUT_VARIABLE(3);
|
||||
gradWP = OUTPUT_VARIABLE(2);
|
||||
gradWP = OUTPUT_NULLIFIED(2);
|
||||
}
|
||||
else {
|
||||
bias = INPUT_VARIABLE(3);
|
||||
gradB = OUTPUT_VARIABLE(2);
|
||||
gradB = OUTPUT_NULLIFIED(2);
|
||||
}
|
||||
}
|
||||
else if(block.width() == 5) {
|
||||
weightsPoint = INPUT_VARIABLE(3);
|
||||
bias = INPUT_VARIABLE(4);
|
||||
gradWP = OUTPUT_VARIABLE(2);
|
||||
gradB = OUTPUT_VARIABLE(3);
|
||||
gradWP = OUTPUT_NULLIFIED(2);
|
||||
gradB = OUTPUT_NULLIFIED(3);
|
||||
}
|
||||
|
||||
|
||||
|
@ -244,17 +247,18 @@ CUSTOM_OP_IMPL(sconv2d_bp, 3, 2, false, 0, 9) {
|
|||
int dW = INT_ARG(7); // dilations width
|
||||
int isSameMode = INT_ARG(8); // 0-VALID, 1-SAME
|
||||
int isNCHW = block.getIArguments()->size() > 9 ? !INT_ARG(9) : 1; // INT_ARG(9): 0-NCHW, 1-NHWC
|
||||
int wFormat = block.getIArguments()->size() > 10 ? INT_ARG(10) : 0; // 0 - [kH, kW, iC, mC], 1 - [mC, iC, kH, kW], 2 - [mC, kH, kW, iC]
|
||||
|
||||
int bS, iC, iH, iW, mC, oC, oH, oW; // batch size, input channels, input height/width, channels multiplier, output channels, output height/width
|
||||
int indIOioC, indIiH, indWmC, indWiC, indWkH, indOoH; // corresponding indexes
|
||||
ConvolutionUtils::getSizesAndIndexesConv2d(isNCHW, *input, *gradO, bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWiC, indWmC, indWkH, indOoH);
|
||||
ConvolutionUtils::getSizesAndIndexesConv2d(isNCHW, wFormat, *input, *gradO, bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWiC, indWmC, indWkH, indOoH);
|
||||
mC = weightsDepth->sizeAt(indWmC); // channels multiplier
|
||||
|
||||
std::vector<Nd4jLong> expectedWeightsDShape = {kH, kW, iC, mC};
|
||||
std::vector<Nd4jLong> expectedWeightsDShape = ConvolutionUtils::expectWeightsShape(wFormat, kH, kW, iC, mC);
|
||||
REQUIRE_TRUE(weightsDepth->isSameShape(expectedWeightsDShape), 0, " SCONV2D_BP OP: wrong shape of weightsDepth array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsDShape).c_str(), ShapeUtils::shapeAsString(weightsDepth).c_str());
|
||||
REQUIRE_TRUE(gradWD->isSameShape(expectedWeightsDShape), 0, " SCONV2D_BP OP: wrong shape of gradWD array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsDShape).c_str(), ShapeUtils::shapeAsString(gradWD).c_str());
|
||||
if(weightsPoint) {
|
||||
std::vector<Nd4jLong> expectedWeightsPShape = {1, 1, iC*mC, oC};
|
||||
std::vector<Nd4jLong> expectedWeightsPShape = ConvolutionUtils::expectWeightsShape(wFormat, 1, 1, iC*mC, oC);
|
||||
REQUIRE_TRUE(weightsPoint->isSameShape(expectedWeightsPShape), 0, " SCONV2D_BP OP: wrong shape of weightsPoint array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsPShape).c_str(), ShapeUtils::shapeAsString(weightsPoint).c_str());
|
||||
REQUIRE_TRUE(gradWP->isSameShape(expectedWeightsPShape), 0, " SCONV2D_BP OP: wrong shape of gradWP array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsPShape).c_str(), ShapeUtils::shapeAsString(gradWP).c_str());
|
||||
}
|
||||
|
@ -274,12 +278,12 @@ CUSTOM_OP_IMPL(sconv2d_bp, 3, 2, false, 0, 9) {
|
|||
|
||||
auto resultFFShape = isNCHW ? std::vector<Nd4jLong>({bS, mC*iC, oH, oW}) : std::vector<Nd4jLong>({bS, oH, oW, mC*iC});
|
||||
auto resultFF = NDArrayFactory::create_(input->ordering(), resultFFShape, input->dataType(), block.launchContext());
|
||||
ConvolutionUtils::sconv2d(block, input, weightsDepth, nullptr, nullptr, resultFF, kH,kW, sH,sW, pH,pW, dH,dW, isSameMode, isNCHW);
|
||||
ConvolutionUtils::sconv2d(block, input, weightsDepth, nullptr, nullptr, resultFF, kH,kW, sH,sW, pH,pW, dH,dW, isSameMode, isNCHW, wFormat);
|
||||
|
||||
auto gradIDepthShape = ShapeUtils::composeShapeUsingDimsAndIdx({bS,iC*mC,oH,oW, 0,indIOioC,indIiH,indIiH+1});
|
||||
auto gradIDepth = NDArrayFactory::create_(resultFF->ordering(), gradIDepthShape, resultFF->dataType(), block.launchContext()); // [bS, oH, oW, iC*mC] (NHWC) or [bS, iC*mC, oH, oW] (NCHW)
|
||||
|
||||
ConvolutionUtils::conv2dBP(block, resultFF, weightsPoint, bias, gradO, gradIDepth, gradWP, gradB, 1,1, 1,1, 0,0, 1,1, isSameMode, isNCHW); // in this case oH=iH and oW=iW
|
||||
ConvolutionUtils::conv2dBP(block, resultFF, weightsPoint, bias, gradO, gradIDepth, gradWP, gradB, 1,1, 1,1, 0,0, 1,1, isSameMode, isNCHW, wFormat); // in this case oH=iH and oW=iW
|
||||
|
||||
gradO = gradIDepth;
|
||||
bias = gradB = nullptr; // if pointwise backprop was done then don't calculate gradB at depthwise_conv2d_bp step
|
||||
|
@ -288,7 +292,7 @@ CUSTOM_OP_IMPL(sconv2d_bp, 3, 2, false, 0, 9) {
|
|||
}
|
||||
|
||||
// ----- apply depthwise_conv2d_bp ----- //
|
||||
ConvolutionUtils::depthwiseConv2dBP(block, input, weightsDepth, bias, gradO, gradI, gradWD, gradB, kH,kW, sH,sW, pH,pW, dH,dW, isSameMode, isNCHW);
|
||||
ConvolutionUtils::depthwiseConv2dBP(block, input, weightsDepth, bias, gradO, gradI, gradWD, gradB, kH,kW, sH,sW, pH,pW, dH,dW, isSameMode, isNCHW, wFormat);
|
||||
|
||||
if(weightsPoint)
|
||||
delete gradO;
|
||||
|
@ -301,8 +305,8 @@ DECLARE_SHAPE_FN(sconv2d_bp) {
|
|||
|
||||
auto inputShapeInfo = inputShape->at(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW)
|
||||
auto gradOShapeInfo = inputShape->at(1); // [bS, oH, oW, oC] (NHWC) or [bS, oC, oH, oW] (NCHW), epsilon_next
|
||||
auto weightsDShapeInfo = inputShape->at(2); // [kH, kW, iC, mC] always
|
||||
Nd4jLong* weightsPShapeInfo = nullptr; // [1, 1, iC*mC, oC] always
|
||||
auto weightsDShapeInfo = inputShape->at(2); // [kH, kW, iC, mC], [mC, iC, kH, kW], [mC, kH, kW, iC]
|
||||
Nd4jLong* weightsPShapeInfo = nullptr; // [1, 1, iC*mC, oC], [oC, iC*mC, 1, 1], [oC, 1, 1, iC*mC]
|
||||
Nd4jLong* biasShapeInfo = nullptr; // [oC], oC = iC*mC if weightsPoint=nullptr
|
||||
|
||||
if(block.width() == 4) {
|
||||
|
@ -335,8 +339,9 @@ DECLARE_SHAPE_FN(sconv2d_bp) {
|
|||
int dW = INT_ARG(7); // dilations width
|
||||
int isSameMode = INT_ARG(8); // 0-VALID, 1-SAME
|
||||
int isNCHW = block.getIArguments()->size() > 9 ? !INT_ARG(9) : 1; // INT_ARG(9): 0-NCHW, 1-NHWC
|
||||
int wFormat = block.getIArguments()->size() > 10 ? INT_ARG(10) : 0; // 0 - [kH, kW, iC, mC], 1 - [mC, iC, kH, kW], 2 - [mC, kH, kW, iC]
|
||||
|
||||
int indIOioC, indIiH, indWmC(3);
|
||||
int indIOioC, indIiH, indWmC(0 == wFormat ? 3 : 0);
|
||||
if(!isNCHW) {
|
||||
indIOioC = 3; indIiH = 1;
|
||||
}
|
||||
|
@ -356,10 +361,10 @@ DECLARE_SHAPE_FN(sconv2d_bp) {
|
|||
|
||||
std::vector<Nd4jLong> expectedGradOShapeInfo = ShapeUtils::composeShapeUsingDimsAndIdx({bS,oC,trueoH,trueoW, 0,indIOioC,indIiH,indIiH+1});
|
||||
REQUIRE_TRUE(ShapeUtils::areShapesEqual(gradOShapeInfo, expectedGradOShapeInfo), 0, "SCONV2D_BP OP: wrong shape of output gradients (next epsilon) array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedGradOShapeInfo).c_str(), ShapeUtils::shapeAsString(gradOShapeInfo).c_str());
|
||||
std::vector<Nd4jLong> expectedWeightsDShape = {kH, kW, iC, mC};
|
||||
std::vector<Nd4jLong> expectedWeightsDShape = ConvolutionUtils::expectWeightsShape(wFormat, kH, kW, iC, mC);
|
||||
REQUIRE_TRUE(ShapeUtils::areShapesEqual(weightsDShapeInfo, expectedWeightsDShape), 0, "SCONV2D_BP OP: wrong shape of depth weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsDShape).c_str(), ShapeUtils::shapeAsString(weightsDShapeInfo).c_str());
|
||||
if(weightsPShapeInfo) {
|
||||
std::vector<Nd4jLong> expectedWeightsPShape = {1, 1, iC*mC, oC};
|
||||
std::vector<Nd4jLong> expectedWeightsPShape = ConvolutionUtils::expectWeightsShape(wFormat, 1, 1, iC*mC, oC);
|
||||
REQUIRE_TRUE(ShapeUtils::areShapesEqual(weightsPShapeInfo, expectedWeightsPShape), 0, "SCONV2D_BP OP: wrong shape of point array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedWeightsPShape).c_str(), ShapeUtils::shapeAsString(weightsPShapeInfo).c_str());
|
||||
}
|
||||
if (biasShapeInfo)
|
||||
|
|
|
@ -32,7 +32,7 @@ namespace ops {
|
|||
//////////////////////////////////////////////////////////////////////
|
||||
CUSTOM_OP_IMPL(upsampling2d, 1, 1, false, 0, 2) {
|
||||
auto input = INPUT_VARIABLE(0); // [bS, iC, iH, iW] (NCHW) or [bS, iH, iW, iC] (NHWC)
|
||||
auto output = OUTPUT_VARIABLE(0); // [bS, iC, factorH*iH, factorW*iW ] (NCHW) or [bS, factorH*iH, factorW*iW, iC] (NHWC)
|
||||
auto output = OUTPUT_NULLIFIED(0); // [bS, iC, factorH*iH, factorW*iW ] (NCHW) or [bS, factorH*iH, factorW*iW, iC] (NHWC)
|
||||
|
||||
const int factorH = INT_ARG(0);
|
||||
const int factorW = INT_ARG(1);
|
||||
|
@ -97,7 +97,7 @@ CUSTOM_OP_IMPL(upsampling2d_bp, 2, 1, false, 0, 0) {
|
|||
|
||||
// NDArray<T>* input = INPUT_VARIABLE(0); // [bS, iC, iH, iW] (NCHW) or [bS, iH, iW, iC] (NHWC)
|
||||
auto gradO = INPUT_VARIABLE(1); // [bS, iC, factorH*iH, factorW*iW ] (NCHW) or [bS, factorH*iH, factorW*iW, iC] (NHWC)
|
||||
auto gradI = OUTPUT_VARIABLE(0); // [bS, iC, iH, iW] (NCHW) or [bS, iH, iW, iC] (NHWC)
|
||||
auto gradI = OUTPUT_NULLIFIED(0); // [bS, iC, iH, iW] (NCHW) or [bS, iH, iW, iC] (NHWC)
|
||||
|
||||
const int isNCHW = block.getIArguments()->size() > 0 ? INT_ARG(0) : 0; // INT_ARG(0): 0-NCHW, 1-NHWC
|
||||
|
||||
|
|
|
@ -31,7 +31,7 @@ namespace ops {
|
|||
//////////////////////////////////////////////////////////////////////
|
||||
CUSTOM_OP_IMPL(upsampling3d, 1, 1, false, 0, 3) {
|
||||
auto input = INPUT_VARIABLE(0); // [bS, iC, iD, iH, iW] (NCDHW) or [bS, iD, iH, iW, iC] (NDHWC)
|
||||
auto output = OUTPUT_VARIABLE(0); // [bS, iC, factorD*iD, factorH*iH, factorW*iW ] (NCDHW) or [bS, factorD*iD, factorH*iH, factorW*iW, iC] (NDHWC)
|
||||
auto output = OUTPUT_NULLIFIED(0); // [bS, iC, factorD*iD, factorH*iH, factorW*iW ] (NCDHW) or [bS, factorD*iD, factorH*iH, factorW*iW, iC] (NDHWC)
|
||||
|
||||
const int factorD = INT_ARG(0);
|
||||
const int factorH = INT_ARG(1);
|
||||
|
@ -97,7 +97,7 @@ DECLARE_SHAPE_FN(upsampling3d) {
|
|||
CUSTOM_OP_IMPL(upsampling3d_bp, 2, 1, false, 0, 0) {
|
||||
// NDArray<T>* input = INPUT_VARIABLE(0); // [bS, iC, iD, iH, iW] (NCDHW) or [bS, iD, iH, iW, iC] (NDHWC)
|
||||
auto gradO = INPUT_VARIABLE(1); // [bS, iC, factorD*iD, factorH*iH, factorW*iW ] (NCDHW) or [bS, factorD*iD, factorH*iH, factorW*iW, iC] (NDHWC)
|
||||
auto gradI = OUTPUT_VARIABLE(0); // [bS, iC, iD, iH, iW] (NCDHW) or [bS, iD, iH, iW, iC] (NDHWC)
|
||||
auto gradI = OUTPUT_NULLIFIED(0); // [bS, iC, iD, iH, iW] (NCDHW) or [bS, iD, iH, iW, iC] (NDHWC)
|
||||
|
||||
const int isNCDHW = block.getIArguments()->size() > 0 ? INT_ARG(0) : 0; // INT_ARG(0): 0-NCHW, 1-NHWC
|
||||
|
||||
|
|
|
@ -31,7 +31,7 @@ namespace ops {
|
|||
CUSTOM_OP_IMPL(avgpool2d, 1, 1, false, 0, 10) {
|
||||
|
||||
auto input = INPUT_VARIABLE(0);
|
||||
auto output = OUTPUT_VARIABLE(0);
|
||||
auto output = OUTPUT_NULLIFIED(0);
|
||||
|
||||
// 0,1 - kernel Height/Width; 2,3 - stride Height/Width; 4,5 - pad Height/Width; 6,7 - dilation Height/Width; 8 - same mode;
|
||||
|
||||
|
@ -147,7 +147,7 @@ CUSTOM_OP_IMPL(avgpool2d_bp, 2, 1, false, 0, 10) {
|
|||
|
||||
auto input = INPUT_VARIABLE(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW)
|
||||
auto gradO = INPUT_VARIABLE(1); // [bS, oH, oW, oC] (NHWC) or [bS, oC, oH, oW] (NCHW), epsilon_next
|
||||
auto gradI = OUTPUT_VARIABLE(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW), epsilon
|
||||
auto gradI = OUTPUT_NULLIFIED(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW), epsilon
|
||||
|
||||
int kH = INT_ARG(0); // filter(kernel) height
|
||||
int kW = INT_ARG(1); // filter(kernel) width
|
||||
|
@ -166,7 +166,7 @@ CUSTOM_OP_IMPL(avgpool2d_bp, 2, 1, false, 0, 10) {
|
|||
|
||||
int bS, iC, iH, iW, oC, oH, oW; // batch size, input channels, input height/width, output channels, output height/width;
|
||||
int indIOioC, indIiH, indWoC, indWiC, indWkH, indOoH; // corresponding indexes
|
||||
ConvolutionUtils::getSizesAndIndexesConv2d(isNCHW, *input, *gradO, bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWiC, indWoC, indWkH, indOoH);
|
||||
ConvolutionUtils::getSizesAndIndexesConv2d(isNCHW, 0, *input, *gradO, bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWiC, indWoC, indWkH, indOoH);
|
||||
|
||||
std::vector<Nd4jLong> expectedGradOShape = ShapeUtils::composeShapeUsingDimsAndIdx({bS,iC,oH,oW, 0,indIOioC,indIiH,indIiH+1});
|
||||
std::vector<Nd4jLong> expectedGradIShape = ShapeUtils::composeShapeUsingDimsAndIdx({bS,iC,iH,iW, 0,indIOioC,indIiH,indIiH+1});
|
||||
|
|
|
@ -32,7 +32,7 @@ namespace ops {
|
|||
CUSTOM_OP_IMPL(avgpool3dnew, 1, 1, false, 0, 14) {
|
||||
|
||||
auto input = INPUT_VARIABLE(0); // [bS, iD, iH, iW, iC] (NDHWC) or [bS, iC, iD, iH, iW] (NCDHW)
|
||||
auto output = OUTPUT_VARIABLE(0); // [bS, oD, oH, oW, iC] (NDHWC) or [bS, iC, oD, oH, oW] (NCDHW)
|
||||
auto output = OUTPUT_NULLIFIED(0); // [bS, oD, oH, oW, iC] (NDHWC) or [bS, iC, oD, oH, oW] (NCDHW)
|
||||
|
||||
int kD = INT_ARG(0); // filter(kernel) depth
|
||||
int kH = INT_ARG(1); // filter(kernel) height
|
||||
|
@ -55,7 +55,7 @@ CUSTOM_OP_IMPL(avgpool3dnew, 1, 1, false, 0, 14) {
|
|||
|
||||
int bS, iC, iD, iH, iW, oC, oD, oH, oW; // batch size, input channels, input depth/height/width, output channels, output depth/height/width;
|
||||
int indIOioC, indIOioD, indWoC, indWiC, indWkD; // corresponding indexes
|
||||
ConvolutionUtils::getSizesAndIndexesConv3d(isNCDHW, *input, *output, bS, iC, iD, iH, iW, oC, oD, oH, oW, indIOioC, indIOioD, indWiC, indWoC, indWkD);
|
||||
ConvolutionUtils::getSizesAndIndexesConv3d(isNCDHW, 0, *input, *output, bS, iC, iD, iH, iW, oC, oD, oH, oW, indIOioC, indIOioD, indWiC, indWoC, indWkD);
|
||||
|
||||
std::vector<Nd4jLong> expectedOutputShape = ShapeUtils::composeShapeUsingDimsAndIdx({bS,iC,oD,oH,oW, 0,indIOioC,indIOioD,indIOioD+1,indIOioD+2});
|
||||
REQUIRE_TRUE(output->isSameShape(expectedOutputShape), 0, "AVGPOOL3DNEW OP: wrong shape of output array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expectedOutputShape).c_str(), ShapeUtils::shapeAsString(output).c_str());
|
||||
|
@ -149,7 +149,7 @@ CUSTOM_OP_IMPL(avgpool3dnew_bp, 2, 1, false, 0, 14) {
|
|||
|
||||
auto input = INPUT_VARIABLE(0); // [bS, iD, iH, iW, iC] (NDHWC) or [bS, iC, iD, iH, iW] (NCDHW)
|
||||
auto gradO = INPUT_VARIABLE(1); // [bS, oD, oH, oW, oC] (NDHWC) or [bS, oC, oD, oH, oW] (NCDHW), epsilon_next
|
||||
auto gradI = OUTPUT_VARIABLE(0); // [bS, iD, iH, iW, iC] (NDHWC) or [bS, iC, iD, iH, iW] (NCDHW), epsilon
|
||||
auto gradI = OUTPUT_NULLIFIED(0); // [bS, iD, iH, iW, iC] (NDHWC) or [bS, iC, iD, iH, iW] (NCDHW), epsilon
|
||||
|
||||
const int kD = INT_ARG(0); // filter(kernel) depth
|
||||
const int kH = INT_ARG(1); // filter(kernel) height
|
||||
|
@ -172,7 +172,7 @@ CUSTOM_OP_IMPL(avgpool3dnew_bp, 2, 1, false, 0, 14) {
|
|||
|
||||
int bS, iC, iD, iH, iW, oC, oD, oH, oW; // batch size, input channels, input depth/height/width, output channels, output depth/height/width;
|
||||
int indIOioC, indIOioD, indWoC, indWiC, indWkD; // corresponding indexes
|
||||
ConvolutionUtils::getSizesAndIndexesConv3d(isNCDHW, *input, *gradO, bS, iC, iD, iH, iW, oC, oD, oH, oW, indIOioC, indIOioD, indWiC, indWoC, indWkD);
|
||||
ConvolutionUtils::getSizesAndIndexesConv3d(isNCDHW, 0, *input, *gradO, bS, iC, iD, iH, iW, oC, oD, oH, oW, indIOioC, indIOioD, indWiC, indWoC, indWkD);
|
||||
|
||||
std::vector<Nd4jLong> expectedGradOShape = ShapeUtils::composeShapeUsingDimsAndIdx({bS,iC,oD,oH,oW, 0,indIOioC,indIOioD,indIOioD+1,indIOioD+2});
|
||||
std::vector<Nd4jLong> expectedGradIShape = ShapeUtils::composeShapeUsingDimsAndIdx({bS,iC,iD,iH,iW, 0,indIOioC,indIOioD,indIOioD+1,indIOioD+2});
|
||||
|
|
|
@ -38,7 +38,7 @@ CUSTOM_OP_IMPL(maxpool2d, 1, 1, false, 0, 9) {
|
|||
REQUIRE_TRUE(input->rankOf() == 4, 0, "MAXPOOL2D OP: input array should have rank of 4, but got %i instead", input->rankOf());
|
||||
|
||||
// 0,1 - kernel Height/Width; 2,3 - stride Height/Width; 4,5 - pad Height/Width; 6,7 - dilation Height/Width; 8 - same mode;
|
||||
auto output = OUTPUT_VARIABLE(0);
|
||||
auto output = OUTPUT_NULLIFIED(0);
|
||||
|
||||
const int kH = INT_ARG(0);
|
||||
const int kW = INT_ARG(1);
|
||||
|
@ -150,7 +150,7 @@ CUSTOM_OP_IMPL(maxpool2d_bp, 2, 1, false, 0, 10) {
|
|||
|
||||
auto input = INPUT_VARIABLE(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW)
|
||||
auto gradO = INPUT_VARIABLE(1); // [bS, oH, oW, oC] (NHWC) or [bS, oC, oH, oW] (NCHW), epsilon_next
|
||||
auto gradI = OUTPUT_VARIABLE(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW), epsilon
|
||||
auto gradI = OUTPUT_NULLIFIED(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW), epsilon
|
||||
|
||||
int kH = INT_ARG(0); // filter(kernel) height
|
||||
int kW = INT_ARG(1); // filter(kernel) width
|
||||
|
@ -168,7 +168,7 @@ CUSTOM_OP_IMPL(maxpool2d_bp, 2, 1, false, 0, 10) {
|
|||
|
||||
int bS, iC, iH, iW, oC, oH, oW; // batch size, input channels, input height/width, output channels, output height/width;
|
||||
int indIOioC, indIiH, indWoC, indWiC, indWkH, indOoH; // corresponding indexes
|
||||
ConvolutionUtils::getSizesAndIndexesConv2d(isNCHW, *input, *gradO, bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWiC, indWoC, indWkH, indOoH);
|
||||
ConvolutionUtils::getSizesAndIndexesConv2d(isNCHW, 0, *input, *gradO, bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWiC, indWoC, indWkH, indOoH);
|
||||
|
||||
std::vector<Nd4jLong> expectedGradOShape = ShapeUtils::composeShapeUsingDimsAndIdx({bS,iC,oH,oW, 0,indIOioC,indIiH,indIiH+1});
|
||||
std::vector<Nd4jLong> expectedGradIShape = ShapeUtils::composeShapeUsingDimsAndIdx({bS,iC,iH,iW, 0,indIOioC,indIiH,indIiH+1});
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue