More fixes (#148)

* Small batch norm fix (cuda/no-mkldnn) Signed-off-by: Alex Black <blacka101@gmail.com> * Dropout fix for RnnOutputLayer Signed-off-by: Alex Black <blacka101@gmail.com> * Allow block size < 2 in batch_to_space_nd and space_to_batch_nd for import, in spite of what TF docs say Signed-off-by: AlexDBlack <blacka101@gmail.com>
2019-08-22 19:55:27 +10:00 · 2019-08-22 19:55:27 +10:00 · e855e47f73
commit e855e47f73
parent 9c2bfc9863
4 changed files with 7 additions and 18 deletions
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalization.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalization.java
@ -245,8 +245,8 @@ public class BatchNormalization extends BaseLayer<org.deeplearning4j.nn.conf.lay
            }
            //TODO: handle fixed beta/gamma case...
-            INDArray dBeta = epsilon.sum(0); //dL/dBeta = sum_examples dL/dOut
+            INDArray dBeta = epsilon.sum(true, 0); //dL/dBeta = sum_examples dL/dOut
-            INDArray dGamma = epsilon.mul(xHat).sum(0); //dL/dGamma = sum_examples dL/dOut .* xHat
+            INDArray dGamma = epsilon.mul(xHat).sum(true, 0); //dL/dGamma = sum_examples dL/dOut .* xHat
            INDArray dxhat;
            if (layerConf.isLockGammaBeta()) {
                dxhat = epsilon.mul(layerConf.getGamma());
@ -257,11 +257,11 @@ public class BatchNormalization extends BaseLayer<org.deeplearning4j.nn.conf.lay
            //dL/dVariance
-            INDArray dLdVar = dxhat.mul(xMu).sum(0).muli(-0.5).muli(Transforms.pow(std, -3.0, true)); //Shape: [1, miniBatch]
+            INDArray dLdVar = dxhat.mul(xMu).sum(true, 0).muli(-0.5).muli(Transforms.pow(std, -3.0, true)); //Shape: [1, miniBatch]
            //dL/dmu
-            INDArray dxmu1 = dxhat.sum(0).divi(std).negi();
+            INDArray dxmu1 = dxhat.sum(true, 0).divi(std).negi();
-            INDArray dxmu2 = xMu.sum(0).muli(-2.0 / batchSize).muli(dLdVar);
+            INDArray dxmu2 = xMu.sum(true, 0).muli(-2.0 / batchSize).muli(dLdVar);
            INDArray dLdmu = dxmu1.addi(dxmu2); //Shape: [1, nOut]
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnOutputLayer.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnOutputLayer.java
@ -52,7 +52,6 @@ public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.l
    @Override
    public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
        assertInputSet(true);
        applyDropOutIfNecessary(true, workspaceMgr);    //Edge case: we skip OutputLayer forward pass during training as this isn't required to calculate gradients
        if (input.rank() != 3) {
            throw new UnsupportedOperationException(
                    "Input is not rank 3. RnnOutputLayer expects rank 3 input with shape [minibatch, layerInSize, sequenceLength]." +
@ -65,6 +64,8 @@ public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.l
        INDArray inputTemp = input;
        this.input = TimeSeriesUtils.reshape3dTo2d(input, workspaceMgr, ArrayType.BP_WORKING_MEM);
        applyDropOutIfNecessary(true, workspaceMgr);    //Edge case: we skip OutputLayer forward pass during training as this isn't required to calculate gradients
        Pair<Gradient, INDArray> gradAndEpsilonNext = super.backpropGradient(epsilon, workspaceMgr);    //Also applies dropout
        this.input = inputTemp;
        INDArray epsilon2d = gradAndEpsilonNext.getSecond();
--- a/libnd4j/include/ops/declarable/generic/parity_ops/batch_to_space_nd.cpp
+++ b/libnd4j/include/ops/declarable/generic/parity_ops/batch_to_space_nd.cpp
@ -61,12 +61,6 @@ CUSTOM_OP_IMPL(batch_to_space_nd, 3, 1, false, 0, 0) {
    const auto product = blockShape->reduceNumber(nd4j::reduce::Prod).e<Nd4jLong>(0);
    REQUIRE_TRUE(input->sizeAt(0) % product == 0, 0, "BatchToSpaceND: first dimension of input array must be divisible by product of blockShape array elements (= %lld), but got first dimension equal to %i", product, input->sizeAt(0));
    // FIXME - should we use this time-consuming validation ?
    for (uint i = 0; i < numOfSpatialDims; ++i) {
        const Nd4jLong blockSize = blockShape->e<Nd4jLong>(i);
        REQUIRE_TRUE(blockSize >= 2, 0, "BatchToSpaceND: all elements of blockShape array must be >= 2, but got value of %i for element number %i !", blockSize, i);
    }
    if(crop->sizeAt(0) != numOfSpatialDims || crop->sizeAt(1) != 2) {
        const std::string expectedCropShape = "[" + std::to_string(numOfSpatialDims) + ", 2]";   // [numOfSpatialDims, 2]
        REQUIRE_TRUE(false, 0, "BatchToSpaceND: operation expects padding shape to be %s, but got %s instead", expectedCropShape.c_str(), ShapeUtils::shapeAsString(crop).c_str());
--- a/libnd4j/include/ops/declarable/generic/parity_ops/space_to_batch_nd.cpp
+++ b/libnd4j/include/ops/declarable/generic/parity_ops/space_to_batch_nd.cpp
@ -43,12 +43,6 @@ CUSTOM_OP_IMPL(space_to_batch_nd, 3, 1, false, 0, 0) {
    REQUIRE_TRUE(input->rankOf() == output->rankOf(), 0, "SpaceToBatchND: rank of input and output array must be the same, but got %i and %i correspondingly !", input->rankOf(), output->rankOf());
    // FIXME - should we use this time-consuming validation ?
    for (uint i = 0; i < numOfSpatialDims; ++i) {
        const Nd4jLong blockSize = blockShape->e<Nd4jLong>(i);
        REQUIRE_TRUE(blockSize >= 2, 0, "SpaceToBatchND: all elements of blockShape array must be >= 2, but got value of %i for element number %i !", blockSize, i);
    }
    if(padding->sizeAt(0) != numOfSpatialDims || padding->sizeAt(1) != 2) {
        const std::string expectedpaddingShape = "[" + std::to_string(numOfSpatialDims) + ", 2]";   // [numOfSpatialDims, 2]
        REQUIRE_TRUE(false, 0, "SpaceToBatchND: operation expects padding shape to be %s, but got %s instead", expectedpaddingShape.c_str(), ShapeUtils::shapeAsString(padding).c_str());