More fixes (#148)
* Small batch norm fix (cuda/no-mkldnn) Signed-off-by: Alex Black <blacka101@gmail.com> * Dropout fix for RnnOutputLayer Signed-off-by: Alex Black <blacka101@gmail.com> * Allow block size < 2 in batch_to_space_nd and space_to_batch_nd for import, in spite of what TF docs say Signed-off-by: AlexDBlack <blacka101@gmail.com>master
parent
9c2bfc9863
commit
e855e47f73
|
@ -245,8 +245,8 @@ public class BatchNormalization extends BaseLayer<org.deeplearning4j.nn.conf.lay
|
|||
}
|
||||
|
||||
//TODO: handle fixed beta/gamma case...
|
||||
INDArray dBeta = epsilon.sum(0); //dL/dBeta = sum_examples dL/dOut
|
||||
INDArray dGamma = epsilon.mul(xHat).sum(0); //dL/dGamma = sum_examples dL/dOut .* xHat
|
||||
INDArray dBeta = epsilon.sum(true, 0); //dL/dBeta = sum_examples dL/dOut
|
||||
INDArray dGamma = epsilon.mul(xHat).sum(true, 0); //dL/dGamma = sum_examples dL/dOut .* xHat
|
||||
INDArray dxhat;
|
||||
if (layerConf.isLockGammaBeta()) {
|
||||
dxhat = epsilon.mul(layerConf.getGamma());
|
||||
|
@ -257,11 +257,11 @@ public class BatchNormalization extends BaseLayer<org.deeplearning4j.nn.conf.lay
|
|||
|
||||
|
||||
//dL/dVariance
|
||||
INDArray dLdVar = dxhat.mul(xMu).sum(0).muli(-0.5).muli(Transforms.pow(std, -3.0, true)); //Shape: [1, miniBatch]
|
||||
INDArray dLdVar = dxhat.mul(xMu).sum(true, 0).muli(-0.5).muli(Transforms.pow(std, -3.0, true)); //Shape: [1, miniBatch]
|
||||
|
||||
//dL/dmu
|
||||
INDArray dxmu1 = dxhat.sum(0).divi(std).negi();
|
||||
INDArray dxmu2 = xMu.sum(0).muli(-2.0 / batchSize).muli(dLdVar);
|
||||
INDArray dxmu1 = dxhat.sum(true, 0).divi(std).negi();
|
||||
INDArray dxmu2 = xMu.sum(true, 0).muli(-2.0 / batchSize).muli(dLdVar);
|
||||
|
||||
INDArray dLdmu = dxmu1.addi(dxmu2); //Shape: [1, nOut]
|
||||
|
||||
|
|
|
@ -52,7 +52,6 @@ public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.l
|
|||
@Override
|
||||
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
|
||||
assertInputSet(true);
|
||||
applyDropOutIfNecessary(true, workspaceMgr); //Edge case: we skip OutputLayer forward pass during training as this isn't required to calculate gradients
|
||||
if (input.rank() != 3) {
|
||||
throw new UnsupportedOperationException(
|
||||
"Input is not rank 3. RnnOutputLayer expects rank 3 input with shape [minibatch, layerInSize, sequenceLength]." +
|
||||
|
@ -65,6 +64,8 @@ public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.l
|
|||
INDArray inputTemp = input;
|
||||
this.input = TimeSeriesUtils.reshape3dTo2d(input, workspaceMgr, ArrayType.BP_WORKING_MEM);
|
||||
|
||||
applyDropOutIfNecessary(true, workspaceMgr); //Edge case: we skip OutputLayer forward pass during training as this isn't required to calculate gradients
|
||||
|
||||
Pair<Gradient, INDArray> gradAndEpsilonNext = super.backpropGradient(epsilon, workspaceMgr); //Also applies dropout
|
||||
this.input = inputTemp;
|
||||
INDArray epsilon2d = gradAndEpsilonNext.getSecond();
|
||||
|
|
|
@ -61,12 +61,6 @@ CUSTOM_OP_IMPL(batch_to_space_nd, 3, 1, false, 0, 0) {
|
|||
const auto product = blockShape->reduceNumber(nd4j::reduce::Prod).e<Nd4jLong>(0);
|
||||
REQUIRE_TRUE(input->sizeAt(0) % product == 0, 0, "BatchToSpaceND: first dimension of input array must be divisible by product of blockShape array elements (= %lld), but got first dimension equal to %i", product, input->sizeAt(0));
|
||||
|
||||
// FIXME - should we use this time-consuming validation ?
|
||||
for (uint i = 0; i < numOfSpatialDims; ++i) {
|
||||
const Nd4jLong blockSize = blockShape->e<Nd4jLong>(i);
|
||||
REQUIRE_TRUE(blockSize >= 2, 0, "BatchToSpaceND: all elements of blockShape array must be >= 2, but got value of %i for element number %i !", blockSize, i);
|
||||
}
|
||||
|
||||
if(crop->sizeAt(0) != numOfSpatialDims || crop->sizeAt(1) != 2) {
|
||||
const std::string expectedCropShape = "[" + std::to_string(numOfSpatialDims) + ", 2]"; // [numOfSpatialDims, 2]
|
||||
REQUIRE_TRUE(false, 0, "BatchToSpaceND: operation expects padding shape to be %s, but got %s instead", expectedCropShape.c_str(), ShapeUtils::shapeAsString(crop).c_str());
|
||||
|
|
|
@ -43,12 +43,6 @@ CUSTOM_OP_IMPL(space_to_batch_nd, 3, 1, false, 0, 0) {
|
|||
|
||||
REQUIRE_TRUE(input->rankOf() == output->rankOf(), 0, "SpaceToBatchND: rank of input and output array must be the same, but got %i and %i correspondingly !", input->rankOf(), output->rankOf());
|
||||
|
||||
// FIXME - should we use this time-consuming validation ?
|
||||
for (uint i = 0; i < numOfSpatialDims; ++i) {
|
||||
const Nd4jLong blockSize = blockShape->e<Nd4jLong>(i);
|
||||
REQUIRE_TRUE(blockSize >= 2, 0, "SpaceToBatchND: all elements of blockShape array must be >= 2, but got value of %i for element number %i !", blockSize, i);
|
||||
}
|
||||
|
||||
if(padding->sizeAt(0) != numOfSpatialDims || padding->sizeAt(1) != 2) {
|
||||
const std::string expectedpaddingShape = "[" + std::to_string(numOfSpatialDims) + ", 2]"; // [numOfSpatialDims, 2]
|
||||
REQUIRE_TRUE(false, 0, "SpaceToBatchND: operation expects padding shape to be %s, but got %s instead", expectedpaddingShape.c_str(), ShapeUtils::shapeAsString(padding).c_str());
|
||||
|
|
Loading…
Reference in New Issue