From fad8da878f0a8349050420d5fa280bb5cc4399d4 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Fri, 26 Jul 2019 00:05:24 +1000 Subject: [PATCH] Various DL4J/ND4J fixes (#81) * #7954 Force refresh of UI when switching tabs on overview page Signed-off-by: AlexDBlack * #8017 Concurrent modification exception (synchronize) fix Signed-off-by: AlexDBlack * #8033 Don't initialize updater in middle of writing memory crash dump Signed-off-by: AlexDBlack * #8208 Fix shape checks for ND4J int[] creator methods Signed-off-by: AlexDBlack * #6385 #7992 Keras import naming fixes + cleanup Signed-off-by: AlexDBlack * #8016 Upsampling3D - add NDHWC format support Signed-off-by: AlexDBlack --- .../gradientcheck/CNN3DGradientCheckTest.java | 107 +++++++++--------- .../keras/utils/KerasOptimizerUtils.java | 70 ++++++------ .../nn/conf/layers/Upsampling3D.java | 28 ++++- .../nn/graph/ComputationGraph.java | 2 +- .../convolution/upsampling/Upsampling3D.java | 79 +++++++++---- .../nn/multilayer/MultiLayerNetwork.java | 2 +- .../optimize/api/ConvexOptimizer.java | 4 + .../optimize/solvers/BaseOptimizer.java | 14 ++- .../util/CrashReportingUtil.java | 2 +- .../ui/module/train/TrainModule.java | 8 +- .../js/train/overview.js | 2 + .../java/org/nd4j/linalg/factory/Nd4j.java | 2 +- .../org/nd4j/linalg/shape/EmptyTests.java | 1 + 13 files changed, 204 insertions(+), 117 deletions(-) diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/CNN3DGradientCheckTest.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/CNN3DGradientCheckTest.java index 227b72527..13cc11e80 100644 --- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/CNN3DGradientCheckTest.java +++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/CNN3DGradientCheckTest.java @@ -386,63 +386,64 @@ public class CNN3DGradientCheckTest extends BaseDL4JTest { for (Activation afn : activations) { for (int miniBatchSize : minibatchSizes) { for (ConvolutionMode mode : modes) { + for(Convolution3D.DataFormat df : Convolution3D.DataFormat.values()) { - int outDepth = depth * upsamplingSize[0]; - int outHeight = height * upsamplingSize[1]; - int outWidth = width * upsamplingSize[2]; + int outDepth = depth * upsamplingSize[0]; + int outHeight = height * upsamplingSize[1]; + int outWidth = width * upsamplingSize[2]; - INDArray input = Nd4j.rand(new int[]{miniBatchSize, convNIn, depth, height, width}); - INDArray labels = Nd4j.zeros(miniBatchSize, finalNOut); - for (int i = 0; i < miniBatchSize; i++) { - labels.putScalar(new int[]{i, i % finalNOut}, 1.0); - } - - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .dataType(DataType.DOUBLE) - .updater(new NoOp()).weightInit(WeightInit.LECUN_NORMAL) - .dist(new NormalDistribution(0, 1)) - .seed(12345) - .list() - .layer(0, new Convolution3D.Builder().activation(afn).kernelSize(1, 1, 1) - .nIn(convNIn).nOut(convNOut).hasBias(false) - .convolutionMode(mode).dataFormat(Convolution3D.DataFormat.NCDHW) - .build()) - .layer(1, new Upsampling3D.Builder(upsamplingSize[0]).build()) - .layer(2, new DenseLayer.Builder().nOut(denseNOut).build()) - .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) - .activation(Activation.SOFTMAX).nOut(finalNOut).build()) - .inputPreProcessor(2, - new Cnn3DToFeedForwardPreProcessor(outDepth, outHeight, outWidth, - convNOut, true)) - .setInputType(InputType.convolutional3D(depth, height, width, convNIn)).build(); - - String json = conf.toJson(); - MultiLayerConfiguration c2 = MultiLayerConfiguration.fromJson(json); - assertEquals(conf, c2); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - String msg = "Minibatch size = " + miniBatchSize + ", activationFn=" + afn - + ", kernel = " + Arrays.toString(upsamplingSize) + ", mode = " + mode.toString() - + ", input depth " + depth + ", input height " + height - + ", input width " + width; - - if (PRINT_RESULTS) { - log.info(msg); - for (int j = 0; j < net.getnLayers(); j++) { - log.info("Layer " + j + " # params: " + net.getLayer(j).numParams()); + INDArray input = df == Convolution3D.DataFormat.NCDHW ? Nd4j.rand(miniBatchSize, convNIn, depth, height, width) : Nd4j.rand(miniBatchSize, depth, height, width, convNIn); + INDArray labels = Nd4j.zeros(miniBatchSize, finalNOut); + for (int i = 0; i < miniBatchSize; i++) { + labels.putScalar(new int[]{i, i % finalNOut}, 1.0); } + + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .dataType(DataType.DOUBLE) + .updater(new NoOp()).weightInit(WeightInit.LECUN_NORMAL) + .dist(new NormalDistribution(0, 1)) + .seed(12345) + .list() + .layer(0, new Convolution3D.Builder().activation(afn).kernelSize(1, 1, 1) + .nIn(convNIn).nOut(convNOut).hasBias(false) + .convolutionMode(mode).dataFormat(df) + .build()) + .layer(1, new Upsampling3D.Builder(upsamplingSize[0]).dataFormat(df).build()) + .layer(2, new DenseLayer.Builder().nOut(denseNOut).build()) + .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX).nOut(finalNOut).build()) + .inputPreProcessor(2, + new Cnn3DToFeedForwardPreProcessor(outDepth, outHeight, outWidth, + convNOut, true)) + .setInputType(InputType.convolutional3D(df, depth, height, width, convNIn)).build(); + + String json = conf.toJson(); + MultiLayerConfiguration c2 = MultiLayerConfiguration.fromJson(json); + assertEquals(conf, c2); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + String msg = "Minibatch size = " + miniBatchSize + ", activationFn=" + afn + + ", kernel = " + Arrays.toString(upsamplingSize) + ", mode = " + mode.toString() + + ", input depth " + depth + ", input height " + height + + ", input width " + width; + + if (PRINT_RESULTS) { + log.info(msg); + for (int j = 0; j < net.getnLayers(); j++) { + log.info("Layer " + j + " # params: " + net.getLayer(j).numParams()); + } + } + + boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, + DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, + RETURN_ON_FIRST_FAILURE, input, labels); + + assertTrue(msg, gradOK); + + TestUtils.testModelSerialization(net); } - - boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, - DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, - RETURN_ON_FIRST_FAILURE, input, labels); - - assertTrue(msg, gradOK); - - TestUtils.testModelSerialization(net); - } } } diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasOptimizerUtils.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasOptimizerUtils.java index 5c332a3b0..6d230d1fe 100644 --- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasOptimizerUtils.java +++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasOptimizerUtils.java @@ -32,6 +32,17 @@ import java.util.Map; */ @Slf4j public class KerasOptimizerUtils { + + protected static final String LR = "lr"; + protected static final String LR2 = "learning_rate"; + protected static final String EPSILON = "epsilon"; + protected static final String MOMENTUM = "momentum"; + protected static final String BETA_1 = "beta_1"; + protected static final String BETA_2 = "beta_2"; + protected static final String DECAY = "decay"; + protected static final String RHO = "rho"; + protected static final String SCHEDULE_DECAY = "schedule_decay"; + /** * Map Keras optimizer to DL4J IUpdater. * @@ -55,11 +66,11 @@ public class KerasOptimizerUtils { switch (optimizerName) { case "Adam": { - double lr = (double) optimizerParameters.get("lr"); - double beta1 = (double) optimizerParameters.get("beta_1"); - double beta2 = (double) optimizerParameters.get("beta_2"); - double epsilon = (double) optimizerParameters.get("epsilon"); - double decay = (double) optimizerParameters.get("decay"); + double lr = (double) (optimizerParameters.containsKey(LR) ? optimizerParameters.get(LR) : optimizerParameters.get(LR2)); + double beta1 = (double) optimizerParameters.get(BETA_1); + double beta2 = (double) optimizerParameters.get(BETA_2); + double epsilon = (double) optimizerParameters.get(EPSILON); + double decay = (double) optimizerParameters.get(DECAY); dl4jOptimizer = new Adam.Builder() .beta1(beta1).beta2(beta2) @@ -69,9 +80,9 @@ public class KerasOptimizerUtils { break; } case "Adadelta": { - double rho = (double) optimizerParameters.get("rho"); - double epsilon = (double) optimizerParameters.get("epsilon"); - // double decay = (double) optimizerParameters.get("decay"); No decay in DL4J Adadelta + double rho = (double) optimizerParameters.get(RHO); + double epsilon = (double) optimizerParameters.get(EPSILON); + // double decay = (double) optimizerParameters.get(DECAY); No decay in DL4J Adadelta dl4jOptimizer = new AdaDelta.Builder() .epsilon(epsilon).rho(rho) @@ -79,9 +90,9 @@ public class KerasOptimizerUtils { break; } case "Adgrad": { - double lr = (double) optimizerParameters.get("lr"); - double epsilon = (double) optimizerParameters.get("epsilon"); - double decay = (double) optimizerParameters.get("decay"); + double lr = (double) (optimizerParameters.containsKey(LR) ? optimizerParameters.get(LR) : optimizerParameters.get(LR2)); + double epsilon = (double) optimizerParameters.get(EPSILON); + double decay = (double) optimizerParameters.get(DECAY); dl4jOptimizer = new AdaGrad.Builder() .epsilon(epsilon).learningRate(lr) @@ -90,20 +101,20 @@ public class KerasOptimizerUtils { break; } case "Adamax": { - double lr = (double) optimizerParameters.get("lr"); - double beta1 = (double) optimizerParameters.get("beta_1"); - double beta2 = (double) optimizerParameters.get("beta_2"); - double epsilon = (double) optimizerParameters.get("epsilon"); + double lr = (double) (optimizerParameters.containsKey(LR) ? optimizerParameters.get(LR) : optimizerParameters.get(LR2)); + double beta1 = (double) optimizerParameters.get(BETA_1); + double beta2 = (double) optimizerParameters.get(BETA_2); + double epsilon = (double) optimizerParameters.get(EPSILON); dl4jOptimizer = new AdaMax(lr, beta1, beta2, epsilon); break; } case "Nadam": { - double lr = (double) optimizerParameters.get("lr"); - double beta1 = (double) optimizerParameters.get("beta_1"); - double beta2 = (double) optimizerParameters.get("beta_2"); - double epsilon = (double) optimizerParameters.get("epsilon"); - double scheduleDecay = (double) optimizerParameters.get("schedule_decay"); + double lr = (double) (optimizerParameters.containsKey(LR) ? optimizerParameters.get(LR) : optimizerParameters.get(LR2)); + double beta1 = (double) optimizerParameters.get(BETA_1); + double beta2 = (double) optimizerParameters.get(BETA_2); + double epsilon = (double) optimizerParameters.get(EPSILON); + double scheduleDecay = (double) optimizerParameters.get(SCHEDULE_DECAY); dl4jOptimizer = new Nadam.Builder() .beta1(beta1).beta2(beta2) @@ -114,15 +125,10 @@ public class KerasOptimizerUtils { break; } case "SGD": { - double lr = (double) optimizerParameters.get("lr"); - double momentum = 0.0; - try { - momentum = (double) optimizerParameters.get("epsilon"); - } catch (Exception e) { - log.warn("couldn't read momentum parameter"); - } + double lr = (double) (optimizerParameters.containsKey(LR) ? optimizerParameters.get(LR) : optimizerParameters.get(LR2)); + double momentum = (double) (optimizerParameters.containsKey(EPSILON) ? optimizerParameters.get(EPSILON) : optimizerParameters.get(MOMENTUM)); - double decay = (double) optimizerParameters.get("decay"); + double decay = (double) optimizerParameters.get(DECAY); dl4jOptimizer = new Nesterovs.Builder() .momentum(momentum).learningRate(lr) @@ -131,10 +137,10 @@ public class KerasOptimizerUtils { break; } case "RMSprop": { - double lr = (double) optimizerParameters.get("lr"); - double rho = (double) optimizerParameters.get("rho"); - double epsilon = (double) optimizerParameters.get("epsilon"); - double decay = (double) optimizerParameters.get("decay"); + double lr = (double) (optimizerParameters.containsKey(LR) ? optimizerParameters.get(LR) : optimizerParameters.get(LR2)); + double rho = (double) optimizerParameters.get(RHO); + double epsilon = (double) optimizerParameters.get(EPSILON); + double decay = (double) optimizerParameters.get(DECAY); dl4jOptimizer = new RmsProp.Builder() .epsilon(epsilon).rmsDecay(rho).learningRate(lr) diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling3D.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling3D.java index c38ab0a53..d142d52a9 100644 --- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling3D.java +++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling3D.java @@ -45,10 +45,14 @@ import java.util.Map; public class Upsampling3D extends BaseUpsamplingLayer { protected int[] size; + protected Convolution3D.DataFormat dataFormat = Convolution3D.DataFormat.NCDHW; //Default to NCDHW for 1.0.0-beta4 and earlier, when no config existed (NCDHW only) - protected Upsampling3D(UpsamplingBuilder builder) { + + + protected Upsampling3D(Builder builder) { super(builder); this.size = builder.size; + this.dataFormat = builder.dataFormat; } @Override @@ -124,10 +128,32 @@ public class Upsampling3D extends BaseUpsamplingLayer { @NoArgsConstructor public static class Builder extends UpsamplingBuilder { + protected Convolution3D.DataFormat dataFormat = Convolution3D.DataFormat.NCDHW; + + /** + * @param size Upsampling layer size (most common value: 2) + */ public Builder(int size) { super(new int[] {size, size, size}); } + /** + * @param dataFormat Data format - see {@link Convolution3D.DataFormat} for more details + * @param size Upsampling layer size (most common value: 2) + */ + public Builder(@NonNull Convolution3D.DataFormat dataFormat, int size){ + super(new int[]{size, size, size}); + this.dataFormat = dataFormat; + } + + /** + * Sets the DataFormat. See {@link Convolution3D.DataFormat} for more details + */ + public Builder dataFormat(@NonNull Convolution3D.DataFormat dataFormat){ + this.dataFormat = dataFormat; + return this; + } + /** * Upsampling size as int, so same upsampling size is used for depth, width and height * diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java index 49a2a34e3..cf3fec70c 100755 --- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java +++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java @@ -2896,7 +2896,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { solver.getOptimizer().setUpdaterComputationGraph(new ComputationGraphUpdater(this)); } if(solver != null) { - return solver.getOptimizer().getComputationGraphUpdater(); + return solver.getOptimizer().getComputationGraphUpdater(initializeIfAbsent); } return null; } diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling3D.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling3D.java index 0c35d9fcd..9ad17b018 100644 --- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling3D.java +++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling3D.java @@ -67,18 +67,36 @@ public class Upsampling3D extends AbstractLayer backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); + boolean ncdhw = layerConf().getDataFormat() == org.deeplearning4j.nn.conf.layers.Convolution3D.DataFormat.NCDHW; // FIXME: int cast // Assumes NCDHW order int miniBatch = (int) input.size(0); - int inChannels = (int) input.size(1); - int inD = (int) input.size(2); - int inH = (int) input.size(3); - int inW = (int) input.size(4); + int inChannels, inD, inH, inW; + int[] intArgs; + if(ncdhw){ + inChannels = (int) input.size(1); + inD = (int) input.size(2); + inH = (int) input.size(3); + inW = (int) input.size(4); + intArgs = new int[] {1}; // 1 is channels first + } else { + inD = (int) input.size(1); + inH = (int) input.size(2); + inW = (int) input.size(3); + inChannels = (int) input.size(4); + intArgs = new int[] {0}; // 0 is channels last + } - int[] intArgs = new int[] {1}; // 1 is channels first - INDArray reshapedEpsilon = workspaceMgr.createUninitialized( - ArrayType.ACTIVATION_GRAD, epsilon.dataType(), new long[]{miniBatch, inChannels, inD, inH, inW}, 'c'); + + INDArray epsOut; + if(ncdhw){ + epsOut = workspaceMgr.createUninitialized( + ArrayType.ACTIVATION_GRAD, epsilon.dataType(), new long[]{miniBatch, inChannels, inD, inH, inW}, 'c'); + } else { + epsOut = workspaceMgr.createUninitialized( + ArrayType.ACTIVATION_GRAD, epsilon.dataType(), new long[]{miniBatch, inD, inH, inW, inChannels}, 'c'); + } Gradient gradient = new DefaultGradient(); @@ -86,13 +104,13 @@ public class Upsampling3D extends AbstractLayer(gradient, reshapedEpsilon); + epsOut = backpropDropOutIfPresent(epsOut); + return new Pair<>(gradient, epsOut); } protected int[] getSize() { @@ -115,32 +133,51 @@ public class Upsampling3D extends AbstractLayer\n" + "\n" + "\n" + @@ -464,7 +464,7 @@ public class TrainModule implements UIModule { * @param sessionId session ID * @return info for session as JSON */ - private Result sessionInfoForSession(String sessionId) { + private synchronized Result sessionInfoForSession(String sessionId) { Map dataEachSession = new HashMap<>(); StatsStorage ss = knownSessionIDs.get(sessionId); @@ -475,7 +475,7 @@ public class TrainModule implements UIModule { return Results.ok(asJson(dataEachSession)).as("application/json"); } - private Result setSession(String newSessionID) { + private synchronized Result setSession(String newSessionID) { if (knownSessionIDs.containsKey(newSessionID)) { currentSessionID = newSessionID; currentWorkerIdx = 0; @@ -567,7 +567,7 @@ public class TrainModule implements UIModule { return getOverviewDataForSession(currentSessionID); } - private Result getOverviewDataForSession(String sessionId) { + private synchronized Result getOverviewDataForSession(String sessionId) { Long lastUpdateTime = getLastUpdateTime(sessionId); I18N i18N = getI18N(sessionId); diff --git a/deeplearning4j/deeplearning4j-ui-parent/deeplearning4j-play/src/main/resources/deeplearning4jUiAssets/js/train/overview.js b/deeplearning4j/deeplearning4j-ui-parent/deeplearning4j-play/src/main/resources/deeplearning4jUiAssets/js/train/overview.js index 77a0ad792..107488028 100644 --- a/deeplearning4j/deeplearning4j-ui-parent/deeplearning4j-play/src/main/resources/deeplearning4jUiAssets/js/train/overview.js +++ b/deeplearning4j/deeplearning4j-ui-parent/deeplearning4j-play/src/main/resources/deeplearning4jUiAssets/js/train/overview.js @@ -20,6 +20,8 @@ function selectStdevChart(fieldName) { $("#stdevGradients").removeAttr("class"); $("#stdevUpdates").attr("class", "active"); } + + renderOverviewPage(false); } /* ---------- Render page ---------- */ diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java index a4460480d..260a0198e 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java @@ -5207,7 +5207,7 @@ public class Nd4j { */ public static void checkShapeValues(int... shape) { for (int e: shape) { - if (e < 1) + if (e < 0) throw new ND4JIllegalStateException("Invalid shape: Requested INDArray shape " + Arrays.toString(shape) + " contains dimension size values < 0 (all dimensions must be 0 or more)"); } diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/shape/EmptyTests.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/shape/EmptyTests.java index d492d8612..261e1e300 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/shape/EmptyTests.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/shape/EmptyTests.java @@ -256,6 +256,7 @@ public class EmptyTests extends BaseNd4jTest { assertArrayEquals(new long[]{0}, Nd4j.zeros(0).shape()); assertArrayEquals(new long[]{0,0}, Nd4j.zeros(0,0).shape()); assertArrayEquals(new long[]{0,0,0}, Nd4j.zeros(0,0,0).shape()); + assertArrayEquals(new long[]{0,0,0}, Nd4j.zeros(new int[]{0,0,0}, 'f').shape()); assertArrayEquals(new long[]{0}, Nd4j.zeros(0L).shape()); assertArrayEquals(new long[]{0}, Nd4j.zeros(dt, 0L).shape());