diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java index 3f748a361..d3de78c3f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java @@ -43,7 +43,7 @@ import org.nd4j.linalg.learning.config.IUpdater; @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") +@SuperBuilder(builderMethodName = "innerBuilder") public class ActivationLayer extends NoParamLayer { @@ -133,8 +133,12 @@ public class ActivationLayer extends NoParamLayer { public static abstract class ActivationLayerBuilder< C extends ActivationLayer, B extends ActivationLayerBuilder> extends NoParamLayer.NoParamLayerBuilder { - public C build() { - C l = this.initBuild(); + + } + + private static final class ActivationLayerBuilderImpl extends ActivationLayerBuilder { + public ActivationLayer build() { + ActivationLayer l = this.initBuild(); l.initializeConstraints(); return l; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseUpsamplingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseUpsamplingLayer.java index d40ac019c..8d031b6c6 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseUpsamplingLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseUpsamplingLayer.java @@ -33,7 +33,7 @@ import org.deeplearning4j.nn.conf.inputs.InputType; @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder() +@SuperBuilder public abstract class BaseUpsamplingLayer extends NoParamLayer { /** diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java index 095d5b3bd..7544b5717 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java @@ -25,10 +25,10 @@ import java.util.List; import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; -import org.deeplearning4j.nn.api.layers.LayerConstraint; import net.brutex.ai.dnn.api.LayerType; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; +import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -48,287 +48,326 @@ import org.nd4j.linalg.learning.regularization.Regularization; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") +@SuperBuilder(builderMethodName = "innerBuilder") public class BatchNormalization extends FeedForwardLayer { - - /** - * At test time: we can use a global estimate of the mean and variance, calculated using a moving average of the - * batch means/variances. This moving average is implemented as:
globalMeanEstimate = decay * - * globalMeanEstimate + (1-decay) * batchMean
globalVarianceEstimate = decay * globalVarianceEstimate + - * (1-decay) * batchVariance
- * - * @param decay Decay value to use for global stats calculation - */ - @lombok.Builder.Default - protected double decay = 0.9; - //Note: need to set defaults here in addition to builder, in case user uses no-op constructor... - /** - * Epsilon value for batch normalization; small floating point value added to variance (algorithm 1 in https://arxiv.org/pdf/1502.03167v3.pdf) to reduce/avoid - * underflow issues.
Default: 1e-5 - * - * @param eps Epsilon values to use - */ - @lombok.Builder.Default protected double eps = 1e-5; - /** - * If doing minibatch training or not. Default: true. Under most circumstances, this should be set to true. If - * doing full batch training (i.e., all examples in a single DataSet object - very small data sets) then this - * should be set to false. Affects how global mean/variance estimates are calculated. - * - * @param minibatch Minibatch parameter - */ - @lombok.Builder.Default protected boolean isMinibatch = true; + /** + * At test time: we can use a global estimate of the mean and variance, calculated using a moving + * average of the batch means/variances. This moving average is implemented as:
+ * globalMeanEstimate = decay * globalMeanEstimate + (1-decay) * batchMean
+ * globalVarianceEstimate = decay * globalVarianceEstimate + (1-decay) * batchVariance
+ * + * @param decay Decay value to use for global stats calculation + */ + @lombok.Builder.Default protected double decay = 0.9; - /** - * Used only when 'true' is passed to {@link BatchNormalizationBuilder#lockGammaBeta(boolean)}. Value is not used otherwise.
Default: - * 1.0 - * - * @param gamma Gamma parameter for all activations, used only with locked gamma/beta configuration mode - */ - @lombok.Builder.Default protected double gamma = 1.0; - /** - * Used only when 'true' is passed to {@link BatchNormalizationBuilder#lockGammaBeta(boolean)}. Value is not used otherwise.
Default: - * 0.0 - * - * @param beta Beta parameter for all activations, used only with locked gamma/beta configuration mode - */ - @lombok.Builder.Default protected double beta = 0.0; - /** - * Set constraints to be applied to the beta parameter of this batch normalisation layer. Default: no - * constraints.
Constraints can be used to enforce certain conditions (non-negativity of parameters, - * max-norm regularization, etc). These constraints are applied at each iteration, after the parameters have - * been updated. - * - */ - protected List betaConstraints; + // Note: need to set defaults here in addition to builder, in case user uses no-op constructor... + /** + * Epsilon value for batch normalization; small floating point value added to variance (algorithm + * 1 in https://arxiv.org/pdf/1502.03167v3.pdf) to + * reduce/avoid underflow issues.
+ * Default: 1e-5 + * + * @param eps Epsilon values to use + */ + @lombok.Builder.Default protected double eps = 1e-5; + /** + * If doing minibatch training or not. Default: true. Under most circumstances, this should be set + * to true. If doing full batch training (i.e., all examples in a single DataSet object - very + * small data sets) then this should be set to false. Affects how global mean/variance estimates + * are calculated. + * + * @param minibatch Minibatch parameter + */ + @lombok.Builder.Default protected boolean isMinibatch = true; - /** - * Set constraints to be applied to the gamma parameter of this batch normalisation layer. Default: no - * constraints.
Constraints can be used to enforce certain conditions (non-negativity of parameters, - * max-norm regularization, etc). These constraints are applied at each iteration, after the parameters have - * been updated. - * - */ - protected List gammaConstraints; + /** + * Used only when 'true' is passed to {@link BatchNormalizationBuilder#lockGammaBeta(boolean)}. + * Value is not used otherwise.
+ * Default: 1.0 + * + * @param gamma Gamma parameter for all activations, used only with locked gamma/beta + * configuration mode + */ + @lombok.Builder.Default protected double gamma = 1.0; + /** + * Used only when 'true' is passed to {@link BatchNormalizationBuilder#lockGammaBeta(boolean)}. + * Value is not used otherwise.
+ * Default: 0.0 + * + * @param beta Beta parameter for all activations, used only with locked gamma/beta configuration + * mode + */ + @lombok.Builder.Default protected double beta = 0.0; + /** + * Set constraints to be applied to the beta parameter of this batch normalisation layer. Default: + * no constraints.
+ * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm + * regularization, etc). These constraints are applied at each iteration, after the parameters + * have been updated. + */ + protected List betaConstraints; + /** + * Set constraints to be applied to the gamma parameter of this batch normalisation layer. + * Default: no constraints.
+ * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm + * regularization, etc). These constraints are applied at each iteration, after the parameters + * have been updated. + */ + protected List gammaConstraints; - /** - * When using CuDNN or MKLDNN and an error is encountered, should fallback to the non-helper implementation be allowed? - * If set to false, an exception in the helper will be propagated back to the user. If true, the built-in - * (non-MKL/CuDNN) implementation for BatchNormalizationLayer will be used - * - * @param allowFallback Whether fallback to non-CuDNN implementation should be used - */ - @lombok.Builder.Default protected boolean cudnnAllowFallback = true; - /** - * How should the moving average of variance be stored? Two different parameterizations are supported. - * useLogStd(false): equivalent to 1.0.0-beta3 and earlier. The variance "parameter" is stored directly as - * variable
useLogStd(true): (Default) variance is stored as log10(stdev)
The motivation here is for - * numerical stability (FP16 etc) and also distributed training: storing the variance directly can cause - * numerical issues. For example, a standard deviation of 1e-3 (something that could be encountered in practice) - * gives a variance of 1e-6, which can be problematic for 16-bit floating point -* - * How should the moving average of variance be stored? Two different parameterizations are supported. - * useLogStd(false): equivalent to 1.0.0-beta3 and earlier. The variance "parameter" is stored directly as - * variable
useLogStd(true): (Default) variance is stored as log10(stdev)
The motivation here is for - * numerical stability (FP16 etc) and also distributed training: storing the variance directly can cause - * numerical issues. For example, a standard deviation of 1e-3 (something that could be encountered in practice) - * gives a variance of 1e-6, which can be problematic for 16-bit floating point - */ - @lombok.Builder.Default protected boolean useLogStd = false; //Default for deserialized models (1.0.0-beta3) and earlier: store variance as variance. Post 1.0.0-beta3: use log stdev instead - /** - * Set the input and output array data format. Defaults to NCHW format - i.e., channels first. - * See {@link CNN2DFormat} for more details - * @param format Format to use - */ - @lombok.Builder.Default protected CNN2DFormat dataFormat = CNN2DFormat.NCHW; //Default for deserialized models, 1.0.0-beta6 and earlier + /** + * When using CuDNN or MKLDNN and an error is encountered, should fallback to the non-helper + * implementation be allowed? If set to false, an exception in the helper will be propagated back + * to the user. If true, the built-in (non-MKL/CuDNN) implementation for BatchNormalizationLayer + * will be used + * + * @param allowFallback Whether fallback to non-CuDNN implementation should be used + */ + @lombok.Builder.Default protected boolean cudnnAllowFallback = true; + /** + * How should the moving average of variance be stored? Two different parameterizations are + * supported. useLogStd(false): equivalent to 1.0.0-beta3 and earlier. The variance "parameter" is + * stored directly as variable
+ * useLogStd(true): (Default) variance is stored as log10(stdev)
+ * The motivation here is for numerical stability (FP16 etc) and also distributed training: + * storing the variance directly can cause numerical issues. For example, a standard deviation of + * 1e-3 (something that could be encountered in practice) gives a variance of 1e-6, which can be + * problematic for 16-bit floating point + * + *

How should the moving average of variance be stored? Two different parameterizations are + * supported. useLogStd(false): equivalent to 1.0.0-beta3 and earlier. The variance "parameter" is + * stored directly as variable
+ * useLogStd(true): (Default) variance is stored as log10(stdev)
+ * The motivation here is for numerical stability (FP16 etc) and also distributed training: + * storing the variance directly can cause numerical issues. For example, a standard deviation of + * 1e-3 (something that could be encountered in practice) gives a variance of 1e-6, which can be + * problematic for 16-bit floating point + */ + @lombok.Builder.Default + protected boolean useLogStd = + false; // Default for deserialized models (1.0.0-beta3) and earlier: store variance as + // variance. Post 1.0.0-beta3: use log stdev instead + /** + * Set the input and output array data format. Defaults to NCHW format - i.e., channels first. See + * {@link CNN2DFormat} for more details + * + * @param format Format to use + */ + @lombok.Builder.Default + protected CNN2DFormat dataFormat = + CNN2DFormat.NCHW; // Default for deserialized models, 1.0.0-beta6 and earlier - private boolean lockGammaBeta; + private boolean lockGammaBeta; - public static BatchNormalizationBuilder builder() { - return innerBuilder(); + public static BatchNormalizationBuilder builder() { + return innerBuilder(); + } + + public static BatchNormalizationBuilder builder(double gamma, double beta) { + return innerBuilder().gamma(gamma).beta(beta); + } + + public static BatchNormalizationBuilder builder(boolean lockGammaBeta) { + return innerBuilder().lockGammaBeta(lockGammaBeta); + } + + @Override + public BatchNormalization clone() { + BatchNormalization clone = (BatchNormalization) super.clone(); + return clone; + } + + @Override + public Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + this.setNetConfiguration(conf); + LayerValidation.assertNOutSet("BatchNormalization", getName(), layerIndex, getNOut()); + runInheritance(); + + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + org.deeplearning4j.nn.layers.normalization.BatchNormalization ret = + new org.deeplearning4j.nn.layers.normalization.BatchNormalization(lconf, networkDataType); + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + return ret; + } + + @Override + public ParamInitializer initializer() { + return BatchNormalizationParamInitializer.getInstance(); + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null) { + throw new IllegalStateException( + "Invalid input type: Batch norm layer expected input of type CNN, got null for layer \"" + + getName() + + "\""); } - public static BatchNormalizationBuilder builder(double gamma, double beta) { - return innerBuilder() - .gamma(gamma) - .beta(beta); + // Can handle CNN, flat CNN, CNN3D or FF input formats only + switch (inputType.getType()) { + case FF: + case CNN: + case CNNFlat: + case CNN3D: + return inputType; // OK + default: + throw new IllegalStateException( + "Invalid input type: Batch norm layer expected input of type CNN, CNN Flat or FF, got " + + inputType + + " for layer index " + + layerIndex + + ", layer name = " + + getName()); + } + } + + @Override + public void setNIn(InputType inputType, boolean override) { + if (nIn <= 0 || override) { + switch (inputType.getType()) { + case FF: + nIn = ((InputType.InputTypeFeedForward) inputType).getSize(); + break; + case CNN: + nIn = ((InputType.InputTypeConvolutional) inputType).getChannels(); + dataFormat = ((InputType.InputTypeConvolutional) inputType).getFormat(); + break; + case CNN3D: + nIn = ((InputType.InputTypeConvolutional3D) inputType).getChannels(); + break; + case CNNFlat: + nIn = ((InputType.InputTypeConvolutionalFlat) inputType).getDepth(); + default: + throw new IllegalStateException( + "Invalid input type: Batch norm layer expected input of type CNN, CNN Flat or FF, got " + + inputType + + " for layer " + + getName() + + "\""); + } + nOut = nIn; + } + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + if (inputType.getType() == InputType.Type.CNNFlat) { + InputType.InputTypeConvolutionalFlat i = (InputType.InputTypeConvolutionalFlat) inputType; + return new FeedForwardToCnnPreProcessor(i.getHeight(), i.getWidth(), i.getDepth()); + } else if (inputType.getType() == InputType.Type.RNN) { + return new RnnToFeedForwardPreProcessor(); } - public static BatchNormalizationBuilder builder(boolean lockGammaBeta) { - return innerBuilder() - .lockGammaBeta(lockGammaBeta); + return null; + } + + @Override + public List getRegularizationByParam(String paramName) { + // Don't regularize batch norm params: similar to biases in the sense that there are not many of + // them... + return null; + } + + @Override + public IUpdater getUpdaterByParam(String paramName) { + switch (paramName) { + case BatchNormalizationParamInitializer.BETA: + case BatchNormalizationParamInitializer.GAMMA: + return getUpdater(); + case BatchNormalizationParamInitializer.GLOBAL_MEAN: + case BatchNormalizationParamInitializer.GLOBAL_VAR: + case BatchNormalizationParamInitializer.GLOBAL_LOG_STD: + return new NoOp(); + default: + throw new IllegalArgumentException("Unknown parameter: \"" + paramName + "\""); + } + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + InputType outputType = getOutputType(-1, inputType); + + // TODO CuDNN helper etc + + val numParams = initializer().numParams(this); + int updaterStateSize = 0; + + for (String s : BatchNormalizationParamInitializer.getInstance().paramKeys(this)) { + updaterStateSize += getUpdaterByParam(s).stateSize(nOut); } - @Override - public BatchNormalization clone() { - BatchNormalization clone = (BatchNormalization) super.clone(); - return clone; + // During forward pass: working memory size approx. equal to 2x input size (copy ops, etc) + val inferenceWorkingSize = 2 * inputType.arrayElementsPerExample(); + + // During training: we calculate mean and variance... result is equal to nOut, and INDEPENDENT + // of minibatch size + val trainWorkFixed = 2 * nOut; + // During backprop: multiple working arrays... output size, 2 * output size (indep. of example + // size), + val trainWorkingSizePerExample = + inferenceWorkingSize // Inference during backprop + + (outputType.arrayElementsPerExample() + 2 * nOut); // Backprop gradient calculation + + return new LayerMemoryReport.Builder(name, BatchNormalization.class, inputType, outputType) + .standardMemory(numParams, updaterStateSize) + .workingMemory( + 0, + 0, + trainWorkFixed, + trainWorkingSizePerExample) // No additional memory (beyond activations) for inference + .cacheMemory( + MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) // No caching + .build(); + } + + @Override + public boolean isPretrainParam(String paramName) { + return false; // No pretrain params in BN + } + + private static final class BatchNormalizationBuilderImpl + extends BatchNormalizationBuilder { + public BatchNormalization build() { + BatchNormalization l = new BatchNormalization(this); + l.setType(LayerType.BN); + l.initializeConstraints(); + return l; + } + } + + public abstract static class BatchNormalizationBuilder< + C extends BatchNormalization, B extends BatchNormalizationBuilder> + extends FeedForwardLayerBuilder { + + public B helperAllowFallback(boolean b) { + this.cudnnAllowFallback$value = b; + this.cudnnAllowFallback$set = true; + return self(); } - @Override - public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, - int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - this.setNetConfiguration(conf); - LayerValidation.assertNOutSet("BatchNormalization", getName(), layerIndex, getNOut()); - runInheritance(); - - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - org.deeplearning4j.nn.layers.normalization.BatchNormalization ret = - new org.deeplearning4j.nn.layers.normalization.BatchNormalization(lconf, networkDataType); - ret.addTrainingListeners(trainingListeners); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - return ret; + public B constrainBeta(LayerConstraint... constraints) { + this.betaConstraints = List.of(constraints); + return self(); } - @Override - public ParamInitializer initializer() { - return BatchNormalizationParamInitializer.getInstance(); + public B constrainGamma(LayerConstraint... constraints) { + this.gammaConstraints = List.of(constraints); + return self(); } - - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - if (inputType == null) { - throw new IllegalStateException( - "Invalid input type: Batch norm layer expected input of type CNN, got null for layer \"" - + getName() + "\""); - } - - //Can handle CNN, flat CNN, CNN3D or FF input formats only - switch (inputType.getType()) { - case FF: - case CNN: - case CNNFlat: - case CNN3D: - return inputType; //OK - default: - throw new IllegalStateException( - "Invalid input type: Batch norm layer expected input of type CNN, CNN Flat or FF, got " - + inputType + " for layer index " + layerIndex + ", layer name = " - + getName()); - } - } - - @Override - public void setNIn(InputType inputType, boolean override) { - if (nIn <= 0 || override) { - switch (inputType.getType()) { - case FF: - nIn = ((InputType.InputTypeFeedForward) inputType).getSize(); - break; - case CNN: - nIn = ((InputType.InputTypeConvolutional) inputType).getChannels(); - dataFormat = ((InputType.InputTypeConvolutional) inputType).getFormat(); - break; - case CNN3D: - nIn = ((InputType.InputTypeConvolutional3D) inputType).getChannels(); - break; - case CNNFlat: - nIn = ((InputType.InputTypeConvolutionalFlat) inputType).getDepth(); - default: - throw new IllegalStateException( - "Invalid input type: Batch norm layer expected input of type CNN, CNN Flat or FF, got " - + inputType + " for layer " + getName() + "\""); - } - nOut = nIn; - } - } - - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - if (inputType.getType() == InputType.Type.CNNFlat) { - InputType.InputTypeConvolutionalFlat i = (InputType.InputTypeConvolutionalFlat) inputType; - return new FeedForwardToCnnPreProcessor(i.getHeight(), i.getWidth(), i.getDepth()); - } else if (inputType.getType() == InputType.Type.RNN) { - return new RnnToFeedForwardPreProcessor(); - } - - return null; - } - - @Override - public List getRegularizationByParam(String paramName){ - //Don't regularize batch norm params: similar to biases in the sense that there are not many of them... - return null; - } - - @Override - public IUpdater getUpdaterByParam(String paramName) { - switch (paramName) { - case BatchNormalizationParamInitializer.BETA: - case BatchNormalizationParamInitializer.GAMMA: - return getUpdater(); - case BatchNormalizationParamInitializer.GLOBAL_MEAN: - case BatchNormalizationParamInitializer.GLOBAL_VAR: - case BatchNormalizationParamInitializer.GLOBAL_LOG_STD: - return new NoOp(); - default: - throw new IllegalArgumentException("Unknown parameter: \"" + paramName + "\""); - } - } - - @Override - public LayerMemoryReport getMemoryReport(InputType inputType) { - InputType outputType = getOutputType(-1, inputType); - - //TODO CuDNN helper etc - - val numParams = initializer().numParams(this); - int updaterStateSize = 0; - - for (String s : BatchNormalizationParamInitializer.getInstance().paramKeys(this)) { - updaterStateSize += getUpdaterByParam(s).stateSize(nOut); - } - - //During forward pass: working memory size approx. equal to 2x input size (copy ops, etc) - val inferenceWorkingSize = 2 * inputType.arrayElementsPerExample(); - - //During training: we calculate mean and variance... result is equal to nOut, and INDEPENDENT of minibatch size - val trainWorkFixed = 2 * nOut; - //During backprop: multiple working arrays... output size, 2 * output size (indep. of example size), - val trainWorkingSizePerExample = inferenceWorkingSize //Inference during backprop - + (outputType.arrayElementsPerExample() + 2 * nOut); //Backprop gradient calculation - - return new LayerMemoryReport.Builder(name, BatchNormalization.class, inputType, outputType) - .standardMemory(numParams, updaterStateSize) - .workingMemory(0, 0, trainWorkFixed, trainWorkingSizePerExample) //No additional memory (beyond activations) for inference - .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching - .build(); - } - - @Override - public boolean isPretrainParam(String paramName) { - return false; //No pretrain params in BN - } - - public static abstract class BatchNormalizationBuilder> extends FeedForwardLayerBuilder { - public C build() { - C l = this.initBuild(); - l.setType(LayerType.BN); - l.initializeConstraints(); - return l; - } - public B helperAllowFallback(boolean b) { - this.cudnnAllowFallback$value = b; - this.cudnnAllowFallback$set = true; - return self(); - } - - public B constrainBeta(LayerConstraint ... constraints) { - this.betaConstraints = List.of(constraints); - return self(); - } - public B constrainGamma(LayerConstraint ... constraints) { - this.gammaConstraints = List.of(constraints); - return self(); - } - } - - - + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleLayer.java index 0d13f2b88..32b0d8efe 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleLayer.java @@ -38,9 +38,8 @@ import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; - @EqualsAndHashCode(callSuper = true) -@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") +@SuperBuilder(builderMethodName = "innerBuilder") public class CapsuleLayer extends SameDiffLayer { private static final String WEIGHT_PARAM = "weight"; @@ -78,6 +77,18 @@ public class CapsuleLayer extends SameDiffLayer { */ @Builder.Default @Getter @Setter private int routings = 3; + public static CapsuleLayerBuilder builder() { + return innerBuilder() + ; + } + + public static CapsuleLayerBuilder builder(int capsules, int capsulesDim, int routings) { + return innerBuilder() + .capsules(capsules) + .capsuleDimensions(capsulesDim) + .routings(routings); + } + @Override public void setNIn(InputType inputType, boolean override) { if(inputType == null || inputType.getType() != Type.RNN) { @@ -185,16 +196,6 @@ public class CapsuleLayer extends SameDiffLayer { return InputType.recurrent(capsules, capsuleDimensions); } - public static CapsuleLayerBuilder builder() { - return innerBuilder() - ; - } - public static CapsuleLayerBuilder builder(int capsules, int capsulesDim, int routings) { - return innerBuilder() - .capsules(capsules) - .capsuleDimensions(capsulesDim) - .routings(routings); - } public static abstract class CapsuleLayerBuilder< C extends CapsuleLayer, B extends CapsuleLayerBuilder> extends SameDiffLayerBuilder { @@ -215,35 +216,37 @@ public class CapsuleLayer extends SameDiffLayer { } - public C build() { - C l = this.initBuild(); - if (capsules <= 0 || capsuleDimensions <= 0 || routings$value <= 0) { - throw new IllegalArgumentException( - "Invalid configuration for Capsule ILayer (layer name = \"" - + l.getName() - + "\"):" - + " capsules, capsuleDimensions, and routings must be > 0. Got: " - + capsules - + ", " - + capsuleDimensions - + ", " - + routings$value); - } - if (inputCapsules$value < 0 || inputCapsuleDimensions$value < 0) { - throw new IllegalArgumentException( - "Invalid configuration for Capsule ILayer (layer name = \"" - + l.getName() - + "\"):" - + " inputCapsules and inputCapsuleDimensions must be >= 0 if set. Got: " - + inputCapsules$value - + ", " - + inputCapsuleDimensions$value); - } - - return l; - } } + private static final class CapsuleLayerBuilderImpl extends CapsuleLayerBuilder { + public CapsuleLayer build() { + CapsuleLayer l = new CapsuleLayer(this); + if (l.getCapsules() <= 0 || l.getCapsuleDimensions() <= 0 || l.getRoutings() <= 0) { + throw new IllegalArgumentException( + "Invalid configuration for Capsule ILayer (layer name = \"" + + l.getName() + + "\"):" + + " capsules, capsuleDimensions, and routings must be > 0. Got: " + + l.getCapsules() + + ", " + + l.getCapsuleDimensions() + + ", " + + l.getRoutings()); + } + + if (l.getInputCapsules() < 0 || l.getInputCapsuleDimensions() < 0) { + throw new IllegalArgumentException( + "Invalid configuration for Capsule ILayer (layer name = \"" + + l.getName() + + "\"):" + + " inputCapsules and inputCapsuleDimensions must be >= 0 if set. Got: " + + l.getInputCapsules() + + ", " + + l.getInputCapsuleDimensions() ); + } + return l; + } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CenterLossOutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CenterLossOutputLayer.java index ec95558cf..c3119195b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CenterLossOutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CenterLossOutputLayer.java @@ -20,6 +20,8 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Collection; +import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; @@ -30,36 +32,21 @@ import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.params.CenterLossParamInitializer; import org.deeplearning4j.optimize.api.TrainingListener; -import org.nd4j.linalg.activations.impl.ActivationSoftmax; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.learning.config.IUpdater; import org.nd4j.linalg.learning.config.NoOp; -import org.nd4j.linalg.lossfunctions.ILossFunction; -import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction; - -import java.util.Collection; -import java.util.Map; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(buildMethodName = "initBuild") +@SuperBuilder public class CenterLossOutputLayer extends BaseOutputLayer { @Builder.Default protected double alpha= 0.805; @Builder.Default protected double lambda = 2e-4; @Builder.Default protected boolean gradientCheck = false; -public static abstract class CenterLossOutputLayerBuilder> extends - BaseOutputLayerBuilder { - public C build() { - C l = initBuild(); - l.initializeConstraints(); - return l; - } -} - @Override public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { @@ -91,7 +78,6 @@ public static abstract class CenterLossOutputLayerBuilder> extends + BaseOutputLayerBuilder { + public C build() { + C l = initBuild(); + l.initializeConstraints(); + return l; + } +} + +private static final class CenterLossOutputLayerBuilderImpl extends CenterLossOutputLayerBuilder { + public CenterLossOutputLayer build() { + CenterLossOutputLayer l = new CenterLossOutputLayer(this); + l.initializeConstraints(); + return l; + } +} + } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1D.java index ec96d6aef..1074b3092 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1D.java @@ -29,6 +29,6 @@ import lombok.experimental.SuperBuilder; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(buildMethodName = "initBuild") +@SuperBuilder public class Convolution1D extends Convolution1DLayer { } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DLayer.java index a82f6c7c5..619b7a477 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DLayer.java @@ -47,9 +47,8 @@ import org.nd4j.linalg.api.ndarray.INDArray; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") +@SuperBuilder(builderMethodName = "innerBuilder") public class Convolution1DLayer extends ConvolutionLayer { - @Builder.Default private RNNFormat rnnDataFormat = RNNFormat.NCW; /** * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). * See {@link CNN2DFormat} for more details.
@@ -60,6 +59,7 @@ public class Convolution1DLayer extends ConvolutionLayer { @Builder.Default protected CNN2DFormat dataFormat = CNN2DFormat.NCHW; // default value for legacy serialization reasons + @Builder.Default private RNNFormat rnnDataFormat = RNNFormat.NCW; /** * Size of the convolution * @@ -183,17 +183,20 @@ public class Convolution1DLayer extends ConvolutionLayer { return true; } - public static abstract class Convolution1DLayerBuilder< - C extends ConvolutionLayer, B extends Convolution1DLayerBuilder> - extends ConvolutionLayerBuilder { - public C build() { - C l = initBuild(); - ConvolutionUtils.validateConvolutionModePadding(l.getConvolutionMode(), padding$value); + private static final class Convolution1DLayerBuilderImpl extends ConvolutionLayerBuilder { + public ConvolutionLayer build() { + ConvolutionLayer l = initBuild(); + ConvolutionUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding()); ConvolutionUtils.validateCnnKernelStridePadding( - kernelSize$value, stride$value, padding$value); + l.getKernelSize(), l.getStride(), l.getPadding()); l.initializeConstraints(); return l; } + } + public static abstract class Convolution1DLayerBuilder< + C extends ConvolutionLayer, B extends Convolution1DLayerBuilder> + extends ConvolutionLayerBuilder { + public B kernelSize(int @NonNull ... kernelSize) { this.kernelSize$value[0] = ValidationUtils.validate1NonNegative(kernelSize, "kernelSize")[0]; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution2D.java index 49d2750a1..dfe5f5c83 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution2D.java @@ -30,6 +30,6 @@ import lombok.experimental.SuperBuilder; @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(buildMethodName = "initBuild") +@SuperBuilder public class Convolution2D extends ConvolutionLayer { } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution3D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution3D.java index 7f02d91f1..debe05588 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution3D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution3D.java @@ -40,7 +40,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(builderMethodName = "innerBuilder", buildMethodName = "initBuild") +@SuperBuilder(builderMethodName = "innerBuilder") public class Convolution3D extends ConvolutionLayer { /** @@ -235,17 +235,20 @@ public class Convolution3D extends ConvolutionLayer { NDHWC } + private static final class Convolution3DBuilderImpl extends Convolution3DBuilder { + public Convolution3D build() { + Convolution3D l = new Convolution3D(this); + ConvolutionUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding()); + Convolution3DUtils.validateCnn3DKernelStridePadding(l.getKernelSize(), l.getStride(), l.getPadding()); + return l; + } + } // public Builder(int[] kernelSize, int[] stride, int[] padding, int[] dilation) { // sup/er(kernelSize, stride, padding, dilation, 3); public static abstract class Convolution3DBuilder< C extends Convolution3D, B extends Convolution3DBuilder> extends ConvolutionLayer.ConvolutionLayerBuilder { - public C build() { - ConvolutionUtils.validateConvolutionModePadding(convolutionMode$value, padding); - Convolution3DUtils.validateCnn3DKernelStridePadding(kernelSize, stride, padding); - C l = initBuild(); - return l; - } + @Override // TODO we can use the parent builder and do not need to redefine the variables. // Validation can be done in override function! diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java index a5915e9ec..d1afb28a3 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java @@ -48,7 +48,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; */ @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(builderMethodName = "innerBuilder", buildMethodName = "initBuild") +@SuperBuilder(builderMethodName = "innerBuilder") public class ConvolutionLayer extends FeedForwardLayer { /** * Size of the convolution rows/columns @@ -397,48 +397,7 @@ public class ConvolutionLayer extends FeedForwardLayer { return self(); } - public C build() { - ConvolutionUtils.validateConvolutionModePadding(convolutionMode$value, padding$value); - ConvolutionUtils.validateCnnKernelStridePadding( - kernelSize$value, stride$value, padding$value); - if (kernelSize$value.length != convolutionDim$value) { - throw new IllegalArgumentException( - "Kernel argument should be a " - + convolutionDim$value - + "d array, got " - + Arrays.toString(kernelSize$value)); - } - - if (stride$value.length != convolutionDim$value) { - throw new IllegalArgumentException( - "Strides argument should be a " - + convolutionDim$value - + "d array, got " - + Arrays.toString(stride$value)); - } - - if (padding$value.length != convolutionDim$value) { - throw new IllegalArgumentException( - "Padding argument should be a " - + convolutionDim$value - + "d array, got " - + Arrays.toString(padding$value)); - } - - if (dilation$value.length != convolutionDim$value) { - throw new IllegalArgumentException( - "Dilation argument should be a " - + convolutionDim$value - + "d array, got " - + Arrays.toString(dilation$value)); - } - - C l = initBuild(); - l.setType(LayerType.CONV); - l.initializeConstraints(); - return l; - } /** * When using CuDNN or MKLDNN and an error is encountered, should fallback to the non-helper @@ -454,4 +413,47 @@ public class ConvolutionLayer extends FeedForwardLayer { return self(); } } + private static final class ConvolutionLayerBuilderImpl extends ConvolutionLayerBuilder { + public ConvolutionLayer build() { + ConvolutionLayer l = new ConvolutionLayer(this); + ConvolutionUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding()); + ConvolutionUtils.validateCnnKernelStridePadding( + l.getKernelSize(), l.getStride(), l.getPadding()); + + if (l.getKernelSize().length != l.getConvolutionDim()) { + throw new IllegalArgumentException( + "Kernel argument should be a " + + l.getConvolutionDim() + + "d array, got " + + Arrays.toString(l.getKernelSize())); + } + + if (l.getStride().length != l.getConvolutionDim()) { + throw new IllegalArgumentException( + "Strides argument should be a " + + l.getConvolutionDim() + + "d array, got " + + Arrays.toString(l.getStride())); + } + + if (l.getPadding().length != l.getConvolutionDim()) { + throw new IllegalArgumentException( + "Padding argument should be a " + + l.getConvolutionDim() + + "d array, got " + + Arrays.toString(l.getPadding())); + } + + if (l.getDilation().length != l.getConvolutionDim()) { + throw new IllegalArgumentException( + "Dilation argument should be a " + + l.getConvolutionDim() + + "d array, got " + + Arrays.toString(l.getDilation())); + } + l.setType(LayerType.CONV); + l.initializeConstraints(); + return l; + } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java index cd9990875..2847168cb 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java @@ -46,7 +46,7 @@ import java.util.Map; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuild") +@SuperBuilder(builderMethodName = "innerBuilder") public class Deconvolution2D extends ConvolutionLayer { @@ -57,12 +57,15 @@ private CNN2DFormat format = CNN2DFormat.NCHW; return false; } - public static abstract class Deconvolution2DBuilder> extends ConvolutionLayerBuilder { - public C build() { - C l = initBuild(); + private static final class Deconvolution2DBuilderImpl extends Deconvolution2DBuilder { + public Deconvolution2D build() { + Deconvolution2D l = new Deconvolution2D(this); l.initializeConstraints(); return l; } + } + public static abstract class Deconvolution2DBuilder> extends ConvolutionLayerBuilder { + @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution3D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution3D.java index e99ef284d..f2b383ad9 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution3D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution3D.java @@ -44,7 +44,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") +@SuperBuilder(builderMethodName = "innerBuilder") public class Deconvolution3D extends ConvolutionLayer { /** * Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more @@ -56,6 +56,15 @@ public class Deconvolution3D extends ConvolutionLayer { private Convolution3D.DataFormat dataFormat = Convolution3D.DataFormat.NCDHW; // in libnd4j: 1 - NCDHW, 0 - NDHWC + public static Deconvolution3DBuilder builder() { + return innerBuilder() + .kernelSize(new int[] {2, 2, 2}) + .stride(new int[] {1, 1, 1}) + .padding(new int[] {0, 0, 0}) + .dilation(new int[] {1, 1, 1}) + .convolutionDim(3); + } + protected boolean allowCausal() { // Causal convolution - allowed for 1D only return false; @@ -69,13 +78,13 @@ public class Deconvolution3D extends ConvolutionLayer { public Deconvolution3D clone() { Deconvolution3D clone = (Deconvolution3D) super.clone(); if (clone.getKernelSize() != null) { - clone.setKernelSize( clone.getKernelSize().clone()); + clone.setKernelSize(clone.getKernelSize().clone()); } if (clone.getStride() != null) { - clone.setStride( clone.getStride().clone()); + clone.setStride(clone.getStride().clone()); } if (clone.getPadding() != null) { - clone.setPadding( clone.getPadding().clone()); + clone.setPadding(clone.getPadding().clone()); } return clone; } @@ -134,6 +143,11 @@ public class Deconvolution3D extends ConvolutionLayer { } } + // private int[] kernelSize; + // private int[] stride; + // private int[] padding; + // private int[] dilation; + @Override public InputType getOutputType(int layerIndex, InputType inputType) { if (inputType == null || inputType.getType() != InputType.Type.CNN3D) { @@ -158,29 +172,16 @@ public class Deconvolution3D extends ConvolutionLayer { Deconvolution3DLayer.class); } - //private int[] kernelSize; - //private int[] stride; - //private int[] padding; - //private int[] dilation; - - public static abstract class Deconvolution3DBuilder< + public abstract static class Deconvolution3DBuilder< C extends Deconvolution3D, B extends Deconvolution3DBuilder> - extends ConvolutionLayerBuilder { - public C build() { - C l = initBuild(); + extends ConvolutionLayerBuilder {} + + private static final class Deconvolution3DBuilderImpl + extends Deconvolution3DBuilder { + public Deconvolution3D build() { + Deconvolution3D l = new Deconvolution3D(this); l.initializeConstraints(); return l; } - } - - public static Deconvolution3DBuilder builder() { - return innerBuilder() - .kernelSize(new int[] {2, 2, 2}) - .stride(new int[] {1, 1, 1}) - .padding(new int[] {0, 0, 0}) - .dilation(new int[] {1, 1, 1}) - .convolutionDim(3); - } - } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DepthwiseConvolution2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DepthwiseConvolution2D.java index 394beeb73..cb2cfd73b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DepthwiseConvolution2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DepthwiseConvolution2D.java @@ -40,7 +40,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(buildMethodName = "initBuild") +@SuperBuilder public class DepthwiseConvolution2D extends ConvolutionLayer { /** * Set channels multiplier for depth-wise convolution @@ -145,21 +145,25 @@ public class DepthwiseConvolution2D extends ConvolutionLayer { this.dataFormat = ((InputType.InputTypeConvolutional) inputType).getFormat(); } - public abstract static class DepthwiseConvolution2DBuilder< - C extends DepthwiseConvolution2D, B extends DepthwiseConvolution2DBuilder> - extends ConvolutionLayerBuilder { - public C build() { + private static final class DepthwiseConvolution2DBuilderImpl extends DepthwiseConvolution2DBuilder { + public DepthwiseConvolution2D build() { + DepthwiseConvolution2D l = new DepthwiseConvolution2D(this); Preconditions.checkState( - depthMultiplier$value > 0, - "Depth multiplier must be > 0, got %s", - depthMultiplier$value); - C l = this.initBuild(); + l.getDepthMultiplier() > 0, + "Depth multiplier must be > 0, got %s", + l.getDepthMultiplier()); + ConvolutionUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding()); ConvolutionUtils.validateCnnKernelStridePadding( - l.getKernelSize(), l.getStride(), l.getPadding()); + l.getKernelSize(), l.getStride(), l.getPadding()); l.initializeConstraints(); return l; } + } + public abstract static class DepthwiseConvolution2DBuilder< + C extends DepthwiseConvolution2D, B extends DepthwiseConvolution2DBuilder> + extends ConvolutionLayerBuilder { + @Override public B kernelSize(int... kernelSize) { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java index e18af8eb3..2f535bce3 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java @@ -20,6 +20,8 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Collection; +import java.util.Map; import lombok.*; import lombok.experimental.Accessors; import lombok.experimental.SuperBuilder; @@ -30,127 +32,136 @@ import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.params.EmbeddingLayerParamInitializer; -import org.deeplearning4j.nn.weights.IWeightInit; import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.nn.weights.embeddings.ArrayEmbeddingInitializer; import org.deeplearning4j.nn.weights.embeddings.EmbeddingInitializer; import org.deeplearning4j.nn.weights.embeddings.WeightInitEmbedding; import org.deeplearning4j.optimize.api.TrainingListener; import org.nd4j.linalg.activations.Activation; -import org.nd4j.linalg.activations.impl.ActivationIdentity; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Collection; -import java.util.Map; - @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") +@SuperBuilder(builderMethodName = "innerBuilder") public class EmbeddingLayer extends FeedForwardLayer { - /** - * If true: include bias parameters in the layer. False (default): no bias. - * @param hasBias If true: include bias parameters in this layer - */ - @Accessors @Builder.Default - private boolean hasBias = false; + /** + * If true: include bias parameters in the layer. False (default): no bias. + * + * @param hasBias If true: include bias parameters in this layer + */ + @Accessors @Builder.Default private boolean hasBias = false; /** - *Default to Identity activation - i.e., don't inherit. - * For example, if user sets ReLU as global default, they very likely don't intend to use it for Embedding layer also - * + * Default to Identity activation - i.e., don't inherit. For example, if user sets ReLU as global + * default, they very likely don't intend to use it for Embedding layer also */ public static EmbeddingLayerBuilder builder() { - return innerBuilder() - .activation(Activation.IDENTITY); + return innerBuilder().activation(Activation.IDENTITY); + } + + @Override + public Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + org.deeplearning4j.nn.layers.feedforward.embedding.EmbeddingLayer ret = + new org.deeplearning4j.nn.layers.feedforward.embedding.EmbeddingLayer( + lconf, networkDataType); + runInheritance(); + + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + return ret; + } + + @Override + public ParamInitializer initializer() { + return EmbeddingLayerParamInitializer.getInstance(); + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + // Basically a dense layer, but no dropout is possible here, and no epsilons + InputType outputType = getOutputType(-1, inputType); + + val actElementsPerEx = outputType.arrayElementsPerExample(); + val numParams = initializer().numParams(this); + val updaterStateSize = (int) getIUpdater().stateSize(numParams); + + // Embedding layer does not use caching. + // Inference: no working memory - just activations (pullRows) + // Training: preout op, the only in-place ops on epsilon (from layer above) + assign ops + + return new LayerMemoryReport.Builder(name, EmbeddingLayer.class, inputType, outputType) + .standardMemory(numParams, updaterStateSize) + .workingMemory(0, 0, 0, actElementsPerEx) + .cacheMemory( + MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) // No caching + .build(); + } + + private static final class EmbeddingLayerBuilderImpl + extends EmbeddingLayerBuilder { + public EmbeddingLayer build() { + EmbeddingLayer l = new EmbeddingLayer(this); + l.initializeConstraints(); + return l; } + } - public static abstract class EmbeddingLayerBuilder> - extends FeedForwardLayerBuilder{ - public C build() { - C l = initBuild(); - l.initializeConstraints(); - return l; - } + public abstract static class EmbeddingLayerBuilder< + C extends EmbeddingLayer, B extends EmbeddingLayerBuilder> + extends FeedForwardLayerBuilder { - /** - * Weight initialization scheme to use, for initial weight values - * - * @param weightInit - * @see WeightInit - */ - @Override - public B weightInit(WeightInit weightInit) { - if(weightInit.getWeightInitFunction() instanceof WeightInitEmbedding){ - long[] shape = ((WeightInitEmbedding) weightInit.getWeightInitFunction()).shape(); - nIn(shape[0]); - nOut(shape[1]); - } - super.weightInit(weightInit); - return self(); - } - /** - * Initialize the embedding layer using values from the specified array. Note that the array should have shape - * [vocabSize, vectorSize]. After copying values from the array to initialize the network parameters, the input - * array will be discarded (so that, if necessary, it can be garbage collected) - * - * @param vectors Vectors to initialize the embedding layer with - */ - public B weightInit(INDArray vectors){ - weightInit(new ArrayEmbeddingInitializer(vectors)); - return self(); - } - - /** - * Initialize the embedding layer using the specified EmbeddingInitializer - such as a Word2Vec instance - * - * @param embeddingInitializer Source of the embedding layer weights - */ - public B weightInit(EmbeddingInitializer embeddingInitializer) { - var weightIn = new WeightInitEmbedding(embeddingInitializer); - super.weightInit(weightIn); - return self(); - } - } + /** + * Weight initialization scheme to use, for initial weight values + * + * @param weightInit + * @see WeightInit + */ @Override - public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, - int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - org.deeplearning4j.nn.layers.feedforward.embedding.EmbeddingLayer ret = - new org.deeplearning4j.nn.layers.feedforward.embedding.EmbeddingLayer(lconf, networkDataType); - runInheritance(); - - ret.addTrainingListeners(trainingListeners); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - return ret; + public B weightInit(WeightInit weightInit) { + if (weightInit.getWeightInitFunction() instanceof WeightInitEmbedding) { + long[] shape = ((WeightInitEmbedding) weightInit.getWeightInitFunction()).shape(); + nIn(shape[0]); + nOut(shape[1]); + } + super.weightInit(weightInit); + return self(); + } + /** + * Initialize the embedding layer using values from the specified array. Note that the array + * should have shape [vocabSize, vectorSize]. After copying values from the array to initialize + * the network parameters, the input array will be discarded (so that, if necessary, it can be + * garbage collected) + * + * @param vectors Vectors to initialize the embedding layer with + */ + public B weightInit(INDArray vectors) { + weightInit(new ArrayEmbeddingInitializer(vectors)); + return self(); } - @Override - public ParamInitializer initializer() { - return EmbeddingLayerParamInitializer.getInstance(); - } - - @Override - public LayerMemoryReport getMemoryReport(InputType inputType) { - //Basically a dense layer, but no dropout is possible here, and no epsilons - InputType outputType = getOutputType(-1, inputType); - - val actElementsPerEx = outputType.arrayElementsPerExample(); - val numParams = initializer().numParams(this); - val updaterStateSize = (int) getIUpdater().stateSize(numParams); - - //Embedding layer does not use caching. - //Inference: no working memory - just activations (pullRows) - //Training: preout op, the only in-place ops on epsilon (from layer above) + assign ops - - return new LayerMemoryReport.Builder(name, EmbeddingLayer.class, inputType, outputType) - .standardMemory(numParams, updaterStateSize).workingMemory(0, 0, 0, actElementsPerEx) - .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching - .build(); + /** + * Initialize the embedding layer using the specified EmbeddingInitializer - such as a Word2Vec + * instance + * + * @param embeddingInitializer Source of the embedding layer weights + */ + public B weightInit(EmbeddingInitializer embeddingInitializer) { + var weightIn = new WeightInitEmbedding(embeddingInitializer); + super.weightInit(weightIn); + return self(); } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingSequenceLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingSequenceLayer.java index 09b908445..e21593fb6 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingSequenceLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingSequenceLayer.java @@ -46,7 +46,7 @@ import java.util.Map; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") +@SuperBuilder(builderMethodName = "innerBuilder") public class EmbeddingSequenceLayer extends FeedForwardLayer { /** * Set input sequence length for this embedding layer. @@ -70,13 +70,16 @@ public class EmbeddingSequenceLayer extends FeedForwardLayer { @Builder.Default private boolean inferInputLength = false; // use input length as provided by input data @Builder.Default private RNNFormat outputDataFormat = RNNFormat.NCW; //Default value for older deserialized models + private static final class EmbeddingSequenceLayerBuilderImpl extends EmbeddingSequenceLayerBuilder { + public EmbeddingSequenceLayer build() { + EmbeddingSequenceLayer l = new EmbeddingSequenceLayer(this); + l.initializeConstraints(); + return l; + } + } public static abstract class EmbeddingSequenceLayerBuilder> extends FeedForwardLayerBuilder { - public C build() { - C l = initBuild(); - l.initializeConstraints(); - return l; - } + public B weightInit(IWeightInit weightInit){ if(weightInit instanceof WeightInitEmbedding){ diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java index 1110e15a6..091410bb7 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.*; import lombok.*; import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; @@ -36,23 +37,21 @@ import org.nd4j.linalg.activations.impl.ActivationSigmoid; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.*; - @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) @Deprecated -@SuperBuilder(buildMethodName = "initBuild") +@SuperBuilder public class GravesBidirectionalLSTM extends BaseRecurrentLayer { - public static abstract class GravesBidirectionalLSTMBuilder> extends BaseRecurrentLayerBuilder { - public C build() { - C l = this.initBuild(); - l.initializeConstraints(); - return l; - } - } + /** + * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation be allowed? + * If set to false, an exception in CuDNN will be propagated back to the user. If false, the built-in + * (non-CuDNN) implementation for GravesBidirectionalLSTM will be used + * + */ + @Builder.Default + protected boolean helperAllowFallback = true; /** * Set forget gate bias initalizations. Values in range 1-5 can potentially help with learning or longer-term * dependencies. @@ -66,15 +65,6 @@ public class GravesBidirectionalLSTM extends BaseRecurrentLayer { */ @Builder.Default private IActivation gateActivationFunction = new ActivationSigmoid(); - /** - * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation be allowed? - * If set to false, an exception in CuDNN will be propagated back to the user. If false, the built-in - * (non-CuDNN) implementation for GravesBidirectionalLSTM will be used - * - */ - @Builder.Default - protected boolean helperAllowFallback = true; - @Override protected void initializeConstraints() { @@ -121,5 +111,18 @@ public class GravesBidirectionalLSTM extends BaseRecurrentLayer { return LSTMHelpers.getMemoryReport(this, inputType); } + private static final class GravesBidirectionalLSTMBuilderImpl extends GravesBidirectionalLSTMBuilder { + public GravesBidirectionalLSTM build() { + GravesBidirectionalLSTM l = new GravesBidirectionalLSTM(this); + l.initializeConstraints(); + return l; + } + } + + public static abstract class GravesBidirectionalLSTMBuilder> extends BaseRecurrentLayerBuilder { + + } + } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java index c5521f8b4..a92151445 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java @@ -43,7 +43,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; @Deprecated @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(buildMethodName = "initBuild") +@SuperBuilder public class GravesLSTM extends AbstractLSTM { private double forgetGateBiasInit; @@ -103,9 +103,12 @@ public class GravesLSTM extends AbstractLSTM { public abstract static class GravesLSTMBuilder< C extends GravesLSTM, B extends GravesLSTMBuilder> - extends AbstractLSTMBuilder { - public C build() { - C l = initBuild(); + extends AbstractLSTMBuilder {} + + private static final class GravesLSTMBuilderImpl + extends GravesLSTMBuilder { + public GravesLSTM build() { + GravesLSTM l = new GravesLSTM(this); l.initializeConstraints(); return l; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LSTM.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LSTM.java index 19b736993..80b64fbf8 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LSTM.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LSTM.java @@ -20,6 +20,10 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; @@ -31,71 +35,75 @@ import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.layers.recurrent.LSTMHelpers; import org.deeplearning4j.nn.params.LSTMParamInitializer; import org.deeplearning4j.optimize.api.TrainingListener; -import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Map; - @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(buildMethodName = "initBuild") +@SuperBuilder public class LSTM extends AbstractLSTM { - private double forgetGateBiasInit; + private double forgetGateBiasInit; - public static abstract class LSTMBuilder> extends AbstractLSTMBuilder { - @Override public C build() { - C l = this.initBuild(); - l.initializeConstraints(); - return l; - } + @Override + protected void initializeConstraints() { + super.initializeConstraints(); + if (recurrentConstraints != null) { + if (constraints == null) { + constraints = new ArrayList<>(); + } + for (LayerConstraint c : recurrentConstraints) { + LayerConstraint c2 = c.clone(); + c2.setParams(Collections.singleton(LSTMParamInitializer.RECURRENT_WEIGHT_KEY)); + constraints.add(c2); + } } + } + @Override + public Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + LayerValidation.assertNInNOutSet("LSTM", getName(), layerIndex, getNIn(), getNOut()); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); + + org.deeplearning4j.nn.layers.recurrent.LSTM ret = + new org.deeplearning4j.nn.layers.recurrent.LSTM(lconf, networkDataType); + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + return ret; + } + + @Override + public ParamInitializer initializer() { + return LSTMParamInitializer.getInstance(); + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + // TODO - CuDNN etc + return LSTMHelpers.getMemoryReport(this, inputType); + } + + public abstract static class LSTMBuilder> + extends AbstractLSTMBuilder {} + + private static final class LSTMBuilderImpl extends LSTMBuilder { @Override - protected void initializeConstraints() { - super.initializeConstraints(); - if (recurrentConstraints != null) { - if (constraints == null) { - constraints = new ArrayList<>(); - } - for (LayerConstraint c : recurrentConstraints) { - LayerConstraint c2 = c.clone(); - c2.setParams(Collections.singleton(LSTMParamInitializer.RECURRENT_WEIGHT_KEY)); - constraints.add(c2); - } - } - } - - @Override - public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, - int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - LayerValidation.assertNInNOutSet("LSTM", getName(), layerIndex, getNIn(), getNOut()); - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - runInheritance(); - - org.deeplearning4j.nn.layers.recurrent.LSTM ret = new org.deeplearning4j.nn.layers.recurrent.LSTM(lconf, networkDataType); - ret.addTrainingListeners(trainingListeners); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - return ret; - } - - @Override - public ParamInitializer initializer() { - return LSTMParamInitializer.getInstance(); - } - - @Override - public LayerMemoryReport getMemoryReport(InputType inputType) { - //TODO - CuDNN etc - return LSTMHelpers.getMemoryReport(this, inputType); + public LSTM build() { + LSTM l = new LSTM(this); + l.initializeConstraints(); + return l; } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java index 06bcad3ef..a34d5dc51 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java @@ -354,15 +354,5 @@ public abstract class LayerConfiguration biasConstraints = Arrays.asList(constraints); return self(); } - - /** - * we are doing this to avoid BUG https://github.com/projectlombok/lombok/issues/3419 as some - * child classes may specify their own buildMethodName in @SuperBuilder, but we use only - * "initBuild" here consequently - * @return - */ - public C initBuild() { - return build(); - } } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LearnedSelfAttentionLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LearnedSelfAttentionLayer.java index a68ae9872..aa9351902 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LearnedSelfAttentionLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LearnedSelfAttentionLayer.java @@ -41,7 +41,7 @@ import org.nd4j.linalg.factory.Nd4j; @Data @EqualsAndHashCode(callSuper = true) -@SuperBuilder(buildMethodName = "initBuild") +@SuperBuilder public class LearnedSelfAttentionLayer extends SameDiffLayer { private static final String WEIGHT_KEY_QUERY_PROJECTION = "Wq"; private static final String WEIGHT_KEY_KEY_PROJECTION = "Wk"; @@ -173,19 +173,24 @@ public class LearnedSelfAttentionLayer extends SameDiffLayer { public static abstract class LearnedSelfAttentionLayerBuilder< C extends LearnedSelfAttentionLayer, B extends LearnedSelfAttentionLayerBuilder> extends SameDiffLayerBuilder { - public C build() { - Preconditions.checkArgument( - this.projectInput || this.nHeads == 1, "projectInput must be true when nHeads != 1"); - Preconditions.checkArgument( - this.projectInput || nIn == nOut, "nIn must be equal to nOut when projectInput is false"); - Preconditions.checkArgument( - !this.projectInput || nOut != 0, "nOut must be specified when projectInput is true"); - Preconditions.checkArgument( - this.nOut % nHeads == 0 || headSize > 0, - "nOut isn't divided by nHeads cleanly. Specify the headSize manually."); - Preconditions.checkArgument(this.nQueries > 0, "You must set numQueries."); - return initBuild(); + } + + private static final class LearnedSelfAttentionLayerBuilderImpl extends LearnedSelfAttentionLayerBuilder { + public LearnedSelfAttentionLayer build() { + LearnedSelfAttentionLayer l = new LearnedSelfAttentionLayer(this); + Preconditions.checkArgument( + l.isProjectInput() || l.getNHeads() == 1, "projectInput must be true when nHeads != 1"); + Preconditions.checkArgument( + l.isProjectInput() || l.getNIn() == l.getNOut(), "nIn must be equal to nOut when projectInput is false"); + Preconditions.checkArgument( + !l.isProjectInput() || l.getNOut() != 0, "nOut must be specified when projectInput is true"); + Preconditions.checkArgument( + l.getNOut() % l.getNHeads() == 0 || l.getHeadSize() > 0, + "nOut isn't divided by nHeads cleanly. Specify the headSize manually."); + Preconditions.checkArgument(l.getNQueries() > 0, "You must set numQueries."); + + return l; } } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected1D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected1D.java index bb4883b35..140807623 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected1D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected1D.java @@ -48,19 +48,9 @@ import org.nd4j.linalg.factory.Nd4j; @Data @EqualsAndHashCode(callSuper = true) @JsonIgnoreProperties({"paramShapes"}) -@SuperBuilder(buildMethodName = "initBuild") +@SuperBuilder public class LocallyConnected1D extends SameDiffLayer { - public static abstract class LocallyConnected1DBuilder> extends - SameDiffLayerBuilder { - public C build() { - Convolution1DUtils.validateConvolutionModePadding(convolutionMode$value, padding$value); - Convolution1DUtils.validateCnn1DKernelStridePadding(kernelSize$value, stride$value, padding$value); - C l = initBuild(); - return l; - } - } - private static final List WEIGHT_KEYS = Collections.singletonList(ConvolutionParamInitializer.WEIGHT_KEY); private static final List BIAS_KEYS = @@ -89,10 +79,8 @@ public class LocallyConnected1D extends SameDiffLayer { private int paddingR; // Right/bottom padding /** Convolution mode for the layer. See {@link ConvolutionMode} for details */ @Builder.Default private ConvolutionMode convolutionMode = ConvolutionMode.Same; - /** Dilation for the layer */ @Builder.Default private int dilation = 1; - /** If true (default is false) the layer will have a bias */ @Builder.Default private boolean hasBias = true; @@ -272,4 +260,20 @@ public class LocallyConnected1D extends SameDiffLayer { convolutionMode = global_conf.getConvolutionMode(); } } + + private static final class LocallyConnected1DBuilderImpl + extends LocallyConnected1DBuilder { + public LocallyConnected1D build() { + LocallyConnected1D l = new LocallyConnected1D(this); + Convolution1DUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding()); + Convolution1DUtils.validateCnn1DKernelStridePadding( + l.getKernelSize(), l.getStride(), l.getPadding()); + + return l; + } + } + + public abstract static class LocallyConnected1DBuilder< + C extends LocallyConnected1D, B extends LocallyConnected1DBuilder> + extends SameDiffLayerBuilder {} } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected2D.java index 95cb18054..04922b857 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected2D.java @@ -41,7 +41,6 @@ import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.enums.PadMode; import org.nd4j.linalg.activations.Activation; -import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; @@ -49,7 +48,7 @@ import org.nd4j.linalg.factory.Nd4j; @Data @EqualsAndHashCode(callSuper = true) @JsonIgnoreProperties({"paramShapes"}) -@SuperBuilder(buildMethodName = "initBuild") +@SuperBuilder public class LocallyConnected2D extends SameDiffLayer { private static final List WEIGHT_KEYS = @@ -318,40 +317,44 @@ public class LocallyConnected2D extends SameDiffLayer { } } - public static abstract class LocallyConnected2DBuilder< - C extends LocallyConnected2D, B extends LocallyConnected2DBuilder> - extends SameDiffLayerBuilder { - public C build() { - featureDim(kernel$value[0] * kernel$value[1] * (int) nIn); - C l = initBuild(); + private static final class LocallyConnected2DBuilderImpl + extends LocallyConnected2DBuilder { + public LocallyConnected2D build() { + LocallyConnected2D l = new LocallyConnected2D(this); + l.setFeatureDim(l.getKernel()[0] * l.getKernel()[1] * (int) l.getNIn()); return l; } + } - public B kernelSize(int ... kernel) { - this.kernel$value = ValidationUtils.validate2NonNegative(kernel, false, "kernel"); + public abstract static class LocallyConnected2DBuilder< + C extends LocallyConnected2D, B extends LocallyConnected2DBuilder> + extends SameDiffLayerBuilder { + + public B kernelSize(int... kernel) { + this.kernel$value = ValidationUtils.validate2NonNegative(kernel, false, "kernel"); this.kernel$set = true; return self(); } - public B inputSize(int ... size) { - this.inputSize = size; + public B inputSize(int... size) { + this.inputSize = size; return self(); } - public B stride(int ... stride) { - this.stride$value = ValidationUtils.validate2NonNegative(stride, false, "stride"); + public B stride(int... stride) { + this.stride$value = ValidationUtils.validate2NonNegative(stride, false, "stride"); this.stride$set = true; return self(); } - public B padding(int ... padding) { - this.padding$value = ValidationUtils.validate2NonNegative(padding, false, "padding"); + public B padding(int... padding) { + this.padding$value = ValidationUtils.validate2NonNegative(padding, false, "padding"); this.padding$set = true; return self(); } - public B dilation(int ... dilation) { - this.dilation$value = ValidationUtils.validate2NonNegative(dilation, false, "dilation"); + public B dilation(int... dilation) { + this.dilation$value = ValidationUtils.validate2NonNegative(dilation, false, "dilation"); this.dilation$set = true; return self(); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/OutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/OutputLayer.java index 48e4b9f1a..3d1da835a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/OutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/OutputLayer.java @@ -22,7 +22,6 @@ package org.deeplearning4j.nn.conf.layers; import java.util.Collection; import java.util.Map; - import lombok.*; import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; @@ -33,13 +32,12 @@ import org.deeplearning4j.optimize.api.TrainingListener; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.lossfunctions.ILossFunction; import org.nd4j.linalg.lossfunctions.LossFunctions; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") +@SuperBuilder(builderMethodName = "innerBuilder") public class OutputLayer extends BaseOutputLayer { { // Set default activation function to softmax (to match default loss function MCXENT) @@ -82,15 +80,16 @@ public class OutputLayer extends BaseOutputLayer { return DefaultParamInitializer.getInstance(); } - public static abstract class OutputLayerBuilder< + public abstract static class OutputLayerBuilder< C extends OutputLayer, B extends OutputLayerBuilder> - extends BaseOutputLayerBuilder { - public C build() { - C l = this.initBuild(); + extends BaseOutputLayerBuilder {} + + private static final class OutputLayerBuilderImpl + extends OutputLayerBuilder { + public OutputLayer build() { + OutputLayer l = new OutputLayer(this); l.initializeConstraints(); return l; } - - } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PReLULayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PReLULayer.java index d48d88708..cacd65ca5 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PReLULayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PReLULayer.java @@ -40,7 +40,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") +@SuperBuilder(builderMethodName = "innerBuilder") public class PReLULayer extends BaseLayerConfiguration { /** * Explicitly set input shape of incoming activations so that parameters can be initialized @@ -129,14 +129,17 @@ public class PReLULayer extends BaseLayerConfiguration { .build(); } - public static abstract class PReLULayerBuilder< - C extends PReLULayer, B extends PReLULayerBuilder> - extends BaseLayerConfigurationBuilder { - public C build() { - C l = initBuild(); + private static final class PReLULayerBuilderImpl extends PReLULayerBuilder { + public PReLULayer build() { + PReLULayer l = new PReLULayer(this); l.initializeConstraints(); return l; } + } + public static abstract class PReLULayerBuilder< + C extends PReLULayer, B extends PReLULayerBuilder> + extends BaseLayerConfigurationBuilder { + /** * Explicitly set input shape of incoming activations so that parameters can be initialized diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Pooling1D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Pooling1D.java index 3ad05de09..e9a8f1b08 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Pooling1D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Pooling1D.java @@ -35,6 +35,6 @@ import lombok.experimental.SuperBuilder; @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(buildMethodName = "initBuild") +@SuperBuilder public class Pooling1D extends Subsampling1DLayer { } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Pooling2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Pooling2D.java index ddc3f837d..1bf16dc5d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Pooling2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Pooling2D.java @@ -35,6 +35,6 @@ import lombok.experimental.SuperBuilder; @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(buildMethodName = "initBuild") +@SuperBuilder public class Pooling2D extends SubsamplingLayer { } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PrimaryCapsules.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PrimaryCapsules.java index ce8101738..081ecbe4d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PrimaryCapsules.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PrimaryCapsules.java @@ -41,7 +41,7 @@ import org.nd4j.linalg.factory.Nd4j; @Data @EqualsAndHashCode(callSuper = true) -@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") +@SuperBuilder(builderMethodName = "innerBuilder") public class PrimaryCapsules extends SameDiffLayer { private static final String WEIGHT_PARAM = "weight"; @@ -335,7 +335,7 @@ public class PrimaryCapsules extends SameDiffLayer { } } - public static abstract class PrimaryCapsulesBuilder< + public abstract static class PrimaryCapsulesBuilder< C extends PrimaryCapsules, B extends PrimaryCapsulesBuilder> extends SameDiffLayerBuilder { @@ -396,27 +396,30 @@ public class PrimaryCapsules extends SameDiffLayer { this.useLeakyReLU$set = true; return self(); } + } - public C build() { - C l = initBuild(); - if (capsuleDimensions <= 0 || channels$value <= 0) { + private static final class PrimaryCapsulesBuilderImpl + extends PrimaryCapsulesBuilder { + public PrimaryCapsules build() { + PrimaryCapsules l = new PrimaryCapsules(this); + if (l.getCapsuleDimensions() <= 0 || l.getChannels() <= 0) { throw new IllegalArgumentException( "Invalid configuration for Primary Capsules (layer name = \"" + l.getName() + "\"):" + " capsuleDimensions and channels must be > 0. Got: " - + capsuleDimensions + + l.getCapsuleDimensions() + ", " - + channels$value); + + l.getChannels()); } - if (capsules < 0) { + if (l.getCapsules() < 0) { throw new IllegalArgumentException( "Invalid configuration for Capsule ILayer (layer name = \"" + l.getName() + "\"):" + " capsules must be >= 0 if set. Got: " - + capsules); + + l.getCapsules()); } return l; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RecurrentAttentionLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RecurrentAttentionLayer.java index 73f71ff83..a57ab343c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RecurrentAttentionLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RecurrentAttentionLayer.java @@ -43,21 +43,25 @@ import java.util.Map; @Data @EqualsAndHashCode(callSuper = true) -@SuperBuilder(buildMethodName = "initBuild") +@SuperBuilder public class RecurrentAttentionLayer extends SameDiffLayer { + private static final class RecurrentAttentionLayerBuilderImpl extends RecurrentAttentionLayerBuilder { + public RecurrentAttentionLayer build() { + RecurrentAttentionLayer l = new RecurrentAttentionLayer(this); + Preconditions.checkArgument(l.isProjectInput() || l.getNHeads() == 1, "projectInput must be true when nHeads != 1"); + Preconditions.checkArgument(l.isProjectInput() || l.getNIn() == l.getNOut(), "nIn must be equal to nOut when projectInput is false"); + Preconditions.checkArgument(!l.isProjectInput() || l.getNOut() != 0, "nOut must be specified when projectInput is true"); + Preconditions.checkArgument(l.getNOut() % l.getNHeads() == 0 || l.getNHeads() > 0, "nOut isn't divided by nHeads cleanly. Specify the headSize manually."); + + + return l; + } + } public static abstract class RecurrentAttentionLayerBuilder> extends SameDiffLayerBuilder { - public C build() { - Preconditions.checkArgument(this.projectInput$value || this.nHeads == 1, "projectInput must be true when nHeads != 1"); - Preconditions.checkArgument(this.projectInput$value || nIn == nOut, "nIn must be equal to nOut when projectInput is false"); - Preconditions.checkArgument(!this.projectInput$value || nOut != 0, "nOut must be specified when projectInput is true"); - Preconditions.checkArgument(this.nOut % nHeads == 0 || headSize > 0, "nOut isn't divided by nHeads cleanly. Specify the headSize manually."); - C l = initBuild(); - return l; - } } /** diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnOutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnOutputLayer.java index 9d774293c..3e1198cb0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnOutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnOutputLayer.java @@ -24,7 +24,6 @@ import java.util.Collection; import java.util.Map; import lombok.Data; import lombok.EqualsAndHashCode; -import lombok.NoArgsConstructor; import lombok.ToString; import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; @@ -42,88 +41,104 @@ import org.nd4j.linalg.lossfunctions.LossFunctions; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") +@SuperBuilder(builderMethodName = "innerBuilder") public class RnnOutputLayer extends BaseOutputLayer { - /** - * @param rnnDataFormat Data format expected by the layer. NCW = [miniBatchSize, size, timeSeriesLength], - * NWC = [miniBatchSize, timeSeriesLength, size]. Defaults to NCW. - */ - private RNNFormat dataFormat; + /** + * @param rnnDataFormat Data format expected by the layer. NCW = [miniBatchSize, size, + * timeSeriesLength], NWC = [miniBatchSize, timeSeriesLength, size]. Defaults to NCW. + */ + private RNNFormat dataFormat; - public static RnnOutputLayerBuilder builder() { - return innerBuilder(); + public static RnnOutputLayerBuilder builder() { + return innerBuilder(); + } + + /** + * @param lossFn Loss function for the output layer + */ + public static RnnOutputLayerBuilder builder(LossFunctions.LossFunction lossFn) { + return innerBuilder().lossFunction(lossFn); + } + + @Override + public Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + LayerValidation.assertNInNOutSet("RnnOutputLayer", getName(), layerIndex, getNIn(), getNOut()); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + + org.deeplearning4j.nn.layers.recurrent.RnnOutputLayer ret = + new org.deeplearning4j.nn.layers.recurrent.RnnOutputLayer(lconf, networkDataType); + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + return ret; + } + + @Override + public ParamInitializer initializer() { + return DefaultParamInitializer.getInstance(); + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != InputType.Type.RNN) { + throw new IllegalStateException( + "Invalid input type for RnnOutputLayer (layer index = " + + layerIndex + + ", layer name=\"" + + getName() + + "\"): Expected RNN input, got " + + inputType); + } + InputType.InputTypeRecurrent itr = (InputType.InputTypeRecurrent) inputType; + + return InputType.recurrent(nOut, itr.getTimeSeriesLength(), itr.getFormat()); + } + + @Override + public void setNIn(InputType inputType, boolean override) { + if (inputType == null || inputType.getType() != InputType.Type.RNN) { + throw new IllegalStateException( + "Invalid input type for RnnOutputLayer (layer name=\"" + + getName() + + "\"): Expected RNN input, got " + + inputType); } - /** - * @param lossFn Loss function for the output layer - */ - public static RnnOutputLayerBuilder builder(LossFunctions.LossFunction lossFn) { - return innerBuilder() - .lossFunction(lossFn); + InputType.InputTypeRecurrent r = (InputType.InputTypeRecurrent) inputType; + if (dataFormat == null || override) { + this.dataFormat = r.getFormat(); } - @Override - public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, - int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - LayerValidation.assertNInNOutSet("RnnOutputLayer", getName(), layerIndex, getNIn(), getNOut()); - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - - org.deeplearning4j.nn.layers.recurrent.RnnOutputLayer ret = - new org.deeplearning4j.nn.layers.recurrent.RnnOutputLayer(lconf, networkDataType); - ret.addTrainingListeners(trainingListeners); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - return ret; + if (nIn <= 0 || override) { + this.nIn = r.getSize(); } + } - @Override - public ParamInitializer initializer() { - return DefaultParamInitializer.getInstance(); + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, dataFormat, getName()); + } + + public abstract static class RnnOutputLayerBuilder< + C extends RnnOutputLayer, B extends RnnOutputLayerBuilder> + extends BaseOutputLayerBuilder {} + + private static final class RnnOutputLayerBuilderImpl + extends RnnOutputLayerBuilder { + public RnnOutputLayer build() { + RnnOutputLayer l = new RnnOutputLayer(this); + l.initializeConstraints(); + return l; } - - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - if (inputType == null || inputType.getType() != InputType.Type.RNN) { - throw new IllegalStateException("Invalid input type for RnnOutputLayer (layer index = " + layerIndex - + ", layer name=\"" + getName() + "\"): Expected RNN input, got " + inputType); - } - InputType.InputTypeRecurrent itr = (InputType.InputTypeRecurrent) inputType; - - return InputType.recurrent(nOut, itr.getTimeSeriesLength(), itr.getFormat()); - } - - @Override - public void setNIn(InputType inputType, boolean override) { - if (inputType == null || inputType.getType() != InputType.Type.RNN) { - throw new IllegalStateException("Invalid input type for RnnOutputLayer (layer name=\"" + getName() - + "\"): Expected RNN input, got " + inputType); - } - - InputType.InputTypeRecurrent r = (InputType.InputTypeRecurrent) inputType; - if(dataFormat == null || override) { - this.dataFormat = r.getFormat(); - } - - if (nIn <= 0 || override) { - this.nIn = r.getSize(); - } - } - - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, dataFormat, getName()); - } - - public static abstract class RnnOutputLayerBuilder> extends BaseOutputLayerBuilder { - public C build() { - C l = this.initBuild(); - l.initializeConstraints(); - return l; - } - } - + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SelfAttentionLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SelfAttentionLayer.java index 40153688e..96a62b19e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SelfAttentionLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SelfAttentionLayer.java @@ -38,7 +38,7 @@ import org.nd4j.linalg.factory.Nd4j; @Data @EqualsAndHashCode(callSuper = true) -@SuperBuilder(buildMethodName = "initBuild") +@SuperBuilder public class SelfAttentionLayer extends SameDiffLayer { private static final String WEIGHT_KEY_QUERY_PROJECTION = "Wq"; private static final String WEIGHT_KEY_KEY_PROJECTION = "Wk"; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SeparableConvolution2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SeparableConvolution2D.java index 5d7d25066..d76e46e09 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SeparableConvolution2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SeparableConvolution2D.java @@ -44,7 +44,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") +@SuperBuilder(builderMethodName = "innerBuilder") public class SeparableConvolution2D extends ConvolutionLayer { /** * Set constraints to be applied to the point-wise convolution weight parameters of this layer. diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java index a64db7447..e2e6ab230 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java @@ -50,7 +50,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(buildMethodName = "initBuild") +@SuperBuilder public class Subsampling1DLayer extends SubsamplingLayer { @Override @@ -153,11 +153,9 @@ public class Subsampling1DLayer extends SubsamplingLayer { return true; } - public static abstract class Subsampling1DLayerBuilder> extends - SubsamplingLayerBuilder { - - public C build() { - C l = this.initBuild(); + private static final class Subsampling1DLayerBuilderImpl extends Subsampling1DLayerBuilder { + public Subsampling1DLayer build() { + Subsampling1DLayer l =new Subsampling1DLayer(this); if (l.getPoolingType() == org.deeplearning4j.nn.conf.layers.PoolingType.PNORM && l.getPnorm() <= 0) { throw new IllegalStateException( "Incorrect Subsampling config: p-norm must be set when using PoolingType.PNORM"); @@ -167,6 +165,11 @@ public class Subsampling1DLayer extends SubsamplingLayer { return l; } + } + public static abstract class Subsampling1DLayerBuilder> extends + SubsamplingLayerBuilder { + + /** * * @param kernelSize diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling3DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling3DLayer.java index 324503c3d..150419817 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling3DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling3DLayer.java @@ -45,7 +45,7 @@ import org.nd4j.linalg.learning.regularization.Regularization; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(builderMethodName = "innerBuilder", buildMethodName = "initBuild") +@SuperBuilder(builderMethodName = "innerBuilder") public class Subsampling3DLayer extends NoParamLayer { @Builder.Default protected ConvolutionMode convolutionMode = ConvolutionMode.Truncate; @@ -304,17 +304,22 @@ public class Subsampling3DLayer extends NoParamLayer { return self(); } - public C build() { - if (kernelSize.length != 3) { + + } + + private static final class Subsampling3DLayerBuilderImpl extends Subsampling3DLayerBuilder { + public Subsampling3DLayer build() { + Subsampling3DLayer l = new Subsampling3DLayer(this); + if (l.getKernelSize().length != 3) { throw new IllegalArgumentException("Kernel size must be length 3"); } - if (stride.length != 3) { + if (l.getStride().length != 3) { throw new IllegalArgumentException("Invalid stride, must be length 3"); } - C l = this.initBuild(); - ConvolutionUtils.validateConvolutionModePadding(l.getConvolutionMode(), padding); - Convolution3DUtils.validateCnn3DKernelStridePadding(kernelSize, stride, padding); + + ConvolutionUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding()); + Convolution3DUtils.validateCnn3DKernelStridePadding(l.getKernelSize(), l.getStride(), l.getPadding()); return l; } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java index d110e90a3..93987e7ec 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java @@ -45,7 +45,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") +@SuperBuilder(builderMethodName = "innerBuilder") public class SubsamplingLayer extends NoParamLayer { public static final CNN2DFormat DEFAULT_FORMAT = CNN2DFormat.NCHW; @@ -425,25 +425,7 @@ public class SubsamplingLayer extends NoParamLayer { return self(); } - public C build() { - if (kernelSize$value.length != 2) { - throw new IllegalArgumentException("Kernel size of should be rows x columns (a 2d array)"); - } - if (stride$value.length != 2) { - throw new IllegalArgumentException("Invalid stride, must be length 2"); - } - if (poolingType$value == org.deeplearning4j.nn.conf.layers.PoolingType.PNORM && pnorm <= 0) { - throw new IllegalStateException( - "Incorrect Subsampling config: p-norm must be set when using PoolingType.PNORM"); - } - ConvolutionUtils.validateConvolutionModePadding(convolutionMode$value, padding$value); - ConvolutionUtils.validateCnnKernelStridePadding( - kernelSize$value, stride$value, padding$value); - - C l = initBuild(); - return l; - } public B setConvolutionMode(ConvolutionMode convolutionMode){ Preconditions.checkState(allowCausal$value || convolutionMode$value != ConvolutionMode.Causal, "Causal convolution mode can only be used with 1D" + @@ -459,4 +441,25 @@ public class SubsamplingLayer extends NoParamLayer { return self(); } } + private static final class SubsamplingLayerBuilderImpl extends SubsamplingLayerBuilder { + public SubsamplingLayer build() { + SubsamplingLayer l = new SubsamplingLayer(this); + if (l.getKernelSize().length != 2) { + throw new IllegalArgumentException("Kernel size of should be rows x columns (a 2d array)"); + } + + if (l.getStride().length != 2) { + throw new IllegalArgumentException("Invalid stride, must be length 2"); + } + if (l.getPoolingType() == org.deeplearning4j.nn.conf.layers.PoolingType.PNORM && l.getPnorm() <= 0) { + throw new IllegalStateException( + "Incorrect Subsampling config: p-norm must be set when using PoolingType.PNORM"); + } + ConvolutionUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding()); + ConvolutionUtils.validateCnnKernelStridePadding( + l.getKernelSize(), l.getStride(), l.getPadding()); + + return l; + } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPaddingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPaddingLayer.java index 064a55123..17a3ad0b2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPaddingLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPaddingLayer.java @@ -41,7 +41,7 @@ import java.util.Map; @Data @EqualsAndHashCode(callSuper = true) -@SuperBuilder(builderMethodName = "innerBuilder", buildMethodName = "initBuild") +@SuperBuilder(builderMethodName = "innerBuilder") public class ZeroPaddingLayer extends NoParamLayer { /** * @param padding Padding value for top, bottom, left, and right. Must be length 4 array diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java index 031869ab6..56dad41bb 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java @@ -48,7 +48,7 @@ import org.nd4j.serde.jackson.shaded.NDArrayTextSerializer; @EqualsAndHashCode(callSuper = false) -@SuperBuilder(buildMethodName = "initBuild") +@SuperBuilder public class Yolo2OutputLayer extends LayerConfiguration { /** diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/wrapper/BaseWrapperLayerConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/wrapper/BaseWrapperLayerConfiguration.java index bfa864c29..72a80d2d8 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/wrapper/BaseWrapperLayerConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/wrapper/BaseWrapperLayerConfiguration.java @@ -43,8 +43,7 @@ import org.nd4j.linalg.learning.regularization.Regularization; public abstract class BaseWrapperLayerConfiguration extends LayerConfiguration { /** The configuration to of another layer to wrap */ - @Getter @Setter - protected LayerConfiguration underlying; + @Getter @Setter protected LayerConfiguration underlying; /** * Set the net configuration for this configuration as well as for the underlying layer (if not