Using @SuperBuilder for LayerConfigurations
Signed-off-by: brian <brian@brutex.de>master
parent
396dbec24e
commit
7628bbdd53
|
@ -43,7 +43,7 @@ import org.nd4j.linalg.learning.config.IUpdater;
|
|||
|
||||
@ToString(callSuper = true)
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder")
|
||||
@SuperBuilder(builderMethodName = "innerBuilder")
|
||||
public class ActivationLayer extends NoParamLayer {
|
||||
|
||||
|
||||
|
@ -133,8 +133,12 @@ public class ActivationLayer extends NoParamLayer {
|
|||
public static abstract class ActivationLayerBuilder<
|
||||
C extends ActivationLayer, B extends ActivationLayerBuilder<C, B>>
|
||||
extends NoParamLayer.NoParamLayerBuilder<C, B> {
|
||||
public C build() {
|
||||
C l = this.initBuild();
|
||||
|
||||
}
|
||||
|
||||
private static final class ActivationLayerBuilderImpl extends ActivationLayerBuilder<ActivationLayer, ActivationLayerBuilderImpl> {
|
||||
public ActivationLayer build() {
|
||||
ActivationLayer l = this.initBuild();
|
||||
l.initializeConstraints();
|
||||
return l;
|
||||
}
|
||||
|
|
|
@ -33,7 +33,7 @@ import org.deeplearning4j.nn.conf.inputs.InputType;
|
|||
|
||||
@ToString(callSuper = true)
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@SuperBuilder()
|
||||
@SuperBuilder
|
||||
public abstract class BaseUpsamplingLayer extends NoParamLayer {
|
||||
|
||||
/**
|
||||
|
|
|
@ -25,10 +25,10 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import lombok.*;
|
||||
import lombok.experimental.SuperBuilder;
|
||||
import org.deeplearning4j.nn.api.layers.LayerConstraint;
|
||||
import net.brutex.ai.dnn.api.LayerType;
|
||||
import org.deeplearning4j.nn.api.Layer;
|
||||
import org.deeplearning4j.nn.api.ParamInitializer;
|
||||
import org.deeplearning4j.nn.api.layers.LayerConstraint;
|
||||
import org.deeplearning4j.nn.conf.CNN2DFormat;
|
||||
import org.deeplearning4j.nn.conf.InputPreProcessor;
|
||||
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
||||
|
@ -48,287 +48,326 @@ import org.nd4j.linalg.learning.regularization.Regularization;
|
|||
@Data
|
||||
@ToString(callSuper = true)
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder")
|
||||
@SuperBuilder(builderMethodName = "innerBuilder")
|
||||
public class BatchNormalization extends FeedForwardLayer {
|
||||
|
||||
/**
|
||||
* At test time: we can use a global estimate of the mean and variance, calculated using a moving average of the
|
||||
* batch means/variances. This moving average is implemented as:<br> globalMeanEstimate = decay *
|
||||
* globalMeanEstimate + (1-decay) * batchMean<br> globalVarianceEstimate = decay * globalVarianceEstimate +
|
||||
* (1-decay) * batchVariance<br>
|
||||
*
|
||||
* @param decay Decay value to use for global stats calculation
|
||||
*/
|
||||
@lombok.Builder.Default
|
||||
protected double decay = 0.9;
|
||||
/**
|
||||
* At test time: we can use a global estimate of the mean and variance, calculated using a moving
|
||||
* average of the batch means/variances. This moving average is implemented as:<br>
|
||||
* globalMeanEstimate = decay * globalMeanEstimate + (1-decay) * batchMean<br>
|
||||
* globalVarianceEstimate = decay * globalVarianceEstimate + (1-decay) * batchVariance<br>
|
||||
*
|
||||
* @param decay Decay value to use for global stats calculation
|
||||
*/
|
||||
@lombok.Builder.Default protected double decay = 0.9;
|
||||
|
||||
//Note: need to set defaults here in addition to builder, in case user uses no-op constructor...
|
||||
/**
|
||||
* Epsilon value for batch normalization; small floating point value added to variance (algorithm 1 in <a
|
||||
* href="https://arxiv.org/pdf/1502.03167v3.pdf">https://arxiv.org/pdf/1502.03167v3.pdf</a>) to reduce/avoid
|
||||
* underflow issues.<br> Default: 1e-5
|
||||
*
|
||||
* @param eps Epsilon values to use
|
||||
*/
|
||||
@lombok.Builder.Default protected double eps = 1e-5;
|
||||
/**
|
||||
* If doing minibatch training or not. Default: true. Under most circumstances, this should be set to true. If
|
||||
* doing full batch training (i.e., all examples in a single DataSet object - very small data sets) then this
|
||||
* should be set to false. Affects how global mean/variance estimates are calculated.
|
||||
*
|
||||
* @param minibatch Minibatch parameter
|
||||
*/
|
||||
@lombok.Builder.Default protected boolean isMinibatch = true;
|
||||
// Note: need to set defaults here in addition to builder, in case user uses no-op constructor...
|
||||
/**
|
||||
* Epsilon value for batch normalization; small floating point value added to variance (algorithm
|
||||
* 1 in <a
|
||||
* href="https://arxiv.org/pdf/1502.03167v3.pdf">https://arxiv.org/pdf/1502.03167v3.pdf</a>) to
|
||||
* reduce/avoid underflow issues.<br>
|
||||
* Default: 1e-5
|
||||
*
|
||||
* @param eps Epsilon values to use
|
||||
*/
|
||||
@lombok.Builder.Default protected double eps = 1e-5;
|
||||
/**
|
||||
* If doing minibatch training or not. Default: true. Under most circumstances, this should be set
|
||||
* to true. If doing full batch training (i.e., all examples in a single DataSet object - very
|
||||
* small data sets) then this should be set to false. Affects how global mean/variance estimates
|
||||
* are calculated.
|
||||
*
|
||||
* @param minibatch Minibatch parameter
|
||||
*/
|
||||
@lombok.Builder.Default protected boolean isMinibatch = true;
|
||||
|
||||
/**
|
||||
* Used only when 'true' is passed to {@link BatchNormalizationBuilder#lockGammaBeta(boolean)}. Value is not used otherwise.<br> Default:
|
||||
* 1.0
|
||||
*
|
||||
* @param gamma Gamma parameter for all activations, used only with locked gamma/beta configuration mode
|
||||
*/
|
||||
@lombok.Builder.Default protected double gamma = 1.0;
|
||||
/**
|
||||
* Used only when 'true' is passed to {@link BatchNormalizationBuilder#lockGammaBeta(boolean)}. Value is not used otherwise.<br> Default:
|
||||
* 0.0
|
||||
*
|
||||
* @param beta Beta parameter for all activations, used only with locked gamma/beta configuration mode
|
||||
*/
|
||||
@lombok.Builder.Default protected double beta = 0.0;
|
||||
/**
|
||||
* Set constraints to be applied to the beta parameter of this batch normalisation layer. Default: no
|
||||
* constraints.<br> Constraints can be used to enforce certain conditions (non-negativity of parameters,
|
||||
* max-norm regularization, etc). These constraints are applied at each iteration, after the parameters have
|
||||
* been updated.
|
||||
*
|
||||
*/
|
||||
protected List<LayerConstraint> betaConstraints;
|
||||
/**
|
||||
* Used only when 'true' is passed to {@link BatchNormalizationBuilder#lockGammaBeta(boolean)}.
|
||||
* Value is not used otherwise.<br>
|
||||
* Default: 1.0
|
||||
*
|
||||
* @param gamma Gamma parameter for all activations, used only with locked gamma/beta
|
||||
* configuration mode
|
||||
*/
|
||||
@lombok.Builder.Default protected double gamma = 1.0;
|
||||
/**
|
||||
* Used only when 'true' is passed to {@link BatchNormalizationBuilder#lockGammaBeta(boolean)}.
|
||||
* Value is not used otherwise.<br>
|
||||
* Default: 0.0
|
||||
*
|
||||
* @param beta Beta parameter for all activations, used only with locked gamma/beta configuration
|
||||
* mode
|
||||
*/
|
||||
@lombok.Builder.Default protected double beta = 0.0;
|
||||
/**
|
||||
* Set constraints to be applied to the beta parameter of this batch normalisation layer. Default:
|
||||
* no constraints.<br>
|
||||
* Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm
|
||||
* regularization, etc). These constraints are applied at each iteration, after the parameters
|
||||
* have been updated.
|
||||
*/
|
||||
protected List<LayerConstraint> betaConstraints;
|
||||
|
||||
/**
|
||||
* Set constraints to be applied to the gamma parameter of this batch normalisation layer. Default: no
|
||||
* constraints.<br> Constraints can be used to enforce certain conditions (non-negativity of parameters,
|
||||
* max-norm regularization, etc). These constraints are applied at each iteration, after the parameters have
|
||||
* been updated.
|
||||
*
|
||||
*/
|
||||
protected List<LayerConstraint> gammaConstraints;
|
||||
/**
|
||||
* Set constraints to be applied to the gamma parameter of this batch normalisation layer.
|
||||
* Default: no constraints.<br>
|
||||
* Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm
|
||||
* regularization, etc). These constraints are applied at each iteration, after the parameters
|
||||
* have been updated.
|
||||
*/
|
||||
protected List<LayerConstraint> gammaConstraints;
|
||||
|
||||
/**
|
||||
* When using CuDNN or MKLDNN and an error is encountered, should fallback to the non-helper
|
||||
* implementation be allowed? If set to false, an exception in the helper will be propagated back
|
||||
* to the user. If true, the built-in (non-MKL/CuDNN) implementation for BatchNormalizationLayer
|
||||
* will be used
|
||||
*
|
||||
* @param allowFallback Whether fallback to non-CuDNN implementation should be used
|
||||
*/
|
||||
@lombok.Builder.Default protected boolean cudnnAllowFallback = true;
|
||||
/**
|
||||
* How should the moving average of variance be stored? Two different parameterizations are
|
||||
* supported. useLogStd(false): equivalent to 1.0.0-beta3 and earlier. The variance "parameter" is
|
||||
* stored directly as variable<br>
|
||||
* useLogStd(true): (Default) variance is stored as log10(stdev)<br>
|
||||
* The motivation here is for numerical stability (FP16 etc) and also distributed training:
|
||||
* storing the variance directly can cause numerical issues. For example, a standard deviation of
|
||||
* 1e-3 (something that could be encountered in practice) gives a variance of 1e-6, which can be
|
||||
* problematic for 16-bit floating point
|
||||
*
|
||||
* <p>How should the moving average of variance be stored? Two different parameterizations are
|
||||
* supported. useLogStd(false): equivalent to 1.0.0-beta3 and earlier. The variance "parameter" is
|
||||
* stored directly as variable<br>
|
||||
* useLogStd(true): (Default) variance is stored as log10(stdev)<br>
|
||||
* The motivation here is for numerical stability (FP16 etc) and also distributed training:
|
||||
* storing the variance directly can cause numerical issues. For example, a standard deviation of
|
||||
* 1e-3 (something that could be encountered in practice) gives a variance of 1e-6, which can be
|
||||
* problematic for 16-bit floating point
|
||||
*/
|
||||
@lombok.Builder.Default
|
||||
protected boolean useLogStd =
|
||||
false; // Default for deserialized models (1.0.0-beta3) and earlier: store variance as
|
||||
// variance. Post 1.0.0-beta3: use log stdev instead
|
||||
/**
|
||||
* Set the input and output array data format. Defaults to NCHW format - i.e., channels first. See
|
||||
* {@link CNN2DFormat} for more details
|
||||
*
|
||||
* @param format Format to use
|
||||
*/
|
||||
@lombok.Builder.Default
|
||||
protected CNN2DFormat dataFormat =
|
||||
CNN2DFormat.NCHW; // Default for deserialized models, 1.0.0-beta6 and earlier
|
||||
|
||||
/**
|
||||
* When using CuDNN or MKLDNN and an error is encountered, should fallback to the non-helper implementation be allowed?
|
||||
* If set to false, an exception in the helper will be propagated back to the user. If true, the built-in
|
||||
* (non-MKL/CuDNN) implementation for BatchNormalizationLayer will be used
|
||||
*
|
||||
* @param allowFallback Whether fallback to non-CuDNN implementation should be used
|
||||
*/
|
||||
@lombok.Builder.Default protected boolean cudnnAllowFallback = true;
|
||||
/**
|
||||
* How should the moving average of variance be stored? Two different parameterizations are supported.
|
||||
* useLogStd(false): equivalent to 1.0.0-beta3 and earlier. The variance "parameter" is stored directly as
|
||||
* variable<br> useLogStd(true): (Default) variance is stored as log10(stdev)<br> The motivation here is for
|
||||
* numerical stability (FP16 etc) and also distributed training: storing the variance directly can cause
|
||||
* numerical issues. For example, a standard deviation of 1e-3 (something that could be encountered in practice)
|
||||
* gives a variance of 1e-6, which can be problematic for 16-bit floating point
|
||||
*
|
||||
* How should the moving average of variance be stored? Two different parameterizations are supported.
|
||||
* useLogStd(false): equivalent to 1.0.0-beta3 and earlier. The variance "parameter" is stored directly as
|
||||
* variable<br> useLogStd(true): (Default) variance is stored as log10(stdev)<br> The motivation here is for
|
||||
* numerical stability (FP16 etc) and also distributed training: storing the variance directly can cause
|
||||
* numerical issues. For example, a standard deviation of 1e-3 (something that could be encountered in practice)
|
||||
* gives a variance of 1e-6, which can be problematic for 16-bit floating point
|
||||
*/
|
||||
@lombok.Builder.Default protected boolean useLogStd = false; //Default for deserialized models (1.0.0-beta3) and earlier: store variance as variance. Post 1.0.0-beta3: use log stdev instead
|
||||
/**
|
||||
* Set the input and output array data format. Defaults to NCHW format - i.e., channels first.
|
||||
* See {@link CNN2DFormat} for more details
|
||||
* @param format Format to use
|
||||
*/
|
||||
@lombok.Builder.Default protected CNN2DFormat dataFormat = CNN2DFormat.NCHW; //Default for deserialized models, 1.0.0-beta6 and earlier
|
||||
private boolean lockGammaBeta;
|
||||
|
||||
private boolean lockGammaBeta;
|
||||
public static BatchNormalizationBuilder<?, ?> builder() {
|
||||
return innerBuilder();
|
||||
}
|
||||
|
||||
public static BatchNormalizationBuilder<?, ?> builder() {
|
||||
return innerBuilder();
|
||||
public static BatchNormalizationBuilder<?, ?> builder(double gamma, double beta) {
|
||||
return innerBuilder().gamma(gamma).beta(beta);
|
||||
}
|
||||
|
||||
public static BatchNormalizationBuilder<?, ?> builder(boolean lockGammaBeta) {
|
||||
return innerBuilder().lockGammaBeta(lockGammaBeta);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BatchNormalization clone() {
|
||||
BatchNormalization clone = (BatchNormalization) super.clone();
|
||||
return clone;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Layer instantiate(
|
||||
NeuralNetConfiguration conf,
|
||||
Collection<TrainingListener> trainingListeners,
|
||||
int layerIndex,
|
||||
INDArray layerParamsView,
|
||||
boolean initializeParams,
|
||||
DataType networkDataType) {
|
||||
this.setNetConfiguration(conf);
|
||||
LayerValidation.assertNOutSet("BatchNormalization", getName(), layerIndex, getNOut());
|
||||
runInheritance();
|
||||
|
||||
LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
|
||||
org.deeplearning4j.nn.layers.normalization.BatchNormalization ret =
|
||||
new org.deeplearning4j.nn.layers.normalization.BatchNormalization(lconf, networkDataType);
|
||||
ret.addTrainingListeners(trainingListeners);
|
||||
ret.setIndex(layerIndex);
|
||||
ret.setParamsViewArray(layerParamsView);
|
||||
Map<String, INDArray> paramTable = initializer().init(this, layerParamsView, initializeParams);
|
||||
ret.setParamTable(paramTable);
|
||||
ret.setLayerConfiguration(lconf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ParamInitializer initializer() {
|
||||
return BatchNormalizationParamInitializer.getInstance();
|
||||
}
|
||||
|
||||
@Override
|
||||
public InputType getOutputType(int layerIndex, InputType inputType) {
|
||||
if (inputType == null) {
|
||||
throw new IllegalStateException(
|
||||
"Invalid input type: Batch norm layer expected input of type CNN, got null for layer \""
|
||||
+ getName()
|
||||
+ "\"");
|
||||
}
|
||||
|
||||
public static BatchNormalizationBuilder<?, ?> builder(double gamma, double beta) {
|
||||
return innerBuilder()
|
||||
.gamma(gamma)
|
||||
.beta(beta);
|
||||
// Can handle CNN, flat CNN, CNN3D or FF input formats only
|
||||
switch (inputType.getType()) {
|
||||
case FF:
|
||||
case CNN:
|
||||
case CNNFlat:
|
||||
case CNN3D:
|
||||
return inputType; // OK
|
||||
default:
|
||||
throw new IllegalStateException(
|
||||
"Invalid input type: Batch norm layer expected input of type CNN, CNN Flat or FF, got "
|
||||
+ inputType
|
||||
+ " for layer index "
|
||||
+ layerIndex
|
||||
+ ", layer name = "
|
||||
+ getName());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNIn(InputType inputType, boolean override) {
|
||||
if (nIn <= 0 || override) {
|
||||
switch (inputType.getType()) {
|
||||
case FF:
|
||||
nIn = ((InputType.InputTypeFeedForward) inputType).getSize();
|
||||
break;
|
||||
case CNN:
|
||||
nIn = ((InputType.InputTypeConvolutional) inputType).getChannels();
|
||||
dataFormat = ((InputType.InputTypeConvolutional) inputType).getFormat();
|
||||
break;
|
||||
case CNN3D:
|
||||
nIn = ((InputType.InputTypeConvolutional3D) inputType).getChannels();
|
||||
break;
|
||||
case CNNFlat:
|
||||
nIn = ((InputType.InputTypeConvolutionalFlat) inputType).getDepth();
|
||||
default:
|
||||
throw new IllegalStateException(
|
||||
"Invalid input type: Batch norm layer expected input of type CNN, CNN Flat or FF, got "
|
||||
+ inputType
|
||||
+ " for layer "
|
||||
+ getName()
|
||||
+ "\"");
|
||||
}
|
||||
nOut = nIn;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
|
||||
if (inputType.getType() == InputType.Type.CNNFlat) {
|
||||
InputType.InputTypeConvolutionalFlat i = (InputType.InputTypeConvolutionalFlat) inputType;
|
||||
return new FeedForwardToCnnPreProcessor(i.getHeight(), i.getWidth(), i.getDepth());
|
||||
} else if (inputType.getType() == InputType.Type.RNN) {
|
||||
return new RnnToFeedForwardPreProcessor();
|
||||
}
|
||||
|
||||
public static BatchNormalizationBuilder<?, ?> builder(boolean lockGammaBeta) {
|
||||
return innerBuilder()
|
||||
.lockGammaBeta(lockGammaBeta);
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Regularization> getRegularizationByParam(String paramName) {
|
||||
// Don't regularize batch norm params: similar to biases in the sense that there are not many of
|
||||
// them...
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public IUpdater getUpdaterByParam(String paramName) {
|
||||
switch (paramName) {
|
||||
case BatchNormalizationParamInitializer.BETA:
|
||||
case BatchNormalizationParamInitializer.GAMMA:
|
||||
return getUpdater();
|
||||
case BatchNormalizationParamInitializer.GLOBAL_MEAN:
|
||||
case BatchNormalizationParamInitializer.GLOBAL_VAR:
|
||||
case BatchNormalizationParamInitializer.GLOBAL_LOG_STD:
|
||||
return new NoOp();
|
||||
default:
|
||||
throw new IllegalArgumentException("Unknown parameter: \"" + paramName + "\"");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public LayerMemoryReport getMemoryReport(InputType inputType) {
|
||||
InputType outputType = getOutputType(-1, inputType);
|
||||
|
||||
// TODO CuDNN helper etc
|
||||
|
||||
val numParams = initializer().numParams(this);
|
||||
int updaterStateSize = 0;
|
||||
|
||||
for (String s : BatchNormalizationParamInitializer.getInstance().paramKeys(this)) {
|
||||
updaterStateSize += getUpdaterByParam(s).stateSize(nOut);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BatchNormalization clone() {
|
||||
BatchNormalization clone = (BatchNormalization) super.clone();
|
||||
return clone;
|
||||
// During forward pass: working memory size approx. equal to 2x input size (copy ops, etc)
|
||||
val inferenceWorkingSize = 2 * inputType.arrayElementsPerExample();
|
||||
|
||||
// During training: we calculate mean and variance... result is equal to nOut, and INDEPENDENT
|
||||
// of minibatch size
|
||||
val trainWorkFixed = 2 * nOut;
|
||||
// During backprop: multiple working arrays... output size, 2 * output size (indep. of example
|
||||
// size),
|
||||
val trainWorkingSizePerExample =
|
||||
inferenceWorkingSize // Inference during backprop
|
||||
+ (outputType.arrayElementsPerExample() + 2 * nOut); // Backprop gradient calculation
|
||||
|
||||
return new LayerMemoryReport.Builder(name, BatchNormalization.class, inputType, outputType)
|
||||
.standardMemory(numParams, updaterStateSize)
|
||||
.workingMemory(
|
||||
0,
|
||||
0,
|
||||
trainWorkFixed,
|
||||
trainWorkingSizePerExample) // No additional memory (beyond activations) for inference
|
||||
.cacheMemory(
|
||||
MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) // No caching
|
||||
.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isPretrainParam(String paramName) {
|
||||
return false; // No pretrain params in BN
|
||||
}
|
||||
|
||||
private static final class BatchNormalizationBuilderImpl
|
||||
extends BatchNormalizationBuilder<BatchNormalization, BatchNormalizationBuilderImpl> {
|
||||
public BatchNormalization build() {
|
||||
BatchNormalization l = new BatchNormalization(this);
|
||||
l.setType(LayerType.BN);
|
||||
l.initializeConstraints();
|
||||
return l;
|
||||
}
|
||||
}
|
||||
|
||||
public abstract static class BatchNormalizationBuilder<
|
||||
C extends BatchNormalization, B extends BatchNormalizationBuilder<C, B>>
|
||||
extends FeedForwardLayerBuilder<C, B> {
|
||||
|
||||
public B helperAllowFallback(boolean b) {
|
||||
this.cudnnAllowFallback$value = b;
|
||||
this.cudnnAllowFallback$set = true;
|
||||
return self();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Layer instantiate(NeuralNetConfiguration conf, Collection<TrainingListener> trainingListeners,
|
||||
int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) {
|
||||
this.setNetConfiguration(conf);
|
||||
LayerValidation.assertNOutSet("BatchNormalization", getName(), layerIndex, getNOut());
|
||||
runInheritance();
|
||||
|
||||
LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
|
||||
org.deeplearning4j.nn.layers.normalization.BatchNormalization ret =
|
||||
new org.deeplearning4j.nn.layers.normalization.BatchNormalization(lconf, networkDataType);
|
||||
ret.addTrainingListeners(trainingListeners);
|
||||
ret.setIndex(layerIndex);
|
||||
ret.setParamsViewArray(layerParamsView);
|
||||
Map<String, INDArray> paramTable = initializer().init(this, layerParamsView, initializeParams);
|
||||
ret.setParamTable(paramTable);
|
||||
ret.setLayerConfiguration(lconf);
|
||||
return ret;
|
||||
public B constrainBeta(LayerConstraint... constraints) {
|
||||
this.betaConstraints = List.of(constraints);
|
||||
return self();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ParamInitializer initializer() {
|
||||
return BatchNormalizationParamInitializer.getInstance();
|
||||
public B constrainGamma(LayerConstraint... constraints) {
|
||||
this.gammaConstraints = List.of(constraints);
|
||||
return self();
|
||||
}
|
||||
|
||||
@Override
|
||||
public InputType getOutputType(int layerIndex, InputType inputType) {
|
||||
if (inputType == null) {
|
||||
throw new IllegalStateException(
|
||||
"Invalid input type: Batch norm layer expected input of type CNN, got null for layer \""
|
||||
+ getName() + "\"");
|
||||
}
|
||||
|
||||
//Can handle CNN, flat CNN, CNN3D or FF input formats only
|
||||
switch (inputType.getType()) {
|
||||
case FF:
|
||||
case CNN:
|
||||
case CNNFlat:
|
||||
case CNN3D:
|
||||
return inputType; //OK
|
||||
default:
|
||||
throw new IllegalStateException(
|
||||
"Invalid input type: Batch norm layer expected input of type CNN, CNN Flat or FF, got "
|
||||
+ inputType + " for layer index " + layerIndex + ", layer name = "
|
||||
+ getName());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNIn(InputType inputType, boolean override) {
|
||||
if (nIn <= 0 || override) {
|
||||
switch (inputType.getType()) {
|
||||
case FF:
|
||||
nIn = ((InputType.InputTypeFeedForward) inputType).getSize();
|
||||
break;
|
||||
case CNN:
|
||||
nIn = ((InputType.InputTypeConvolutional) inputType).getChannels();
|
||||
dataFormat = ((InputType.InputTypeConvolutional) inputType).getFormat();
|
||||
break;
|
||||
case CNN3D:
|
||||
nIn = ((InputType.InputTypeConvolutional3D) inputType).getChannels();
|
||||
break;
|
||||
case CNNFlat:
|
||||
nIn = ((InputType.InputTypeConvolutionalFlat) inputType).getDepth();
|
||||
default:
|
||||
throw new IllegalStateException(
|
||||
"Invalid input type: Batch norm layer expected input of type CNN, CNN Flat or FF, got "
|
||||
+ inputType + " for layer " + getName() + "\"");
|
||||
}
|
||||
nOut = nIn;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
|
||||
if (inputType.getType() == InputType.Type.CNNFlat) {
|
||||
InputType.InputTypeConvolutionalFlat i = (InputType.InputTypeConvolutionalFlat) inputType;
|
||||
return new FeedForwardToCnnPreProcessor(i.getHeight(), i.getWidth(), i.getDepth());
|
||||
} else if (inputType.getType() == InputType.Type.RNN) {
|
||||
return new RnnToFeedForwardPreProcessor();
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Regularization> getRegularizationByParam(String paramName){
|
||||
//Don't regularize batch norm params: similar to biases in the sense that there are not many of them...
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public IUpdater getUpdaterByParam(String paramName) {
|
||||
switch (paramName) {
|
||||
case BatchNormalizationParamInitializer.BETA:
|
||||
case BatchNormalizationParamInitializer.GAMMA:
|
||||
return getUpdater();
|
||||
case BatchNormalizationParamInitializer.GLOBAL_MEAN:
|
||||
case BatchNormalizationParamInitializer.GLOBAL_VAR:
|
||||
case BatchNormalizationParamInitializer.GLOBAL_LOG_STD:
|
||||
return new NoOp();
|
||||
default:
|
||||
throw new IllegalArgumentException("Unknown parameter: \"" + paramName + "\"");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public LayerMemoryReport getMemoryReport(InputType inputType) {
|
||||
InputType outputType = getOutputType(-1, inputType);
|
||||
|
||||
//TODO CuDNN helper etc
|
||||
|
||||
val numParams = initializer().numParams(this);
|
||||
int updaterStateSize = 0;
|
||||
|
||||
for (String s : BatchNormalizationParamInitializer.getInstance().paramKeys(this)) {
|
||||
updaterStateSize += getUpdaterByParam(s).stateSize(nOut);
|
||||
}
|
||||
|
||||
//During forward pass: working memory size approx. equal to 2x input size (copy ops, etc)
|
||||
val inferenceWorkingSize = 2 * inputType.arrayElementsPerExample();
|
||||
|
||||
//During training: we calculate mean and variance... result is equal to nOut, and INDEPENDENT of minibatch size
|
||||
val trainWorkFixed = 2 * nOut;
|
||||
//During backprop: multiple working arrays... output size, 2 * output size (indep. of example size),
|
||||
val trainWorkingSizePerExample = inferenceWorkingSize //Inference during backprop
|
||||
+ (outputType.arrayElementsPerExample() + 2 * nOut); //Backprop gradient calculation
|
||||
|
||||
return new LayerMemoryReport.Builder(name, BatchNormalization.class, inputType, outputType)
|
||||
.standardMemory(numParams, updaterStateSize)
|
||||
.workingMemory(0, 0, trainWorkFixed, trainWorkingSizePerExample) //No additional memory (beyond activations) for inference
|
||||
.cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching
|
||||
.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isPretrainParam(String paramName) {
|
||||
return false; //No pretrain params in BN
|
||||
}
|
||||
|
||||
public static abstract class BatchNormalizationBuilder<C extends BatchNormalization, B extends BatchNormalizationBuilder<C, B>> extends FeedForwardLayerBuilder<C, B> {
|
||||
public C build() {
|
||||
C l = this.initBuild();
|
||||
l.setType(LayerType.BN);
|
||||
l.initializeConstraints();
|
||||
return l;
|
||||
}
|
||||
public B helperAllowFallback(boolean b) {
|
||||
this.cudnnAllowFallback$value = b;
|
||||
this.cudnnAllowFallback$set = true;
|
||||
return self();
|
||||
}
|
||||
|
||||
public B constrainBeta(LayerConstraint ... constraints) {
|
||||
this.betaConstraints = List.of(constraints);
|
||||
return self();
|
||||
}
|
||||
public B constrainGamma(LayerConstraint ... constraints) {
|
||||
this.gammaConstraints = List.of(constraints);
|
||||
return self();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -38,9 +38,8 @@ import org.nd4j.linalg.api.memory.MemoryWorkspace;
|
|||
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||
import org.nd4j.linalg.factory.Nd4j;
|
||||
|
||||
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder")
|
||||
@SuperBuilder(builderMethodName = "innerBuilder")
|
||||
public class CapsuleLayer extends SameDiffLayer {
|
||||
|
||||
private static final String WEIGHT_PARAM = "weight";
|
||||
|
@ -78,6 +77,18 @@ public class CapsuleLayer extends SameDiffLayer {
|
|||
*/
|
||||
@Builder.Default @Getter @Setter private int routings = 3;
|
||||
|
||||
public static CapsuleLayerBuilder<?,?> builder() {
|
||||
return innerBuilder()
|
||||
;
|
||||
}
|
||||
|
||||
public static CapsuleLayerBuilder<?,?> builder(int capsules, int capsulesDim, int routings) {
|
||||
return innerBuilder()
|
||||
.capsules(capsules)
|
||||
.capsuleDimensions(capsulesDim)
|
||||
.routings(routings);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNIn(InputType inputType, boolean override) {
|
||||
if(inputType == null || inputType.getType() != Type.RNN) {
|
||||
|
@ -185,16 +196,6 @@ public class CapsuleLayer extends SameDiffLayer {
|
|||
return InputType.recurrent(capsules, capsuleDimensions);
|
||||
}
|
||||
|
||||
public static CapsuleLayerBuilder<?,?> builder() {
|
||||
return innerBuilder()
|
||||
;
|
||||
}
|
||||
public static CapsuleLayerBuilder<?,?> builder(int capsules, int capsulesDim, int routings) {
|
||||
return innerBuilder()
|
||||
.capsules(capsules)
|
||||
.capsuleDimensions(capsulesDim)
|
||||
.routings(routings);
|
||||
}
|
||||
public static abstract class CapsuleLayerBuilder<
|
||||
C extends CapsuleLayer, B extends CapsuleLayerBuilder<C, B>>
|
||||
extends SameDiffLayerBuilder<C, B> {
|
||||
|
@ -215,35 +216,37 @@ public class CapsuleLayer extends SameDiffLayer {
|
|||
}
|
||||
|
||||
|
||||
public C build() {
|
||||
C l = this.initBuild();
|
||||
if (capsules <= 0 || capsuleDimensions <= 0 || routings$value <= 0) {
|
||||
throw new IllegalArgumentException(
|
||||
"Invalid configuration for Capsule ILayer (layer name = \""
|
||||
+ l.getName()
|
||||
+ "\"):"
|
||||
+ " capsules, capsuleDimensions, and routings must be > 0. Got: "
|
||||
+ capsules
|
||||
+ ", "
|
||||
+ capsuleDimensions
|
||||
+ ", "
|
||||
+ routings$value);
|
||||
}
|
||||
|
||||
if (inputCapsules$value < 0 || inputCapsuleDimensions$value < 0) {
|
||||
throw new IllegalArgumentException(
|
||||
"Invalid configuration for Capsule ILayer (layer name = \""
|
||||
+ l.getName()
|
||||
+ "\"):"
|
||||
+ " inputCapsules and inputCapsuleDimensions must be >= 0 if set. Got: "
|
||||
+ inputCapsules$value
|
||||
+ ", "
|
||||
+ inputCapsuleDimensions$value);
|
||||
}
|
||||
|
||||
return l;
|
||||
}
|
||||
}
|
||||
|
||||
private static final class CapsuleLayerBuilderImpl extends CapsuleLayerBuilder<CapsuleLayer, CapsuleLayerBuilderImpl> {
|
||||
public CapsuleLayer build() {
|
||||
CapsuleLayer l = new CapsuleLayer(this);
|
||||
if (l.getCapsules() <= 0 || l.getCapsuleDimensions() <= 0 || l.getRoutings() <= 0) {
|
||||
throw new IllegalArgumentException(
|
||||
"Invalid configuration for Capsule ILayer (layer name = \""
|
||||
+ l.getName()
|
||||
+ "\"):"
|
||||
+ " capsules, capsuleDimensions, and routings must be > 0. Got: "
|
||||
+ l.getCapsules()
|
||||
+ ", "
|
||||
+ l.getCapsuleDimensions()
|
||||
+ ", "
|
||||
+ l.getRoutings());
|
||||
}
|
||||
|
||||
if (l.getInputCapsules() < 0 || l.getInputCapsuleDimensions() < 0) {
|
||||
throw new IllegalArgumentException(
|
||||
"Invalid configuration for Capsule ILayer (layer name = \""
|
||||
+ l.getName()
|
||||
+ "\"):"
|
||||
+ " inputCapsules and inputCapsuleDimensions must be >= 0 if set. Got: "
|
||||
+ l.getInputCapsules()
|
||||
+ ", "
|
||||
+ l.getInputCapsuleDimensions() );
|
||||
}
|
||||
return l;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -20,6 +20,8 @@
|
|||
|
||||
package org.deeplearning4j.nn.conf.layers;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
import lombok.*;
|
||||
import lombok.experimental.SuperBuilder;
|
||||
import org.deeplearning4j.nn.api.Layer;
|
||||
|
@ -30,36 +32,21 @@ import org.deeplearning4j.nn.conf.memory.LayerMemoryReport;
|
|||
import org.deeplearning4j.nn.conf.memory.MemoryReport;
|
||||
import org.deeplearning4j.nn.params.CenterLossParamInitializer;
|
||||
import org.deeplearning4j.optimize.api.TrainingListener;
|
||||
import org.nd4j.linalg.activations.impl.ActivationSoftmax;
|
||||
import org.nd4j.linalg.api.buffer.DataType;
|
||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||
import org.nd4j.linalg.learning.config.IUpdater;
|
||||
import org.nd4j.linalg.learning.config.NoOp;
|
||||
import org.nd4j.linalg.lossfunctions.ILossFunction;
|
||||
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
|
||||
@Data
|
||||
@ToString(callSuper = true)
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@SuperBuilder(buildMethodName = "initBuild")
|
||||
@SuperBuilder
|
||||
public class CenterLossOutputLayer extends BaseOutputLayer {
|
||||
|
||||
@Builder.Default protected double alpha= 0.805;
|
||||
@Builder.Default protected double lambda = 2e-4;
|
||||
@Builder.Default protected boolean gradientCheck = false;
|
||||
|
||||
public static abstract class CenterLossOutputLayerBuilder<C extends CenterLossOutputLayer, B extends CenterLossOutputLayerBuilder<C,B>> extends
|
||||
BaseOutputLayerBuilder<C, B> {
|
||||
public C build() {
|
||||
C l = initBuild();
|
||||
l.initializeConstraints();
|
||||
return l;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Layer instantiate(NeuralNetConfiguration conf, Collection<TrainingListener> trainingListeners,
|
||||
int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) {
|
||||
|
@ -91,7 +78,6 @@ public static abstract class CenterLossOutputLayerBuilder<C extends CenterLossOu
|
|||
return getUpdater();
|
||||
}
|
||||
|
||||
|
||||
public boolean getGradientCheck() {
|
||||
return gradientCheck;
|
||||
}
|
||||
|
@ -135,6 +121,24 @@ public static abstract class CenterLossOutputLayerBuilder<C extends CenterLossOu
|
|||
.build();
|
||||
}
|
||||
|
||||
public static abstract class CenterLossOutputLayerBuilder<C extends CenterLossOutputLayer, B extends CenterLossOutputLayerBuilder<C,B>> extends
|
||||
BaseOutputLayerBuilder<C, B> {
|
||||
public C build() {
|
||||
C l = initBuild();
|
||||
l.initializeConstraints();
|
||||
return l;
|
||||
}
|
||||
}
|
||||
|
||||
private static final class CenterLossOutputLayerBuilderImpl extends CenterLossOutputLayerBuilder<CenterLossOutputLayer,
|
||||
CenterLossOutputLayerBuilderImpl> {
|
||||
public CenterLossOutputLayer build() {
|
||||
CenterLossOutputLayer l = new CenterLossOutputLayer(this);
|
||||
l.initializeConstraints();
|
||||
return l;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -29,6 +29,6 @@ import lombok.experimental.SuperBuilder;
|
|||
@Data
|
||||
@ToString(callSuper = true)
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@SuperBuilder(buildMethodName = "initBuild")
|
||||
@SuperBuilder
|
||||
public class Convolution1D extends Convolution1DLayer {
|
||||
}
|
||||
|
|
|
@ -47,9 +47,8 @@ import org.nd4j.linalg.api.ndarray.INDArray;
|
|||
@Data
|
||||
@ToString(callSuper = true)
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder")
|
||||
@SuperBuilder(builderMethodName = "innerBuilder")
|
||||
public class Convolution1DLayer extends ConvolutionLayer {
|
||||
@Builder.Default private RNNFormat rnnDataFormat = RNNFormat.NCW;
|
||||
/**
|
||||
* Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last).
|
||||
* See {@link CNN2DFormat} for more details.<br>
|
||||
|
@ -60,6 +59,7 @@ public class Convolution1DLayer extends ConvolutionLayer {
|
|||
@Builder.Default
|
||||
protected CNN2DFormat dataFormat =
|
||||
CNN2DFormat.NCHW; // default value for legacy serialization reasons
|
||||
@Builder.Default private RNNFormat rnnDataFormat = RNNFormat.NCW;
|
||||
/**
|
||||
* Size of the convolution
|
||||
*
|
||||
|
@ -183,17 +183,20 @@ public class Convolution1DLayer extends ConvolutionLayer {
|
|||
return true;
|
||||
}
|
||||
|
||||
public static abstract class Convolution1DLayerBuilder<
|
||||
C extends ConvolutionLayer, B extends Convolution1DLayerBuilder<C, B>>
|
||||
extends ConvolutionLayerBuilder<C, B> {
|
||||
public C build() {
|
||||
C l = initBuild();
|
||||
ConvolutionUtils.validateConvolutionModePadding(l.getConvolutionMode(), padding$value);
|
||||
private static final class Convolution1DLayerBuilderImpl extends ConvolutionLayerBuilder<ConvolutionLayer, Convolution1DLayerBuilderImpl> {
|
||||
public ConvolutionLayer build() {
|
||||
ConvolutionLayer l = initBuild();
|
||||
ConvolutionUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding());
|
||||
ConvolutionUtils.validateCnnKernelStridePadding(
|
||||
kernelSize$value, stride$value, padding$value);
|
||||
l.getKernelSize(), l.getStride(), l.getPadding());
|
||||
l.initializeConstraints();
|
||||
return l;
|
||||
}
|
||||
}
|
||||
public static abstract class Convolution1DLayerBuilder<
|
||||
C extends ConvolutionLayer, B extends Convolution1DLayerBuilder<C, B>>
|
||||
extends ConvolutionLayerBuilder<C, B> {
|
||||
|
||||
|
||||
public B kernelSize(int @NonNull ... kernelSize) {
|
||||
this.kernelSize$value[0] = ValidationUtils.validate1NonNegative(kernelSize, "kernelSize")[0];
|
||||
|
|
|
@ -30,6 +30,6 @@ import lombok.experimental.SuperBuilder;
|
|||
|
||||
@ToString(callSuper = true)
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@SuperBuilder(buildMethodName = "initBuild")
|
||||
@SuperBuilder
|
||||
public class Convolution2D extends ConvolutionLayer {
|
||||
}
|
||||
|
|
|
@ -40,7 +40,7 @@ import org.nd4j.linalg.api.ndarray.INDArray;
|
|||
@Data
|
||||
@ToString(callSuper = true)
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@SuperBuilder(builderMethodName = "innerBuilder", buildMethodName = "initBuild")
|
||||
@SuperBuilder(builderMethodName = "innerBuilder")
|
||||
public class Convolution3D extends ConvolutionLayer {
|
||||
|
||||
/**
|
||||
|
@ -235,17 +235,20 @@ public class Convolution3D extends ConvolutionLayer {
|
|||
NDHWC
|
||||
}
|
||||
|
||||
private static final class Convolution3DBuilderImpl extends Convolution3DBuilder<Convolution3D, Convolution3DBuilderImpl> {
|
||||
public Convolution3D build() {
|
||||
Convolution3D l = new Convolution3D(this);
|
||||
ConvolutionUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding());
|
||||
Convolution3DUtils.validateCnn3DKernelStridePadding(l.getKernelSize(), l.getStride(), l.getPadding());
|
||||
return l;
|
||||
}
|
||||
}
|
||||
// public Builder(int[] kernelSize, int[] stride, int[] padding, int[] dilation) {
|
||||
// sup/er(kernelSize, stride, padding, dilation, 3);
|
||||
public static abstract class Convolution3DBuilder<
|
||||
C extends Convolution3D, B extends Convolution3DBuilder<C, B>>
|
||||
extends ConvolutionLayer.ConvolutionLayerBuilder<C, B> {
|
||||
public C build() {
|
||||
ConvolutionUtils.validateConvolutionModePadding(convolutionMode$value, padding);
|
||||
Convolution3DUtils.validateCnn3DKernelStridePadding(kernelSize, stride, padding);
|
||||
C l = initBuild();
|
||||
return l;
|
||||
}
|
||||
|
||||
|
||||
@Override // TODO we can use the parent builder and do not need to redefine the variables.
|
||||
// Validation can be done in override function!
|
||||
|
|
|
@ -48,7 +48,7 @@ import org.nd4j.linalg.api.ndarray.INDArray;
|
|||
*/
|
||||
@ToString(callSuper = true)
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@SuperBuilder(builderMethodName = "innerBuilder", buildMethodName = "initBuild")
|
||||
@SuperBuilder(builderMethodName = "innerBuilder")
|
||||
public class ConvolutionLayer extends FeedForwardLayer {
|
||||
/**
|
||||
* Size of the convolution rows/columns
|
||||
|
@ -397,48 +397,7 @@ public class ConvolutionLayer extends FeedForwardLayer {
|
|||
return self();
|
||||
}
|
||||
|
||||
public C build() {
|
||||
ConvolutionUtils.validateConvolutionModePadding(convolutionMode$value, padding$value);
|
||||
ConvolutionUtils.validateCnnKernelStridePadding(
|
||||
kernelSize$value, stride$value, padding$value);
|
||||
|
||||
if (kernelSize$value.length != convolutionDim$value) {
|
||||
throw new IllegalArgumentException(
|
||||
"Kernel argument should be a "
|
||||
+ convolutionDim$value
|
||||
+ "d array, got "
|
||||
+ Arrays.toString(kernelSize$value));
|
||||
}
|
||||
|
||||
if (stride$value.length != convolutionDim$value) {
|
||||
throw new IllegalArgumentException(
|
||||
"Strides argument should be a "
|
||||
+ convolutionDim$value
|
||||
+ "d array, got "
|
||||
+ Arrays.toString(stride$value));
|
||||
}
|
||||
|
||||
if (padding$value.length != convolutionDim$value) {
|
||||
throw new IllegalArgumentException(
|
||||
"Padding argument should be a "
|
||||
+ convolutionDim$value
|
||||
+ "d array, got "
|
||||
+ Arrays.toString(padding$value));
|
||||
}
|
||||
|
||||
if (dilation$value.length != convolutionDim$value) {
|
||||
throw new IllegalArgumentException(
|
||||
"Dilation argument should be a "
|
||||
+ convolutionDim$value
|
||||
+ "d array, got "
|
||||
+ Arrays.toString(dilation$value));
|
||||
}
|
||||
|
||||
C l = initBuild();
|
||||
l.setType(LayerType.CONV);
|
||||
l.initializeConstraints();
|
||||
return l;
|
||||
}
|
||||
|
||||
/**
|
||||
* When using CuDNN or MKLDNN and an error is encountered, should fallback to the non-helper
|
||||
|
@ -454,4 +413,47 @@ public class ConvolutionLayer extends FeedForwardLayer {
|
|||
return self();
|
||||
}
|
||||
}
|
||||
private static final class ConvolutionLayerBuilderImpl extends ConvolutionLayerBuilder<ConvolutionLayer, ConvolutionLayerBuilderImpl> {
|
||||
public ConvolutionLayer build() {
|
||||
ConvolutionLayer l = new ConvolutionLayer(this);
|
||||
ConvolutionUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding());
|
||||
ConvolutionUtils.validateCnnKernelStridePadding(
|
||||
l.getKernelSize(), l.getStride(), l.getPadding());
|
||||
|
||||
if (l.getKernelSize().length != l.getConvolutionDim()) {
|
||||
throw new IllegalArgumentException(
|
||||
"Kernel argument should be a "
|
||||
+ l.getConvolutionDim()
|
||||
+ "d array, got "
|
||||
+ Arrays.toString(l.getKernelSize()));
|
||||
}
|
||||
|
||||
if (l.getStride().length != l.getConvolutionDim()) {
|
||||
throw new IllegalArgumentException(
|
||||
"Strides argument should be a "
|
||||
+ l.getConvolutionDim()
|
||||
+ "d array, got "
|
||||
+ Arrays.toString(l.getStride()));
|
||||
}
|
||||
|
||||
if (l.getPadding().length != l.getConvolutionDim()) {
|
||||
throw new IllegalArgumentException(
|
||||
"Padding argument should be a "
|
||||
+ l.getConvolutionDim()
|
||||
+ "d array, got "
|
||||
+ Arrays.toString(l.getPadding()));
|
||||
}
|
||||
|
||||
if (l.getDilation().length != l.getConvolutionDim()) {
|
||||
throw new IllegalArgumentException(
|
||||
"Dilation argument should be a "
|
||||
+ l.getConvolutionDim()
|
||||
+ "d array, got "
|
||||
+ Arrays.toString(l.getDilation()));
|
||||
}
|
||||
l.setType(LayerType.CONV);
|
||||
l.initializeConstraints();
|
||||
return l;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -46,7 +46,7 @@ import java.util.Map;
|
|||
@Data
|
||||
@ToString(callSuper = true)
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuild")
|
||||
@SuperBuilder(builderMethodName = "innerBuilder")
|
||||
public class Deconvolution2D extends ConvolutionLayer {
|
||||
|
||||
|
||||
|
@ -57,12 +57,15 @@ private CNN2DFormat format = CNN2DFormat.NCHW;
|
|||
return false;
|
||||
}
|
||||
|
||||
public static abstract class Deconvolution2DBuilder<C extends Deconvolution2D, B extends Deconvolution2DBuilder<C, B>> extends ConvolutionLayerBuilder<C, B> {
|
||||
public C build() {
|
||||
C l = initBuild();
|
||||
private static final class Deconvolution2DBuilderImpl extends Deconvolution2DBuilder<Deconvolution2D, Deconvolution2DBuilderImpl> {
|
||||
public Deconvolution2D build() {
|
||||
Deconvolution2D l = new Deconvolution2D(this);
|
||||
l.initializeConstraints();
|
||||
return l;
|
||||
}
|
||||
}
|
||||
public static abstract class Deconvolution2DBuilder<C extends Deconvolution2D, B extends Deconvolution2DBuilder<C, B>> extends ConvolutionLayerBuilder<C, B> {
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
|
|
|
@ -44,7 +44,7 @@ import org.nd4j.linalg.api.ndarray.INDArray;
|
|||
@Data
|
||||
@ToString(callSuper = true)
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder")
|
||||
@SuperBuilder(builderMethodName = "innerBuilder")
|
||||
public class Deconvolution3D extends ConvolutionLayer {
|
||||
/**
|
||||
* Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more
|
||||
|
@ -56,6 +56,15 @@ public class Deconvolution3D extends ConvolutionLayer {
|
|||
private Convolution3D.DataFormat dataFormat =
|
||||
Convolution3D.DataFormat.NCDHW; // in libnd4j: 1 - NCDHW, 0 - NDHWC
|
||||
|
||||
public static Deconvolution3DBuilder<?, ?> builder() {
|
||||
return innerBuilder()
|
||||
.kernelSize(new int[] {2, 2, 2})
|
||||
.stride(new int[] {1, 1, 1})
|
||||
.padding(new int[] {0, 0, 0})
|
||||
.dilation(new int[] {1, 1, 1})
|
||||
.convolutionDim(3);
|
||||
}
|
||||
|
||||
protected boolean allowCausal() {
|
||||
// Causal convolution - allowed for 1D only
|
||||
return false;
|
||||
|
@ -69,13 +78,13 @@ public class Deconvolution3D extends ConvolutionLayer {
|
|||
public Deconvolution3D clone() {
|
||||
Deconvolution3D clone = (Deconvolution3D) super.clone();
|
||||
if (clone.getKernelSize() != null) {
|
||||
clone.setKernelSize( clone.getKernelSize().clone());
|
||||
clone.setKernelSize(clone.getKernelSize().clone());
|
||||
}
|
||||
if (clone.getStride() != null) {
|
||||
clone.setStride( clone.getStride().clone());
|
||||
clone.setStride(clone.getStride().clone());
|
||||
}
|
||||
if (clone.getPadding() != null) {
|
||||
clone.setPadding( clone.getPadding().clone());
|
||||
clone.setPadding(clone.getPadding().clone());
|
||||
}
|
||||
return clone;
|
||||
}
|
||||
|
@ -134,6 +143,11 @@ public class Deconvolution3D extends ConvolutionLayer {
|
|||
}
|
||||
}
|
||||
|
||||
// private int[] kernelSize;
|
||||
// private int[] stride;
|
||||
// private int[] padding;
|
||||
// private int[] dilation;
|
||||
|
||||
@Override
|
||||
public InputType getOutputType(int layerIndex, InputType inputType) {
|
||||
if (inputType == null || inputType.getType() != InputType.Type.CNN3D) {
|
||||
|
@ -158,29 +172,16 @@ public class Deconvolution3D extends ConvolutionLayer {
|
|||
Deconvolution3DLayer.class);
|
||||
}
|
||||
|
||||
//private int[] kernelSize;
|
||||
//private int[] stride;
|
||||
//private int[] padding;
|
||||
//private int[] dilation;
|
||||
|
||||
public static abstract class Deconvolution3DBuilder<
|
||||
public abstract static class Deconvolution3DBuilder<
|
||||
C extends Deconvolution3D, B extends Deconvolution3DBuilder<C, B>>
|
||||
extends ConvolutionLayerBuilder<C, B> {
|
||||
public C build() {
|
||||
C l = initBuild();
|
||||
extends ConvolutionLayerBuilder<C, B> {}
|
||||
|
||||
private static final class Deconvolution3DBuilderImpl
|
||||
extends Deconvolution3DBuilder<Deconvolution3D, Deconvolution3DBuilderImpl> {
|
||||
public Deconvolution3D build() {
|
||||
Deconvolution3D l = new Deconvolution3D(this);
|
||||
l.initializeConstraints();
|
||||
return l;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static Deconvolution3DBuilder<?,?> builder() {
|
||||
return innerBuilder()
|
||||
.kernelSize(new int[] {2, 2, 2})
|
||||
.stride(new int[] {1, 1, 1})
|
||||
.padding(new int[] {0, 0, 0})
|
||||
.dilation(new int[] {1, 1, 1})
|
||||
.convolutionDim(3);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -40,7 +40,7 @@ import org.nd4j.linalg.api.ndarray.INDArray;
|
|||
@Data
|
||||
@ToString(callSuper = true)
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@SuperBuilder(buildMethodName = "initBuild")
|
||||
@SuperBuilder
|
||||
public class DepthwiseConvolution2D extends ConvolutionLayer {
|
||||
/**
|
||||
* Set channels multiplier for depth-wise convolution
|
||||
|
@ -145,21 +145,25 @@ public class DepthwiseConvolution2D extends ConvolutionLayer {
|
|||
this.dataFormat = ((InputType.InputTypeConvolutional) inputType).getFormat();
|
||||
}
|
||||
|
||||
public abstract static class DepthwiseConvolution2DBuilder<
|
||||
C extends DepthwiseConvolution2D, B extends DepthwiseConvolution2DBuilder<C, B>>
|
||||
extends ConvolutionLayerBuilder<C, B> {
|
||||
public C build() {
|
||||
private static final class DepthwiseConvolution2DBuilderImpl extends DepthwiseConvolution2DBuilder<DepthwiseConvolution2D, DepthwiseConvolution2DBuilderImpl> {
|
||||
public DepthwiseConvolution2D build() {
|
||||
DepthwiseConvolution2D l = new DepthwiseConvolution2D(this);
|
||||
Preconditions.checkState(
|
||||
depthMultiplier$value > 0,
|
||||
"Depth multiplier must be > 0, got %s",
|
||||
depthMultiplier$value);
|
||||
C l = this.initBuild();
|
||||
l.getDepthMultiplier() > 0,
|
||||
"Depth multiplier must be > 0, got %s",
|
||||
l.getDepthMultiplier());
|
||||
|
||||
ConvolutionUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding());
|
||||
ConvolutionUtils.validateCnnKernelStridePadding(
|
||||
l.getKernelSize(), l.getStride(), l.getPadding());
|
||||
l.getKernelSize(), l.getStride(), l.getPadding());
|
||||
l.initializeConstraints();
|
||||
return l;
|
||||
}
|
||||
}
|
||||
public abstract static class DepthwiseConvolution2DBuilder<
|
||||
C extends DepthwiseConvolution2D, B extends DepthwiseConvolution2DBuilder<C, B>>
|
||||
extends ConvolutionLayerBuilder<C, B> {
|
||||
|
||||
|
||||
@Override
|
||||
public B kernelSize(int... kernelSize) {
|
||||
|
|
|
@ -20,6 +20,8 @@
|
|||
|
||||
package org.deeplearning4j.nn.conf.layers;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
import lombok.*;
|
||||
import lombok.experimental.Accessors;
|
||||
import lombok.experimental.SuperBuilder;
|
||||
|
@ -30,127 +32,136 @@ import org.deeplearning4j.nn.conf.inputs.InputType;
|
|||
import org.deeplearning4j.nn.conf.memory.LayerMemoryReport;
|
||||
import org.deeplearning4j.nn.conf.memory.MemoryReport;
|
||||
import org.deeplearning4j.nn.params.EmbeddingLayerParamInitializer;
|
||||
import org.deeplearning4j.nn.weights.IWeightInit;
|
||||
import org.deeplearning4j.nn.weights.WeightInit;
|
||||
import org.deeplearning4j.nn.weights.embeddings.ArrayEmbeddingInitializer;
|
||||
import org.deeplearning4j.nn.weights.embeddings.EmbeddingInitializer;
|
||||
import org.deeplearning4j.nn.weights.embeddings.WeightInitEmbedding;
|
||||
import org.deeplearning4j.optimize.api.TrainingListener;
|
||||
import org.nd4j.linalg.activations.Activation;
|
||||
import org.nd4j.linalg.activations.impl.ActivationIdentity;
|
||||
import org.nd4j.linalg.api.buffer.DataType;
|
||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
|
||||
@Data
|
||||
@ToString(callSuper = true)
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder")
|
||||
@SuperBuilder(builderMethodName = "innerBuilder")
|
||||
public class EmbeddingLayer extends FeedForwardLayer {
|
||||
/**
|
||||
* If true: include bias parameters in the layer. False (default): no bias.
|
||||
* @param hasBias If true: include bias parameters in this layer
|
||||
*/
|
||||
@Accessors @Builder.Default
|
||||
private boolean hasBias = false;
|
||||
/**
|
||||
* If true: include bias parameters in the layer. False (default): no bias.
|
||||
*
|
||||
* @param hasBias If true: include bias parameters in this layer
|
||||
*/
|
||||
@Accessors @Builder.Default private boolean hasBias = false;
|
||||
|
||||
/**
|
||||
*Default to Identity activation - i.e., don't inherit.
|
||||
* For example, if user sets ReLU as global default, they very likely don't intend to use it for Embedding layer also
|
||||
*
|
||||
* Default to Identity activation - i.e., don't inherit. For example, if user sets ReLU as global
|
||||
* default, they very likely don't intend to use it for Embedding layer also
|
||||
*/
|
||||
public static EmbeddingLayerBuilder<?, ?> builder() {
|
||||
return innerBuilder()
|
||||
.activation(Activation.IDENTITY);
|
||||
return innerBuilder().activation(Activation.IDENTITY);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Layer instantiate(
|
||||
NeuralNetConfiguration conf,
|
||||
Collection<TrainingListener> trainingListeners,
|
||||
int layerIndex,
|
||||
INDArray layerParamsView,
|
||||
boolean initializeParams,
|
||||
DataType networkDataType) {
|
||||
LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
|
||||
org.deeplearning4j.nn.layers.feedforward.embedding.EmbeddingLayer ret =
|
||||
new org.deeplearning4j.nn.layers.feedforward.embedding.EmbeddingLayer(
|
||||
lconf, networkDataType);
|
||||
runInheritance();
|
||||
|
||||
ret.addTrainingListeners(trainingListeners);
|
||||
ret.setIndex(layerIndex);
|
||||
ret.setParamsViewArray(layerParamsView);
|
||||
Map<String, INDArray> paramTable = initializer().init(this, layerParamsView, initializeParams);
|
||||
ret.setParamTable(paramTable);
|
||||
ret.setLayerConfiguration(lconf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ParamInitializer initializer() {
|
||||
return EmbeddingLayerParamInitializer.getInstance();
|
||||
}
|
||||
|
||||
@Override
|
||||
public LayerMemoryReport getMemoryReport(InputType inputType) {
|
||||
// Basically a dense layer, but no dropout is possible here, and no epsilons
|
||||
InputType outputType = getOutputType(-1, inputType);
|
||||
|
||||
val actElementsPerEx = outputType.arrayElementsPerExample();
|
||||
val numParams = initializer().numParams(this);
|
||||
val updaterStateSize = (int) getIUpdater().stateSize(numParams);
|
||||
|
||||
// Embedding layer does not use caching.
|
||||
// Inference: no working memory - just activations (pullRows)
|
||||
// Training: preout op, the only in-place ops on epsilon (from layer above) + assign ops
|
||||
|
||||
return new LayerMemoryReport.Builder(name, EmbeddingLayer.class, inputType, outputType)
|
||||
.standardMemory(numParams, updaterStateSize)
|
||||
.workingMemory(0, 0, 0, actElementsPerEx)
|
||||
.cacheMemory(
|
||||
MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) // No caching
|
||||
.build();
|
||||
}
|
||||
|
||||
private static final class EmbeddingLayerBuilderImpl
|
||||
extends EmbeddingLayerBuilder<EmbeddingLayer, EmbeddingLayerBuilderImpl> {
|
||||
public EmbeddingLayer build() {
|
||||
EmbeddingLayer l = new EmbeddingLayer(this);
|
||||
l.initializeConstraints();
|
||||
return l;
|
||||
}
|
||||
}
|
||||
|
||||
public static abstract class EmbeddingLayerBuilder<C extends EmbeddingLayer, B extends EmbeddingLayerBuilder<C,B>>
|
||||
extends FeedForwardLayerBuilder<C,B>{
|
||||
public C build() {
|
||||
C l = initBuild();
|
||||
l.initializeConstraints();
|
||||
return l;
|
||||
}
|
||||
public abstract static class EmbeddingLayerBuilder<
|
||||
C extends EmbeddingLayer, B extends EmbeddingLayerBuilder<C, B>>
|
||||
extends FeedForwardLayerBuilder<C, B> {
|
||||
|
||||
/**
|
||||
* Weight initialization scheme to use, for initial weight values
|
||||
*
|
||||
* @param weightInit
|
||||
* @see WeightInit
|
||||
*/
|
||||
@Override
|
||||
public B weightInit(WeightInit weightInit) {
|
||||
if(weightInit.getWeightInitFunction() instanceof WeightInitEmbedding){
|
||||
long[] shape = ((WeightInitEmbedding) weightInit.getWeightInitFunction()).shape();
|
||||
nIn(shape[0]);
|
||||
nOut(shape[1]);
|
||||
}
|
||||
super.weightInit(weightInit);
|
||||
return self();
|
||||
}
|
||||
/**
|
||||
* Initialize the embedding layer using values from the specified array. Note that the array should have shape
|
||||
* [vocabSize, vectorSize]. After copying values from the array to initialize the network parameters, the input
|
||||
* array will be discarded (so that, if necessary, it can be garbage collected)
|
||||
*
|
||||
* @param vectors Vectors to initialize the embedding layer with
|
||||
*/
|
||||
public B weightInit(INDArray vectors){
|
||||
weightInit(new ArrayEmbeddingInitializer(vectors));
|
||||
return self();
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the embedding layer using the specified EmbeddingInitializer - such as a Word2Vec instance
|
||||
*
|
||||
* @param embeddingInitializer Source of the embedding layer weights
|
||||
*/
|
||||
public B weightInit(EmbeddingInitializer embeddingInitializer) {
|
||||
var weightIn = new WeightInitEmbedding(embeddingInitializer);
|
||||
super.weightInit(weightIn);
|
||||
return self();
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Weight initialization scheme to use, for initial weight values
|
||||
*
|
||||
* @param weightInit
|
||||
* @see WeightInit
|
||||
*/
|
||||
@Override
|
||||
public Layer instantiate(NeuralNetConfiguration conf, Collection<TrainingListener> trainingListeners,
|
||||
int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) {
|
||||
LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
|
||||
org.deeplearning4j.nn.layers.feedforward.embedding.EmbeddingLayer ret =
|
||||
new org.deeplearning4j.nn.layers.feedforward.embedding.EmbeddingLayer(lconf, networkDataType);
|
||||
runInheritance();
|
||||
|
||||
ret.addTrainingListeners(trainingListeners);
|
||||
ret.setIndex(layerIndex);
|
||||
ret.setParamsViewArray(layerParamsView);
|
||||
Map<String, INDArray> paramTable = initializer().init(this, layerParamsView, initializeParams);
|
||||
ret.setParamTable(paramTable);
|
||||
ret.setLayerConfiguration(lconf);
|
||||
return ret;
|
||||
public B weightInit(WeightInit weightInit) {
|
||||
if (weightInit.getWeightInitFunction() instanceof WeightInitEmbedding) {
|
||||
long[] shape = ((WeightInitEmbedding) weightInit.getWeightInitFunction()).shape();
|
||||
nIn(shape[0]);
|
||||
nOut(shape[1]);
|
||||
}
|
||||
super.weightInit(weightInit);
|
||||
return self();
|
||||
}
|
||||
/**
|
||||
* Initialize the embedding layer using values from the specified array. Note that the array
|
||||
* should have shape [vocabSize, vectorSize]. After copying values from the array to initialize
|
||||
* the network parameters, the input array will be discarded (so that, if necessary, it can be
|
||||
* garbage collected)
|
||||
*
|
||||
* @param vectors Vectors to initialize the embedding layer with
|
||||
*/
|
||||
public B weightInit(INDArray vectors) {
|
||||
weightInit(new ArrayEmbeddingInitializer(vectors));
|
||||
return self();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ParamInitializer initializer() {
|
||||
return EmbeddingLayerParamInitializer.getInstance();
|
||||
}
|
||||
|
||||
@Override
|
||||
public LayerMemoryReport getMemoryReport(InputType inputType) {
|
||||
//Basically a dense layer, but no dropout is possible here, and no epsilons
|
||||
InputType outputType = getOutputType(-1, inputType);
|
||||
|
||||
val actElementsPerEx = outputType.arrayElementsPerExample();
|
||||
val numParams = initializer().numParams(this);
|
||||
val updaterStateSize = (int) getIUpdater().stateSize(numParams);
|
||||
|
||||
//Embedding layer does not use caching.
|
||||
//Inference: no working memory - just activations (pullRows)
|
||||
//Training: preout op, the only in-place ops on epsilon (from layer above) + assign ops
|
||||
|
||||
return new LayerMemoryReport.Builder(name, EmbeddingLayer.class, inputType, outputType)
|
||||
.standardMemory(numParams, updaterStateSize).workingMemory(0, 0, 0, actElementsPerEx)
|
||||
.cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching
|
||||
.build();
|
||||
/**
|
||||
* Initialize the embedding layer using the specified EmbeddingInitializer - such as a Word2Vec
|
||||
* instance
|
||||
*
|
||||
* @param embeddingInitializer Source of the embedding layer weights
|
||||
*/
|
||||
public B weightInit(EmbeddingInitializer embeddingInitializer) {
|
||||
var weightIn = new WeightInitEmbedding(embeddingInitializer);
|
||||
super.weightInit(weightIn);
|
||||
return self();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -46,7 +46,7 @@ import java.util.Map;
|
|||
@Data
|
||||
@ToString(callSuper = true)
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder")
|
||||
@SuperBuilder(builderMethodName = "innerBuilder")
|
||||
public class EmbeddingSequenceLayer extends FeedForwardLayer {
|
||||
/**
|
||||
* Set input sequence length for this embedding layer.
|
||||
|
@ -70,13 +70,16 @@ public class EmbeddingSequenceLayer extends FeedForwardLayer {
|
|||
@Builder.Default private boolean inferInputLength = false; // use input length as provided by input data
|
||||
@Builder.Default private RNNFormat outputDataFormat = RNNFormat.NCW; //Default value for older deserialized models
|
||||
|
||||
private static final class EmbeddingSequenceLayerBuilderImpl extends EmbeddingSequenceLayerBuilder<EmbeddingSequenceLayer, EmbeddingSequenceLayerBuilderImpl> {
|
||||
public EmbeddingSequenceLayer build() {
|
||||
EmbeddingSequenceLayer l = new EmbeddingSequenceLayer(this);
|
||||
l.initializeConstraints();
|
||||
return l;
|
||||
}
|
||||
}
|
||||
public static abstract class EmbeddingSequenceLayerBuilder<C extends EmbeddingSequenceLayer, B extends EmbeddingSequenceLayerBuilder<C, B>>
|
||||
extends FeedForwardLayerBuilder<C, B> {
|
||||
public C build() {
|
||||
C l = initBuild();
|
||||
l.initializeConstraints();
|
||||
return l;
|
||||
}
|
||||
|
||||
|
||||
public B weightInit(IWeightInit weightInit){
|
||||
if(weightInit instanceof WeightInitEmbedding){
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
|
||||
package org.deeplearning4j.nn.conf.layers;
|
||||
|
||||
import java.util.*;
|
||||
import lombok.*;
|
||||
import lombok.experimental.SuperBuilder;
|
||||
import org.deeplearning4j.nn.api.Layer;
|
||||
|
@ -36,23 +37,21 @@ import org.nd4j.linalg.activations.impl.ActivationSigmoid;
|
|||
import org.nd4j.linalg.api.buffer.DataType;
|
||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
@Data
|
||||
@ToString(callSuper = true)
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@Deprecated
|
||||
@SuperBuilder(buildMethodName = "initBuild")
|
||||
@SuperBuilder
|
||||
public class GravesBidirectionalLSTM extends BaseRecurrentLayer {
|
||||
|
||||
public static abstract class GravesBidirectionalLSTMBuilder<C extends GravesBidirectionalLSTM, B extends
|
||||
GravesBidirectionalLSTMBuilder<C, B>> extends BaseRecurrentLayerBuilder<C, B> {
|
||||
public C build() {
|
||||
C l = this.initBuild();
|
||||
l.initializeConstraints();
|
||||
return l;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation be allowed?
|
||||
* If set to false, an exception in CuDNN will be propagated back to the user. If false, the built-in
|
||||
* (non-CuDNN) implementation for GravesBidirectionalLSTM will be used
|
||||
*
|
||||
*/
|
||||
@Builder.Default
|
||||
protected boolean helperAllowFallback = true;
|
||||
/**
|
||||
* Set forget gate bias initalizations. Values in range 1-5 can potentially help with learning or longer-term
|
||||
* dependencies.
|
||||
|
@ -66,15 +65,6 @@ public class GravesBidirectionalLSTM extends BaseRecurrentLayer {
|
|||
*/
|
||||
@Builder.Default
|
||||
private IActivation gateActivationFunction = new ActivationSigmoid();
|
||||
/**
|
||||
* When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation be allowed?
|
||||
* If set to false, an exception in CuDNN will be propagated back to the user. If false, the built-in
|
||||
* (non-CuDNN) implementation for GravesBidirectionalLSTM will be used
|
||||
*
|
||||
*/
|
||||
@Builder.Default
|
||||
protected boolean helperAllowFallback = true;
|
||||
|
||||
|
||||
@Override
|
||||
protected void initializeConstraints() {
|
||||
|
@ -121,5 +111,18 @@ public class GravesBidirectionalLSTM extends BaseRecurrentLayer {
|
|||
return LSTMHelpers.getMemoryReport(this, inputType);
|
||||
}
|
||||
|
||||
private static final class GravesBidirectionalLSTMBuilderImpl extends GravesBidirectionalLSTMBuilder<GravesBidirectionalLSTM, GravesBidirectionalLSTMBuilderImpl> {
|
||||
public GravesBidirectionalLSTM build() {
|
||||
GravesBidirectionalLSTM l = new GravesBidirectionalLSTM(this);
|
||||
l.initializeConstraints();
|
||||
return l;
|
||||
}
|
||||
}
|
||||
|
||||
public static abstract class GravesBidirectionalLSTMBuilder<C extends GravesBidirectionalLSTM, B extends
|
||||
GravesBidirectionalLSTMBuilder<C, B>> extends BaseRecurrentLayerBuilder<C, B> {
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -43,7 +43,7 @@ import org.nd4j.linalg.api.ndarray.INDArray;
|
|||
@Deprecated
|
||||
@ToString(callSuper = true)
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@SuperBuilder(buildMethodName = "initBuild")
|
||||
@SuperBuilder
|
||||
public class GravesLSTM extends AbstractLSTM {
|
||||
|
||||
private double forgetGateBiasInit;
|
||||
|
@ -103,9 +103,12 @@ public class GravesLSTM extends AbstractLSTM {
|
|||
|
||||
public abstract static class GravesLSTMBuilder<
|
||||
C extends GravesLSTM, B extends GravesLSTMBuilder<C, B>>
|
||||
extends AbstractLSTMBuilder<C, B> {
|
||||
public C build() {
|
||||
C l = initBuild();
|
||||
extends AbstractLSTMBuilder<C, B> {}
|
||||
|
||||
private static final class GravesLSTMBuilderImpl
|
||||
extends GravesLSTMBuilder<GravesLSTM, GravesLSTMBuilderImpl> {
|
||||
public GravesLSTM build() {
|
||||
GravesLSTM l = new GravesLSTM(this);
|
||||
l.initializeConstraints();
|
||||
return l;
|
||||
}
|
||||
|
|
|
@ -20,6 +20,10 @@
|
|||
|
||||
package org.deeplearning4j.nn.conf.layers;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
import lombok.*;
|
||||
import lombok.experimental.SuperBuilder;
|
||||
import org.deeplearning4j.nn.api.Layer;
|
||||
|
@ -31,71 +35,75 @@ import org.deeplearning4j.nn.conf.memory.LayerMemoryReport;
|
|||
import org.deeplearning4j.nn.layers.recurrent.LSTMHelpers;
|
||||
import org.deeplearning4j.nn.params.LSTMParamInitializer;
|
||||
import org.deeplearning4j.optimize.api.TrainingListener;
|
||||
import org.nd4j.linalg.activations.Activation;
|
||||
import org.nd4j.linalg.api.buffer.DataType;
|
||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
|
||||
@Data
|
||||
@ToString(callSuper = true)
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@SuperBuilder(buildMethodName = "initBuild")
|
||||
@SuperBuilder
|
||||
public class LSTM extends AbstractLSTM {
|
||||
|
||||
private double forgetGateBiasInit;
|
||||
private double forgetGateBiasInit;
|
||||
|
||||
public static abstract class LSTMBuilder<C extends LSTM, B extends LSTMBuilder<C, B>> extends AbstractLSTMBuilder<C, B> {
|
||||
@Override public C build() {
|
||||
C l = this.initBuild();
|
||||
l.initializeConstraints();
|
||||
return l;
|
||||
}
|
||||
@Override
|
||||
protected void initializeConstraints() {
|
||||
super.initializeConstraints();
|
||||
if (recurrentConstraints != null) {
|
||||
if (constraints == null) {
|
||||
constraints = new ArrayList<>();
|
||||
}
|
||||
for (LayerConstraint c : recurrentConstraints) {
|
||||
LayerConstraint c2 = c.clone();
|
||||
c2.setParams(Collections.singleton(LSTMParamInitializer.RECURRENT_WEIGHT_KEY));
|
||||
constraints.add(c2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Layer instantiate(
|
||||
NeuralNetConfiguration conf,
|
||||
Collection<TrainingListener> trainingListeners,
|
||||
int layerIndex,
|
||||
INDArray layerParamsView,
|
||||
boolean initializeParams,
|
||||
DataType networkDataType) {
|
||||
LayerValidation.assertNInNOutSet("LSTM", getName(), layerIndex, getNIn(), getNOut());
|
||||
LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
|
||||
runInheritance();
|
||||
|
||||
org.deeplearning4j.nn.layers.recurrent.LSTM ret =
|
||||
new org.deeplearning4j.nn.layers.recurrent.LSTM(lconf, networkDataType);
|
||||
ret.addTrainingListeners(trainingListeners);
|
||||
ret.setIndex(layerIndex);
|
||||
ret.setParamsViewArray(layerParamsView);
|
||||
Map<String, INDArray> paramTable = initializer().init(this, layerParamsView, initializeParams);
|
||||
ret.setParamTable(paramTable);
|
||||
ret.setLayerConfiguration(lconf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ParamInitializer initializer() {
|
||||
return LSTMParamInitializer.getInstance();
|
||||
}
|
||||
|
||||
@Override
|
||||
public LayerMemoryReport getMemoryReport(InputType inputType) {
|
||||
// TODO - CuDNN etc
|
||||
return LSTMHelpers.getMemoryReport(this, inputType);
|
||||
}
|
||||
|
||||
public abstract static class LSTMBuilder<C extends LSTM, B extends LSTMBuilder<C, B>>
|
||||
extends AbstractLSTMBuilder<C, B> {}
|
||||
|
||||
private static final class LSTMBuilderImpl extends LSTMBuilder<LSTM, LSTMBuilderImpl> {
|
||||
@Override
|
||||
protected void initializeConstraints() {
|
||||
super.initializeConstraints();
|
||||
if (recurrentConstraints != null) {
|
||||
if (constraints == null) {
|
||||
constraints = new ArrayList<>();
|
||||
}
|
||||
for (LayerConstraint c : recurrentConstraints) {
|
||||
LayerConstraint c2 = c.clone();
|
||||
c2.setParams(Collections.singleton(LSTMParamInitializer.RECURRENT_WEIGHT_KEY));
|
||||
constraints.add(c2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Layer instantiate(NeuralNetConfiguration conf, Collection<TrainingListener> trainingListeners,
|
||||
int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) {
|
||||
LayerValidation.assertNInNOutSet("LSTM", getName(), layerIndex, getNIn(), getNOut());
|
||||
LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
|
||||
runInheritance();
|
||||
|
||||
org.deeplearning4j.nn.layers.recurrent.LSTM ret = new org.deeplearning4j.nn.layers.recurrent.LSTM(lconf, networkDataType);
|
||||
ret.addTrainingListeners(trainingListeners);
|
||||
ret.setIndex(layerIndex);
|
||||
ret.setParamsViewArray(layerParamsView);
|
||||
Map<String, INDArray> paramTable = initializer().init(this, layerParamsView, initializeParams);
|
||||
ret.setParamTable(paramTable);
|
||||
ret.setLayerConfiguration(lconf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ParamInitializer initializer() {
|
||||
return LSTMParamInitializer.getInstance();
|
||||
}
|
||||
|
||||
@Override
|
||||
public LayerMemoryReport getMemoryReport(InputType inputType) {
|
||||
//TODO - CuDNN etc
|
||||
return LSTMHelpers.getMemoryReport(this, inputType);
|
||||
public LSTM build() {
|
||||
LSTM l = new LSTM(this);
|
||||
l.initializeConstraints();
|
||||
return l;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -354,15 +354,5 @@ public abstract class LayerConfiguration
|
|||
biasConstraints = Arrays.asList(constraints);
|
||||
return self();
|
||||
}
|
||||
|
||||
/**
|
||||
* we are doing this to avoid BUG https://github.com/projectlombok/lombok/issues/3419 as some
|
||||
* child classes may specify their own buildMethodName in @SuperBuilder, but we use only
|
||||
* "initBuild" here consequently
|
||||
* @return
|
||||
*/
|
||||
public C initBuild() {
|
||||
return build();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -41,7 +41,7 @@ import org.nd4j.linalg.factory.Nd4j;
|
|||
|
||||
@Data
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@SuperBuilder(buildMethodName = "initBuild")
|
||||
@SuperBuilder
|
||||
public class LearnedSelfAttentionLayer extends SameDiffLayer {
|
||||
private static final String WEIGHT_KEY_QUERY_PROJECTION = "Wq";
|
||||
private static final String WEIGHT_KEY_KEY_PROJECTION = "Wk";
|
||||
|
@ -173,19 +173,24 @@ public class LearnedSelfAttentionLayer extends SameDiffLayer {
|
|||
public static abstract class LearnedSelfAttentionLayerBuilder<
|
||||
C extends LearnedSelfAttentionLayer, B extends LearnedSelfAttentionLayerBuilder<C, B>>
|
||||
extends SameDiffLayerBuilder<C, B> {
|
||||
public C build() {
|
||||
Preconditions.checkArgument(
|
||||
this.projectInput || this.nHeads == 1, "projectInput must be true when nHeads != 1");
|
||||
Preconditions.checkArgument(
|
||||
this.projectInput || nIn == nOut, "nIn must be equal to nOut when projectInput is false");
|
||||
Preconditions.checkArgument(
|
||||
!this.projectInput || nOut != 0, "nOut must be specified when projectInput is true");
|
||||
Preconditions.checkArgument(
|
||||
this.nOut % nHeads == 0 || headSize > 0,
|
||||
"nOut isn't divided by nHeads cleanly. Specify the headSize manually.");
|
||||
Preconditions.checkArgument(this.nQueries > 0, "You must set numQueries.");
|
||||
|
||||
return initBuild();
|
||||
}
|
||||
|
||||
private static final class LearnedSelfAttentionLayerBuilderImpl extends LearnedSelfAttentionLayerBuilder<LearnedSelfAttentionLayer, LearnedSelfAttentionLayerBuilderImpl> {
|
||||
public LearnedSelfAttentionLayer build() {
|
||||
LearnedSelfAttentionLayer l = new LearnedSelfAttentionLayer(this);
|
||||
Preconditions.checkArgument(
|
||||
l.isProjectInput() || l.getNHeads() == 1, "projectInput must be true when nHeads != 1");
|
||||
Preconditions.checkArgument(
|
||||
l.isProjectInput() || l.getNIn() == l.getNOut(), "nIn must be equal to nOut when projectInput is false");
|
||||
Preconditions.checkArgument(
|
||||
!l.isProjectInput() || l.getNOut() != 0, "nOut must be specified when projectInput is true");
|
||||
Preconditions.checkArgument(
|
||||
l.getNOut() % l.getNHeads() == 0 || l.getHeadSize() > 0,
|
||||
"nOut isn't divided by nHeads cleanly. Specify the headSize manually.");
|
||||
Preconditions.checkArgument(l.getNQueries() > 0, "You must set numQueries.");
|
||||
|
||||
return l;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -48,19 +48,9 @@ import org.nd4j.linalg.factory.Nd4j;
|
|||
@Data
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@JsonIgnoreProperties({"paramShapes"})
|
||||
@SuperBuilder(buildMethodName = "initBuild")
|
||||
@SuperBuilder
|
||||
public class LocallyConnected1D extends SameDiffLayer {
|
||||
|
||||
public static abstract class LocallyConnected1DBuilder<C extends LocallyConnected1D, B extends LocallyConnected1DBuilder<C, B>> extends
|
||||
SameDiffLayerBuilder<C, B> {
|
||||
public C build() {
|
||||
Convolution1DUtils.validateConvolutionModePadding(convolutionMode$value, padding$value);
|
||||
Convolution1DUtils.validateCnn1DKernelStridePadding(kernelSize$value, stride$value, padding$value);
|
||||
C l = initBuild();
|
||||
return l;
|
||||
}
|
||||
}
|
||||
|
||||
private static final List<String> WEIGHT_KEYS =
|
||||
Collections.singletonList(ConvolutionParamInitializer.WEIGHT_KEY);
|
||||
private static final List<String> BIAS_KEYS =
|
||||
|
@ -89,10 +79,8 @@ public class LocallyConnected1D extends SameDiffLayer {
|
|||
private int paddingR; // Right/bottom padding
|
||||
/** Convolution mode for the layer. See {@link ConvolutionMode} for details */
|
||||
@Builder.Default private ConvolutionMode convolutionMode = ConvolutionMode.Same;
|
||||
|
||||
/** Dilation for the layer */
|
||||
@Builder.Default private int dilation = 1;
|
||||
|
||||
/** If true (default is false) the layer will have a bias */
|
||||
@Builder.Default private boolean hasBias = true;
|
||||
|
||||
|
@ -272,4 +260,20 @@ public class LocallyConnected1D extends SameDiffLayer {
|
|||
convolutionMode = global_conf.getConvolutionMode();
|
||||
}
|
||||
}
|
||||
|
||||
private static final class LocallyConnected1DBuilderImpl
|
||||
extends LocallyConnected1DBuilder<LocallyConnected1D, LocallyConnected1DBuilderImpl> {
|
||||
public LocallyConnected1D build() {
|
||||
LocallyConnected1D l = new LocallyConnected1D(this);
|
||||
Convolution1DUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding());
|
||||
Convolution1DUtils.validateCnn1DKernelStridePadding(
|
||||
l.getKernelSize(), l.getStride(), l.getPadding());
|
||||
|
||||
return l;
|
||||
}
|
||||
}
|
||||
|
||||
public abstract static class LocallyConnected1DBuilder<
|
||||
C extends LocallyConnected1D, B extends LocallyConnected1DBuilder<C, B>>
|
||||
extends SameDiffLayerBuilder<C, B> {}
|
||||
}
|
||||
|
|
|
@ -41,7 +41,6 @@ import org.nd4j.autodiff.samediff.SDVariable;
|
|||
import org.nd4j.autodiff.samediff.SameDiff;
|
||||
import org.nd4j.enums.PadMode;
|
||||
import org.nd4j.linalg.activations.Activation;
|
||||
import org.nd4j.linalg.activations.IActivation;
|
||||
import org.nd4j.linalg.api.memory.MemoryWorkspace;
|
||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||
import org.nd4j.linalg.factory.Nd4j;
|
||||
|
@ -49,7 +48,7 @@ import org.nd4j.linalg.factory.Nd4j;
|
|||
@Data
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@JsonIgnoreProperties({"paramShapes"})
|
||||
@SuperBuilder(buildMethodName = "initBuild")
|
||||
@SuperBuilder
|
||||
public class LocallyConnected2D extends SameDiffLayer {
|
||||
|
||||
private static final List<String> WEIGHT_KEYS =
|
||||
|
@ -318,40 +317,44 @@ public class LocallyConnected2D extends SameDiffLayer {
|
|||
}
|
||||
}
|
||||
|
||||
public static abstract class LocallyConnected2DBuilder<
|
||||
C extends LocallyConnected2D, B extends LocallyConnected2DBuilder<C, B>>
|
||||
extends SameDiffLayerBuilder<C, B> {
|
||||
public C build() {
|
||||
featureDim(kernel$value[0] * kernel$value[1] * (int) nIn);
|
||||
C l = initBuild();
|
||||
private static final class LocallyConnected2DBuilderImpl
|
||||
extends LocallyConnected2DBuilder<LocallyConnected2D, LocallyConnected2DBuilderImpl> {
|
||||
public LocallyConnected2D build() {
|
||||
LocallyConnected2D l = new LocallyConnected2D(this);
|
||||
l.setFeatureDim(l.getKernel()[0] * l.getKernel()[1] * (int) l.getNIn());
|
||||
return l;
|
||||
}
|
||||
}
|
||||
|
||||
public B kernelSize(int ... kernel) {
|
||||
this.kernel$value = ValidationUtils.validate2NonNegative(kernel, false, "kernel");
|
||||
public abstract static class LocallyConnected2DBuilder<
|
||||
C extends LocallyConnected2D, B extends LocallyConnected2DBuilder<C, B>>
|
||||
extends SameDiffLayerBuilder<C, B> {
|
||||
|
||||
public B kernelSize(int... kernel) {
|
||||
this.kernel$value = ValidationUtils.validate2NonNegative(kernel, false, "kernel");
|
||||
this.kernel$set = true;
|
||||
return self();
|
||||
}
|
||||
|
||||
public B inputSize(int ... size) {
|
||||
this.inputSize = size;
|
||||
public B inputSize(int... size) {
|
||||
this.inputSize = size;
|
||||
return self();
|
||||
}
|
||||
|
||||
public B stride(int ... stride) {
|
||||
this.stride$value = ValidationUtils.validate2NonNegative(stride, false, "stride");
|
||||
public B stride(int... stride) {
|
||||
this.stride$value = ValidationUtils.validate2NonNegative(stride, false, "stride");
|
||||
this.stride$set = true;
|
||||
return self();
|
||||
}
|
||||
|
||||
public B padding(int ... padding) {
|
||||
this.padding$value = ValidationUtils.validate2NonNegative(padding, false, "padding");
|
||||
public B padding(int... padding) {
|
||||
this.padding$value = ValidationUtils.validate2NonNegative(padding, false, "padding");
|
||||
this.padding$set = true;
|
||||
return self();
|
||||
}
|
||||
|
||||
public B dilation(int ... dilation) {
|
||||
this.dilation$value = ValidationUtils.validate2NonNegative(dilation, false, "dilation");
|
||||
public B dilation(int... dilation) {
|
||||
this.dilation$value = ValidationUtils.validate2NonNegative(dilation, false, "dilation");
|
||||
this.dilation$set = true;
|
||||
return self();
|
||||
}
|
||||
|
|
|
@ -22,7 +22,6 @@ package org.deeplearning4j.nn.conf.layers;
|
|||
|
||||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
|
||||
import lombok.*;
|
||||
import lombok.experimental.SuperBuilder;
|
||||
import org.deeplearning4j.nn.api.Layer;
|
||||
|
@ -33,13 +32,12 @@ import org.deeplearning4j.optimize.api.TrainingListener;
|
|||
import org.nd4j.linalg.activations.Activation;
|
||||
import org.nd4j.linalg.api.buffer.DataType;
|
||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||
import org.nd4j.linalg.lossfunctions.ILossFunction;
|
||||
import org.nd4j.linalg.lossfunctions.LossFunctions;
|
||||
|
||||
@Data
|
||||
@ToString(callSuper = true)
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder")
|
||||
@SuperBuilder(builderMethodName = "innerBuilder")
|
||||
public class OutputLayer extends BaseOutputLayer {
|
||||
|
||||
{ // Set default activation function to softmax (to match default loss function MCXENT)
|
||||
|
@ -82,15 +80,16 @@ public class OutputLayer extends BaseOutputLayer {
|
|||
return DefaultParamInitializer.getInstance();
|
||||
}
|
||||
|
||||
public static abstract class OutputLayerBuilder<
|
||||
public abstract static class OutputLayerBuilder<
|
||||
C extends OutputLayer, B extends OutputLayerBuilder<C, B>>
|
||||
extends BaseOutputLayerBuilder<C, B> {
|
||||
public C build() {
|
||||
C l = this.initBuild();
|
||||
extends BaseOutputLayerBuilder<C, B> {}
|
||||
|
||||
private static final class OutputLayerBuilderImpl
|
||||
extends OutputLayerBuilder<OutputLayer, OutputLayerBuilderImpl> {
|
||||
public OutputLayer build() {
|
||||
OutputLayer l = new OutputLayer(this);
|
||||
l.initializeConstraints();
|
||||
return l;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -40,7 +40,7 @@ import org.nd4j.linalg.api.ndarray.INDArray;
|
|||
@Data
|
||||
@ToString(callSuper = true)
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder")
|
||||
@SuperBuilder(builderMethodName = "innerBuilder")
|
||||
public class PReLULayer extends BaseLayerConfiguration {
|
||||
/**
|
||||
* Explicitly set input shape of incoming activations so that parameters can be initialized
|
||||
|
@ -129,14 +129,17 @@ public class PReLULayer extends BaseLayerConfiguration {
|
|||
.build();
|
||||
}
|
||||
|
||||
public static abstract class PReLULayerBuilder<
|
||||
C extends PReLULayer, B extends PReLULayerBuilder<C, B>>
|
||||
extends BaseLayerConfigurationBuilder<C, B> {
|
||||
public C build() {
|
||||
C l = initBuild();
|
||||
private static final class PReLULayerBuilderImpl extends PReLULayerBuilder<PReLULayer, PReLULayerBuilderImpl> {
|
||||
public PReLULayer build() {
|
||||
PReLULayer l = new PReLULayer(this);
|
||||
l.initializeConstraints();
|
||||
return l;
|
||||
}
|
||||
}
|
||||
public static abstract class PReLULayerBuilder<
|
||||
C extends PReLULayer, B extends PReLULayerBuilder<C, B>>
|
||||
extends BaseLayerConfigurationBuilder<C, B> {
|
||||
|
||||
|
||||
/**
|
||||
* Explicitly set input shape of incoming activations so that parameters can be initialized
|
||||
|
|
|
@ -35,6 +35,6 @@ import lombok.experimental.SuperBuilder;
|
|||
|
||||
@ToString(callSuper = true)
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@SuperBuilder(buildMethodName = "initBuild")
|
||||
@SuperBuilder
|
||||
public class Pooling1D extends Subsampling1DLayer {
|
||||
}
|
||||
|
|
|
@ -35,6 +35,6 @@ import lombok.experimental.SuperBuilder;
|
|||
|
||||
@ToString(callSuper = true)
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@SuperBuilder(buildMethodName = "initBuild")
|
||||
@SuperBuilder
|
||||
public class Pooling2D extends SubsamplingLayer {
|
||||
}
|
||||
|
|
|
@ -41,7 +41,7 @@ import org.nd4j.linalg.factory.Nd4j;
|
|||
|
||||
@Data
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder")
|
||||
@SuperBuilder(builderMethodName = "innerBuilder")
|
||||
public class PrimaryCapsules extends SameDiffLayer {
|
||||
|
||||
private static final String WEIGHT_PARAM = "weight";
|
||||
|
@ -335,7 +335,7 @@ public class PrimaryCapsules extends SameDiffLayer {
|
|||
}
|
||||
}
|
||||
|
||||
public static abstract class PrimaryCapsulesBuilder<
|
||||
public abstract static class PrimaryCapsulesBuilder<
|
||||
C extends PrimaryCapsules, B extends PrimaryCapsulesBuilder<C, B>>
|
||||
extends SameDiffLayerBuilder<C, B> {
|
||||
|
||||
|
@ -396,27 +396,30 @@ public class PrimaryCapsules extends SameDiffLayer {
|
|||
this.useLeakyReLU$set = true;
|
||||
return self();
|
||||
}
|
||||
}
|
||||
|
||||
public C build() {
|
||||
C l = initBuild();
|
||||
if (capsuleDimensions <= 0 || channels$value <= 0) {
|
||||
private static final class PrimaryCapsulesBuilderImpl
|
||||
extends PrimaryCapsulesBuilder<PrimaryCapsules, PrimaryCapsulesBuilderImpl> {
|
||||
public PrimaryCapsules build() {
|
||||
PrimaryCapsules l = new PrimaryCapsules(this);
|
||||
if (l.getCapsuleDimensions() <= 0 || l.getChannels() <= 0) {
|
||||
throw new IllegalArgumentException(
|
||||
"Invalid configuration for Primary Capsules (layer name = \""
|
||||
+ l.getName()
|
||||
+ "\"):"
|
||||
+ " capsuleDimensions and channels must be > 0. Got: "
|
||||
+ capsuleDimensions
|
||||
+ l.getCapsuleDimensions()
|
||||
+ ", "
|
||||
+ channels$value);
|
||||
+ l.getChannels());
|
||||
}
|
||||
|
||||
if (capsules < 0) {
|
||||
if (l.getCapsules() < 0) {
|
||||
throw new IllegalArgumentException(
|
||||
"Invalid configuration for Capsule ILayer (layer name = \""
|
||||
+ l.getName()
|
||||
+ "\"):"
|
||||
+ " capsules must be >= 0 if set. Got: "
|
||||
+ capsules);
|
||||
+ l.getCapsules());
|
||||
}
|
||||
return l;
|
||||
}
|
||||
|
|
|
@ -43,21 +43,25 @@ import java.util.Map;
|
|||
|
||||
@Data
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@SuperBuilder(buildMethodName = "initBuild")
|
||||
@SuperBuilder
|
||||
public class RecurrentAttentionLayer extends SameDiffLayer {
|
||||
|
||||
private static final class RecurrentAttentionLayerBuilderImpl extends RecurrentAttentionLayerBuilder<RecurrentAttentionLayer, RecurrentAttentionLayerBuilderImpl> {
|
||||
public RecurrentAttentionLayer build() {
|
||||
RecurrentAttentionLayer l = new RecurrentAttentionLayer(this);
|
||||
Preconditions.checkArgument(l.isProjectInput() || l.getNHeads() == 1, "projectInput must be true when nHeads != 1");
|
||||
Preconditions.checkArgument(l.isProjectInput() || l.getNIn() == l.getNOut(), "nIn must be equal to nOut when projectInput is false");
|
||||
Preconditions.checkArgument(!l.isProjectInput() || l.getNOut() != 0, "nOut must be specified when projectInput is true");
|
||||
Preconditions.checkArgument(l.getNOut() % l.getNHeads() == 0 || l.getNHeads() > 0, "nOut isn't divided by nHeads cleanly. Specify the headSize manually.");
|
||||
|
||||
|
||||
return l;
|
||||
}
|
||||
}
|
||||
public static abstract class RecurrentAttentionLayerBuilder<C extends RecurrentAttentionLayer, B extends RecurrentAttentionLayerBuilder<C,B>>
|
||||
extends SameDiffLayerBuilder<C,B> {
|
||||
|
||||
public C build() {
|
||||
Preconditions.checkArgument(this.projectInput$value || this.nHeads == 1, "projectInput must be true when nHeads != 1");
|
||||
Preconditions.checkArgument(this.projectInput$value || nIn == nOut, "nIn must be equal to nOut when projectInput is false");
|
||||
Preconditions.checkArgument(!this.projectInput$value || nOut != 0, "nOut must be specified when projectInput is true");
|
||||
Preconditions.checkArgument(this.nOut % nHeads == 0 || headSize > 0, "nOut isn't divided by nHeads cleanly. Specify the headSize manually.");
|
||||
|
||||
C l = initBuild();
|
||||
return l;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -24,7 +24,6 @@ import java.util.Collection;
|
|||
import java.util.Map;
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.ToString;
|
||||
import lombok.experimental.SuperBuilder;
|
||||
import org.deeplearning4j.nn.api.Layer;
|
||||
|
@ -42,88 +41,104 @@ import org.nd4j.linalg.lossfunctions.LossFunctions;
|
|||
@Data
|
||||
@ToString(callSuper = true)
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder")
|
||||
@SuperBuilder(builderMethodName = "innerBuilder")
|
||||
public class RnnOutputLayer extends BaseOutputLayer {
|
||||
|
||||
/**
|
||||
* @param rnnDataFormat Data format expected by the layer. NCW = [miniBatchSize, size, timeSeriesLength],
|
||||
* NWC = [miniBatchSize, timeSeriesLength, size]. Defaults to NCW.
|
||||
*/
|
||||
private RNNFormat dataFormat;
|
||||
/**
|
||||
* @param rnnDataFormat Data format expected by the layer. NCW = [miniBatchSize, size,
|
||||
* timeSeriesLength], NWC = [miniBatchSize, timeSeriesLength, size]. Defaults to NCW.
|
||||
*/
|
||||
private RNNFormat dataFormat;
|
||||
|
||||
public static RnnOutputLayerBuilder<?,?> builder() {
|
||||
return innerBuilder();
|
||||
public static RnnOutputLayerBuilder<?, ?> builder() {
|
||||
return innerBuilder();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param lossFn Loss function for the output layer
|
||||
*/
|
||||
public static RnnOutputLayerBuilder<?, ?> builder(LossFunctions.LossFunction lossFn) {
|
||||
return innerBuilder().lossFunction(lossFn);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Layer instantiate(
|
||||
NeuralNetConfiguration conf,
|
||||
Collection<TrainingListener> trainingListeners,
|
||||
int layerIndex,
|
||||
INDArray layerParamsView,
|
||||
boolean initializeParams,
|
||||
DataType networkDataType) {
|
||||
LayerValidation.assertNInNOutSet("RnnOutputLayer", getName(), layerIndex, getNIn(), getNOut());
|
||||
LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
|
||||
|
||||
org.deeplearning4j.nn.layers.recurrent.RnnOutputLayer ret =
|
||||
new org.deeplearning4j.nn.layers.recurrent.RnnOutputLayer(lconf, networkDataType);
|
||||
ret.addTrainingListeners(trainingListeners);
|
||||
ret.setIndex(layerIndex);
|
||||
ret.setParamsViewArray(layerParamsView);
|
||||
Map<String, INDArray> paramTable = initializer().init(this, layerParamsView, initializeParams);
|
||||
ret.setParamTable(paramTable);
|
||||
ret.setLayerConfiguration(lconf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ParamInitializer initializer() {
|
||||
return DefaultParamInitializer.getInstance();
|
||||
}
|
||||
|
||||
@Override
|
||||
public InputType getOutputType(int layerIndex, InputType inputType) {
|
||||
if (inputType == null || inputType.getType() != InputType.Type.RNN) {
|
||||
throw new IllegalStateException(
|
||||
"Invalid input type for RnnOutputLayer (layer index = "
|
||||
+ layerIndex
|
||||
+ ", layer name=\""
|
||||
+ getName()
|
||||
+ "\"): Expected RNN input, got "
|
||||
+ inputType);
|
||||
}
|
||||
InputType.InputTypeRecurrent itr = (InputType.InputTypeRecurrent) inputType;
|
||||
|
||||
return InputType.recurrent(nOut, itr.getTimeSeriesLength(), itr.getFormat());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNIn(InputType inputType, boolean override) {
|
||||
if (inputType == null || inputType.getType() != InputType.Type.RNN) {
|
||||
throw new IllegalStateException(
|
||||
"Invalid input type for RnnOutputLayer (layer name=\""
|
||||
+ getName()
|
||||
+ "\"): Expected RNN input, got "
|
||||
+ inputType);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param lossFn Loss function for the output layer
|
||||
*/
|
||||
public static RnnOutputLayerBuilder<?,?> builder(LossFunctions.LossFunction lossFn) {
|
||||
return innerBuilder()
|
||||
.lossFunction(lossFn);
|
||||
InputType.InputTypeRecurrent r = (InputType.InputTypeRecurrent) inputType;
|
||||
if (dataFormat == null || override) {
|
||||
this.dataFormat = r.getFormat();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Layer instantiate(NeuralNetConfiguration conf, Collection<TrainingListener> trainingListeners,
|
||||
int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) {
|
||||
LayerValidation.assertNInNOutSet("RnnOutputLayer", getName(), layerIndex, getNIn(), getNOut());
|
||||
LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
|
||||
|
||||
org.deeplearning4j.nn.layers.recurrent.RnnOutputLayer ret =
|
||||
new org.deeplearning4j.nn.layers.recurrent.RnnOutputLayer(lconf, networkDataType);
|
||||
ret.addTrainingListeners(trainingListeners);
|
||||
ret.setIndex(layerIndex);
|
||||
ret.setParamsViewArray(layerParamsView);
|
||||
Map<String, INDArray> paramTable = initializer().init(this, layerParamsView, initializeParams);
|
||||
ret.setParamTable(paramTable);
|
||||
ret.setLayerConfiguration(lconf);
|
||||
return ret;
|
||||
if (nIn <= 0 || override) {
|
||||
this.nIn = r.getSize();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ParamInitializer initializer() {
|
||||
return DefaultParamInitializer.getInstance();
|
||||
@Override
|
||||
public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
|
||||
return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, dataFormat, getName());
|
||||
}
|
||||
|
||||
public abstract static class RnnOutputLayerBuilder<
|
||||
C extends RnnOutputLayer, B extends RnnOutputLayerBuilder<C, B>>
|
||||
extends BaseOutputLayerBuilder<C, B> {}
|
||||
|
||||
private static final class RnnOutputLayerBuilderImpl
|
||||
extends RnnOutputLayerBuilder<RnnOutputLayer, RnnOutputLayerBuilderImpl> {
|
||||
public RnnOutputLayer build() {
|
||||
RnnOutputLayer l = new RnnOutputLayer(this);
|
||||
l.initializeConstraints();
|
||||
return l;
|
||||
}
|
||||
|
||||
@Override
|
||||
public InputType getOutputType(int layerIndex, InputType inputType) {
|
||||
if (inputType == null || inputType.getType() != InputType.Type.RNN) {
|
||||
throw new IllegalStateException("Invalid input type for RnnOutputLayer (layer index = " + layerIndex
|
||||
+ ", layer name=\"" + getName() + "\"): Expected RNN input, got " + inputType);
|
||||
}
|
||||
InputType.InputTypeRecurrent itr = (InputType.InputTypeRecurrent) inputType;
|
||||
|
||||
return InputType.recurrent(nOut, itr.getTimeSeriesLength(), itr.getFormat());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNIn(InputType inputType, boolean override) {
|
||||
if (inputType == null || inputType.getType() != InputType.Type.RNN) {
|
||||
throw new IllegalStateException("Invalid input type for RnnOutputLayer (layer name=\"" + getName()
|
||||
+ "\"): Expected RNN input, got " + inputType);
|
||||
}
|
||||
|
||||
InputType.InputTypeRecurrent r = (InputType.InputTypeRecurrent) inputType;
|
||||
if(dataFormat == null || override) {
|
||||
this.dataFormat = r.getFormat();
|
||||
}
|
||||
|
||||
if (nIn <= 0 || override) {
|
||||
this.nIn = r.getSize();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
|
||||
return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, dataFormat, getName());
|
||||
}
|
||||
|
||||
public static abstract class RnnOutputLayerBuilder<C extends RnnOutputLayer, B extends RnnOutputLayerBuilder<C, B>> extends BaseOutputLayerBuilder<C, B> {
|
||||
public C build() {
|
||||
C l = this.initBuild();
|
||||
l.initializeConstraints();
|
||||
return l;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -38,7 +38,7 @@ import org.nd4j.linalg.factory.Nd4j;
|
|||
|
||||
@Data
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@SuperBuilder(buildMethodName = "initBuild")
|
||||
@SuperBuilder
|
||||
public class SelfAttentionLayer extends SameDiffLayer {
|
||||
private static final String WEIGHT_KEY_QUERY_PROJECTION = "Wq";
|
||||
private static final String WEIGHT_KEY_KEY_PROJECTION = "Wk";
|
||||
|
|
|
@ -44,7 +44,7 @@ import org.nd4j.linalg.api.ndarray.INDArray;
|
|||
@Data
|
||||
@ToString(callSuper = true)
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder")
|
||||
@SuperBuilder(builderMethodName = "innerBuilder")
|
||||
public class SeparableConvolution2D extends ConvolutionLayer {
|
||||
/**
|
||||
* Set constraints to be applied to the point-wise convolution weight parameters of this layer.
|
||||
|
|
|
@ -50,7 +50,7 @@ import org.nd4j.linalg.api.ndarray.INDArray;
|
|||
|
||||
@ToString(callSuper = true)
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@SuperBuilder(buildMethodName = "initBuild")
|
||||
@SuperBuilder
|
||||
public class Subsampling1DLayer extends SubsamplingLayer {
|
||||
|
||||
@Override
|
||||
|
@ -153,11 +153,9 @@ public class Subsampling1DLayer extends SubsamplingLayer {
|
|||
return true;
|
||||
}
|
||||
|
||||
public static abstract class Subsampling1DLayerBuilder<C extends Subsampling1DLayer, B extends Subsampling1DLayerBuilder<C, B>> extends
|
||||
SubsamplingLayerBuilder<C, B> {
|
||||
|
||||
public C build() {
|
||||
C l = this.initBuild();
|
||||
private static final class Subsampling1DLayerBuilderImpl extends Subsampling1DLayerBuilder<Subsampling1DLayer, Subsampling1DLayerBuilderImpl> {
|
||||
public Subsampling1DLayer build() {
|
||||
Subsampling1DLayer l =new Subsampling1DLayer(this);
|
||||
if (l.getPoolingType() == org.deeplearning4j.nn.conf.layers.PoolingType.PNORM && l.getPnorm() <= 0) {
|
||||
throw new IllegalStateException(
|
||||
"Incorrect Subsampling config: p-norm must be set when using PoolingType.PNORM");
|
||||
|
@ -167,6 +165,11 @@ public class Subsampling1DLayer extends SubsamplingLayer {
|
|||
return l;
|
||||
}
|
||||
|
||||
}
|
||||
public static abstract class Subsampling1DLayerBuilder<C extends Subsampling1DLayer, B extends Subsampling1DLayerBuilder<C, B>> extends
|
||||
SubsamplingLayerBuilder<C, B> {
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @param kernelSize
|
||||
|
|
|
@ -45,7 +45,7 @@ import org.nd4j.linalg.learning.regularization.Regularization;
|
|||
@Data
|
||||
@ToString(callSuper = true)
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@SuperBuilder(builderMethodName = "innerBuilder", buildMethodName = "initBuild")
|
||||
@SuperBuilder(builderMethodName = "innerBuilder")
|
||||
public class Subsampling3DLayer extends NoParamLayer {
|
||||
|
||||
@Builder.Default protected ConvolutionMode convolutionMode = ConvolutionMode.Truncate;
|
||||
|
@ -304,17 +304,22 @@ public class Subsampling3DLayer extends NoParamLayer {
|
|||
return self();
|
||||
}
|
||||
|
||||
public C build() {
|
||||
if (kernelSize.length != 3) {
|
||||
|
||||
}
|
||||
|
||||
private static final class Subsampling3DLayerBuilderImpl extends Subsampling3DLayerBuilder<Subsampling3DLayer, Subsampling3DLayerBuilderImpl> {
|
||||
public Subsampling3DLayer build() {
|
||||
Subsampling3DLayer l = new Subsampling3DLayer(this);
|
||||
if (l.getKernelSize().length != 3) {
|
||||
throw new IllegalArgumentException("Kernel size must be length 3");
|
||||
}
|
||||
|
||||
if (stride.length != 3) {
|
||||
if (l.getStride().length != 3) {
|
||||
throw new IllegalArgumentException("Invalid stride, must be length 3");
|
||||
}
|
||||
C l = this.initBuild();
|
||||
ConvolutionUtils.validateConvolutionModePadding(l.getConvolutionMode(), padding);
|
||||
Convolution3DUtils.validateCnn3DKernelStridePadding(kernelSize, stride, padding);
|
||||
|
||||
ConvolutionUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding());
|
||||
Convolution3DUtils.validateCnn3DKernelStridePadding(l.getKernelSize(), l.getStride(), l.getPadding());
|
||||
return l;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -45,7 +45,7 @@ import org.nd4j.linalg.api.ndarray.INDArray;
|
|||
@Data
|
||||
@ToString(callSuper = true)
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder")
|
||||
@SuperBuilder(builderMethodName = "innerBuilder")
|
||||
public class SubsamplingLayer extends NoParamLayer {
|
||||
|
||||
public static final CNN2DFormat DEFAULT_FORMAT = CNN2DFormat.NCHW;
|
||||
|
@ -425,25 +425,7 @@ public class SubsamplingLayer extends NoParamLayer {
|
|||
return self();
|
||||
}
|
||||
|
||||
public C build() {
|
||||
if (kernelSize$value.length != 2) {
|
||||
throw new IllegalArgumentException("Kernel size of should be rows x columns (a 2d array)");
|
||||
}
|
||||
|
||||
if (stride$value.length != 2) {
|
||||
throw new IllegalArgumentException("Invalid stride, must be length 2");
|
||||
}
|
||||
if (poolingType$value == org.deeplearning4j.nn.conf.layers.PoolingType.PNORM && pnorm <= 0) {
|
||||
throw new IllegalStateException(
|
||||
"Incorrect Subsampling config: p-norm must be set when using PoolingType.PNORM");
|
||||
}
|
||||
ConvolutionUtils.validateConvolutionModePadding(convolutionMode$value, padding$value);
|
||||
ConvolutionUtils.validateCnnKernelStridePadding(
|
||||
kernelSize$value, stride$value, padding$value);
|
||||
|
||||
C l = initBuild();
|
||||
return l;
|
||||
}
|
||||
|
||||
public B setConvolutionMode(ConvolutionMode convolutionMode){
|
||||
Preconditions.checkState(allowCausal$value || convolutionMode$value != ConvolutionMode.Causal, "Causal convolution mode can only be used with 1D" +
|
||||
|
@ -459,4 +441,25 @@ public class SubsamplingLayer extends NoParamLayer {
|
|||
return self();
|
||||
}
|
||||
}
|
||||
private static final class SubsamplingLayerBuilderImpl extends SubsamplingLayerBuilder<SubsamplingLayer, SubsamplingLayerBuilderImpl> {
|
||||
public SubsamplingLayer build() {
|
||||
SubsamplingLayer l = new SubsamplingLayer(this);
|
||||
if (l.getKernelSize().length != 2) {
|
||||
throw new IllegalArgumentException("Kernel size of should be rows x columns (a 2d array)");
|
||||
}
|
||||
|
||||
if (l.getStride().length != 2) {
|
||||
throw new IllegalArgumentException("Invalid stride, must be length 2");
|
||||
}
|
||||
if (l.getPoolingType() == org.deeplearning4j.nn.conf.layers.PoolingType.PNORM && l.getPnorm() <= 0) {
|
||||
throw new IllegalStateException(
|
||||
"Incorrect Subsampling config: p-norm must be set when using PoolingType.PNORM");
|
||||
}
|
||||
ConvolutionUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding());
|
||||
ConvolutionUtils.validateCnnKernelStridePadding(
|
||||
l.getKernelSize(), l.getStride(), l.getPadding());
|
||||
|
||||
return l;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -41,7 +41,7 @@ import java.util.Map;
|
|||
|
||||
@Data
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@SuperBuilder(builderMethodName = "innerBuilder", buildMethodName = "initBuild")
|
||||
@SuperBuilder(builderMethodName = "innerBuilder")
|
||||
public class ZeroPaddingLayer extends NoParamLayer {
|
||||
/**
|
||||
* @param padding Padding value for top, bottom, left, and right. Must be length 4 array
|
||||
|
|
|
@ -48,7 +48,7 @@ import org.nd4j.serde.jackson.shaded.NDArrayTextSerializer;
|
|||
|
||||
|
||||
@EqualsAndHashCode(callSuper = false)
|
||||
@SuperBuilder(buildMethodName = "initBuild")
|
||||
@SuperBuilder
|
||||
public class Yolo2OutputLayer extends LayerConfiguration {
|
||||
|
||||
/**
|
||||
|
|
|
@ -43,8 +43,7 @@ import org.nd4j.linalg.learning.regularization.Regularization;
|
|||
public abstract class BaseWrapperLayerConfiguration extends LayerConfiguration {
|
||||
|
||||
/** The configuration to of another layer to wrap */
|
||||
@Getter @Setter
|
||||
protected LayerConfiguration underlying;
|
||||
@Getter @Setter protected LayerConfiguration underlying;
|
||||
|
||||
/**
|
||||
* Set the net configuration for this configuration as well as for the underlying layer (if not
|
||||
|
|
Loading…
Reference in New Issue