Using @SuperBuilder for LayerConfigurations

Signed-off-by: brian <brian@brutex.de>
master
Brian Rosenberger 2023-04-25 15:42:24 +02:00
parent 391a1ad397
commit 8f524827e4
40 changed files with 427 additions and 404 deletions

View File

@ -171,7 +171,7 @@ public class App {
LayerConfiguration[] disLayers = Arrays.stream(disLayers()) LayerConfiguration[] disLayers = Arrays.stream(disLayers())
.map((layer) -> { .map((layer) -> {
if (layer instanceof DenseLayer || layer instanceof OutputLayer) { if (layer instanceof DenseLayer || layer instanceof OutputLayer) {
return new FrozenLayerWithBackprop(layer); return FrozenLayerWithBackprop.builder(layer);
} else { } else {
return layer; return layer;
} }

View File

@ -162,19 +162,19 @@ public class UtilLayerGradientChecks extends BaseDL4JTest {
} }
NeuralNetConfiguration conf = NeuralNetConfiguration.builder() NeuralNetConfiguration conf =
.updater(new NoOp()) NeuralNetConfiguration.builder()
.activation(Activation.TANH) .updater(new NoOp())
.dataType(DataType.DOUBLE) .activation(Activation.TANH)
.dist(new NormalDistribution(0,2)) .dataType(DataType.DOUBLE)
.list() .dist(new NormalDistribution(0, 2))
.layer(l1) .list()
.layer(new MaskLayer()) .layer(l1)
.layer(l2) .layer(MaskLayer.builder().build())
.layer(l3) .layer(l2)
.inputType(it) .layer(l3)
.build(); .inputType(it)
.build();
MultiLayerNetwork net = new MultiLayerNetwork(conf); MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init(); net.init();
@ -203,11 +203,11 @@ public class UtilLayerGradientChecks extends BaseDL4JTest {
.list() .list()
.layer(DenseLayer.builder().nIn(10).nOut(10) .layer(DenseLayer.builder().nIn(10).nOut(10)
.activation(Activation.TANH).weightInit(WeightInit.XAVIER).build()) .activation(Activation.TANH).weightInit(WeightInit.XAVIER).build())
.layer(new FrozenLayerWithBackprop(DenseLayer.builder().nIn(10).nOut(10) .layer(FrozenLayerWithBackprop.builder().underlying(DenseLayer.builder().nIn(10).nOut(10)
.activation(Activation.TANH).weightInit(WeightInit.XAVIER).build())) .activation(Activation.TANH).weightInit(WeightInit.XAVIER).build()).build())
.layer(new FrozenLayerWithBackprop( .layer(FrozenLayerWithBackprop.builder().underlying(
DenseLayer.builder().nIn(10).nOut(10).activation(Activation.TANH) DenseLayer.builder().nIn(10).nOut(10).activation(Activation.TANH)
.weightInit(WeightInit.XAVIER).build())) .weightInit(WeightInit.XAVIER).build()).build())
.layer(OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .layer(OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX).nIn(10).nOut(10).build()) .activation(Activation.SOFTMAX).nIn(10).nOut(10).build())
.build(); .build();

View File

@ -40,15 +40,12 @@ import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.learning.config.IUpdater; import org.nd4j.linalg.learning.config.IUpdater;
@NoArgsConstructor
@ToString(callSuper = true) @ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") @SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder")
public class ActivationLayer extends NoParamLayer { public class ActivationLayer extends NoParamLayer {
{
setType(LayerType.ACT);
}
public static ActivationLayerBuilder<?, ?> builder(Activation activation) { public static ActivationLayerBuilder<?, ?> builder(Activation activation) {
return innerBuilder().activation(activation); return innerBuilder().activation(activation);

View File

@ -49,6 +49,8 @@ import org.nd4j.linalg.learning.regularization.WeightDecay;
@SuperBuilder @SuperBuilder
public abstract class BaseLayerConfiguration extends LayerConfiguration public abstract class BaseLayerConfiguration extends LayerConfiguration
implements ITraininableLayerConfiguration, Serializable, Cloneable { implements ITraininableLayerConfiguration, Serializable, Cloneable {
/** /**
* Set constraints to be applied to all layers. Default: no constraints.<br> * Set constraints to be applied to all layers. Default: no constraints.<br>
* Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm
@ -84,9 +86,9 @@ public abstract class BaseLayerConfiguration extends LayerConfiguration
@Getter @Setter @Builder.Default @Getter @Setter @Builder.Default
protected double gainInit = 0.0; protected double gainInit = 0.0;
/** Regularization for the parameters (excluding biases). */ /** Regularization for the parameters (excluding biases). */
@Builder.Default @Getter protected List<Regularization> regularization = new ArrayList<>(); @Builder.Default @Getter @Setter protected List<Regularization> regularization = new ArrayList<>();
/** Regularization for the bias parameters only */ /** Regularization for the bias parameters only */
@Builder.Default @Getter @Builder.Default @Getter @Setter
protected List<Regularization> regularizationBias = new ArrayList<>(); protected List<Regularization> regularizationBias = new ArrayList<>();
/** /**
* Gradient updater. For example, {@link org.nd4j.linalg.learning.config.Adam} or {@link * Gradient updater. For example, {@link org.nd4j.linalg.learning.config.Adam} or {@link
@ -210,6 +212,7 @@ public abstract class BaseLayerConfiguration extends LayerConfiguration
C extends BaseLayerConfiguration, B extends BaseLayerConfigurationBuilder<C, B>> C extends BaseLayerConfiguration, B extends BaseLayerConfigurationBuilder<C, B>>
extends LayerConfigurationBuilder<C, B> { extends LayerConfigurationBuilder<C, B> {
/** /**
* Set weight initialization scheme to random sampling via the specified distribution. * Set weight initialization scheme to random sampling via the specified distribution.
* Equivalent to: {@code .weightInit(new WeightInitDistribution(distribution))} * Equivalent to: {@code .weightInit(new WeightInitDistribution(distribution))}

View File

@ -29,8 +29,7 @@ import org.nd4j.linalg.lossfunctions.ILossFunction;
import org.nd4j.linalg.lossfunctions.LossFunctions; import org.nd4j.linalg.lossfunctions.LossFunctions;
import org.nd4j.linalg.lossfunctions.impl.LossMCXENT; import org.nd4j.linalg.lossfunctions.impl.LossMCXENT;
@Data
@NoArgsConstructor
@ToString(callSuper = true) @ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@SuperBuilder(builderMethodName = "innerBuilder") @SuperBuilder(builderMethodName = "innerBuilder")
@ -39,19 +38,16 @@ public abstract class BaseOutputLayer extends FeedForwardLayer {
/** /**
* Loss function for the output layer * Loss function for the output layer
*/ */
@lombok.Builder.Default @lombok.Builder.Default @Getter @Setter
protected ILossFunction lossFunction = new LossMCXENT(); protected ILossFunction lossFunction = new LossMCXENT();
/** /**
* If true (default): include bias parameters in the model. False: no bias. * If true (default): include bias parameters in the model. False: no bias.
* *
*/ */
@lombok.Builder.Default @lombok.Builder.Default @Getter @Setter
protected boolean hasBias = true; protected boolean hasBias = true;
public boolean hasBias() {
return hasBias;
}
@Override @Override
public LayerMemoryReport getMemoryReport(InputType inputType) { public LayerMemoryReport getMemoryReport(InputType inputType) {

View File

@ -31,11 +31,11 @@ import org.nd4j.linalg.lossfunctions.LossFunctions;
@JsonIgnoreProperties("pretrain") @JsonIgnoreProperties("pretrain")
@SuperBuilder @SuperBuilder
public abstract class BasePretrainNetwork extends FeedForwardLayer { public abstract class BasePretrainNetwork extends FeedForwardLayer {
@Builder.Default @Builder.Default @Getter
protected LossFunctions.LossFunction lossFunction = protected LossFunctions.LossFunction lossFunction =
LossFunctions.LossFunction.RECONSTRUCTION_CROSSENTROPY; LossFunctions.LossFunction.RECONSTRUCTION_CROSSENTROPY;
@Builder.Default protected double visibleBiasInit = 0.0; @Builder.Default @Getter protected double visibleBiasInit = 0.0;
@Override @Override
public boolean isPretrainParam(String paramName) { public boolean isPretrainParam(String paramName) {

View File

@ -31,8 +31,6 @@ import org.deeplearning4j.nn.conf.inputs.InputType;
* @author Max Pumperla * @author Max Pumperla
*/ */
@Data
@NoArgsConstructor
@ToString(callSuper = true) @ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@SuperBuilder() @SuperBuilder()
@ -43,7 +41,7 @@ public abstract class BaseUpsamplingLayer extends NoParamLayer {
* dimensions (e.g. 2 for Upsampling2D etc.) * dimensions (e.g. 2 for Upsampling2D etc.)
* *
*/ */
@Builder.Default @Builder.Default @Getter
protected int[] size = new int[] {1}; protected int[] size = new int[] {1};
@Override @Override
@ -60,8 +58,4 @@ public abstract class BaseUpsamplingLayer extends NoParamLayer {
} }
return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getName()); return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getName());
} }
} }

View File

@ -42,7 +42,6 @@ import java.util.Collection;
import java.util.Map; import java.util.Map;
@Data @Data
@NoArgsConstructor
@ToString(callSuper = true) @ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@SuperBuilder(buildMethodName = "initBuild") @SuperBuilder(buildMethodName = "initBuild")

View File

@ -37,15 +37,15 @@ import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.lossfunctions.ILossFunction; import org.nd4j.linalg.lossfunctions.ILossFunction;
@Data
@NoArgsConstructor
@ToString(callSuper = true) @ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@SuperBuilder @SuperBuilder
public class Cnn3DLossLayer extends FeedForwardLayer { public class Cnn3DLossLayer extends FeedForwardLayer {
@Getter @Setter
protected ILossFunction lossFunction; protected ILossFunction lossFunction;
/** Format of the input/output data. See {@link Convolution3D.DataFormat} for details */ /** Format of the input/output data. See {@link Convolution3D.DataFormat} for details */
@Getter @Setter
protected Convolution3D.DataFormat dataFormat; protected Convolution3D.DataFormat dataFormat;
@Override @Override

View File

@ -24,10 +24,11 @@ import lombok.Data;
import lombok.EqualsAndHashCode; import lombok.EqualsAndHashCode;
import lombok.NoArgsConstructor; import lombok.NoArgsConstructor;
import lombok.ToString; import lombok.ToString;
import lombok.experimental.SuperBuilder;
@Data @Data
@NoArgsConstructor
@ToString(callSuper = true) @ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@SuperBuilder
public class Convolution1D extends Convolution1DLayer { public class Convolution1D extends Convolution1DLayer {
} }

View File

@ -45,7 +45,6 @@ import org.nd4j.linalg.api.ndarray.INDArray;
* wide. * wide.
*/ */
@Data @Data
@NoArgsConstructor
@ToString(callSuper = true) @ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") @SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder")
@ -142,7 +141,7 @@ public class Convolution1DLayer extends ConvolutionLayer {
} else { } else {
outLength = outLength =
Convolution1DUtils.getOutputSize( Convolution1DUtils.getOutputSize(
inputTsLength, kernelSize[0], stride[0], padding[0], convolutionMode, dilation[0]); inputTsLength, kernelSize[0], stride[0], padding[0], getConvolutionMode(), dilation[0]);
} }
return InputType.recurrent(nOut, outLength, rnnDataFormat); return InputType.recurrent(nOut, outLength, rnnDataFormat);

View File

@ -24,10 +24,12 @@ import lombok.Data;
import lombok.EqualsAndHashCode; import lombok.EqualsAndHashCode;
import lombok.NoArgsConstructor; import lombok.NoArgsConstructor;
import lombok.ToString; import lombok.ToString;
import lombok.experimental.SuperBuilder;
@Data @Data
@NoArgsConstructor
@ToString(callSuper = true) @ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@SuperBuilder
public class Convolution2D extends ConvolutionLayer { public class Convolution2D extends ConvolutionLayer {
} }

View File

@ -38,7 +38,6 @@ import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ndarray.INDArray;
@Data @Data
@NoArgsConstructor
@ToString(callSuper = true) @ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@SuperBuilder(builderMethodName = "innerBuilder", buildMethodName = "initBuild") @SuperBuilder(builderMethodName = "innerBuilder", buildMethodName = "initBuild")
@ -118,7 +117,7 @@ public class Convolution3D extends ConvolutionLayer {
* kernel size * kernel size
*/ */
public boolean hasBias() { public boolean hasBias() {
return hasBias; return isHasBias();
} }
@Override @Override

View File

@ -46,6 +46,7 @@ import org.nd4j.linalg.api.ndarray.INDArray;
* to be used in the net or in other words the channels The builder specifies the filter/kernel * to be used in the net or in other words the channels The builder specifies the filter/kernel
* size, the stride and padding The pooling layer takes the kernel size * size, the stride and padding The pooling layer takes the kernel size
*/ */
@Data
@ToString(callSuper = true) @ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") @SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder")
@ -55,14 +56,14 @@ public class ConvolutionLayer extends FeedForwardLayer {
* *
* @param kernelSize the height and width of the kernel * @param kernelSize the height and width of the kernel
*/ */
public @Builder.Default int[] kernelSize = new int[] {5, 5}; // Square filter private @Builder.Default @Getter @Setter int[] kernelSize = new int[] {5, 5}; // Square filter
/** If true (default): include bias parameters in the model. False: no bias. */ /** If true (default): include bias parameters in the model. False: no bias. */
@Builder.Default protected boolean hasBias = true; @Builder.Default @Getter @Setter private boolean hasBias = true;
/** /**
* Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more * Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more
* details Default is {@link ConvolutionMode}.Truncate. * details Default is {@link ConvolutionMode}.Truncate.
*/ */
@Builder.Default protected ConvolutionMode convolutionMode = ConvolutionMode.Truncate; @Builder.Default @Getter @Setter private ConvolutionMode convolutionMode = ConvolutionMode.Truncate;
/** /**
* Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last).
@ -72,7 +73,7 @@ public class ConvolutionLayer extends FeedForwardLayer {
* @param format Format for activations (in and out) * @param format Format for activations (in and out)
*/ */
@Builder.Default @Builder.Default
protected CNN2DFormat convFormat = private CNN2DFormat convFormat =
CNN2DFormat.NCHW; // default value for legacy serialization reasons CNN2DFormat.NCHW; // default value for legacy serialization reasons
/** /**
@ -85,25 +86,25 @@ public class ConvolutionLayer extends FeedForwardLayer {
* http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html#dilated-convolutions</a> * http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html#dilated-convolutions</a>
* <br> * <br>
*/ */
protected @Builder.Default int[] dilation = new int[] {1, 1}; private @Builder.Default int[] dilation = new int[] {1, 1};
/** Default is 2. Down-sample by a factor of 2 */ /** Default is 2. Down-sample by a factor of 2 */
protected @Builder.Default int[] stride = new int[] {1, 1}; private @Builder.Default int[] stride = new int[] {1, 1};
protected @Builder.Default int[] padding = new int[] {0, 0}; private @Builder.Default int[] padding = new int[] {0, 0};
/** /**
* When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation
* be allowed? If set to false, an exception in CuDNN will be propagated back to the user. If * be allowed? If set to false, an exception in CuDNN will be propagated back to the user. If
* false, the built-in (non-CuDNN) implementation for ConvolutionLayer will be used * false, the built-in (non-CuDNN) implementation for ConvolutionLayer will be used
*/ */
@Builder.Default protected boolean cudnnAllowFallback = true; @Builder.Default private boolean cudnnAllowFallback = true;
/** Defaults to "PREFER_FASTEST", but "NO_WORKSPACE" uses less memory. */ /** Defaults to "PREFER_FASTEST", but "NO_WORKSPACE" uses less memory. */
@Builder.Default protected AlgoMode cudnnAlgoMode = AlgoMode.PREFER_FASTEST; @Builder.Default private AlgoMode cudnnAlgoMode = AlgoMode.PREFER_FASTEST;
protected FwdAlgo cudnnFwdAlgo; private FwdAlgo cudnnFwdAlgo;
protected BwdFilterAlgo cudnnBwdFilterAlgo; private BwdFilterAlgo cudnnBwdFilterAlgo;
protected BwdDataAlgo cudnnBwdDataAlgo; private BwdDataAlgo cudnnBwdDataAlgo;
@Builder.Default protected int convolutionDim = 2; // 2D convolution by default @Builder.Default private int convolutionDim = 2; // 2D convolution by default
/** Causal convolution - allowed for 1D only */ /** Causal convolution - allowed for 1D only */
@Builder.Default private boolean allowCausal = false; @Builder.Default private boolean allowCausal = false;

View File

@ -44,7 +44,6 @@ import java.util.Map;
* The pooling layer takes the kernel size * The pooling layer takes the kernel size
*/ */
@Data @Data
@NoArgsConstructor
@ToString(callSuper = true) @ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuild") @SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuild")
@ -88,20 +87,20 @@ private CNN2DFormat format = CNN2DFormat.NCHW;
} }
} }
public boolean hasBias() { public boolean hasBias() {
return hasBias; return isHasBias();
} }
@Override @Override
public Deconvolution2D clone() { public Deconvolution2D clone() {
Deconvolution2D clone = (Deconvolution2D) super.clone(); Deconvolution2D clone = (Deconvolution2D) super.clone();
if (clone.kernelSize != null) { if (clone.getKernelSize() != null) {
clone.kernelSize = clone.kernelSize.clone(); clone.setKernelSize( clone.getKernelSize().clone());
} }
if (clone.stride != null) { if (clone.getStride() != null) {
clone.stride = clone.stride.clone(); clone.setStride( clone.getStride().clone());
} }
if (clone.padding != null) { if (clone.getPadding() != null) {
clone.padding = clone.padding.clone(); clone.setPadding( clone.getPadding().clone());
} }
return clone; return clone;
} }
@ -138,7 +137,7 @@ private CNN2DFormat format = CNN2DFormat.NCHW;
+ "\"): Expected CNN input, got " + inputType); + "\"): Expected CNN input, got " + inputType);
} }
return InputTypeUtil.getOutputTypeDeconvLayer(inputType, kernelSize, stride, padding, dilation, convolutionMode, return InputTypeUtil.getOutputTypeDeconvLayer(inputType, getKernelSize(), getStride(), getPadding(), getDilation(), getConvolutionMode(),
nOut, layerIndex, getName(), Deconvolution2DLayer.class); nOut, layerIndex, getName(), Deconvolution2DLayer.class);
} }

View File

@ -42,7 +42,6 @@ import org.nd4j.linalg.api.ndarray.INDArray;
* filter/kernel size, the stride and padding The pooling layer takes the kernel size * filter/kernel size, the stride and padding The pooling layer takes the kernel size
*/ */
@Data @Data
@NoArgsConstructor
@ToString(callSuper = true) @ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") @SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder")
@ -63,20 +62,20 @@ public class Deconvolution3D extends ConvolutionLayer {
} }
public boolean hasBias() { public boolean hasBias() {
return hasBias; return isHasBias();
} }
@Override @Override
public Deconvolution3D clone() { public Deconvolution3D clone() {
Deconvolution3D clone = (Deconvolution3D) super.clone(); Deconvolution3D clone = (Deconvolution3D) super.clone();
if (clone.kernelSize != null) { if (clone.getKernelSize() != null) {
clone.kernelSize = clone.kernelSize.clone(); clone.setKernelSize( clone.getKernelSize().clone());
} }
if (clone.stride != null) { if (clone.getStride() != null) {
clone.stride = clone.stride.clone(); clone.setStride( clone.getStride().clone());
} }
if (clone.padding != null) { if (clone.getPadding() != null) {
clone.padding = clone.padding.clone(); clone.setPadding( clone.getPadding().clone());
} }
return clone; return clone;
} }
@ -147,11 +146,11 @@ public class Deconvolution3D extends ConvolutionLayer {
return InputTypeUtil.getOutputTypeDeconv3dLayer( return InputTypeUtil.getOutputTypeDeconv3dLayer(
inputType, inputType,
kernelSize, getKernelSize(),
stride, getStride(),
padding, getPadding(),
dilation, getDilation(),
convolutionMode, getConvolutionMode(),
dataFormat, dataFormat,
nOut, nOut,
layerIndex, layerIndex,

View File

@ -38,7 +38,7 @@ import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ndarray.INDArray;
/** Dense Layer Uses WeightInitXavier as default */ /** Dense Layer Uses WeightInitXavier as default */
@Data
@ToString(callSuper = true) @ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@SuperBuilder( @SuperBuilder(
@ -47,9 +47,9 @@ import org.nd4j.linalg.api.ndarray.INDArray;
public class DenseLayer extends FeedForwardLayer { public class DenseLayer extends FeedForwardLayer {
/** If true (default = false): enable layer normalization on this layer */ /** If true (default = false): enable layer normalization on this layer */
@lombok.Builder.Default @Accessors private boolean hasLayerNorm = false; @lombok.Builder.Default private boolean hasLayerNorm = false;
@lombok.Builder.Default @Accessors private boolean hasBias = true; @lombok.Builder.Default private boolean hasBias = true;
@Override @Override
public Layer instantiate( public Layer instantiate(

View File

@ -20,6 +20,7 @@
package org.deeplearning4j.nn.conf.layers; package org.deeplearning4j.nn.conf.layers;
import java.util.*;
import lombok.*; import lombok.*;
import lombok.experimental.SuperBuilder; import lombok.experimental.SuperBuilder;
import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.Layer;
@ -36,134 +37,152 @@ import org.nd4j.common.base.Preconditions;
import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ndarray.INDArray;
import java.util.*;
@Data @Data
@NoArgsConstructor
@ToString(callSuper = true) @ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@SuperBuilder(buildMethodName = "initBuild") @SuperBuilder(buildMethodName = "initBuild")
public class DepthwiseConvolution2D extends ConvolutionLayer { public class DepthwiseConvolution2D extends ConvolutionLayer {
/** /**
* Set channels multiplier for depth-wise convolution * Set channels multiplier for depth-wise convolution
* *
* @param depthMultiplier integer value, for each input map we get depthMultiplier outputs in channels-wise * @param depthMultiplier integer value, for each input map we get depthMultiplier outputs in
* step. * channels-wise step.
* @return Builder * @return Builder
*/ */
@Builder.Default @Builder.Default protected int depthMultiplier = 1;
protected int depthMultiplier = 1; /**
/** * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last).
* Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). * See {@link CNN2DFormat} for more details.<br>
* See {@link CNN2DFormat} for more details.<br> * Default: NCHW
* Default: NCHW *
* * @param format Format for activations (in and out)
* @param format Format for activations (in and out) */
*/ @Builder.Default
@Builder.Default protected CNN2DFormat dataFormat =
protected CNN2DFormat dataFormat = CNN2DFormat.NCHW; // default value for legacy serialization reasons
CNN2DFormat.NCHW; // default value for legacy serialization reasons /**
/** * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last).
* Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). * See {@link CNN2DFormat} for more details.<br>
* See {@link CNN2DFormat} for more details.<br> * Default: NCHW
* Default: NCHW *
* @param format Format for activations (in and out) * @param format Format for activations (in and out)
*/ */
@Builder.Default @Builder.Default protected CNN2DFormat cnn2DFormat = CNN2DFormat.NCHW;
protected CNN2DFormat cnn2DFormat = CNN2DFormat.NCHW;
public static abstract class DepthwiseConvolution2DBuilder<C extends DepthwiseConvolution2D, B extends DepthwiseConvolution2DBuilder<C, B>> protected boolean allowCausal() {
extends ConvolutionLayerBuilder<C, B> { // Causal convolution - allowed for 1D only
public C build() { return false;
Preconditions.checkState(depthMultiplier$value > 0, "Depth multiplier must be > 0, got %s", depthMultiplier$value); }
C l = this.initBuild();
ConvolutionUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding());
ConvolutionUtils.validateCnnKernelStridePadding(l.getKernelSize(), l.getStride(), l.getPadding());
l.initializeConstraints();
return l;
}
@Override @Override
public B kernelSize(int... kernelSize) { public DepthwiseConvolution2D clone() {
super.kernelSize(ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize")); DepthwiseConvolution2D clone = (DepthwiseConvolution2D) super.clone();
return self(); clone.depthMultiplier = depthMultiplier;
} return clone;
@Override }
public B stride(int... stride) {
super.stride(ValidationUtils.validate2NonNegative(stride, false, "stride")); @Override
return self(); public Layer instantiate(
} NeuralNetConfiguration conf,
@Override Collection<TrainingListener> trainingListeners,
public B padding(int... padding) { int layerIndex,
super.padding(ValidationUtils.validate2NonNegative(padding, false, "padding")); INDArray layerParamsView,
return self(); boolean initializeParams,
} DataType networkDataType) {
@Override LayerValidation.assertNInNOutSet(
public B dilation(int... dilation) { "DepthwiseConvolution2D", getName(), layerIndex, getNIn(), getNOut());
super.dilation(ValidationUtils.validate2NonNegative(dilation, false, "dilation"));
return self(); LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
} runInheritance();
DepthwiseConvolution2DLayer ret = new DepthwiseConvolution2DLayer(lconf, networkDataType);
ret.addTrainingListeners(trainingListeners);
ret.setIndex(layerIndex);
ret.setParamsViewArray(layerParamsView);
Map<String, INDArray> paramTable = initializer().init(this, layerParamsView, initializeParams);
ret.setParamTable(paramTable);
ret.setLayerConfiguration(lconf);
return ret;
}
@Override
public ParamInitializer initializer() {
return DepthwiseConvolutionParamInitializer.getInstance();
}
@Override
public InputType getOutputType(int layerIndex, InputType inputType) {
if (inputType == null || inputType.getType() != InputType.Type.CNN) {
throw new IllegalStateException(
"Invalid input for depth-wise convolution layer (layer name=\""
+ getName()
+ "\"): Expected CNN input, got "
+ inputType);
} }
return InputTypeUtil.getOutputTypeCnnLayers(
inputType,
getKernelSize(),
getStride(),
getPadding(),
getDilation(),
getConvolutionMode(),
nOut,
layerIndex,
getName(),
dataFormat,
DepthwiseConvolution2DLayer.class);
}
@Override
public void setNIn(InputType inputType, boolean override) {
super.setNIn(inputType, override);
protected boolean allowCausal() { if (nOut == 0 || override) {
//Causal convolution - allowed for 1D only nOut = this.nIn * this.depthMultiplier;
return false;
}
@Override
public DepthwiseConvolution2D clone() {
DepthwiseConvolution2D clone = (DepthwiseConvolution2D) super.clone();
clone.depthMultiplier = depthMultiplier;
return clone;
} }
this.dataFormat = ((InputType.InputTypeConvolutional) inputType).getFormat();
}
public abstract static class DepthwiseConvolution2DBuilder<
@Override C extends DepthwiseConvolution2D, B extends DepthwiseConvolution2DBuilder<C, B>>
public Layer instantiate(NeuralNetConfiguration conf, Collection<TrainingListener> trainingListeners, extends ConvolutionLayerBuilder<C, B> {
int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { public C build() {
LayerValidation.assertNInNOutSet("DepthwiseConvolution2D", getName(), layerIndex, getNIn(), getNOut()); Preconditions.checkState(
depthMultiplier$value > 0,
LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); "Depth multiplier must be > 0, got %s",
runInheritance(); depthMultiplier$value);
DepthwiseConvolution2DLayer ret = new DepthwiseConvolution2DLayer(lconf, networkDataType); C l = this.initBuild();
ConvolutionUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding());
ret.addTrainingListeners(trainingListeners); ConvolutionUtils.validateCnnKernelStridePadding(
ret.setIndex(layerIndex); l.getKernelSize(), l.getStride(), l.getPadding());
ret.setParamsViewArray(layerParamsView); l.initializeConstraints();
Map<String, INDArray> paramTable = initializer().init(this, layerParamsView, initializeParams); return l;
ret.setParamTable(paramTable);
ret.setLayerConfiguration(lconf);
return ret;
} }
@Override @Override
public ParamInitializer initializer() { public B kernelSize(int... kernelSize) {
return DepthwiseConvolutionParamInitializer.getInstance(); super.kernelSize(ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize"));
return self();
} }
@Override @Override
public InputType getOutputType(int layerIndex, InputType inputType) { public B stride(int... stride) {
if (inputType == null || inputType.getType() != InputType.Type.CNN) { super.stride(ValidationUtils.validate2NonNegative(stride, false, "stride"));
throw new IllegalStateException("Invalid input for depth-wise convolution layer (layer name=\"" return self();
+ getName() + "\"): Expected CNN input, got " + inputType);
}
return InputTypeUtil.getOutputTypeCnnLayers(inputType, kernelSize, stride, padding, dilation, convolutionMode,
nOut, layerIndex, getName(), dataFormat, DepthwiseConvolution2DLayer.class);
} }
@Override @Override
public void setNIn(InputType inputType, boolean override) { public B padding(int... padding) {
super.setNIn(inputType, override); super.padding(ValidationUtils.validate2NonNegative(padding, false, "padding"));
return self();
if(nOut == 0 || override){
nOut = this.nIn * this.depthMultiplier;
}
this.dataFormat = ((InputType.InputTypeConvolutional)inputType).getFormat();
} }
@Override
public B dilation(int... dilation) {
super.dilation(ValidationUtils.validate2NonNegative(dilation, false, "dilation"));
return self();
}
}
} }

View File

@ -45,15 +45,12 @@ import org.nd4j.linalg.learning.regularization.Regularization;
* the input activation. See {@link Dropout} for the full details * the input activation. See {@link Dropout} for the full details
*/ */
@Data @Data
@NoArgsConstructor
@ToString(callSuper = true) @ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@SuperBuilder(builderMethodName = "innerBuilder") @SuperBuilder(builderMethodName = "innerBuilder")
public class DropoutLayer extends FeedForwardLayer { public class DropoutLayer extends FeedForwardLayer {
{
setType(LayerType.DO);
}
public static DropoutLayerBuilder<?,?> builder() { public static DropoutLayerBuilder<?,?> builder() {
return innerBuilder(); return innerBuilder();

View File

@ -36,6 +36,10 @@ import org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor;
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@SuperBuilder @SuperBuilder
public abstract class FeedForwardLayer extends BaseLayerConfiguration { public abstract class FeedForwardLayer extends BaseLayerConfiguration {
public static abstract class FeedForwardLayerBuilder<C extends FeedForwardLayer, B extends FeedForwardLayerBuilder<C, B>>
extends BaseLayerConfigurationBuilder<C, B> {
}
/** /**
* Number of inputs for the layer (usually the size of the last layer). <br> Note that for Convolutional layers, * Number of inputs for the layer (usually the size of the last layer). <br> Note that for Convolutional layers,
* this is the input channels, otherwise is the previous layer size. * this is the input channels, otherwise is the previous layer size.
@ -55,7 +59,7 @@ public abstract class FeedForwardLayer extends BaseLayerConfiguration {
* this is the input channels, otherwise is the previous layer size. * this is the input channels, otherwise is the previous layer size.
* *
*/ */
@Getter @Getter @Setter
protected long nOut; protected long nOut;
protected DataFormat timeDistributedFormat; protected DataFormat timeDistributedFormat;

View File

@ -57,10 +57,10 @@ public abstract class LayerConfiguration
implements ILayerConfiguration, Serializable, Cloneable { // ITrainableLayerConfiguration implements ILayerConfiguration, Serializable, Cloneable { // ITrainableLayerConfiguration
@Getter @Setter protected String name; @Getter @Setter protected String name;
@Getter protected List<LayerConstraint> allParamConstraints; @Getter @Setter protected List<LayerConstraint> allParamConstraints;
@Getter protected List<LayerConstraint> weightConstraints; @Getter @Setter protected List<LayerConstraint> weightConstraints;
@Getter protected List<LayerConstraint> biasConstraints; @Getter @Setter protected List<LayerConstraint> biasConstraints;
@Getter protected List<LayerConstraint> constraints; @Getter @Setter protected List<LayerConstraint> constraints;
@Getter @Setter protected IWeightNoise weightNoise; @Getter @Setter protected IWeightNoise weightNoise;
@Builder.Default private @Getter @Setter LinkedHashSet<String> variables = new LinkedHashSet<>(); @Builder.Default private @Getter @Setter LinkedHashSet<String> variables = new LinkedHashSet<>();
@Getter @Setter private IDropout dropOut; @Getter @Setter private IDropout dropOut;
@ -325,4 +325,15 @@ public abstract class LayerConfiguration
runInheritance(getNetConfiguration()); runInheritance(getNetConfiguration());
} }
public abstract static class LayerConfigurationBuilder<
C extends LayerConfiguration, B extends LayerConfigurationBuilder<C, B>> {
public B dropOut(double d) {
this.dropOut(new Dropout(d));
return self();
}
public B dropOut(IDropout d) {
this.dropOut = d;
return self();
}
}
} }

View File

@ -61,9 +61,6 @@ public class LearnedSelfAttentionLayer extends SameDiffLayer {
/** Number of queries to learn */ /** Number of queries to learn */
private int nQueries; private int nQueries;
private LearnedSelfAttentionLayer() {
/*No arg constructor for serialization*/
}
@Override @Override
public InputPreProcessor getPreProcessorForInputType(InputType inputType) { public InputPreProcessor getPreProcessorForInputType(InputType inputType) {

View File

@ -32,9 +32,6 @@ import org.nd4j.linalg.learning.regularization.Regularization;
@SuperBuilder @SuperBuilder
public abstract class NoParamLayer extends LayerConfiguration { public abstract class NoParamLayer extends LayerConfiguration {
{
setType(LayerType.POOL);
}
@Override @Override
public ParamInitializer initializer() { public ParamInitializer initializer() {

View File

@ -42,7 +42,6 @@ import org.nd4j.linalg.api.ndarray.INDArray;
* filter/kernel size, the stride and padding The pooling layer takes the kernel size * filter/kernel size, the stride and padding The pooling layer takes the kernel size
*/ */
@Data @Data
@NoArgsConstructor
@ToString(callSuper = true) @ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") @SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder")
@ -103,20 +102,20 @@ public class SeparableConvolution2D extends ConvolutionLayer {
} }
public boolean hasBias() { public boolean hasBias() {
return hasBias; return isHasBias();
} }
@Override @Override
public SeparableConvolution2D clone() { public SeparableConvolution2D clone() {
SeparableConvolution2D clone = (SeparableConvolution2D) super.clone(); SeparableConvolution2D clone = (SeparableConvolution2D) super.clone();
if (clone.kernelSize != null) { if (clone.getKernelSize() != null) {
clone.kernelSize = clone.kernelSize.clone(); clone.setKernelSize( clone.getKernelSize().clone());
} }
if (clone.stride != null) { if (clone.getStride() != null) {
clone.stride = clone.stride.clone(); clone.setStride( clone.getStride().clone());
} }
if (clone.padding != null) { if (clone.getPadding() != null) {
clone.padding = clone.padding.clone(); clone.setPadding( clone.getPadding().clone());
} }
return clone; return clone;
} }
@ -165,11 +164,11 @@ public class SeparableConvolution2D extends ConvolutionLayer {
return InputTypeUtil.getOutputTypeCnnLayers( return InputTypeUtil.getOutputTypeCnnLayers(
inputType, inputType,
kernelSize, getKernelSize(),
stride, getStride(),
padding, getPadding(),
dilation, getDilation(),
convolutionMode, getConvolutionMode(),
nOut, nOut,
layerIndex, layerIndex,
getName(), getName(),

View File

@ -20,6 +20,9 @@
package org.deeplearning4j.nn.conf.layers.misc; package org.deeplearning4j.nn.conf.layers.misc;
import java.util.Collection;
import java.util.List;
import java.util.Set;
import lombok.EqualsAndHashCode; import lombok.EqualsAndHashCode;
import lombok.Getter; import lombok.Getter;
import lombok.Setter; import lombok.Setter;
@ -37,108 +40,111 @@ import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.learning.config.IUpdater; import org.nd4j.linalg.learning.config.IUpdater;
import org.nd4j.linalg.learning.regularization.Regularization; import org.nd4j.linalg.learning.regularization.Regularization;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.util.Collection;
import java.util.List;
import java.util.Set;
@EqualsAndHashCode(callSuper = false) @EqualsAndHashCode(callSuper = false)
@SuperBuilder @SuperBuilder(builderMethodName = "innerBuilder")
public class FrozenLayer extends LayerConfiguration { public class FrozenLayer extends LayerConfiguration {
/** /** A layer configuration, only if this layer config has been created from another one */
* A layer configuration, only if this layer config has been created from another one @Getter @Setter private LayerConfiguration innerConfiguration;
*/
@Getter @Setter
private LayerConfiguration innerConfiguration;
public static FrozenLayerBuilder<?, ?> builder() {
return innerBuilder();
}
public FrozenLayer(@JsonProperty("layer") LayerConfiguration layer) { public static FrozenLayerBuilder<?, ?> builder(LayerConfiguration innerConfiguration) {
this.innerConfiguration = layer; return innerBuilder().innerConfiguration(innerConfiguration);
}
@Override
public LayerConfiguration clone() {
FrozenLayer l = (FrozenLayer) super.clone();
l.innerConfiguration = innerConfiguration.clone();
return l;
}
@Override
public org.deeplearning4j.nn.api.Layer instantiate(
NeuralNetConfiguration conf,
Collection<TrainingListener> trainingListeners,
int layerIndex,
INDArray layerParamsView,
boolean initializeParams,
DataType networkDataType) {
// Need to be able to instantiate a layer, from a config - for JSON -> net type situations
org.deeplearning4j.nn.api.Layer underlying =
innerConfiguration.instantiate(
getNetConfiguration(),
trainingListeners,
layerIndex,
layerParamsView,
initializeParams,
networkDataType);
NeuralNetConfiguration nncUnderlying = underlying.getNetConfiguration();
if (nncUnderlying.getNetWideVariables() != null) {
Set<String> vars = nncUnderlying.getNetWideVariables(true);
nncUnderlying.clearNetWideVariable();
conf.clearNetWideVariable();
for (String s : vars) {
conf.getNetWideVariables(false).add(s);
nncUnderlying.getNetWideVariables(false).add(s);
}
} }
@Override return new org.deeplearning4j.nn.layers.FrozenLayer(underlying);
public LayerConfiguration clone() { }
FrozenLayer l = (FrozenLayer) super.clone();
l.innerConfiguration = innerConfiguration.clone();
return l;
}
@Override @Override
public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, public ParamInitializer initializer() {
Collection<TrainingListener> trainingListeners, int layerIndex, INDArray layerParamsView, return FrozenLayerParamInitializer.getInstance();
boolean initializeParams, DataType networkDataType) { }
//Need to be able to instantiate a layer, from a config - for JSON -> net type situations @Override
org.deeplearning4j.nn.api.Layer underlying = innerConfiguration.instantiate(getNetConfiguration(), trainingListeners, public InputType getOutputType(int layerIndex, InputType inputType) {
layerIndex, layerParamsView, initializeParams, networkDataType); return innerConfiguration.getOutputType(layerIndex, inputType);
}
NeuralNetConfiguration nncUnderlying = underlying.getNetConfiguration(); @Override
if (nncUnderlying.getNetWideVariables() != null) { public void setNIn(InputType inputType, boolean override) {
Set<String> vars = nncUnderlying.getNetWideVariables(true); innerConfiguration.setNIn(inputType, override);
nncUnderlying.clearNetWideVariable(); }
conf.clearNetWideVariable();
for (String s : vars) {
conf.getNetWideVariables(false).add(s);
nncUnderlying.getNetWideVariables(false).add(s);
}
}
return new org.deeplearning4j.nn.layers.FrozenLayer(underlying); @Override
} public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
return innerConfiguration.getPreProcessorForInputType(inputType);
}
@Override @Override
public ParamInitializer initializer() { public List<Regularization> getRegularizationByParam(String param) {
return FrozenLayerParamInitializer.getInstance(); return null;
} }
@Override @Override
public InputType getOutputType(int layerIndex, InputType inputType) { public boolean isPretrainParam(String paramName) {
return innerConfiguration.getOutputType(layerIndex, inputType); return false;
} }
@Override @Override
public void setNIn(InputType inputType, boolean override) { public IUpdater getUpdaterByParam(String paramName) {
innerConfiguration.setNIn(inputType, override); return null;
} }
@Override @Override
public InputPreProcessor getPreProcessorForInputType(InputType inputType) { public LayerMemoryReport getMemoryReport(InputType inputType) {
return innerConfiguration.getPreProcessorForInputType(inputType); return innerConfiguration.getMemoryReport(inputType);
} }
@Override
public List<Regularization> getRegularizationByParam(String param){
return null;
}
@Override
public boolean isPretrainParam(String paramName) {
return false;
}
@Override
public IUpdater getUpdaterByParam(String paramName) {
return null;
}
@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
return innerConfiguration.getMemoryReport(inputType);
}
@Override
public void setName(String layerName) {
super.setName(layerName);
innerConfiguration.setName(layerName);
}
@Override
public void setConstraints(List<LayerConstraint> constraints) {
this.constraints = constraints;
this.innerConfiguration.setConstraints(constraints);
}
@Override
public void setName(String layerName) {
super.setName(layerName);
innerConfiguration.setName(layerName);
}
@Override
public void setConstraints(List<LayerConstraint> constraints) {
this.constraints = constraints;
this.innerConfiguration.setConstraints(constraints);
}
} }

View File

@ -22,6 +22,7 @@ package org.deeplearning4j.nn.conf.layers.misc;
import lombok.Data; import lombok.Data;
import lombok.EqualsAndHashCode; import lombok.EqualsAndHashCode;
import lombok.experimental.SuperBuilder;
import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.api.ParamInitializer;
import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.api.layers.LayerConstraint;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
@ -39,19 +40,23 @@ import java.util.Collection;
import java.util.List; import java.util.List;
import java.util.Set; import java.util.Set;
@Data
@EqualsAndHashCode(callSuper = false) @EqualsAndHashCode(callSuper = false)
@SuperBuilder(builderMethodName = "innerBuilder")
public class FrozenLayerWithBackprop extends BaseWrapperLayerConfiguration { public class FrozenLayerWithBackprop extends BaseWrapperLayerConfiguration {
public static FrozenLayerWithBackpropBuilder<?, ?> builder() {
return innerBuilder();
}
/** /**
* Create a new Frozen Layer, that wraps another layer with backpropagation enabled. * Create a new Frozen Layer, that wraps another layer with backpropagation enabled.
* *
* @param layer configuration of the layer to wrap * @param innerConfiguration configuration of the layer to wrap
*/ */
public FrozenLayerWithBackprop(@JsonProperty("layer") LayerConfiguration layer) { public static FrozenLayerWithBackpropBuilder<?, ?> builder(LayerConfiguration innerConfiguration) {
super(layer); return innerBuilder().underlying(innerConfiguration);
} }
public NeuralNetConfiguration getInnerConf(NeuralNetConfiguration conf) { public NeuralNetConfiguration getInnerConf(NeuralNetConfiguration conf) {
NeuralNetConfiguration nnc = conf.clone(); NeuralNetConfiguration nnc = conf.clone();
nnc.getLayerConfigurations().add(0, underlying); nnc.getLayerConfigurations().add(0, underlying);

View File

@ -46,7 +46,7 @@ import org.nd4j.linalg.lossfunctions.ILossFunction;
import org.nd4j.linalg.lossfunctions.impl.LossL2; import org.nd4j.linalg.lossfunctions.impl.LossL2;
import org.nd4j.serde.jackson.shaded.NDArrayTextSerializer; import org.nd4j.serde.jackson.shaded.NDArrayTextSerializer;
@Data
@EqualsAndHashCode(callSuper = false) @EqualsAndHashCode(callSuper = false)
@SuperBuilder(buildMethodName = "initBuild") @SuperBuilder(buildMethodName = "initBuild")
public class Yolo2OutputLayer extends LayerConfiguration { public class Yolo2OutputLayer extends LayerConfiguration {
@ -55,20 +55,20 @@ public class Yolo2OutputLayer extends LayerConfiguration {
* Loss function coefficient for position and size/scale components of the loss function. Default * Loss function coefficient for position and size/scale components of the loss function. Default
* (as per paper): 5 * (as per paper): 5
*/ */
@Builder.Default private double lambdaCoord = 5; @Builder.Default @Getter private double lambdaCoord = 5;
/** /**
* Loss function coefficient for the "no object confidence" components of the loss function. * Loss function coefficient for the "no object confidence" components of the loss function.
* Default (as per paper): 0.5 * Default (as per paper): 0.5
*/ */
@Builder.Default private double lambdaNoObj = 0.5; @Builder.Default @Getter private double lambdaNoObj = 0.5;
/** Loss function for position/scale component of the loss function */ /** Loss function for position/scale component of the loss function */
@Builder.Default private ILossFunction lossPositionScale = new LossL2(); @Builder.Default @Getter private ILossFunction lossPositionScale = new LossL2();
/** /**
* Loss function for the class predictions - defaults to L2 loss (i.e., sum of squared errors, as * Loss function for the class predictions - defaults to L2 loss (i.e., sum of squared errors, as
* per the paper), however Loss MCXENT could also be used (which is more common for * per the paper), however Loss MCXENT could also be used (which is more common for
* classification). * classification).
*/ */
@Builder.Default private ILossFunction lossClassPredictions = new LossL2(); @Builder.Default @Getter private ILossFunction lossClassPredictions = new LossL2();
; ;
/** /**
* Bounding box priors dimensions [width, height]. For N bounding boxes, input has shape [rows, * Bounding box priors dimensions [width, height]. For N bounding boxes, input has shape [rows,
@ -78,15 +78,12 @@ public class Yolo2OutputLayer extends LayerConfiguration {
*/ */
@JsonSerialize(using = NDArrayTextSerializer.class) @JsonSerialize(using = NDArrayTextSerializer.class)
@JsonDeserialize(using = BoundingBoxesDeserializer.class) @JsonDeserialize(using = BoundingBoxesDeserializer.class)
@Builder.Default @Builder.Default @Getter
private INDArray boundingBoxes; private INDArray boundingBoxes;
@Builder.Default @Builder.Default @Getter
private CNN2DFormat format = CNN2DFormat.NCHW; // Default for serialization of old formats private CNN2DFormat format = CNN2DFormat.NCHW; // Default for serialization of old formats
private Yolo2OutputLayer() {
// No-arg constructor for Jackson JSON
}
@Override @Override
public Layer instantiate( public Layer instantiate(

View File

@ -20,6 +20,7 @@
package org.deeplearning4j.nn.conf.layers.recurrent; package org.deeplearning4j.nn.conf.layers.recurrent;
import lombok.experimental.SuperBuilder;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration;
@ -30,14 +31,18 @@ import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ndarray.INDArray;
import java.util.Collection; import java.util.Collection;
@SuperBuilder(builderMethodName = "innerBuilder")
public class LastTimeStep extends BaseWrapperLayerConfiguration { public class LastTimeStep extends BaseWrapperLayerConfiguration {
private LastTimeStep() {} public static LastTimeStepBuilder<?,?> builder() {
return innerBuilder();
}
public LastTimeStep(LayerConfiguration underlying) {
super(underlying); public static LastTimeStepBuilder<?,?> builder(LayerConfiguration underlying) {
this.name = underlying.getName(); // needed for keras import to match names return innerBuilder()
.underlying(underlying)
.name(underlying.getName());
} }
public LayerConfiguration getUnderlying() { public LayerConfiguration getUnderlying() {

View File

@ -41,7 +41,6 @@ import java.util.Map;
@EqualsAndHashCode(callSuper = false) @EqualsAndHashCode(callSuper = false)
@NoArgsConstructor
@SuperBuilder @SuperBuilder
public class SimpleRnn extends BaseRecurrentLayer { public class SimpleRnn extends BaseRecurrentLayer {
/** /**

View File

@ -20,9 +20,9 @@
package org.deeplearning4j.nn.conf.layers.recurrent; package org.deeplearning4j.nn.conf.layers.recurrent;
import lombok.Data; import java.util.Collection;
import lombok.EqualsAndHashCode; import lombok.*;
import lombok.NonNull; import lombok.experimental.SuperBuilder;
import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.RNNFormat;
@ -33,66 +33,67 @@ import org.deeplearning4j.nn.layers.recurrent.TimeDistributedLayer;
import org.deeplearning4j.optimize.api.TrainingListener; import org.deeplearning4j.optimize.api.TrainingListener;
import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ndarray.INDArray;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.util.Collection;
@Data
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@SuperBuilder
public class TimeDistributed extends BaseWrapperLayerConfiguration { public class TimeDistributed extends BaseWrapperLayerConfiguration {
@Getter @Setter private RNNFormat rnnDataFormat = RNNFormat.NCW;
private RNNFormat rnnDataFormat = RNNFormat.NCW; @Override
public org.deeplearning4j.nn.api.Layer instantiate(
NeuralNetConfiguration conf,
Collection<TrainingListener> trainingListeners,
int layerIndex,
INDArray layerParamsView,
boolean initializeParams,
DataType networkDataType) {
LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
/** NeuralNetConfiguration conf2 = conf.clone();
* @param underlying Underlying (internal) layer - should be a feed forward type such as DenseLayerConfiguration conf2.setLayer(((TimeDistributed) lconf).getUnderlying());
*/ return new TimeDistributedLayer(
public TimeDistributed(@JsonProperty("underlying") @NonNull LayerConfiguration underlying, @JsonProperty("rnnDataFormat") RNNFormat rnnDataFormat) { underlying.instantiate(
super(underlying); conf2,
this.rnnDataFormat = rnnDataFormat; trainingListeners,
layerIndex,
layerParamsView,
initializeParams,
networkDataType),
rnnDataFormat);
}
@Override
public InputType getOutputType(int layerIndex, InputType inputType) {
if (inputType.getType() != InputType.Type.RNN) {
throw new IllegalStateException(
"Only RNN input type is supported as input to TimeDistributed layer (layer #"
+ layerIndex
+ ")");
} }
public TimeDistributed(LayerConfiguration underlying){ InputType.InputTypeRecurrent rnn = (InputType.InputTypeRecurrent) inputType;
super(underlying); InputType ff = InputType.feedForward(rnn.getSize());
InputType ffOut = underlying.getOutputType(layerIndex, ff);
return InputType.recurrent(
ffOut.arrayElementsPerExample(), rnn.getTimeSeriesLength(), rnnDataFormat);
}
@Override
public void setNIn(InputType inputType, boolean override) {
if (inputType.getType() != InputType.Type.RNN) {
throw new IllegalStateException(
"Only RNN input type is supported as input to TimeDistributed layer");
} }
@Override InputType.InputTypeRecurrent rnn = (InputType.InputTypeRecurrent) inputType;
public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection<TrainingListener> trainingListeners, InputType ff = InputType.feedForward(rnn.getSize());
int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { this.rnnDataFormat = rnn.getFormat();
LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); underlying.setNIn(ff, override);
}
NeuralNetConfiguration conf2 = conf.clone(); @Override
conf2.setLayer(((TimeDistributed) lconf).getUnderlying()); public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
return new TimeDistributedLayer(underlying.instantiate(conf2, trainingListeners, layerIndex, layerParamsView, // No preprocessor - the wrapper layer operates as the preprocessor
initializeParams, networkDataType), rnnDataFormat); return null;
} }
@Override
public InputType getOutputType(int layerIndex, InputType inputType) {
if (inputType.getType() != InputType.Type.RNN) {
throw new IllegalStateException("Only RNN input type is supported as input to TimeDistributed layer (layer #" + layerIndex + ")");
}
InputType.InputTypeRecurrent rnn = (InputType.InputTypeRecurrent) inputType;
InputType ff = InputType.feedForward(rnn.getSize());
InputType ffOut = underlying.getOutputType(layerIndex, ff);
return InputType.recurrent(ffOut.arrayElementsPerExample(), rnn.getTimeSeriesLength(), rnnDataFormat);
}
@Override
public void setNIn(InputType inputType, boolean override) {
if (inputType.getType() != InputType.Type.RNN) {
throw new IllegalStateException("Only RNN input type is supported as input to TimeDistributed layer");
}
InputType.InputTypeRecurrent rnn = (InputType.InputTypeRecurrent) inputType;
InputType ff = InputType.feedForward(rnn.getSize());
this.rnnDataFormat = rnn.getFormat();
underlying.setNIn(ff, override);
}
@Override
public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
//No preprocessor - the wrapper layer operates as the preprocessor
return null;
}
} }

View File

@ -20,6 +20,7 @@
package org.deeplearning4j.nn.conf.layers.samediff; package org.deeplearning4j.nn.conf.layers.samediff;
import lombok.Builder;
import lombok.EqualsAndHashCode; import lombok.EqualsAndHashCode;
import lombok.experimental.SuperBuilder; import lombok.experimental.SuperBuilder;
import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.Layer;
@ -47,7 +48,9 @@ public abstract class SameDiffLayer extends AbstractSameDiffLayer {
/** /**
* WeightInit, default is XAVIER. * WeightInit, default is XAVIER.
*/ */
@Builder.Default
protected WeightInit weightInit = WeightInit.XAVIER; protected WeightInit weightInit = WeightInit.XAVIER;
@Builder.Default
protected Map<String,IWeightInit> paramWeightInit = new HashMap<>(); protected Map<String,IWeightInit> paramWeightInit = new HashMap<>();

View File

@ -20,6 +20,7 @@
package org.deeplearning4j.nn.conf.layers.samediff; package org.deeplearning4j.nn.conf.layers.samediff;
import lombok.experimental.SuperBuilder;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration;
import org.deeplearning4j.optimize.api.TrainingListener; import org.deeplearning4j.optimize.api.TrainingListener;
@ -30,13 +31,10 @@ import org.nd4j.linalg.api.ndarray.INDArray;
import java.util.Collection; import java.util.Collection;
import java.util.Map; import java.util.Map;
@SuperBuilder
public abstract class SameDiffOutputLayer extends AbstractSameDiffLayer { public abstract class SameDiffOutputLayer extends AbstractSameDiffLayer {
protected SameDiffOutputLayer() {
//No op constructor for Jackson
}
/** /**
* Define the output layer * Define the output layer

View File

@ -21,6 +21,7 @@
package org.deeplearning4j.nn.conf.layers.util; package org.deeplearning4j.nn.conf.layers.util;
import lombok.NoArgsConstructor; import lombok.NoArgsConstructor;
import lombok.experimental.SuperBuilder;
import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.api.ParamInitializer;
import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
@ -38,7 +39,7 @@ import java.util.Collection;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@NoArgsConstructor @SuperBuilder
public class MaskLayer extends NoParamLayer { public class MaskLayer extends NoParamLayer {
@Override @Override
public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf,

View File

@ -35,22 +35,17 @@ import com.fasterxml.jackson.annotation.JsonProperty;
import java.util.Collection; import java.util.Collection;
@Data
@EqualsAndHashCode(callSuper = false) @EqualsAndHashCode(callSuper = false)
@SuperBuilder @SuperBuilder
public class MaskZeroLayer extends BaseWrapperLayerConfiguration { public class MaskZeroLayer extends BaseWrapperLayerConfiguration {
@Builder.Default @Builder.Default @Getter @Setter
private double maskingValue = 0.0; private double maskingValue = 0.0;
private static final long serialVersionUID = 9074525846200921839L; private static final long serialVersionUID = 9074525846200921839L;
public MaskZeroLayer(@JsonProperty("underlying") LayerConfiguration underlying, @JsonProperty("maskingValue") double maskingValue) {
this.underlying = underlying;
this.maskingValue = maskingValue;
}
@Override @Override
public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf,

View File

@ -23,6 +23,7 @@ package org.deeplearning4j.nn.conf.layers.wrapper;
import java.util.List; import java.util.List;
import lombok.EqualsAndHashCode; import lombok.EqualsAndHashCode;
import lombok.Getter; import lombok.Getter;
import lombok.Setter;
import lombok.experimental.SuperBuilder; import lombok.experimental.SuperBuilder;
import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.api.ParamInitializer;
import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.InputPreProcessor;
@ -42,7 +43,8 @@ import org.nd4j.linalg.learning.regularization.Regularization;
public abstract class BaseWrapperLayerConfiguration extends LayerConfiguration { public abstract class BaseWrapperLayerConfiguration extends LayerConfiguration {
/** The configuration to of another layer to wrap */ /** The configuration to of another layer to wrap */
@Getter protected LayerConfiguration underlying; @Getter @Setter
protected LayerConfiguration underlying;
/** /**
* Set the net configuration for this configuration as well as for the underlying layer (if not * Set the net configuration for this configuration as well as for the underlying layer (if not

View File

@ -38,8 +38,6 @@ import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.learning.regularization.Regularization; import org.nd4j.linalg.learning.regularization.Regularization;
@Data
@NoArgsConstructor
@ToString(callSuper = true) @ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@JsonIgnoreProperties("lossFn") @JsonIgnoreProperties("lossFn")

View File

@ -349,6 +349,6 @@ public abstract class BaseOutputLayer<LayerConfT extends org.deeplearning4j.nn.c
@Override @Override
public boolean hasBias() { public boolean hasBias() {
return getTypedLayerConfiguration().hasBias(); return getTypedLayerConfiguration().isHasBias();
} }
} }

View File

@ -109,7 +109,7 @@ public abstract class BasePretrainNetwork<LayerConfT extends org.deeplearning4j.
protected void setScoreWithZ(INDArray z) { protected void setScoreWithZ(INDArray z) {
if (input == null || z == null) if (input == null || z == null)
throw new IllegalStateException("Cannot calculate score without input and labels " + layerId()); throw new IllegalStateException("Cannot calculate score without input and labels " + layerId());
ILossFunction lossFunction = getTypedLayerConfiguration().getLossFunction().getILossFunction(); ILossFunction lossFunction = getTypedLayerConfiguration().getLossFunction().getILossFunction();
//double score = lossFunction.computeScore(input, z, layerConf().getActivationFunction(), maskArray, false); //double score = lossFunction.computeScore(input, z, layerConf().getActivationFunction(), maskArray, false);
double score = lossFunction.computeScore(input, z, getTypedLayerConfiguration().getActivationFn(), maskArray, false); double score = lossFunction.computeScore(input, z, getTypedLayerConfiguration().getActivationFn(), maskArray, false);

View File

@ -226,7 +226,7 @@ public class DefaultParamInitializer extends AbstractParamInitializer {
protected boolean hasBias(LayerConfiguration layer){ protected boolean hasBias(LayerConfiguration layer){
if(layer instanceof BaseOutputLayer ) { if(layer instanceof BaseOutputLayer ) {
return ((BaseOutputLayer) layer).hasBias(); return ((BaseOutputLayer) layer).isHasBias();
} else if(layer instanceof DenseLayer){ } else if(layer instanceof DenseLayer){
return ((DenseLayer)layer).isHasBias(); return ((DenseLayer)layer).isHasBias();
} else if(layer instanceof EmbeddingLayer){ } else if(layer instanceof EmbeddingLayer){

View File

@ -382,7 +382,7 @@ public class TransferLearning {
} }
LayerConfiguration origLayerConf = editedModel.getNetConfiguration().getFlattenedLayerConfigurations().get(i); LayerConfiguration origLayerConf = editedModel.getNetConfiguration().getFlattenedLayerConfigurations().get(i);
LayerConfiguration newLayerConf = new org.deeplearning4j.nn.conf.layers.misc.FrozenLayer(origLayerConf); LayerConfiguration newLayerConf = org.deeplearning4j.nn.conf.layers.misc.FrozenLayer.builder().innerConfiguration(origLayerConf).build();
newLayerConf.setName(origLayerConf.getName()); newLayerConf.setName(origLayerConf.getName());
editedModel.getNetConfiguration().getNetConfigurations().get(i).setLayer(newLayerConf); editedModel.getNetConfiguration().getNetConfigurations().get(i).setLayer(newLayerConf);
} }
@ -1009,7 +1009,7 @@ public class TransferLearning {
String layerName = gv.getVertexName(); String layerName = gv.getVertexName();
LayerVertex currLayerVertex = (LayerVertex) newConfig.getVertices().get(layerName); LayerVertex currLayerVertex = (LayerVertex) newConfig.getVertices().get(layerName);
LayerConfiguration origLayerConf = currLayerVertex.getLayerConfiguration(); LayerConfiguration origLayerConf = currLayerVertex.getLayerConfiguration();
LayerConfiguration newLayerConf = new org.deeplearning4j.nn.conf.layers.misc.FrozenLayer(origLayerConf); LayerConfiguration newLayerConf = org.deeplearning4j.nn.conf.layers.misc.FrozenLayer.builder().innerConfiguration(origLayerConf).build();
newLayerConf.setName(origLayerConf.getName()); newLayerConf.setName(origLayerConf.getName());
//Complication here(and reason for clone on next line): inner LayerConfiguration (implementation) //Complication here(and reason for clone on next line): inner LayerConfiguration (implementation)
// NeuralNetConfiguration.layer (config) should keep the original layer config. While network // NeuralNetConfiguration.layer (config) should keep the original layer config. While network