Using @SuperBuilder for LayerConfigurations
Signed-off-by: brian <brian@brutex.de>master
parent
391a1ad397
commit
8f524827e4
|
@ -171,7 +171,7 @@ public class App {
|
||||||
LayerConfiguration[] disLayers = Arrays.stream(disLayers())
|
LayerConfiguration[] disLayers = Arrays.stream(disLayers())
|
||||||
.map((layer) -> {
|
.map((layer) -> {
|
||||||
if (layer instanceof DenseLayer || layer instanceof OutputLayer) {
|
if (layer instanceof DenseLayer || layer instanceof OutputLayer) {
|
||||||
return new FrozenLayerWithBackprop(layer);
|
return FrozenLayerWithBackprop.builder(layer);
|
||||||
} else {
|
} else {
|
||||||
return layer;
|
return layer;
|
||||||
}
|
}
|
||||||
|
|
|
@ -162,19 +162,19 @@ public class UtilLayerGradientChecks extends BaseDL4JTest {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
NeuralNetConfiguration conf = NeuralNetConfiguration.builder()
|
NeuralNetConfiguration conf =
|
||||||
.updater(new NoOp())
|
NeuralNetConfiguration.builder()
|
||||||
.activation(Activation.TANH)
|
.updater(new NoOp())
|
||||||
.dataType(DataType.DOUBLE)
|
.activation(Activation.TANH)
|
||||||
.dist(new NormalDistribution(0,2))
|
.dataType(DataType.DOUBLE)
|
||||||
.list()
|
.dist(new NormalDistribution(0, 2))
|
||||||
.layer(l1)
|
.list()
|
||||||
.layer(new MaskLayer())
|
.layer(l1)
|
||||||
.layer(l2)
|
.layer(MaskLayer.builder().build())
|
||||||
.layer(l3)
|
.layer(l2)
|
||||||
.inputType(it)
|
.layer(l3)
|
||||||
.build();
|
.inputType(it)
|
||||||
|
.build();
|
||||||
|
|
||||||
MultiLayerNetwork net = new MultiLayerNetwork(conf);
|
MultiLayerNetwork net = new MultiLayerNetwork(conf);
|
||||||
net.init();
|
net.init();
|
||||||
|
@ -203,11 +203,11 @@ public class UtilLayerGradientChecks extends BaseDL4JTest {
|
||||||
.list()
|
.list()
|
||||||
.layer(DenseLayer.builder().nIn(10).nOut(10)
|
.layer(DenseLayer.builder().nIn(10).nOut(10)
|
||||||
.activation(Activation.TANH).weightInit(WeightInit.XAVIER).build())
|
.activation(Activation.TANH).weightInit(WeightInit.XAVIER).build())
|
||||||
.layer(new FrozenLayerWithBackprop(DenseLayer.builder().nIn(10).nOut(10)
|
.layer(FrozenLayerWithBackprop.builder().underlying(DenseLayer.builder().nIn(10).nOut(10)
|
||||||
.activation(Activation.TANH).weightInit(WeightInit.XAVIER).build()))
|
.activation(Activation.TANH).weightInit(WeightInit.XAVIER).build()).build())
|
||||||
.layer(new FrozenLayerWithBackprop(
|
.layer(FrozenLayerWithBackprop.builder().underlying(
|
||||||
DenseLayer.builder().nIn(10).nOut(10).activation(Activation.TANH)
|
DenseLayer.builder().nIn(10).nOut(10).activation(Activation.TANH)
|
||||||
.weightInit(WeightInit.XAVIER).build()))
|
.weightInit(WeightInit.XAVIER).build()).build())
|
||||||
.layer(OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT)
|
.layer(OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT)
|
||||||
.activation(Activation.SOFTMAX).nIn(10).nOut(10).build())
|
.activation(Activation.SOFTMAX).nIn(10).nOut(10).build())
|
||||||
.build();
|
.build();
|
||||||
|
|
|
@ -40,15 +40,12 @@ import org.nd4j.linalg.api.buffer.DataType;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
import org.nd4j.linalg.learning.config.IUpdater;
|
import org.nd4j.linalg.learning.config.IUpdater;
|
||||||
|
|
||||||
@NoArgsConstructor
|
|
||||||
@ToString(callSuper = true)
|
@ToString(callSuper = true)
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder")
|
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder")
|
||||||
public class ActivationLayer extends NoParamLayer {
|
public class ActivationLayer extends NoParamLayer {
|
||||||
|
|
||||||
{
|
|
||||||
setType(LayerType.ACT);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static ActivationLayerBuilder<?, ?> builder(Activation activation) {
|
public static ActivationLayerBuilder<?, ?> builder(Activation activation) {
|
||||||
return innerBuilder().activation(activation);
|
return innerBuilder().activation(activation);
|
||||||
|
|
|
@ -49,6 +49,8 @@ import org.nd4j.linalg.learning.regularization.WeightDecay;
|
||||||
@SuperBuilder
|
@SuperBuilder
|
||||||
public abstract class BaseLayerConfiguration extends LayerConfiguration
|
public abstract class BaseLayerConfiguration extends LayerConfiguration
|
||||||
implements ITraininableLayerConfiguration, Serializable, Cloneable {
|
implements ITraininableLayerConfiguration, Serializable, Cloneable {
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set constraints to be applied to all layers. Default: no constraints.<br>
|
* Set constraints to be applied to all layers. Default: no constraints.<br>
|
||||||
* Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm
|
* Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm
|
||||||
|
@ -84,9 +86,9 @@ public abstract class BaseLayerConfiguration extends LayerConfiguration
|
||||||
@Getter @Setter @Builder.Default
|
@Getter @Setter @Builder.Default
|
||||||
protected double gainInit = 0.0;
|
protected double gainInit = 0.0;
|
||||||
/** Regularization for the parameters (excluding biases). */
|
/** Regularization for the parameters (excluding biases). */
|
||||||
@Builder.Default @Getter protected List<Regularization> regularization = new ArrayList<>();
|
@Builder.Default @Getter @Setter protected List<Regularization> regularization = new ArrayList<>();
|
||||||
/** Regularization for the bias parameters only */
|
/** Regularization for the bias parameters only */
|
||||||
@Builder.Default @Getter
|
@Builder.Default @Getter @Setter
|
||||||
protected List<Regularization> regularizationBias = new ArrayList<>();
|
protected List<Regularization> regularizationBias = new ArrayList<>();
|
||||||
/**
|
/**
|
||||||
* Gradient updater. For example, {@link org.nd4j.linalg.learning.config.Adam} or {@link
|
* Gradient updater. For example, {@link org.nd4j.linalg.learning.config.Adam} or {@link
|
||||||
|
@ -210,6 +212,7 @@ public abstract class BaseLayerConfiguration extends LayerConfiguration
|
||||||
C extends BaseLayerConfiguration, B extends BaseLayerConfigurationBuilder<C, B>>
|
C extends BaseLayerConfiguration, B extends BaseLayerConfigurationBuilder<C, B>>
|
||||||
extends LayerConfigurationBuilder<C, B> {
|
extends LayerConfigurationBuilder<C, B> {
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set weight initialization scheme to random sampling via the specified distribution.
|
* Set weight initialization scheme to random sampling via the specified distribution.
|
||||||
* Equivalent to: {@code .weightInit(new WeightInitDistribution(distribution))}
|
* Equivalent to: {@code .weightInit(new WeightInitDistribution(distribution))}
|
||||||
|
|
|
@ -29,8 +29,7 @@ import org.nd4j.linalg.lossfunctions.ILossFunction;
|
||||||
import org.nd4j.linalg.lossfunctions.LossFunctions;
|
import org.nd4j.linalg.lossfunctions.LossFunctions;
|
||||||
import org.nd4j.linalg.lossfunctions.impl.LossMCXENT;
|
import org.nd4j.linalg.lossfunctions.impl.LossMCXENT;
|
||||||
|
|
||||||
@Data
|
|
||||||
@NoArgsConstructor
|
|
||||||
@ToString(callSuper = true)
|
@ToString(callSuper = true)
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
@SuperBuilder(builderMethodName = "innerBuilder")
|
@SuperBuilder(builderMethodName = "innerBuilder")
|
||||||
|
@ -39,19 +38,16 @@ public abstract class BaseOutputLayer extends FeedForwardLayer {
|
||||||
/**
|
/**
|
||||||
* Loss function for the output layer
|
* Loss function for the output layer
|
||||||
*/
|
*/
|
||||||
@lombok.Builder.Default
|
@lombok.Builder.Default @Getter @Setter
|
||||||
protected ILossFunction lossFunction = new LossMCXENT();
|
protected ILossFunction lossFunction = new LossMCXENT();
|
||||||
/**
|
/**
|
||||||
* If true (default): include bias parameters in the model. False: no bias.
|
* If true (default): include bias parameters in the model. False: no bias.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
@lombok.Builder.Default
|
@lombok.Builder.Default @Getter @Setter
|
||||||
protected boolean hasBias = true;
|
protected boolean hasBias = true;
|
||||||
|
|
||||||
|
|
||||||
public boolean hasBias() {
|
|
||||||
return hasBias;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public LayerMemoryReport getMemoryReport(InputType inputType) {
|
public LayerMemoryReport getMemoryReport(InputType inputType) {
|
||||||
|
|
|
@ -31,11 +31,11 @@ import org.nd4j.linalg.lossfunctions.LossFunctions;
|
||||||
@JsonIgnoreProperties("pretrain")
|
@JsonIgnoreProperties("pretrain")
|
||||||
@SuperBuilder
|
@SuperBuilder
|
||||||
public abstract class BasePretrainNetwork extends FeedForwardLayer {
|
public abstract class BasePretrainNetwork extends FeedForwardLayer {
|
||||||
@Builder.Default
|
@Builder.Default @Getter
|
||||||
protected LossFunctions.LossFunction lossFunction =
|
protected LossFunctions.LossFunction lossFunction =
|
||||||
LossFunctions.LossFunction.RECONSTRUCTION_CROSSENTROPY;
|
LossFunctions.LossFunction.RECONSTRUCTION_CROSSENTROPY;
|
||||||
|
|
||||||
@Builder.Default protected double visibleBiasInit = 0.0;
|
@Builder.Default @Getter protected double visibleBiasInit = 0.0;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isPretrainParam(String paramName) {
|
public boolean isPretrainParam(String paramName) {
|
||||||
|
|
|
@ -31,8 +31,6 @@ import org.deeplearning4j.nn.conf.inputs.InputType;
|
||||||
* @author Max Pumperla
|
* @author Max Pumperla
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@Data
|
|
||||||
@NoArgsConstructor
|
|
||||||
@ToString(callSuper = true)
|
@ToString(callSuper = true)
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
@SuperBuilder()
|
@SuperBuilder()
|
||||||
|
@ -43,7 +41,7 @@ public abstract class BaseUpsamplingLayer extends NoParamLayer {
|
||||||
* dimensions (e.g. 2 for Upsampling2D etc.)
|
* dimensions (e.g. 2 for Upsampling2D etc.)
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
@Builder.Default
|
@Builder.Default @Getter
|
||||||
protected int[] size = new int[] {1};
|
protected int[] size = new int[] {1};
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -60,8 +58,4 @@ public abstract class BaseUpsamplingLayer extends NoParamLayer {
|
||||||
}
|
}
|
||||||
return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getName());
|
return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getName());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -42,7 +42,6 @@ import java.util.Collection;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@NoArgsConstructor
|
|
||||||
@ToString(callSuper = true)
|
@ToString(callSuper = true)
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
@SuperBuilder(buildMethodName = "initBuild")
|
@SuperBuilder(buildMethodName = "initBuild")
|
||||||
|
|
|
@ -37,15 +37,15 @@ import org.nd4j.linalg.api.buffer.DataType;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
import org.nd4j.linalg.lossfunctions.ILossFunction;
|
import org.nd4j.linalg.lossfunctions.ILossFunction;
|
||||||
|
|
||||||
@Data
|
|
||||||
@NoArgsConstructor
|
|
||||||
@ToString(callSuper = true)
|
@ToString(callSuper = true)
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
@SuperBuilder
|
@SuperBuilder
|
||||||
public class Cnn3DLossLayer extends FeedForwardLayer {
|
public class Cnn3DLossLayer extends FeedForwardLayer {
|
||||||
|
@Getter @Setter
|
||||||
protected ILossFunction lossFunction;
|
protected ILossFunction lossFunction;
|
||||||
/** Format of the input/output data. See {@link Convolution3D.DataFormat} for details */
|
/** Format of the input/output data. See {@link Convolution3D.DataFormat} for details */
|
||||||
|
@Getter @Setter
|
||||||
protected Convolution3D.DataFormat dataFormat;
|
protected Convolution3D.DataFormat dataFormat;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -24,10 +24,11 @@ import lombok.Data;
|
||||||
import lombok.EqualsAndHashCode;
|
import lombok.EqualsAndHashCode;
|
||||||
import lombok.NoArgsConstructor;
|
import lombok.NoArgsConstructor;
|
||||||
import lombok.ToString;
|
import lombok.ToString;
|
||||||
|
import lombok.experimental.SuperBuilder;
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@NoArgsConstructor
|
|
||||||
@ToString(callSuper = true)
|
@ToString(callSuper = true)
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
|
@SuperBuilder
|
||||||
public class Convolution1D extends Convolution1DLayer {
|
public class Convolution1D extends Convolution1DLayer {
|
||||||
}
|
}
|
||||||
|
|
|
@ -45,7 +45,6 @@ import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
* wide.
|
* wide.
|
||||||
*/
|
*/
|
||||||
@Data
|
@Data
|
||||||
@NoArgsConstructor
|
|
||||||
@ToString(callSuper = true)
|
@ToString(callSuper = true)
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder")
|
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder")
|
||||||
|
@ -142,7 +141,7 @@ public class Convolution1DLayer extends ConvolutionLayer {
|
||||||
} else {
|
} else {
|
||||||
outLength =
|
outLength =
|
||||||
Convolution1DUtils.getOutputSize(
|
Convolution1DUtils.getOutputSize(
|
||||||
inputTsLength, kernelSize[0], stride[0], padding[0], convolutionMode, dilation[0]);
|
inputTsLength, kernelSize[0], stride[0], padding[0], getConvolutionMode(), dilation[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
return InputType.recurrent(nOut, outLength, rnnDataFormat);
|
return InputType.recurrent(nOut, outLength, rnnDataFormat);
|
||||||
|
|
|
@ -24,10 +24,12 @@ import lombok.Data;
|
||||||
import lombok.EqualsAndHashCode;
|
import lombok.EqualsAndHashCode;
|
||||||
import lombok.NoArgsConstructor;
|
import lombok.NoArgsConstructor;
|
||||||
import lombok.ToString;
|
import lombok.ToString;
|
||||||
|
import lombok.experimental.SuperBuilder;
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@NoArgsConstructor
|
|
||||||
@ToString(callSuper = true)
|
@ToString(callSuper = true)
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
|
@SuperBuilder
|
||||||
public class Convolution2D extends ConvolutionLayer {
|
public class Convolution2D extends ConvolutionLayer {
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,7 +38,6 @@ import org.nd4j.linalg.api.buffer.DataType;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@NoArgsConstructor
|
|
||||||
@ToString(callSuper = true)
|
@ToString(callSuper = true)
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
@SuperBuilder(builderMethodName = "innerBuilder", buildMethodName = "initBuild")
|
@SuperBuilder(builderMethodName = "innerBuilder", buildMethodName = "initBuild")
|
||||||
|
@ -118,7 +117,7 @@ public class Convolution3D extends ConvolutionLayer {
|
||||||
* kernel size
|
* kernel size
|
||||||
*/
|
*/
|
||||||
public boolean hasBias() {
|
public boolean hasBias() {
|
||||||
return hasBias;
|
return isHasBias();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -46,6 +46,7 @@ import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
* to be used in the net or in other words the channels The builder specifies the filter/kernel
|
* to be used in the net or in other words the channels The builder specifies the filter/kernel
|
||||||
* size, the stride and padding The pooling layer takes the kernel size
|
* size, the stride and padding The pooling layer takes the kernel size
|
||||||
*/
|
*/
|
||||||
|
@Data
|
||||||
@ToString(callSuper = true)
|
@ToString(callSuper = true)
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder")
|
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder")
|
||||||
|
@ -55,14 +56,14 @@ public class ConvolutionLayer extends FeedForwardLayer {
|
||||||
*
|
*
|
||||||
* @param kernelSize the height and width of the kernel
|
* @param kernelSize the height and width of the kernel
|
||||||
*/
|
*/
|
||||||
public @Builder.Default int[] kernelSize = new int[] {5, 5}; // Square filter
|
private @Builder.Default @Getter @Setter int[] kernelSize = new int[] {5, 5}; // Square filter
|
||||||
/** If true (default): include bias parameters in the model. False: no bias. */
|
/** If true (default): include bias parameters in the model. False: no bias. */
|
||||||
@Builder.Default protected boolean hasBias = true;
|
@Builder.Default @Getter @Setter private boolean hasBias = true;
|
||||||
/**
|
/**
|
||||||
* Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more
|
* Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more
|
||||||
* details Default is {@link ConvolutionMode}.Truncate.
|
* details Default is {@link ConvolutionMode}.Truncate.
|
||||||
*/
|
*/
|
||||||
@Builder.Default protected ConvolutionMode convolutionMode = ConvolutionMode.Truncate;
|
@Builder.Default @Getter @Setter private ConvolutionMode convolutionMode = ConvolutionMode.Truncate;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last).
|
* Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last).
|
||||||
|
@ -72,7 +73,7 @@ public class ConvolutionLayer extends FeedForwardLayer {
|
||||||
* @param format Format for activations (in and out)
|
* @param format Format for activations (in and out)
|
||||||
*/
|
*/
|
||||||
@Builder.Default
|
@Builder.Default
|
||||||
protected CNN2DFormat convFormat =
|
private CNN2DFormat convFormat =
|
||||||
CNN2DFormat.NCHW; // default value for legacy serialization reasons
|
CNN2DFormat.NCHW; // default value for legacy serialization reasons
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -85,25 +86,25 @@ public class ConvolutionLayer extends FeedForwardLayer {
|
||||||
* http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html#dilated-convolutions</a>
|
* http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html#dilated-convolutions</a>
|
||||||
* <br>
|
* <br>
|
||||||
*/
|
*/
|
||||||
protected @Builder.Default int[] dilation = new int[] {1, 1};
|
private @Builder.Default int[] dilation = new int[] {1, 1};
|
||||||
/** Default is 2. Down-sample by a factor of 2 */
|
/** Default is 2. Down-sample by a factor of 2 */
|
||||||
protected @Builder.Default int[] stride = new int[] {1, 1};
|
private @Builder.Default int[] stride = new int[] {1, 1};
|
||||||
|
|
||||||
protected @Builder.Default int[] padding = new int[] {0, 0};
|
private @Builder.Default int[] padding = new int[] {0, 0};
|
||||||
/**
|
/**
|
||||||
* When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation
|
* When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation
|
||||||
* be allowed? If set to false, an exception in CuDNN will be propagated back to the user. If
|
* be allowed? If set to false, an exception in CuDNN will be propagated back to the user. If
|
||||||
* false, the built-in (non-CuDNN) implementation for ConvolutionLayer will be used
|
* false, the built-in (non-CuDNN) implementation for ConvolutionLayer will be used
|
||||||
*/
|
*/
|
||||||
@Builder.Default protected boolean cudnnAllowFallback = true;
|
@Builder.Default private boolean cudnnAllowFallback = true;
|
||||||
|
|
||||||
/** Defaults to "PREFER_FASTEST", but "NO_WORKSPACE" uses less memory. */
|
/** Defaults to "PREFER_FASTEST", but "NO_WORKSPACE" uses less memory. */
|
||||||
@Builder.Default protected AlgoMode cudnnAlgoMode = AlgoMode.PREFER_FASTEST;
|
@Builder.Default private AlgoMode cudnnAlgoMode = AlgoMode.PREFER_FASTEST;
|
||||||
|
|
||||||
protected FwdAlgo cudnnFwdAlgo;
|
private FwdAlgo cudnnFwdAlgo;
|
||||||
protected BwdFilterAlgo cudnnBwdFilterAlgo;
|
private BwdFilterAlgo cudnnBwdFilterAlgo;
|
||||||
protected BwdDataAlgo cudnnBwdDataAlgo;
|
private BwdDataAlgo cudnnBwdDataAlgo;
|
||||||
@Builder.Default protected int convolutionDim = 2; // 2D convolution by default
|
@Builder.Default private int convolutionDim = 2; // 2D convolution by default
|
||||||
/** Causal convolution - allowed for 1D only */
|
/** Causal convolution - allowed for 1D only */
|
||||||
@Builder.Default private boolean allowCausal = false;
|
@Builder.Default private boolean allowCausal = false;
|
||||||
|
|
||||||
|
|
|
@ -44,7 +44,6 @@ import java.util.Map;
|
||||||
* The pooling layer takes the kernel size
|
* The pooling layer takes the kernel size
|
||||||
*/
|
*/
|
||||||
@Data
|
@Data
|
||||||
@NoArgsConstructor
|
|
||||||
@ToString(callSuper = true)
|
@ToString(callSuper = true)
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuild")
|
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuild")
|
||||||
|
@ -88,20 +87,20 @@ private CNN2DFormat format = CNN2DFormat.NCHW;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
public boolean hasBias() {
|
public boolean hasBias() {
|
||||||
return hasBias;
|
return isHasBias();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Deconvolution2D clone() {
|
public Deconvolution2D clone() {
|
||||||
Deconvolution2D clone = (Deconvolution2D) super.clone();
|
Deconvolution2D clone = (Deconvolution2D) super.clone();
|
||||||
if (clone.kernelSize != null) {
|
if (clone.getKernelSize() != null) {
|
||||||
clone.kernelSize = clone.kernelSize.clone();
|
clone.setKernelSize( clone.getKernelSize().clone());
|
||||||
}
|
}
|
||||||
if (clone.stride != null) {
|
if (clone.getStride() != null) {
|
||||||
clone.stride = clone.stride.clone();
|
clone.setStride( clone.getStride().clone());
|
||||||
}
|
}
|
||||||
if (clone.padding != null) {
|
if (clone.getPadding() != null) {
|
||||||
clone.padding = clone.padding.clone();
|
clone.setPadding( clone.getPadding().clone());
|
||||||
}
|
}
|
||||||
return clone;
|
return clone;
|
||||||
}
|
}
|
||||||
|
@ -138,7 +137,7 @@ private CNN2DFormat format = CNN2DFormat.NCHW;
|
||||||
+ "\"): Expected CNN input, got " + inputType);
|
+ "\"): Expected CNN input, got " + inputType);
|
||||||
}
|
}
|
||||||
|
|
||||||
return InputTypeUtil.getOutputTypeDeconvLayer(inputType, kernelSize, stride, padding, dilation, convolutionMode,
|
return InputTypeUtil.getOutputTypeDeconvLayer(inputType, getKernelSize(), getStride(), getPadding(), getDilation(), getConvolutionMode(),
|
||||||
nOut, layerIndex, getName(), Deconvolution2DLayer.class);
|
nOut, layerIndex, getName(), Deconvolution2DLayer.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -42,7 +42,6 @@ import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
* filter/kernel size, the stride and padding The pooling layer takes the kernel size
|
* filter/kernel size, the stride and padding The pooling layer takes the kernel size
|
||||||
*/
|
*/
|
||||||
@Data
|
@Data
|
||||||
@NoArgsConstructor
|
|
||||||
@ToString(callSuper = true)
|
@ToString(callSuper = true)
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder")
|
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder")
|
||||||
|
@ -63,20 +62,20 @@ public class Deconvolution3D extends ConvolutionLayer {
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean hasBias() {
|
public boolean hasBias() {
|
||||||
return hasBias;
|
return isHasBias();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Deconvolution3D clone() {
|
public Deconvolution3D clone() {
|
||||||
Deconvolution3D clone = (Deconvolution3D) super.clone();
|
Deconvolution3D clone = (Deconvolution3D) super.clone();
|
||||||
if (clone.kernelSize != null) {
|
if (clone.getKernelSize() != null) {
|
||||||
clone.kernelSize = clone.kernelSize.clone();
|
clone.setKernelSize( clone.getKernelSize().clone());
|
||||||
}
|
}
|
||||||
if (clone.stride != null) {
|
if (clone.getStride() != null) {
|
||||||
clone.stride = clone.stride.clone();
|
clone.setStride( clone.getStride().clone());
|
||||||
}
|
}
|
||||||
if (clone.padding != null) {
|
if (clone.getPadding() != null) {
|
||||||
clone.padding = clone.padding.clone();
|
clone.setPadding( clone.getPadding().clone());
|
||||||
}
|
}
|
||||||
return clone;
|
return clone;
|
||||||
}
|
}
|
||||||
|
@ -147,11 +146,11 @@ public class Deconvolution3D extends ConvolutionLayer {
|
||||||
|
|
||||||
return InputTypeUtil.getOutputTypeDeconv3dLayer(
|
return InputTypeUtil.getOutputTypeDeconv3dLayer(
|
||||||
inputType,
|
inputType,
|
||||||
kernelSize,
|
getKernelSize(),
|
||||||
stride,
|
getStride(),
|
||||||
padding,
|
getPadding(),
|
||||||
dilation,
|
getDilation(),
|
||||||
convolutionMode,
|
getConvolutionMode(),
|
||||||
dataFormat,
|
dataFormat,
|
||||||
nOut,
|
nOut,
|
||||||
layerIndex,
|
layerIndex,
|
||||||
|
|
|
@ -38,7 +38,7 @@ import org.nd4j.linalg.api.buffer.DataType;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
|
|
||||||
/** Dense Layer Uses WeightInitXavier as default */
|
/** Dense Layer Uses WeightInitXavier as default */
|
||||||
|
@Data
|
||||||
@ToString(callSuper = true)
|
@ToString(callSuper = true)
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
@SuperBuilder(
|
@SuperBuilder(
|
||||||
|
@ -47,9 +47,9 @@ import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
public class DenseLayer extends FeedForwardLayer {
|
public class DenseLayer extends FeedForwardLayer {
|
||||||
|
|
||||||
/** If true (default = false): enable layer normalization on this layer */
|
/** If true (default = false): enable layer normalization on this layer */
|
||||||
@lombok.Builder.Default @Accessors private boolean hasLayerNorm = false;
|
@lombok.Builder.Default private boolean hasLayerNorm = false;
|
||||||
|
|
||||||
@lombok.Builder.Default @Accessors private boolean hasBias = true;
|
@lombok.Builder.Default private boolean hasBias = true;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Layer instantiate(
|
public Layer instantiate(
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
|
|
||||||
package org.deeplearning4j.nn.conf.layers;
|
package org.deeplearning4j.nn.conf.layers;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
import lombok.*;
|
import lombok.*;
|
||||||
import lombok.experimental.SuperBuilder;
|
import lombok.experimental.SuperBuilder;
|
||||||
import org.deeplearning4j.nn.api.Layer;
|
import org.deeplearning4j.nn.api.Layer;
|
||||||
|
@ -36,134 +37,152 @@ import org.nd4j.common.base.Preconditions;
|
||||||
import org.nd4j.linalg.api.buffer.DataType;
|
import org.nd4j.linalg.api.buffer.DataType;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@NoArgsConstructor
|
|
||||||
@ToString(callSuper = true)
|
@ToString(callSuper = true)
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
@SuperBuilder(buildMethodName = "initBuild")
|
@SuperBuilder(buildMethodName = "initBuild")
|
||||||
public class DepthwiseConvolution2D extends ConvolutionLayer {
|
public class DepthwiseConvolution2D extends ConvolutionLayer {
|
||||||
/**
|
/**
|
||||||
* Set channels multiplier for depth-wise convolution
|
* Set channels multiplier for depth-wise convolution
|
||||||
*
|
*
|
||||||
* @param depthMultiplier integer value, for each input map we get depthMultiplier outputs in channels-wise
|
* @param depthMultiplier integer value, for each input map we get depthMultiplier outputs in
|
||||||
* step.
|
* channels-wise step.
|
||||||
* @return Builder
|
* @return Builder
|
||||||
*/
|
*/
|
||||||
@Builder.Default
|
@Builder.Default protected int depthMultiplier = 1;
|
||||||
protected int depthMultiplier = 1;
|
/**
|
||||||
/**
|
* Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last).
|
||||||
* Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last).
|
* See {@link CNN2DFormat} for more details.<br>
|
||||||
* See {@link CNN2DFormat} for more details.<br>
|
* Default: NCHW
|
||||||
* Default: NCHW
|
*
|
||||||
*
|
* @param format Format for activations (in and out)
|
||||||
* @param format Format for activations (in and out)
|
*/
|
||||||
*/
|
@Builder.Default
|
||||||
@Builder.Default
|
protected CNN2DFormat dataFormat =
|
||||||
protected CNN2DFormat dataFormat =
|
CNN2DFormat.NCHW; // default value for legacy serialization reasons
|
||||||
CNN2DFormat.NCHW; // default value for legacy serialization reasons
|
/**
|
||||||
/**
|
* Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last).
|
||||||
* Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last).
|
* See {@link CNN2DFormat} for more details.<br>
|
||||||
* See {@link CNN2DFormat} for more details.<br>
|
* Default: NCHW
|
||||||
* Default: NCHW
|
*
|
||||||
* @param format Format for activations (in and out)
|
* @param format Format for activations (in and out)
|
||||||
*/
|
*/
|
||||||
@Builder.Default
|
@Builder.Default protected CNN2DFormat cnn2DFormat = CNN2DFormat.NCHW;
|
||||||
protected CNN2DFormat cnn2DFormat = CNN2DFormat.NCHW;
|
|
||||||
|
|
||||||
public static abstract class DepthwiseConvolution2DBuilder<C extends DepthwiseConvolution2D, B extends DepthwiseConvolution2DBuilder<C, B>>
|
protected boolean allowCausal() {
|
||||||
extends ConvolutionLayerBuilder<C, B> {
|
// Causal convolution - allowed for 1D only
|
||||||
public C build() {
|
return false;
|
||||||
Preconditions.checkState(depthMultiplier$value > 0, "Depth multiplier must be > 0, got %s", depthMultiplier$value);
|
}
|
||||||
C l = this.initBuild();
|
|
||||||
ConvolutionUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding());
|
|
||||||
ConvolutionUtils.validateCnnKernelStridePadding(l.getKernelSize(), l.getStride(), l.getPadding());
|
|
||||||
l.initializeConstraints();
|
|
||||||
return l;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public B kernelSize(int... kernelSize) {
|
public DepthwiseConvolution2D clone() {
|
||||||
super.kernelSize(ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize"));
|
DepthwiseConvolution2D clone = (DepthwiseConvolution2D) super.clone();
|
||||||
return self();
|
clone.depthMultiplier = depthMultiplier;
|
||||||
}
|
return clone;
|
||||||
@Override
|
}
|
||||||
public B stride(int... stride) {
|
|
||||||
super.stride(ValidationUtils.validate2NonNegative(stride, false, "stride"));
|
@Override
|
||||||
return self();
|
public Layer instantiate(
|
||||||
}
|
NeuralNetConfiguration conf,
|
||||||
@Override
|
Collection<TrainingListener> trainingListeners,
|
||||||
public B padding(int... padding) {
|
int layerIndex,
|
||||||
super.padding(ValidationUtils.validate2NonNegative(padding, false, "padding"));
|
INDArray layerParamsView,
|
||||||
return self();
|
boolean initializeParams,
|
||||||
}
|
DataType networkDataType) {
|
||||||
@Override
|
LayerValidation.assertNInNOutSet(
|
||||||
public B dilation(int... dilation) {
|
"DepthwiseConvolution2D", getName(), layerIndex, getNIn(), getNOut());
|
||||||
super.dilation(ValidationUtils.validate2NonNegative(dilation, false, "dilation"));
|
|
||||||
return self();
|
LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
|
||||||
}
|
runInheritance();
|
||||||
|
DepthwiseConvolution2DLayer ret = new DepthwiseConvolution2DLayer(lconf, networkDataType);
|
||||||
|
|
||||||
|
ret.addTrainingListeners(trainingListeners);
|
||||||
|
ret.setIndex(layerIndex);
|
||||||
|
ret.setParamsViewArray(layerParamsView);
|
||||||
|
Map<String, INDArray> paramTable = initializer().init(this, layerParamsView, initializeParams);
|
||||||
|
ret.setParamTable(paramTable);
|
||||||
|
ret.setLayerConfiguration(lconf);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ParamInitializer initializer() {
|
||||||
|
return DepthwiseConvolutionParamInitializer.getInstance();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public InputType getOutputType(int layerIndex, InputType inputType) {
|
||||||
|
if (inputType == null || inputType.getType() != InputType.Type.CNN) {
|
||||||
|
throw new IllegalStateException(
|
||||||
|
"Invalid input for depth-wise convolution layer (layer name=\""
|
||||||
|
+ getName()
|
||||||
|
+ "\"): Expected CNN input, got "
|
||||||
|
+ inputType);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return InputTypeUtil.getOutputTypeCnnLayers(
|
||||||
|
inputType,
|
||||||
|
getKernelSize(),
|
||||||
|
getStride(),
|
||||||
|
getPadding(),
|
||||||
|
getDilation(),
|
||||||
|
getConvolutionMode(),
|
||||||
|
nOut,
|
||||||
|
layerIndex,
|
||||||
|
getName(),
|
||||||
|
dataFormat,
|
||||||
|
DepthwiseConvolution2DLayer.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setNIn(InputType inputType, boolean override) {
|
||||||
|
super.setNIn(inputType, override);
|
||||||
|
|
||||||
protected boolean allowCausal() {
|
if (nOut == 0 || override) {
|
||||||
//Causal convolution - allowed for 1D only
|
nOut = this.nIn * this.depthMultiplier;
|
||||||
return false;
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public DepthwiseConvolution2D clone() {
|
|
||||||
DepthwiseConvolution2D clone = (DepthwiseConvolution2D) super.clone();
|
|
||||||
clone.depthMultiplier = depthMultiplier;
|
|
||||||
return clone;
|
|
||||||
}
|
}
|
||||||
|
this.dataFormat = ((InputType.InputTypeConvolutional) inputType).getFormat();
|
||||||
|
}
|
||||||
|
|
||||||
|
public abstract static class DepthwiseConvolution2DBuilder<
|
||||||
@Override
|
C extends DepthwiseConvolution2D, B extends DepthwiseConvolution2DBuilder<C, B>>
|
||||||
public Layer instantiate(NeuralNetConfiguration conf, Collection<TrainingListener> trainingListeners,
|
extends ConvolutionLayerBuilder<C, B> {
|
||||||
int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) {
|
public C build() {
|
||||||
LayerValidation.assertNInNOutSet("DepthwiseConvolution2D", getName(), layerIndex, getNIn(), getNOut());
|
Preconditions.checkState(
|
||||||
|
depthMultiplier$value > 0,
|
||||||
LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
|
"Depth multiplier must be > 0, got %s",
|
||||||
runInheritance();
|
depthMultiplier$value);
|
||||||
DepthwiseConvolution2DLayer ret = new DepthwiseConvolution2DLayer(lconf, networkDataType);
|
C l = this.initBuild();
|
||||||
|
ConvolutionUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding());
|
||||||
ret.addTrainingListeners(trainingListeners);
|
ConvolutionUtils.validateCnnKernelStridePadding(
|
||||||
ret.setIndex(layerIndex);
|
l.getKernelSize(), l.getStride(), l.getPadding());
|
||||||
ret.setParamsViewArray(layerParamsView);
|
l.initializeConstraints();
|
||||||
Map<String, INDArray> paramTable = initializer().init(this, layerParamsView, initializeParams);
|
return l;
|
||||||
ret.setParamTable(paramTable);
|
|
||||||
ret.setLayerConfiguration(lconf);
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ParamInitializer initializer() {
|
public B kernelSize(int... kernelSize) {
|
||||||
return DepthwiseConvolutionParamInitializer.getInstance();
|
super.kernelSize(ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize"));
|
||||||
|
return self();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public InputType getOutputType(int layerIndex, InputType inputType) {
|
public B stride(int... stride) {
|
||||||
if (inputType == null || inputType.getType() != InputType.Type.CNN) {
|
super.stride(ValidationUtils.validate2NonNegative(stride, false, "stride"));
|
||||||
throw new IllegalStateException("Invalid input for depth-wise convolution layer (layer name=\""
|
return self();
|
||||||
+ getName() + "\"): Expected CNN input, got " + inputType);
|
|
||||||
}
|
|
||||||
|
|
||||||
return InputTypeUtil.getOutputTypeCnnLayers(inputType, kernelSize, stride, padding, dilation, convolutionMode,
|
|
||||||
nOut, layerIndex, getName(), dataFormat, DepthwiseConvolution2DLayer.class);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setNIn(InputType inputType, boolean override) {
|
public B padding(int... padding) {
|
||||||
super.setNIn(inputType, override);
|
super.padding(ValidationUtils.validate2NonNegative(padding, false, "padding"));
|
||||||
|
return self();
|
||||||
if(nOut == 0 || override){
|
|
||||||
nOut = this.nIn * this.depthMultiplier;
|
|
||||||
}
|
|
||||||
this.dataFormat = ((InputType.InputTypeConvolutional)inputType).getFormat();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public B dilation(int... dilation) {
|
||||||
|
super.dilation(ValidationUtils.validate2NonNegative(dilation, false, "dilation"));
|
||||||
|
return self();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -45,15 +45,12 @@ import org.nd4j.linalg.learning.regularization.Regularization;
|
||||||
* the input activation. See {@link Dropout} for the full details
|
* the input activation. See {@link Dropout} for the full details
|
||||||
*/
|
*/
|
||||||
@Data
|
@Data
|
||||||
@NoArgsConstructor
|
|
||||||
@ToString(callSuper = true)
|
@ToString(callSuper = true)
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
@SuperBuilder(builderMethodName = "innerBuilder")
|
@SuperBuilder(builderMethodName = "innerBuilder")
|
||||||
public class DropoutLayer extends FeedForwardLayer {
|
public class DropoutLayer extends FeedForwardLayer {
|
||||||
|
|
||||||
{
|
|
||||||
setType(LayerType.DO);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static DropoutLayerBuilder<?,?> builder() {
|
public static DropoutLayerBuilder<?,?> builder() {
|
||||||
return innerBuilder();
|
return innerBuilder();
|
||||||
|
|
|
@ -36,6 +36,10 @@ import org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor;
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
@SuperBuilder
|
@SuperBuilder
|
||||||
public abstract class FeedForwardLayer extends BaseLayerConfiguration {
|
public abstract class FeedForwardLayer extends BaseLayerConfiguration {
|
||||||
|
public static abstract class FeedForwardLayerBuilder<C extends FeedForwardLayer, B extends FeedForwardLayerBuilder<C, B>>
|
||||||
|
extends BaseLayerConfigurationBuilder<C, B> {
|
||||||
|
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* Number of inputs for the layer (usually the size of the last layer). <br> Note that for Convolutional layers,
|
* Number of inputs for the layer (usually the size of the last layer). <br> Note that for Convolutional layers,
|
||||||
* this is the input channels, otherwise is the previous layer size.
|
* this is the input channels, otherwise is the previous layer size.
|
||||||
|
@ -55,7 +59,7 @@ public abstract class FeedForwardLayer extends BaseLayerConfiguration {
|
||||||
* this is the input channels, otherwise is the previous layer size.
|
* this is the input channels, otherwise is the previous layer size.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
@Getter
|
@Getter @Setter
|
||||||
protected long nOut;
|
protected long nOut;
|
||||||
protected DataFormat timeDistributedFormat;
|
protected DataFormat timeDistributedFormat;
|
||||||
|
|
||||||
|
|
|
@ -57,10 +57,10 @@ public abstract class LayerConfiguration
|
||||||
implements ILayerConfiguration, Serializable, Cloneable { // ITrainableLayerConfiguration
|
implements ILayerConfiguration, Serializable, Cloneable { // ITrainableLayerConfiguration
|
||||||
|
|
||||||
@Getter @Setter protected String name;
|
@Getter @Setter protected String name;
|
||||||
@Getter protected List<LayerConstraint> allParamConstraints;
|
@Getter @Setter protected List<LayerConstraint> allParamConstraints;
|
||||||
@Getter protected List<LayerConstraint> weightConstraints;
|
@Getter @Setter protected List<LayerConstraint> weightConstraints;
|
||||||
@Getter protected List<LayerConstraint> biasConstraints;
|
@Getter @Setter protected List<LayerConstraint> biasConstraints;
|
||||||
@Getter protected List<LayerConstraint> constraints;
|
@Getter @Setter protected List<LayerConstraint> constraints;
|
||||||
@Getter @Setter protected IWeightNoise weightNoise;
|
@Getter @Setter protected IWeightNoise weightNoise;
|
||||||
@Builder.Default private @Getter @Setter LinkedHashSet<String> variables = new LinkedHashSet<>();
|
@Builder.Default private @Getter @Setter LinkedHashSet<String> variables = new LinkedHashSet<>();
|
||||||
@Getter @Setter private IDropout dropOut;
|
@Getter @Setter private IDropout dropOut;
|
||||||
|
@ -325,4 +325,15 @@ public abstract class LayerConfiguration
|
||||||
runInheritance(getNetConfiguration());
|
runInheritance(getNetConfiguration());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public abstract static class LayerConfigurationBuilder<
|
||||||
|
C extends LayerConfiguration, B extends LayerConfigurationBuilder<C, B>> {
|
||||||
|
public B dropOut(double d) {
|
||||||
|
this.dropOut(new Dropout(d));
|
||||||
|
return self();
|
||||||
|
}
|
||||||
|
public B dropOut(IDropout d) {
|
||||||
|
this.dropOut = d;
|
||||||
|
return self();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -61,9 +61,6 @@ public class LearnedSelfAttentionLayer extends SameDiffLayer {
|
||||||
/** Number of queries to learn */
|
/** Number of queries to learn */
|
||||||
private int nQueries;
|
private int nQueries;
|
||||||
|
|
||||||
private LearnedSelfAttentionLayer() {
|
|
||||||
/*No arg constructor for serialization*/
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
|
public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
|
||||||
|
|
|
@ -32,9 +32,6 @@ import org.nd4j.linalg.learning.regularization.Regularization;
|
||||||
|
|
||||||
@SuperBuilder
|
@SuperBuilder
|
||||||
public abstract class NoParamLayer extends LayerConfiguration {
|
public abstract class NoParamLayer extends LayerConfiguration {
|
||||||
{
|
|
||||||
setType(LayerType.POOL);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ParamInitializer initializer() {
|
public ParamInitializer initializer() {
|
||||||
|
|
|
@ -42,7 +42,6 @@ import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
* filter/kernel size, the stride and padding The pooling layer takes the kernel size
|
* filter/kernel size, the stride and padding The pooling layer takes the kernel size
|
||||||
*/
|
*/
|
||||||
@Data
|
@Data
|
||||||
@NoArgsConstructor
|
|
||||||
@ToString(callSuper = true)
|
@ToString(callSuper = true)
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder")
|
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder")
|
||||||
|
@ -103,20 +102,20 @@ public class SeparableConvolution2D extends ConvolutionLayer {
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean hasBias() {
|
public boolean hasBias() {
|
||||||
return hasBias;
|
return isHasBias();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SeparableConvolution2D clone() {
|
public SeparableConvolution2D clone() {
|
||||||
SeparableConvolution2D clone = (SeparableConvolution2D) super.clone();
|
SeparableConvolution2D clone = (SeparableConvolution2D) super.clone();
|
||||||
if (clone.kernelSize != null) {
|
if (clone.getKernelSize() != null) {
|
||||||
clone.kernelSize = clone.kernelSize.clone();
|
clone.setKernelSize( clone.getKernelSize().clone());
|
||||||
}
|
}
|
||||||
if (clone.stride != null) {
|
if (clone.getStride() != null) {
|
||||||
clone.stride = clone.stride.clone();
|
clone.setStride( clone.getStride().clone());
|
||||||
}
|
}
|
||||||
if (clone.padding != null) {
|
if (clone.getPadding() != null) {
|
||||||
clone.padding = clone.padding.clone();
|
clone.setPadding( clone.getPadding().clone());
|
||||||
}
|
}
|
||||||
return clone;
|
return clone;
|
||||||
}
|
}
|
||||||
|
@ -165,11 +164,11 @@ public class SeparableConvolution2D extends ConvolutionLayer {
|
||||||
|
|
||||||
return InputTypeUtil.getOutputTypeCnnLayers(
|
return InputTypeUtil.getOutputTypeCnnLayers(
|
||||||
inputType,
|
inputType,
|
||||||
kernelSize,
|
getKernelSize(),
|
||||||
stride,
|
getStride(),
|
||||||
padding,
|
getPadding(),
|
||||||
dilation,
|
getDilation(),
|
||||||
convolutionMode,
|
getConvolutionMode(),
|
||||||
nOut,
|
nOut,
|
||||||
layerIndex,
|
layerIndex,
|
||||||
getName(),
|
getName(),
|
||||||
|
|
|
@ -20,6 +20,9 @@
|
||||||
|
|
||||||
package org.deeplearning4j.nn.conf.layers.misc;
|
package org.deeplearning4j.nn.conf.layers.misc;
|
||||||
|
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
import lombok.EqualsAndHashCode;
|
import lombok.EqualsAndHashCode;
|
||||||
import lombok.Getter;
|
import lombok.Getter;
|
||||||
import lombok.Setter;
|
import lombok.Setter;
|
||||||
|
@ -37,108 +40,111 @@ import org.nd4j.linalg.api.buffer.DataType;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
import org.nd4j.linalg.learning.config.IUpdater;
|
import org.nd4j.linalg.learning.config.IUpdater;
|
||||||
import org.nd4j.linalg.learning.regularization.Regularization;
|
import org.nd4j.linalg.learning.regularization.Regularization;
|
||||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
|
||||||
|
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
@EqualsAndHashCode(callSuper = false)
|
@EqualsAndHashCode(callSuper = false)
|
||||||
@SuperBuilder
|
@SuperBuilder(builderMethodName = "innerBuilder")
|
||||||
public class FrozenLayer extends LayerConfiguration {
|
public class FrozenLayer extends LayerConfiguration {
|
||||||
|
|
||||||
/**
|
/** A layer configuration, only if this layer config has been created from another one */
|
||||||
* A layer configuration, only if this layer config has been created from another one
|
@Getter @Setter private LayerConfiguration innerConfiguration;
|
||||||
*/
|
|
||||||
@Getter @Setter
|
|
||||||
private LayerConfiguration innerConfiguration;
|
|
||||||
|
|
||||||
|
public static FrozenLayerBuilder<?, ?> builder() {
|
||||||
|
return innerBuilder();
|
||||||
|
}
|
||||||
|
|
||||||
public FrozenLayer(@JsonProperty("layer") LayerConfiguration layer) {
|
public static FrozenLayerBuilder<?, ?> builder(LayerConfiguration innerConfiguration) {
|
||||||
this.innerConfiguration = layer;
|
return innerBuilder().innerConfiguration(innerConfiguration);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public LayerConfiguration clone() {
|
||||||
|
FrozenLayer l = (FrozenLayer) super.clone();
|
||||||
|
l.innerConfiguration = innerConfiguration.clone();
|
||||||
|
return l;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public org.deeplearning4j.nn.api.Layer instantiate(
|
||||||
|
NeuralNetConfiguration conf,
|
||||||
|
Collection<TrainingListener> trainingListeners,
|
||||||
|
int layerIndex,
|
||||||
|
INDArray layerParamsView,
|
||||||
|
boolean initializeParams,
|
||||||
|
DataType networkDataType) {
|
||||||
|
|
||||||
|
// Need to be able to instantiate a layer, from a config - for JSON -> net type situations
|
||||||
|
org.deeplearning4j.nn.api.Layer underlying =
|
||||||
|
innerConfiguration.instantiate(
|
||||||
|
getNetConfiguration(),
|
||||||
|
trainingListeners,
|
||||||
|
layerIndex,
|
||||||
|
layerParamsView,
|
||||||
|
initializeParams,
|
||||||
|
networkDataType);
|
||||||
|
|
||||||
|
NeuralNetConfiguration nncUnderlying = underlying.getNetConfiguration();
|
||||||
|
if (nncUnderlying.getNetWideVariables() != null) {
|
||||||
|
Set<String> vars = nncUnderlying.getNetWideVariables(true);
|
||||||
|
nncUnderlying.clearNetWideVariable();
|
||||||
|
conf.clearNetWideVariable();
|
||||||
|
for (String s : vars) {
|
||||||
|
conf.getNetWideVariables(false).add(s);
|
||||||
|
nncUnderlying.getNetWideVariables(false).add(s);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
return new org.deeplearning4j.nn.layers.FrozenLayer(underlying);
|
||||||
public LayerConfiguration clone() {
|
}
|
||||||
FrozenLayer l = (FrozenLayer) super.clone();
|
|
||||||
l.innerConfiguration = innerConfiguration.clone();
|
|
||||||
return l;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf,
|
public ParamInitializer initializer() {
|
||||||
Collection<TrainingListener> trainingListeners, int layerIndex, INDArray layerParamsView,
|
return FrozenLayerParamInitializer.getInstance();
|
||||||
boolean initializeParams, DataType networkDataType) {
|
}
|
||||||
|
|
||||||
//Need to be able to instantiate a layer, from a config - for JSON -> net type situations
|
@Override
|
||||||
org.deeplearning4j.nn.api.Layer underlying = innerConfiguration.instantiate(getNetConfiguration(), trainingListeners,
|
public InputType getOutputType(int layerIndex, InputType inputType) {
|
||||||
layerIndex, layerParamsView, initializeParams, networkDataType);
|
return innerConfiguration.getOutputType(layerIndex, inputType);
|
||||||
|
}
|
||||||
|
|
||||||
NeuralNetConfiguration nncUnderlying = underlying.getNetConfiguration();
|
@Override
|
||||||
if (nncUnderlying.getNetWideVariables() != null) {
|
public void setNIn(InputType inputType, boolean override) {
|
||||||
Set<String> vars = nncUnderlying.getNetWideVariables(true);
|
innerConfiguration.setNIn(inputType, override);
|
||||||
nncUnderlying.clearNetWideVariable();
|
}
|
||||||
conf.clearNetWideVariable();
|
|
||||||
for (String s : vars) {
|
|
||||||
conf.getNetWideVariables(false).add(s);
|
|
||||||
nncUnderlying.getNetWideVariables(false).add(s);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return new org.deeplearning4j.nn.layers.FrozenLayer(underlying);
|
@Override
|
||||||
}
|
public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
|
||||||
|
return innerConfiguration.getPreProcessorForInputType(inputType);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ParamInitializer initializer() {
|
public List<Regularization> getRegularizationByParam(String param) {
|
||||||
return FrozenLayerParamInitializer.getInstance();
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public InputType getOutputType(int layerIndex, InputType inputType) {
|
public boolean isPretrainParam(String paramName) {
|
||||||
return innerConfiguration.getOutputType(layerIndex, inputType);
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setNIn(InputType inputType, boolean override) {
|
public IUpdater getUpdaterByParam(String paramName) {
|
||||||
innerConfiguration.setNIn(inputType, override);
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
|
public LayerMemoryReport getMemoryReport(InputType inputType) {
|
||||||
return innerConfiguration.getPreProcessorForInputType(inputType);
|
return innerConfiguration.getMemoryReport(inputType);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<Regularization> getRegularizationByParam(String param){
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isPretrainParam(String paramName) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public IUpdater getUpdaterByParam(String paramName) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public LayerMemoryReport getMemoryReport(InputType inputType) {
|
|
||||||
return innerConfiguration.getMemoryReport(inputType);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void setName(String layerName) {
|
|
||||||
super.setName(layerName);
|
|
||||||
innerConfiguration.setName(layerName);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void setConstraints(List<LayerConstraint> constraints) {
|
|
||||||
this.constraints = constraints;
|
|
||||||
this.innerConfiguration.setConstraints(constraints);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setName(String layerName) {
|
||||||
|
super.setName(layerName);
|
||||||
|
innerConfiguration.setName(layerName);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setConstraints(List<LayerConstraint> constraints) {
|
||||||
|
this.constraints = constraints;
|
||||||
|
this.innerConfiguration.setConstraints(constraints);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,7 @@ package org.deeplearning4j.nn.conf.layers.misc;
|
||||||
|
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
import lombok.EqualsAndHashCode;
|
import lombok.EqualsAndHashCode;
|
||||||
|
import lombok.experimental.SuperBuilder;
|
||||||
import org.deeplearning4j.nn.api.ParamInitializer;
|
import org.deeplearning4j.nn.api.ParamInitializer;
|
||||||
import org.deeplearning4j.nn.api.layers.LayerConstraint;
|
import org.deeplearning4j.nn.api.layers.LayerConstraint;
|
||||||
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
||||||
|
@ -39,19 +40,23 @@ import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
@Data
|
|
||||||
@EqualsAndHashCode(callSuper = false)
|
@EqualsAndHashCode(callSuper = false)
|
||||||
|
@SuperBuilder(builderMethodName = "innerBuilder")
|
||||||
public class FrozenLayerWithBackprop extends BaseWrapperLayerConfiguration {
|
public class FrozenLayerWithBackprop extends BaseWrapperLayerConfiguration {
|
||||||
|
|
||||||
|
|
||||||
|
public static FrozenLayerWithBackpropBuilder<?, ?> builder() {
|
||||||
|
return innerBuilder();
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* Create a new Frozen Layer, that wraps another layer with backpropagation enabled.
|
* Create a new Frozen Layer, that wraps another layer with backpropagation enabled.
|
||||||
*
|
*
|
||||||
* @param layer configuration of the layer to wrap
|
* @param innerConfiguration configuration of the layer to wrap
|
||||||
*/
|
*/
|
||||||
public FrozenLayerWithBackprop(@JsonProperty("layer") LayerConfiguration layer) {
|
public static FrozenLayerWithBackpropBuilder<?, ?> builder(LayerConfiguration innerConfiguration) {
|
||||||
super(layer);
|
return innerBuilder().underlying(innerConfiguration);
|
||||||
}
|
}
|
||||||
|
|
||||||
public NeuralNetConfiguration getInnerConf(NeuralNetConfiguration conf) {
|
public NeuralNetConfiguration getInnerConf(NeuralNetConfiguration conf) {
|
||||||
NeuralNetConfiguration nnc = conf.clone();
|
NeuralNetConfiguration nnc = conf.clone();
|
||||||
nnc.getLayerConfigurations().add(0, underlying);
|
nnc.getLayerConfigurations().add(0, underlying);
|
||||||
|
|
|
@ -46,7 +46,7 @@ import org.nd4j.linalg.lossfunctions.ILossFunction;
|
||||||
import org.nd4j.linalg.lossfunctions.impl.LossL2;
|
import org.nd4j.linalg.lossfunctions.impl.LossL2;
|
||||||
import org.nd4j.serde.jackson.shaded.NDArrayTextSerializer;
|
import org.nd4j.serde.jackson.shaded.NDArrayTextSerializer;
|
||||||
|
|
||||||
@Data
|
|
||||||
@EqualsAndHashCode(callSuper = false)
|
@EqualsAndHashCode(callSuper = false)
|
||||||
@SuperBuilder(buildMethodName = "initBuild")
|
@SuperBuilder(buildMethodName = "initBuild")
|
||||||
public class Yolo2OutputLayer extends LayerConfiguration {
|
public class Yolo2OutputLayer extends LayerConfiguration {
|
||||||
|
@ -55,20 +55,20 @@ public class Yolo2OutputLayer extends LayerConfiguration {
|
||||||
* Loss function coefficient for position and size/scale components of the loss function. Default
|
* Loss function coefficient for position and size/scale components of the loss function. Default
|
||||||
* (as per paper): 5
|
* (as per paper): 5
|
||||||
*/
|
*/
|
||||||
@Builder.Default private double lambdaCoord = 5;
|
@Builder.Default @Getter private double lambdaCoord = 5;
|
||||||
/**
|
/**
|
||||||
* Loss function coefficient for the "no object confidence" components of the loss function.
|
* Loss function coefficient for the "no object confidence" components of the loss function.
|
||||||
* Default (as per paper): 0.5
|
* Default (as per paper): 0.5
|
||||||
*/
|
*/
|
||||||
@Builder.Default private double lambdaNoObj = 0.5;
|
@Builder.Default @Getter private double lambdaNoObj = 0.5;
|
||||||
/** Loss function for position/scale component of the loss function */
|
/** Loss function for position/scale component of the loss function */
|
||||||
@Builder.Default private ILossFunction lossPositionScale = new LossL2();
|
@Builder.Default @Getter private ILossFunction lossPositionScale = new LossL2();
|
||||||
/**
|
/**
|
||||||
* Loss function for the class predictions - defaults to L2 loss (i.e., sum of squared errors, as
|
* Loss function for the class predictions - defaults to L2 loss (i.e., sum of squared errors, as
|
||||||
* per the paper), however Loss MCXENT could also be used (which is more common for
|
* per the paper), however Loss MCXENT could also be used (which is more common for
|
||||||
* classification).
|
* classification).
|
||||||
*/
|
*/
|
||||||
@Builder.Default private ILossFunction lossClassPredictions = new LossL2();
|
@Builder.Default @Getter private ILossFunction lossClassPredictions = new LossL2();
|
||||||
;
|
;
|
||||||
/**
|
/**
|
||||||
* Bounding box priors dimensions [width, height]. For N bounding boxes, input has shape [rows,
|
* Bounding box priors dimensions [width, height]. For N bounding boxes, input has shape [rows,
|
||||||
|
@ -78,15 +78,12 @@ public class Yolo2OutputLayer extends LayerConfiguration {
|
||||||
*/
|
*/
|
||||||
@JsonSerialize(using = NDArrayTextSerializer.class)
|
@JsonSerialize(using = NDArrayTextSerializer.class)
|
||||||
@JsonDeserialize(using = BoundingBoxesDeserializer.class)
|
@JsonDeserialize(using = BoundingBoxesDeserializer.class)
|
||||||
@Builder.Default
|
@Builder.Default @Getter
|
||||||
private INDArray boundingBoxes;
|
private INDArray boundingBoxes;
|
||||||
|
|
||||||
@Builder.Default
|
@Builder.Default @Getter
|
||||||
private CNN2DFormat format = CNN2DFormat.NCHW; // Default for serialization of old formats
|
private CNN2DFormat format = CNN2DFormat.NCHW; // Default for serialization of old formats
|
||||||
|
|
||||||
private Yolo2OutputLayer() {
|
|
||||||
// No-arg constructor for Jackson JSON
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Layer instantiate(
|
public Layer instantiate(
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
|
|
||||||
package org.deeplearning4j.nn.conf.layers.recurrent;
|
package org.deeplearning4j.nn.conf.layers.recurrent;
|
||||||
|
|
||||||
|
import lombok.experimental.SuperBuilder;
|
||||||
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
||||||
import org.deeplearning4j.nn.conf.inputs.InputType;
|
import org.deeplearning4j.nn.conf.inputs.InputType;
|
||||||
import org.deeplearning4j.nn.conf.layers.LayerConfiguration;
|
import org.deeplearning4j.nn.conf.layers.LayerConfiguration;
|
||||||
|
@ -30,14 +31,18 @@ import org.nd4j.linalg.api.buffer.DataType;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
@SuperBuilder(builderMethodName = "innerBuilder")
|
||||||
public class LastTimeStep extends BaseWrapperLayerConfiguration {
|
public class LastTimeStep extends BaseWrapperLayerConfiguration {
|
||||||
|
|
||||||
private LastTimeStep() {}
|
public static LastTimeStepBuilder<?,?> builder() {
|
||||||
|
return innerBuilder();
|
||||||
|
}
|
||||||
|
|
||||||
public LastTimeStep(LayerConfiguration underlying) {
|
|
||||||
super(underlying);
|
public static LastTimeStepBuilder<?,?> builder(LayerConfiguration underlying) {
|
||||||
this.name = underlying.getName(); // needed for keras import to match names
|
return innerBuilder()
|
||||||
|
.underlying(underlying)
|
||||||
|
.name(underlying.getName());
|
||||||
}
|
}
|
||||||
|
|
||||||
public LayerConfiguration getUnderlying() {
|
public LayerConfiguration getUnderlying() {
|
||||||
|
|
|
@ -41,7 +41,6 @@ import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
@EqualsAndHashCode(callSuper = false)
|
@EqualsAndHashCode(callSuper = false)
|
||||||
@NoArgsConstructor
|
|
||||||
@SuperBuilder
|
@SuperBuilder
|
||||||
public class SimpleRnn extends BaseRecurrentLayer {
|
public class SimpleRnn extends BaseRecurrentLayer {
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -20,9 +20,9 @@
|
||||||
|
|
||||||
package org.deeplearning4j.nn.conf.layers.recurrent;
|
package org.deeplearning4j.nn.conf.layers.recurrent;
|
||||||
|
|
||||||
import lombok.Data;
|
import java.util.Collection;
|
||||||
import lombok.EqualsAndHashCode;
|
import lombok.*;
|
||||||
import lombok.NonNull;
|
import lombok.experimental.SuperBuilder;
|
||||||
import org.deeplearning4j.nn.conf.InputPreProcessor;
|
import org.deeplearning4j.nn.conf.InputPreProcessor;
|
||||||
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
||||||
import org.deeplearning4j.nn.conf.RNNFormat;
|
import org.deeplearning4j.nn.conf.RNNFormat;
|
||||||
|
@ -33,66 +33,67 @@ import org.deeplearning4j.nn.layers.recurrent.TimeDistributedLayer;
|
||||||
import org.deeplearning4j.optimize.api.TrainingListener;
|
import org.deeplearning4j.optimize.api.TrainingListener;
|
||||||
import org.nd4j.linalg.api.buffer.DataType;
|
import org.nd4j.linalg.api.buffer.DataType;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
|
||||||
|
|
||||||
import java.util.Collection;
|
|
||||||
|
|
||||||
@Data
|
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
|
@SuperBuilder
|
||||||
public class TimeDistributed extends BaseWrapperLayerConfiguration {
|
public class TimeDistributed extends BaseWrapperLayerConfiguration {
|
||||||
|
@Getter @Setter private RNNFormat rnnDataFormat = RNNFormat.NCW;
|
||||||
|
|
||||||
private RNNFormat rnnDataFormat = RNNFormat.NCW;
|
@Override
|
||||||
|
public org.deeplearning4j.nn.api.Layer instantiate(
|
||||||
|
NeuralNetConfiguration conf,
|
||||||
|
Collection<TrainingListener> trainingListeners,
|
||||||
|
int layerIndex,
|
||||||
|
INDArray layerParamsView,
|
||||||
|
boolean initializeParams,
|
||||||
|
DataType networkDataType) {
|
||||||
|
LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
|
||||||
|
|
||||||
/**
|
NeuralNetConfiguration conf2 = conf.clone();
|
||||||
* @param underlying Underlying (internal) layer - should be a feed forward type such as DenseLayerConfiguration
|
conf2.setLayer(((TimeDistributed) lconf).getUnderlying());
|
||||||
*/
|
return new TimeDistributedLayer(
|
||||||
public TimeDistributed(@JsonProperty("underlying") @NonNull LayerConfiguration underlying, @JsonProperty("rnnDataFormat") RNNFormat rnnDataFormat) {
|
underlying.instantiate(
|
||||||
super(underlying);
|
conf2,
|
||||||
this.rnnDataFormat = rnnDataFormat;
|
trainingListeners,
|
||||||
|
layerIndex,
|
||||||
|
layerParamsView,
|
||||||
|
initializeParams,
|
||||||
|
networkDataType),
|
||||||
|
rnnDataFormat);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public InputType getOutputType(int layerIndex, InputType inputType) {
|
||||||
|
if (inputType.getType() != InputType.Type.RNN) {
|
||||||
|
throw new IllegalStateException(
|
||||||
|
"Only RNN input type is supported as input to TimeDistributed layer (layer #"
|
||||||
|
+ layerIndex
|
||||||
|
+ ")");
|
||||||
}
|
}
|
||||||
|
|
||||||
public TimeDistributed(LayerConfiguration underlying){
|
InputType.InputTypeRecurrent rnn = (InputType.InputTypeRecurrent) inputType;
|
||||||
super(underlying);
|
InputType ff = InputType.feedForward(rnn.getSize());
|
||||||
|
InputType ffOut = underlying.getOutputType(layerIndex, ff);
|
||||||
|
return InputType.recurrent(
|
||||||
|
ffOut.arrayElementsPerExample(), rnn.getTimeSeriesLength(), rnnDataFormat);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setNIn(InputType inputType, boolean override) {
|
||||||
|
if (inputType.getType() != InputType.Type.RNN) {
|
||||||
|
throw new IllegalStateException(
|
||||||
|
"Only RNN input type is supported as input to TimeDistributed layer");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
InputType.InputTypeRecurrent rnn = (InputType.InputTypeRecurrent) inputType;
|
||||||
public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection<TrainingListener> trainingListeners,
|
InputType ff = InputType.feedForward(rnn.getSize());
|
||||||
int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) {
|
this.rnnDataFormat = rnn.getFormat();
|
||||||
LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
|
underlying.setNIn(ff, override);
|
||||||
|
}
|
||||||
|
|
||||||
NeuralNetConfiguration conf2 = conf.clone();
|
@Override
|
||||||
conf2.setLayer(((TimeDistributed) lconf).getUnderlying());
|
public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
|
||||||
return new TimeDistributedLayer(underlying.instantiate(conf2, trainingListeners, layerIndex, layerParamsView,
|
// No preprocessor - the wrapper layer operates as the preprocessor
|
||||||
initializeParams, networkDataType), rnnDataFormat);
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public InputType getOutputType(int layerIndex, InputType inputType) {
|
|
||||||
if (inputType.getType() != InputType.Type.RNN) {
|
|
||||||
throw new IllegalStateException("Only RNN input type is supported as input to TimeDistributed layer (layer #" + layerIndex + ")");
|
|
||||||
}
|
|
||||||
|
|
||||||
InputType.InputTypeRecurrent rnn = (InputType.InputTypeRecurrent) inputType;
|
|
||||||
InputType ff = InputType.feedForward(rnn.getSize());
|
|
||||||
InputType ffOut = underlying.getOutputType(layerIndex, ff);
|
|
||||||
return InputType.recurrent(ffOut.arrayElementsPerExample(), rnn.getTimeSeriesLength(), rnnDataFormat);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void setNIn(InputType inputType, boolean override) {
|
|
||||||
if (inputType.getType() != InputType.Type.RNN) {
|
|
||||||
throw new IllegalStateException("Only RNN input type is supported as input to TimeDistributed layer");
|
|
||||||
}
|
|
||||||
|
|
||||||
InputType.InputTypeRecurrent rnn = (InputType.InputTypeRecurrent) inputType;
|
|
||||||
InputType ff = InputType.feedForward(rnn.getSize());
|
|
||||||
this.rnnDataFormat = rnn.getFormat();
|
|
||||||
underlying.setNIn(ff, override);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
|
|
||||||
//No preprocessor - the wrapper layer operates as the preprocessor
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
|
|
||||||
package org.deeplearning4j.nn.conf.layers.samediff;
|
package org.deeplearning4j.nn.conf.layers.samediff;
|
||||||
|
|
||||||
|
import lombok.Builder;
|
||||||
import lombok.EqualsAndHashCode;
|
import lombok.EqualsAndHashCode;
|
||||||
import lombok.experimental.SuperBuilder;
|
import lombok.experimental.SuperBuilder;
|
||||||
import org.deeplearning4j.nn.api.Layer;
|
import org.deeplearning4j.nn.api.Layer;
|
||||||
|
@ -47,7 +48,9 @@ public abstract class SameDiffLayer extends AbstractSameDiffLayer {
|
||||||
/**
|
/**
|
||||||
* WeightInit, default is XAVIER.
|
* WeightInit, default is XAVIER.
|
||||||
*/
|
*/
|
||||||
|
@Builder.Default
|
||||||
protected WeightInit weightInit = WeightInit.XAVIER;
|
protected WeightInit weightInit = WeightInit.XAVIER;
|
||||||
|
@Builder.Default
|
||||||
protected Map<String,IWeightInit> paramWeightInit = new HashMap<>();
|
protected Map<String,IWeightInit> paramWeightInit = new HashMap<>();
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
|
|
||||||
package org.deeplearning4j.nn.conf.layers.samediff;
|
package org.deeplearning4j.nn.conf.layers.samediff;
|
||||||
|
|
||||||
|
import lombok.experimental.SuperBuilder;
|
||||||
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
||||||
import org.deeplearning4j.nn.conf.layers.LayerConfiguration;
|
import org.deeplearning4j.nn.conf.layers.LayerConfiguration;
|
||||||
import org.deeplearning4j.optimize.api.TrainingListener;
|
import org.deeplearning4j.optimize.api.TrainingListener;
|
||||||
|
@ -30,13 +31,10 @@ import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@SuperBuilder
|
||||||
public abstract class SameDiffOutputLayer extends AbstractSameDiffLayer {
|
public abstract class SameDiffOutputLayer extends AbstractSameDiffLayer {
|
||||||
|
|
||||||
|
|
||||||
protected SameDiffOutputLayer() {
|
|
||||||
//No op constructor for Jackson
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Define the output layer
|
* Define the output layer
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
package org.deeplearning4j.nn.conf.layers.util;
|
package org.deeplearning4j.nn.conf.layers.util;
|
||||||
|
|
||||||
import lombok.NoArgsConstructor;
|
import lombok.NoArgsConstructor;
|
||||||
|
import lombok.experimental.SuperBuilder;
|
||||||
import org.deeplearning4j.nn.api.ParamInitializer;
|
import org.deeplearning4j.nn.api.ParamInitializer;
|
||||||
import org.deeplearning4j.nn.conf.InputPreProcessor;
|
import org.deeplearning4j.nn.conf.InputPreProcessor;
|
||||||
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
||||||
|
@ -38,7 +39,7 @@ import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
@NoArgsConstructor
|
@SuperBuilder
|
||||||
public class MaskLayer extends NoParamLayer {
|
public class MaskLayer extends NoParamLayer {
|
||||||
@Override
|
@Override
|
||||||
public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf,
|
public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf,
|
||||||
|
|
|
@ -35,22 +35,17 @@ import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
|
||||||
@Data
|
|
||||||
@EqualsAndHashCode(callSuper = false)
|
@EqualsAndHashCode(callSuper = false)
|
||||||
@SuperBuilder
|
@SuperBuilder
|
||||||
public class MaskZeroLayer extends BaseWrapperLayerConfiguration {
|
public class MaskZeroLayer extends BaseWrapperLayerConfiguration {
|
||||||
@Builder.Default
|
@Builder.Default @Getter @Setter
|
||||||
private double maskingValue = 0.0;
|
private double maskingValue = 0.0;
|
||||||
|
|
||||||
private static final long serialVersionUID = 9074525846200921839L;
|
private static final long serialVersionUID = 9074525846200921839L;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public MaskZeroLayer(@JsonProperty("underlying") LayerConfiguration underlying, @JsonProperty("maskingValue") double maskingValue) {
|
|
||||||
this.underlying = underlying;
|
|
||||||
this.maskingValue = maskingValue;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf,
|
public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf,
|
||||||
|
|
|
@ -23,6 +23,7 @@ package org.deeplearning4j.nn.conf.layers.wrapper;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import lombok.EqualsAndHashCode;
|
import lombok.EqualsAndHashCode;
|
||||||
import lombok.Getter;
|
import lombok.Getter;
|
||||||
|
import lombok.Setter;
|
||||||
import lombok.experimental.SuperBuilder;
|
import lombok.experimental.SuperBuilder;
|
||||||
import org.deeplearning4j.nn.api.ParamInitializer;
|
import org.deeplearning4j.nn.api.ParamInitializer;
|
||||||
import org.deeplearning4j.nn.conf.InputPreProcessor;
|
import org.deeplearning4j.nn.conf.InputPreProcessor;
|
||||||
|
@ -42,7 +43,8 @@ import org.nd4j.linalg.learning.regularization.Regularization;
|
||||||
public abstract class BaseWrapperLayerConfiguration extends LayerConfiguration {
|
public abstract class BaseWrapperLayerConfiguration extends LayerConfiguration {
|
||||||
|
|
||||||
/** The configuration to of another layer to wrap */
|
/** The configuration to of another layer to wrap */
|
||||||
@Getter protected LayerConfiguration underlying;
|
@Getter @Setter
|
||||||
|
protected LayerConfiguration underlying;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the net configuration for this configuration as well as for the underlying layer (if not
|
* Set the net configuration for this configuration as well as for the underlying layer (if not
|
||||||
|
|
|
@ -38,8 +38,6 @@ import org.nd4j.linalg.api.buffer.DataType;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
import org.nd4j.linalg.learning.regularization.Regularization;
|
import org.nd4j.linalg.learning.regularization.Regularization;
|
||||||
|
|
||||||
@Data
|
|
||||||
@NoArgsConstructor
|
|
||||||
@ToString(callSuper = true)
|
@ToString(callSuper = true)
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
@JsonIgnoreProperties("lossFn")
|
@JsonIgnoreProperties("lossFn")
|
||||||
|
|
|
@ -349,6 +349,6 @@ public abstract class BaseOutputLayer<LayerConfT extends org.deeplearning4j.nn.c
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean hasBias() {
|
public boolean hasBias() {
|
||||||
return getTypedLayerConfiguration().hasBias();
|
return getTypedLayerConfiguration().isHasBias();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -109,7 +109,7 @@ public abstract class BasePretrainNetwork<LayerConfT extends org.deeplearning4j.
|
||||||
protected void setScoreWithZ(INDArray z) {
|
protected void setScoreWithZ(INDArray z) {
|
||||||
if (input == null || z == null)
|
if (input == null || z == null)
|
||||||
throw new IllegalStateException("Cannot calculate score without input and labels " + layerId());
|
throw new IllegalStateException("Cannot calculate score without input and labels " + layerId());
|
||||||
ILossFunction lossFunction = getTypedLayerConfiguration().getLossFunction().getILossFunction();
|
ILossFunction lossFunction = getTypedLayerConfiguration().getLossFunction().getILossFunction();
|
||||||
|
|
||||||
//double score = lossFunction.computeScore(input, z, layerConf().getActivationFunction(), maskArray, false);
|
//double score = lossFunction.computeScore(input, z, layerConf().getActivationFunction(), maskArray, false);
|
||||||
double score = lossFunction.computeScore(input, z, getTypedLayerConfiguration().getActivationFn(), maskArray, false);
|
double score = lossFunction.computeScore(input, z, getTypedLayerConfiguration().getActivationFn(), maskArray, false);
|
||||||
|
|
|
@ -226,7 +226,7 @@ public class DefaultParamInitializer extends AbstractParamInitializer {
|
||||||
|
|
||||||
protected boolean hasBias(LayerConfiguration layer){
|
protected boolean hasBias(LayerConfiguration layer){
|
||||||
if(layer instanceof BaseOutputLayer ) {
|
if(layer instanceof BaseOutputLayer ) {
|
||||||
return ((BaseOutputLayer) layer).hasBias();
|
return ((BaseOutputLayer) layer).isHasBias();
|
||||||
} else if(layer instanceof DenseLayer){
|
} else if(layer instanceof DenseLayer){
|
||||||
return ((DenseLayer)layer).isHasBias();
|
return ((DenseLayer)layer).isHasBias();
|
||||||
} else if(layer instanceof EmbeddingLayer){
|
} else if(layer instanceof EmbeddingLayer){
|
||||||
|
|
|
@ -382,7 +382,7 @@ public class TransferLearning {
|
||||||
}
|
}
|
||||||
|
|
||||||
LayerConfiguration origLayerConf = editedModel.getNetConfiguration().getFlattenedLayerConfigurations().get(i);
|
LayerConfiguration origLayerConf = editedModel.getNetConfiguration().getFlattenedLayerConfigurations().get(i);
|
||||||
LayerConfiguration newLayerConf = new org.deeplearning4j.nn.conf.layers.misc.FrozenLayer(origLayerConf);
|
LayerConfiguration newLayerConf = org.deeplearning4j.nn.conf.layers.misc.FrozenLayer.builder().innerConfiguration(origLayerConf).build();
|
||||||
newLayerConf.setName(origLayerConf.getName());
|
newLayerConf.setName(origLayerConf.getName());
|
||||||
editedModel.getNetConfiguration().getNetConfigurations().get(i).setLayer(newLayerConf);
|
editedModel.getNetConfiguration().getNetConfigurations().get(i).setLayer(newLayerConf);
|
||||||
}
|
}
|
||||||
|
@ -1009,7 +1009,7 @@ public class TransferLearning {
|
||||||
String layerName = gv.getVertexName();
|
String layerName = gv.getVertexName();
|
||||||
LayerVertex currLayerVertex = (LayerVertex) newConfig.getVertices().get(layerName);
|
LayerVertex currLayerVertex = (LayerVertex) newConfig.getVertices().get(layerName);
|
||||||
LayerConfiguration origLayerConf = currLayerVertex.getLayerConfiguration();
|
LayerConfiguration origLayerConf = currLayerVertex.getLayerConfiguration();
|
||||||
LayerConfiguration newLayerConf = new org.deeplearning4j.nn.conf.layers.misc.FrozenLayer(origLayerConf);
|
LayerConfiguration newLayerConf = org.deeplearning4j.nn.conf.layers.misc.FrozenLayer.builder().innerConfiguration(origLayerConf).build();
|
||||||
newLayerConf.setName(origLayerConf.getName());
|
newLayerConf.setName(origLayerConf.getName());
|
||||||
//Complication here(and reason for clone on next line): inner LayerConfiguration (implementation)
|
//Complication here(and reason for clone on next line): inner LayerConfiguration (implementation)
|
||||||
// NeuralNetConfiguration.layer (config) should keep the original layer config. While network
|
// NeuralNetConfiguration.layer (config) should keep the original layer config. While network
|
||||||
|
|
Loading…
Reference in New Issue