Using @SuperBuilder for LayerConfigurations

Signed-off-by: brian <brian@brutex.de>
master
Brian Rosenberger 2023-04-27 12:55:01 +02:00
parent 3267b06bde
commit e576659639
42 changed files with 440 additions and 385 deletions

View File

@ -71,7 +71,7 @@ dependencies {
// api "com.fasterxml.jackson.module:jackson-module-scala_${scalaVersion}" // api "com.fasterxml.jackson.module:jackson-module-scala_${scalaVersion}"
api "org.projectlombok:lombok:1.18.24" api "org.projectlombok:lombok:1.18.26"
/*Logging*/ /*Logging*/
api 'org.slf4j:slf4j-api:2.0.3' api 'org.slf4j:slf4j-api:2.0.3'

View File

@ -507,7 +507,7 @@ public class DTypeTests extends BaseDL4JTest {
.updater(new Adam(1e-2)) .updater(new Adam(1e-2))
.layer(ConvolutionLayer.builder().kernelSize(2, 2).stride(1, 1).nOut(3).activation(Activation.TANH).build()) .layer(ConvolutionLayer.builder().kernelSize(2, 2).stride(1, 1).nOut(3).activation(Activation.TANH).build())
.layer(new LocalResponseNormalization()) .layer(LocalResponseNormalization.builder())
.layer(DropoutLayer.builder(0.5).build()) .layer(DropoutLayer.builder(0.5).build())
.layer(DropoutLayer.builder(new AlphaDropout(0.5)).build()) .layer(DropoutLayer.builder(new AlphaDropout(0.5)).build())
.layer(DropoutLayer.builder(new GaussianDropout(0.5)).build()) .layer(DropoutLayer.builder(new GaussianDropout(0.5)).build())
@ -519,12 +519,12 @@ public class DTypeTests extends BaseDL4JTest {
// .layer(LocallyConnected2D.builder().nOut(3).kernelSize(2,2).stride(1,1).activation(Activation.SIGMOID).build()) //EXCEPTION // .layer(LocallyConnected2D.builder().nOut(3).kernelSize(2,2).stride(1,1).activation(Activation.SIGMOID).build()) //EXCEPTION
.layer(ZeroPaddingLayer.builder(1, 1).build()) .layer(ZeroPaddingLayer.builder(1, 1).build())
.layer(Cropping2D.builder(1, 1).build()) .layer(Cropping2D.builder(1, 1).build())
.layer(new IdentityLayer()) .layer(IdentityLayer.builder())
.layer(Upsampling2D.builder().size(2).build()) .layer(Upsampling2D.builder().size(2).build())
.layer(SubsamplingLayer.builder().kernelSize(2, 2).stride(2, 2).build()) .layer(SubsamplingLayer.builder().kernelSize(2, 2).stride(2, 2).build())
.layer(DepthwiseConvolution2D.builder().nOut(3).activation(Activation.RELU).build()) .layer(DepthwiseConvolution2D.builder().nOut(3).activation(Activation.RELU).build())
.layer(SeparableConvolution2D.builder().nOut(3).activation(Activation.HARDTANH).build()) .layer(SeparableConvolution2D.builder().nOut(3).activation(Activation.HARDTANH).build())
.layer(new MaskLayer()) .layer(MaskLayer.builder())
.layer(BatchNormalization.builder().build()) .layer(BatchNormalization.builder().build())
.layer(ActivationLayer.builder().activation(Activation.LEAKYRELU).build()) .layer(ActivationLayer.builder().activation(Activation.LEAKYRELU).build())
.layer(secondLast) .layer(secondLast)

View File

@ -114,7 +114,8 @@ public class LocalResponseTest extends BaseDL4JTest {
.layer(LocalResponseNormalization.builder().k(2).n(5).alpha(1e-4).beta(0.75).build()) .layer(LocalResponseNormalization.builder().k(2).n(5).alpha(1e-4).beta(0.75).build())
.build(); .build();
layer = new LocalResponseNormalization().instantiate(conf, null, 0, null, false, Nd4j.defaultFloatingPointType()); layer = LocalResponseNormalization.builder().build()
.instantiate(conf, null, 0, null, false, Nd4j.defaultFloatingPointType());
activationsActual = layer.activate(x, false, LayerWorkspaceMgr.noWorkspaces()); activationsActual = layer.activate(x, false, LayerWorkspaceMgr.noWorkspaces());
} }

View File

@ -243,7 +243,7 @@ public class RnnDataFormatTests extends BaseDL4JTest {
layer = MaskZeroLayer.builder().maskingValue(0.).underlying(layer).build(); layer = MaskZeroLayer.builder().maskingValue(0.).underlying(layer).build();
} }
if(lastTimeStep){ if(lastTimeStep){
layer = LastTimeStep.builder(layer); layer = LastTimeStep.builder(layer).build();
} }
NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = (NeuralNetConfiguration.NeuralNetConfigurationBuilder) NeuralNetConfiguration.builder() NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = (NeuralNetConfiguration.NeuralNetConfigurationBuilder) NeuralNetConfiguration.builder()
.seed(12345) .seed(12345)

View File

@ -20,6 +20,8 @@
package org.deeplearning4j.nn.layers.samediff.testlayers; package org.deeplearning4j.nn.layers.samediff.testlayers;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import java.util.*;
import lombok.Data; import lombok.Data;
import lombok.EqualsAndHashCode; import lombok.EqualsAndHashCode;
import lombok.NoArgsConstructor; import lombok.NoArgsConstructor;
@ -27,8 +29,8 @@ import lombok.experimental.SuperBuilder;
import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer;
import org.deeplearning4j.nn.conf.layers.samediff.SDLayerParams; import org.deeplearning4j.nn.conf.layers.samediff.SDLayerParams;
import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer;
import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayerUtils; import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayerUtils;
import org.deeplearning4j.nn.params.DefaultParamInitializer; import org.deeplearning4j.nn.params.DefaultParamInitializer;
import org.deeplearning4j.nn.weights.WeightInitUtil; import org.deeplearning4j.nn.weights.WeightInitUtil;
@ -36,20 +38,22 @@ import org.nd4j.autodiff.samediff.SDVariable;
import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.autodiff.samediff.SameDiff;
import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ndarray.INDArray;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import java.util.*;
@Data @Data
@EqualsAndHashCode(callSuper = true, exclude = {"paramShapes"}) @EqualsAndHashCode(
callSuper = true,
exclude = {"paramShapes"})
@NoArgsConstructor() @NoArgsConstructor()
@JsonIgnoreProperties("paramShapes") @JsonIgnoreProperties("paramShapes")
@SuperBuilder @SuperBuilder
public class SameDiffDense extends SameDiffLayer { public class SameDiffDense extends SameDiffLayer {
private static final List<String> W_KEYS = Collections.singletonList(DefaultParamInitializer.WEIGHT_KEY); private static final List<String> W_KEYS =
private static final List<String> B_KEYS = Collections.singletonList(DefaultParamInitializer.BIAS_KEY); Collections.singletonList(DefaultParamInitializer.WEIGHT_KEY);
private static final List<String> PARAM_KEYS = Arrays.asList(DefaultParamInitializer.WEIGHT_KEY, DefaultParamInitializer.BIAS_KEY); private static final List<String> B_KEYS =
Collections.singletonList(DefaultParamInitializer.BIAS_KEY);
private static final List<String> PARAM_KEYS =
Arrays.asList(DefaultParamInitializer.WEIGHT_KEY, DefaultParamInitializer.BIAS_KEY);
private final Map<String, long[]> paramShapes = new HashMap<>(); private final Map<String, long[]> paramShapes = new HashMap<>();
@ -57,8 +61,6 @@ public class SameDiffDense extends SameDiffLayer {
private long nOut; private long nOut;
private Activation activation; private Activation activation;
@Override @Override
public InputType getOutputType(int layerIndex, InputType inputType) { public InputType getOutputType(int layerIndex, InputType inputType) {
return null; return null;
@ -92,15 +94,18 @@ public class SameDiffDense extends SameDiffLayer {
if (DefaultParamInitializer.BIAS_KEY.equals(e.getKey())) { if (DefaultParamInitializer.BIAS_KEY.equals(e.getKey())) {
e.getValue().assign(0.0); e.getValue().assign(0.0);
} else { } else {
//Normally use 'c' order, but use 'f' for direct comparison to DL4J DenseLayerConfiguration // Normally use 'c' order, but use 'f' for direct comparison to DL4J
WeightInitUtil.initWeights(nIn, nOut, new long[]{nIn, nOut}, weightInit, null, 'f', e.getValue()); // DenseLayerConfiguration
WeightInitUtil.initWeights(
nIn, nOut, new long[] {nIn, nOut}, weightInit, null, 'f', e.getValue());
} }
} }
} }
} }
@Override @Override
public SDVariable defineLayer(SameDiff sd, SDVariable layerInput, Map<String, SDVariable> paramTable, SDVariable mask) { public SDVariable defineLayer(
SameDiff sd, SDVariable layerInput, Map<String, SDVariable> paramTable, SDVariable mask) {
SDVariable weights = paramTable.get(DefaultParamInitializer.WEIGHT_KEY); SDVariable weights = paramTable.get(DefaultParamInitializer.WEIGHT_KEY);
SDVariable bias = paramTable.get(DefaultParamInitializer.BIAS_KEY); SDVariable bias = paramTable.get(DefaultParamInitializer.BIAS_KEY);
@ -110,7 +115,8 @@ public class SameDiffDense extends SameDiffLayer {
} }
@Override @Override
public void applyGlobalConfigToLayer(NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig) { public void applyGlobalConfigToLayer(
NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig) {
NeuralNetConfiguration clone = globalConfig.clone().build(); NeuralNetConfiguration clone = globalConfig.clone().build();
if (activation == null) { if (activation == null) {
activation = SameDiffLayerUtils.fromIActivation(clone.getActivation()); activation = SameDiffLayerUtils.fromIActivation(clone.getActivation());
@ -121,6 +127,4 @@ public class SameDiffDense extends SameDiffLayer {
// To match DL4J for easy comparison // To match DL4J for easy comparison
return 'f'; return 'f';
} }
} }

View File

@ -20,6 +20,7 @@
package org.deeplearning4j.nn.layers.samediff.testlayers; package org.deeplearning4j.nn.layers.samediff.testlayers;
import java.util.Map;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.samediff.SDLayerParams; import org.deeplearning4j.nn.conf.layers.samediff.SDLayerParams;
@ -31,8 +32,6 @@ import org.nd4j.autodiff.samediff.SameDiff;
import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ndarray.INDArray;
import java.util.Map;
public class SameDiffMSEOutputLayer extends SameDiffOutputLayer { public class SameDiffMSEOutputLayer extends SameDiffOutputLayer {
private final int nIn; private final int nIn;
@ -48,7 +47,11 @@ public class SameDiffMSEOutputLayer extends SameDiffOutputLayer {
} }
@Override @Override
public SDVariable defineLayer(SameDiff sameDiff, SDVariable layerInput, SDVariable labels, Map<String, SDVariable> paramTable) { public SDVariable defineLayer(
SameDiff sameDiff,
SDVariable layerInput,
SDVariable labels,
Map<String, SDVariable> paramTable) {
SDVariable z = sameDiff.mmul(layerInput, paramTable.get("W")).add(paramTable.get("b")); SDVariable z = sameDiff.mmul(layerInput, paramTable.get("W")).add(paramTable.get("b"));
SDVariable out = activation.asSameDiff("out", sameDiff, z); SDVariable out = activation.asSameDiff("out", sameDiff, z);
// MSE: 1/nOut * (input-labels)^2 // MSE: 1/nOut * (input-labels)^2
@ -69,7 +72,8 @@ public class SameDiffMSEOutputLayer extends SameDiffOutputLayer {
@Override @Override
public void initializeParameters(Map<String, INDArray> params) { public void initializeParameters(Map<String, INDArray> params) {
WeightInitUtil.initWeights(nIn, nOut, new long[]{nIn, nOut}, weightInit, null, 'f', params.get("W")); WeightInitUtil.initWeights(
nIn, nOut, new long[] {nIn, nOut}, weightInit, null, 'f', params.get("W"));
params.get("b").assign(0.0); params.get("b").assign(0.0);
} }
@ -85,8 +89,6 @@ public class SameDiffMSEOutputLayer extends SameDiffOutputLayer {
} }
@Override @Override
public void applyGlobalConfigToLayer(NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig){ public void applyGlobalConfigToLayer(
NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig) {}
}
} }

View File

@ -787,7 +787,7 @@ public class MultiLayerTestRNN extends BaseDL4JTest {
NeuralNetConfiguration conf = NeuralNetConfiguration.builder() NeuralNetConfiguration conf = NeuralNetConfiguration.builder()
.list() .list()
.layer(new FrozenLayer(org.deeplearning4j.nn.conf.layers.LSTM.builder() .layer(FrozenLayer.builder(org.deeplearning4j.nn.conf.layers.LSTM.builder()
.nIn(5).nOut(5).build())) .nIn(5).nOut(5).build()))
.build(); .build();

View File

@ -67,7 +67,7 @@ public class MiscRegressionTests extends BaseDL4JTest {
public void testFrozenNewFormat(){ public void testFrozenNewFormat(){
NeuralNetConfiguration configuration = NeuralNetConfiguration.builder() NeuralNetConfiguration configuration = NeuralNetConfiguration.builder()
.list() .list()
.layer(0, new FrozenLayer(DenseLayer.builder().nIn(10).nOut(10).build())) .layer(0, FrozenLayer.builder(DenseLayer.builder().nIn(10).nOut(10).build()))
.build(); .build();
String json = configuration.toJson(); String json = configuration.toJson();

View File

@ -20,9 +20,10 @@
package org.deeplearning4j.regressiontest.customlayer100a; package org.deeplearning4j.regressiontest.customlayer100a;
import java.util.Collection;
import java.util.Map;
import lombok.Getter; import lombok.Getter;
import lombok.NoArgsConstructor; import lombok.NoArgsConstructor;
import lombok.Setter;
import lombok.experimental.SuperBuilder; import lombok.experimental.SuperBuilder;
import lombok.val; import lombok.val;
import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.Layer;
@ -35,33 +36,33 @@ import org.deeplearning4j.nn.conf.memory.LayerMemoryReport;
import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport;
import org.deeplearning4j.nn.params.DefaultParamInitializer; import org.deeplearning4j.nn.params.DefaultParamInitializer;
import org.deeplearning4j.optimize.api.TrainingListener; import org.deeplearning4j.optimize.api.TrainingListener;
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.activations.IActivation;
import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ndarray.INDArray;
import java.util.Collection;
import java.util.Map;
@NoArgsConstructor @NoArgsConstructor
@SuperBuilder @SuperBuilder
public class CustomLayer extends FeedForwardLayer { public class CustomLayer extends FeedForwardLayer {
/** /**
* A custom property used in this custom layer example. See the CustomLayerExampleReadme.md for details * A custom property used in this custom layer example. See the CustomLayerExampleReadme.md for
* details
* *
* @param secondActivationFunction Second activation function for the layer * @param secondActivationFunction Second activation function for the layer
*/ */
@Getter @Getter private IActivation secondActivationFunction;
private IActivation secondActivationFunction;
@Override @Override
public Layer instantiate(NeuralNetConfiguration conf, Collection<TrainingListener> iterationListeners, public Layer instantiate(
int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { NeuralNetConfiguration conf,
Collection<TrainingListener> iterationListeners,
int layerIndex,
INDArray layerParamsView,
boolean initializeParams,
DataType networkDataType) {
LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(0); LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(0);
//The instantiate method is how we go from the configuration class (i.e., this class) to the implementation class // The instantiate method is how we go from the configuration class (i.e., this class) to the
// implementation class
// (i.e., a CustomLayerImpl instance) // (i.e., a CustomLayerImpl instance)
// For the most part, it's the same for each type of layer // For the most part, it's the same for each type of layer
runInheritance(); runInheritance();
@ -70,14 +71,17 @@ public class CustomLayer extends FeedForwardLayer {
myCustomLayer.addTrainingListeners(iterationListeners); // Set the iteration listeners, if any myCustomLayer.addTrainingListeners(iterationListeners); // Set the iteration listeners, if any
myCustomLayer.setIndex(layerIndex); // Integer index of the layer myCustomLayer.setIndex(layerIndex); // Integer index of the layer
//Parameter view array: In Deeplearning4j, the network parameters for the entire network (all layers) are // Parameter view array: In Deeplearning4j, the network parameters for the entire network (all
// allocated in one big array. The relevant section of this parameter vector is extracted out for each layer, // layers) are
// allocated in one big array. The relevant section of this parameter vector is extracted out
// for each layer,
// (i.e., it's a "view" array in that it's a subset of a larger array) // (i.e., it's a "view" array in that it's a subset of a larger array)
// This is a row vector, with length equal to the number of parameters in the layer // This is a row vector, with length equal to the number of parameters in the layer
myCustomLayer.setParamsViewArray(layerParamsView); myCustomLayer.setParamsViewArray(layerParamsView);
// Initialize the layer parameters. For example, // Initialize the layer parameters. For example,
// Note that the entries in paramTable (2 entries here: a weight array of shape [nIn,nOut] and biases of shape [1,nOut] // Note that the entries in paramTable (2 entries here: a weight array of shape [nIn,nOut] and
// biases of shape [1,nOut]
// are in turn a view of the 'layerParamsView' array. // are in turn a view of the 'layerParamsView' array.
Map<String, INDArray> paramTable = initializer().init(this, layerParamsView, initializeParams); Map<String, INDArray> paramTable = initializer().init(this, layerParamsView, initializeParams);
myCustomLayer.setParamTable(paramTable); myCustomLayer.setParamTable(paramTable);
@ -88,7 +92,8 @@ public class CustomLayer extends FeedForwardLayer {
@Override @Override
public ParamInitializer initializer() { public ParamInitializer initializer() {
// This method returns the parameter initializer for this type of layer // This method returns the parameter initializer for this type of layer
//In this case, we can use the DefaultParamInitializer, which is the same one used for DenseLayerConfiguration // In this case, we can use the DefaultParamInitializer, which is the same one used for
// DenseLayerConfiguration
// For more complex layers, you may need to implement a custom parameter initializer // For more complex layers, you may need to implement a custom parameter initializer
// See the various parameter initializers here: // See the various parameter initializers here:
// https://github.com/deeplearning4j/deeplearning4j/tree/master/deeplearning4j-core/src/main/java/org/deeplearning4j/nn/params // https://github.com/deeplearning4j/deeplearning4j/tree/master/deeplearning4j-core/src/main/java/org/deeplearning4j/nn/params
@ -98,8 +103,10 @@ public class CustomLayer extends FeedForwardLayer {
@Override @Override
public LayerMemoryReport getMemoryReport(InputType inputType) { public LayerMemoryReport getMemoryReport(InputType inputType) {
//Memory report is used to estimate how much memory is required for the layer, for different configurations // Memory report is used to estimate how much memory is required for the layer, for different
//If you don't need this functionality for your custom layer, you can return a LayerMemoryReport // configurations
// If you don't need this functionality for your custom layer, you can return a
// LayerMemoryReport
// with all 0s, or // with all 0s, or
// This implementation: based on DenseLayerConfiguration implementation // This implementation: based on DenseLayerConfiguration implementation
@ -115,16 +122,21 @@ public class CustomLayer extends FeedForwardLayer {
trainSizeVariable += inputType.arrayElementsPerExample(); trainSizeVariable += inputType.arrayElementsPerExample();
} }
//Also, during backprop: we do a preOut call -> gives us activations size equal to the output size // Also, during backprop: we do a preOut call -> gives us activations size equal to the output
// size
// which is modified in-place by activation function backprop // which is modified in-place by activation function backprop
// then we have 'epsilonNext' which is equivalent to input size // then we have 'epsilonNext' which is equivalent to input size
trainSizeVariable += outputType.arrayElementsPerExample(); trainSizeVariable += outputType.arrayElementsPerExample();
return new LayerMemoryReport.Builder(name, CustomLayer.class, inputType, outputType) return new LayerMemoryReport.Builder(name, CustomLayer.class, inputType, outputType)
.standardMemory(numParams, updaterStateSize) .standardMemory(numParams, updaterStateSize)
.workingMemory(0, 0, trainSizeFixed, .workingMemory(
0,
0,
trainSizeFixed,
trainSizeVariable) // No additional memory (beyond activations) for inference trainSizeVariable) // No additional memory (beyond activations) for inference
.cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, .cacheMemory(
MemoryReport.CACHE_MODE_ALL_ZEROS,
MemoryReport.CACHE_MODE_ALL_ZEROS) // No caching in DenseLayerConfiguration MemoryReport.CACHE_MODE_ALL_ZEROS) // No caching in DenseLayerConfiguration
.build(); .build();
} }

View File

@ -20,6 +20,7 @@
package org.deeplearning4j.nn.modelimport.keras.layers.convolutional; package org.deeplearning4j.nn.modelimport.keras.layers.convolutional;
import lombok.val;
import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.api.layers.LayerConstraint;
import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.CNN2DFormat;
import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InputType;
@ -84,7 +85,7 @@ public class KerasAtrousConvolution2D extends KerasConvolution {
IWeightInit init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(), IWeightInit init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(),
enforceTrainingConfig, conf, kerasMajorVersion); enforceTrainingConfig, conf, kerasMajorVersion);
ConvolutionLayer.ConvolutionLayerBuilder builder = ConvolutionLayer.builder().name(this.name) val builder = ConvolutionLayer.builder().name(this.name)
.nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout) .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout)
.activation(getIActivationFromConfig(layerConfig, conf)) .activation(getIActivationFromConfig(layerConfig, conf))
.weightInit(init) .weightInit(init)
@ -92,7 +93,7 @@ public class KerasAtrousConvolution2D extends KerasConvolution {
.l1(this.weightL1Regularization).l2(this.weightL2Regularization) .l1(this.weightL1Regularization).l2(this.weightL2Regularization)
.convolutionMode(getConvolutionModeFromConfig(layerConfig, conf)) .convolutionMode(getConvolutionModeFromConfig(layerConfig, conf))
.kernelSize(getKernelSizeFromConfig(layerConfig, 2, conf, kerasMajorVersion)) .kernelSize(getKernelSizeFromConfig(layerConfig, 2, conf, kerasMajorVersion))
.dataFormat(dimOrder == DimOrder.TENSORFLOW ? CNN2DFormat.NHWC : CNN2DFormat.NCHW) .convFormat(dimOrder == DimOrder.TENSORFLOW ? CNN2DFormat.NHWC : CNN2DFormat.NCHW)
.hasBias(hasBias) .hasBias(hasBias)
.stride(getStrideFromConfig(layerConfig, 2, conf)); .stride(getStrideFromConfig(layerConfig, 2, conf));
int[] padding = getPaddingFromBorderModeConfig(layerConfig, 2, conf, kerasMajorVersion); int[] padding = getPaddingFromBorderModeConfig(layerConfig, 2, conf, kerasMajorVersion);

View File

@ -40,7 +40,6 @@ import static org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils.remo
@Slf4j @Slf4j
@Data @Data
@EqualsAndHashCode(callSuper = false) @EqualsAndHashCode(callSuper = false)
@NoArgsConstructor
abstract public class KerasConvolution extends KerasLayer { abstract public class KerasConvolution extends KerasLayer {
protected int numTrainableParams; protected int numTrainableParams;
@ -56,6 +55,10 @@ abstract public class KerasConvolution extends KerasLayer {
super(kerasVersion); super(kerasVersion);
} }
public KerasConvolution() throws UnsupportedKerasConfigurationException {
super();
}
/** /**
* Constructor from parsed Keras layer configuration dictionary. * Constructor from parsed Keras layer configuration dictionary.
* *

View File

@ -23,6 +23,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolutional;
import lombok.Data; import lombok.Data;
import lombok.EqualsAndHashCode; import lombok.EqualsAndHashCode;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import lombok.val;
import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.api.layers.LayerConstraint;
import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.CNN2DFormat;
import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.InputPreProcessor;
@ -94,11 +95,11 @@ public class KerasConvolution2D extends KerasConvolution {
LayerConstraint weightConstraint = KerasConstraintUtils.getConstraintsFromConfig( LayerConstraint weightConstraint = KerasConstraintUtils.getConstraintsFromConfig(
layerConfig, conf.getLAYER_FIELD_W_CONSTRAINT(), conf, kerasMajorVersion); layerConfig, conf.getLAYER_FIELD_W_CONSTRAINT(), conf, kerasMajorVersion);
ConvolutionLayer.ConvolutionLayerBuilder builder = ConvolutionLayer.builder().name(this.name) final var builder = ConvolutionLayer.builder().name(this.name)
.nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout) .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout)
.activation(getIActivationFromConfig(layerConfig, conf)) .activation(getIActivationFromConfig(layerConfig, conf))
.weightInit(init) .weightInit(init)
.dataFormat(dimOrder == DimOrder.TENSORFLOW ? CNN2DFormat.NHWC : CNN2DFormat.NCHW) .convFormat(dimOrder == DimOrder.TENSORFLOW ? CNN2DFormat.NHWC : CNN2DFormat.NCHW)
.l1(this.weightL1Regularization).l2(this.weightL2Regularization) .l1(this.weightL1Regularization).l2(this.weightL2Regularization)
.convolutionMode(getConvolutionModeFromConfig(layerConfig, conf)) .convolutionMode(getConvolutionModeFromConfig(layerConfig, conf))
.kernelSize(getKernelSizeFromConfig(layerConfig, 2, conf, kerasMajorVersion)) .kernelSize(getKernelSizeFromConfig(layerConfig, 2, conf, kerasMajorVersion))

View File

@ -91,11 +91,11 @@ public class KerasDeconvolution2D extends KerasConvolution {
LayerConstraint weightConstraint = KerasConstraintUtils.getConstraintsFromConfig( LayerConstraint weightConstraint = KerasConstraintUtils.getConstraintsFromConfig(
layerConfig, conf.getLAYER_FIELD_W_CONSTRAINT(), conf, kerasMajorVersion); layerConfig, conf.getLAYER_FIELD_W_CONSTRAINT(), conf, kerasMajorVersion);
var builder = Deconvolution2D.builder().name(this.name) final var builder = Deconvolution2D.builder().name(this.name)
.nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout) .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout)
.activation(getIActivationFromConfig(layerConfig, conf)) .activation(getIActivationFromConfig(layerConfig, conf))
.weightInit(init) .weightInit(init)
.dataFormat(KerasConvolutionUtils.getDataFormatFromConfig(layerConfig,conf)) .convFormat(KerasConvolutionUtils.getDataFormatFromConfig(layerConfig,conf))
.l1(this.weightL1Regularization).l2(this.weightL2Regularization) .l1(this.weightL1Regularization).l2(this.weightL2Regularization)
.convolutionMode(getConvolutionModeFromConfig(layerConfig, conf)) .convolutionMode(getConvolutionModeFromConfig(layerConfig, conf))
.kernelSize(getKernelSizeFromConfig(layerConfig, 2, conf, kerasMajorVersion)) .kernelSize(getKernelSizeFromConfig(layerConfig, 2, conf, kerasMajorVersion))

View File

@ -72,7 +72,7 @@ public class KerasMasking extends KerasLayer {
maskingValue = KerasLayerUtils.getMaskingValueFromConfig(layerConfig, conf); maskingValue = KerasLayerUtils.getMaskingValueFromConfig(layerConfig, conf);
this.layer = MaskZeroLayer.builder() this.layer = MaskZeroLayer.builder()
.maskingValue(maskingValue) .maskingValue(maskingValue)
.underlying(new IdentityLayer(this.name)) .underlying(IdentityLayer.builder(this.name).build())
.name(this.name) .name(this.name)
.build(); .build();
} }

View File

@ -37,6 +37,7 @@ import org.deeplearning4j.nn.weights.WeightInit;
import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ndarray.INDArray;
import java.util.HashMap; import java.util.HashMap;
import java.util.List;
import java.util.Map; import java.util.Map;
import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.*; import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.*;
@ -113,7 +114,7 @@ public class KerasLocallyConnected1D extends KerasConvolution {
if (biasConstraint != null) if (biasConstraint != null)
builder.constrainBias(biasConstraint); builder.constrainBias(biasConstraint);
if (weightConstraint != null) if (weightConstraint != null)
builder.constrainWeights(weightConstraint); builder.weightConstraints(List.of(weightConstraint));
this.layer = builder.build(); this.layer = builder.build();
} }

View File

@ -36,6 +36,7 @@ import org.deeplearning4j.nn.weights.IWeightInit;
import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ndarray.INDArray;
import java.util.HashMap; import java.util.HashMap;
import java.util.List;
import java.util.Map; import java.util.Map;
import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.*; import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.*;
@ -113,7 +114,7 @@ public class KerasLocallyConnected2D extends KerasConvolution {
if (biasConstraint != null) if (biasConstraint != null)
builder.constrainBias(biasConstraint); builder.constrainBias(biasConstraint);
if (weightConstraint != null) if (weightConstraint != null)
builder.constrainWeights(weightConstraint); builder.weightConstraints(List.of(weightConstraint));
this.layer = builder.build(); this.layer = builder.build();
} }

View File

@ -179,7 +179,7 @@ public class KerasLSTM extends KerasLayer {
Pair<Boolean, Double> maskingConfig = KerasLayerUtils.getMaskingConfiguration(inboundLayerNames, previousLayers); Pair<Boolean, Double> maskingConfig = KerasLayerUtils.getMaskingConfiguration(inboundLayerNames, previousLayers);
LSTM.LSTMBuilder builder = LSTM.builder() final var builder = LSTM.builder()
.gateActivationFunction(getGateActivationFromConfig(layerConfig)) .gateActivationFunction(getGateActivationFromConfig(layerConfig))
.forgetGateBiasInit(getForgetBiasInitFromConfig(layerConfig, enforceTrainingConfig)) .forgetGateBiasInit(getForgetBiasInitFromConfig(layerConfig, enforceTrainingConfig))
.name(this.name) .name(this.name)
@ -203,10 +203,10 @@ public class KerasLSTM extends KerasLayer {
this.layer = builder.build(); this.layer = builder.build();
if (!returnSequences) { if (!returnSequences) {
this.layer = LastTimeStep.builder(this.layer); this.layer = LastTimeStep.builder().underlying(this.layer).build();
} }
if (maskingConfig.getFirst()) { if (maskingConfig.getFirst()) {
this.layer = new MaskZeroLayer(this.layer, maskingConfig.getSecond()); this.layer = MaskZeroLayer.builder().underlying(this.layer).maskingValue(maskingConfig.getSecond()).build();
} }
} }

View File

@ -174,10 +174,10 @@ public class KerasSimpleRnn extends KerasLayer {
this.layer = builder.build(); this.layer = builder.build();
if (!returnSequences) { if (!returnSequences) {
this.layer = LastTimeStep.builder(this.layer); this.layer = LastTimeStep.builder(this.layer).build();
} }
if (maskingConfig.getFirst()) { if (maskingConfig.getFirst()) {
this.layer = new MaskZeroLayer(this.layer, maskingConfig.getSecond()); this.layer = MaskZeroLayer.builder().underlying(this.layer).maskingValue(maskingConfig.getSecond()).build();
} }
} }

View File

@ -20,11 +20,18 @@
package org.deeplearning4j.nn.modelimport.keras.layers.local; package org.deeplearning4j.nn.modelimport.keras.layers.local;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.deeplearning4j.BaseDL4JTest;
import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.dropout.Dropout;
import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.LocallyConnected2D; import org.deeplearning4j.nn.conf.layers.LocallyConnected2D;
import org.deeplearning4j.BaseDL4JTest;
import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils; import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils;
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
@ -32,14 +39,6 @@ import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.nn.weights.WeightInit;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
/** /**
* @author Max Pumperla * @author Max Pumperla
*/ */
@ -66,18 +65,17 @@ public class KerasLocallyConnected2DTest extends BaseDL4JTest {
private final Keras1LayerConfiguration conf1 = new Keras1LayerConfiguration(); private final Keras1LayerConfiguration conf1 = new Keras1LayerConfiguration();
private final Keras2LayerConfiguration conf2 = new Keras2LayerConfiguration(); private final Keras2LayerConfiguration conf2 = new Keras2LayerConfiguration();
@Test @Test
public void testLocallyConnected2DLayer() throws Exception { public void testLocallyConnected2DLayer() throws Exception {
buildLocallyConnected2DLayer(conf1, keras1); buildLocallyConnected2DLayer(conf1, keras1);
buildLocallyConnected2DLayer(conf2, keras2); buildLocallyConnected2DLayer(conf2, keras2);
} }
private void buildLocallyConnected2DLayer(KerasLayerConfiguration conf, Integer kerasVersion) private void buildLocallyConnected2DLayer(KerasLayerConfiguration conf, Integer kerasVersion)
throws Exception { throws Exception {
Map<String, Object> layerConfig = new HashMap<>(); Map<String, Object> layerConfig = new HashMap<>();
layerConfig.put(conf.getLAYER_FIELD_CLASS_NAME(), conf.getLAYER_CLASS_NAME_LOCALLY_CONNECTED_2D()); layerConfig.put(
conf.getLAYER_FIELD_CLASS_NAME(), conf.getLAYER_CLASS_NAME_LOCALLY_CONNECTED_2D());
Map<String, Object> config = new HashMap<>(); Map<String, Object> config = new HashMap<>();
config.put(conf.getLAYER_FIELD_ACTIVATION(), ACTIVATION_KERAS); config.put(conf.getLAYER_FIELD_ACTIVATION(), ACTIVATION_KERAS);
config.put(conf.getLAYER_FIELD_NAME(), LAYER_NAME); config.put(conf.getLAYER_FIELD_NAME(), LAYER_NAME);
@ -97,9 +95,12 @@ public class KerasLocallyConnected2DTest extends BaseDL4JTest {
config.put(conf.getLAYER_FIELD_NB_ROW(), KERNEL_SIZE[0]); config.put(conf.getLAYER_FIELD_NB_ROW(), KERNEL_SIZE[0]);
config.put(conf.getLAYER_FIELD_NB_COL(), KERNEL_SIZE[1]); config.put(conf.getLAYER_FIELD_NB_COL(), KERNEL_SIZE[1]);
} else { } else {
ArrayList kernel = new ArrayList<Integer>() {{ ArrayList kernel =
new ArrayList<Integer>() {
{
for (int i : KERNEL_SIZE) add(i); for (int i : KERNEL_SIZE) add(i);
}}; }
};
config.put(conf.getLAYER_FIELD_KERNEL_SIZE(), kernel); config.put(conf.getLAYER_FIELD_KERNEL_SIZE(), kernel);
} }
@ -112,7 +113,6 @@ public class KerasLocallyConnected2DTest extends BaseDL4JTest {
layerConfig.put(conf.getLAYER_FIELD_CONFIG(), config); layerConfig.put(conf.getLAYER_FIELD_CONFIG(), config);
layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion);
KerasLocallyConnected2D kerasLocal = new KerasLocallyConnected2D(layerConfig); KerasLocallyConnected2D kerasLocal = new KerasLocallyConnected2D(layerConfig);
// once get output type is triggered, inputshape, output shape and input depth are updated // once get output type is triggered, inputshape, output shape and input depth are updated
@ -135,4 +135,3 @@ public class KerasLocallyConnected2DTest extends BaseDL4JTest {
assertEquals(layer.getNIn(), 3); assertEquals(layer.getNIn(), 3);
} }
} }

View File

@ -18,6 +18,9 @@
* ***************************************************************************** * *****************************************************************************
* *
*/ */
plugins {
id("io.freefair.lombok") version "8.0.1"
}
apply from: "${project.rootProject.projectDir}/createTestBackends.gradle" apply from: "${project.rootProject.projectDir}/createTestBackends.gradle"
dependencies { dependencies {

View File

@ -134,7 +134,7 @@ public class ActivationLayer extends NoParamLayer {
C extends ActivationLayer, B extends ActivationLayerBuilder<C, B>> C extends ActivationLayer, B extends ActivationLayerBuilder<C, B>>
extends NoParamLayer.NoParamLayerBuilder<C, B> { extends NoParamLayer.NoParamLayerBuilder<C, B> {
public C build() { public C build() {
C l = this.build(); C l = this.initBuild();
l.initializeConstraints(); l.initializeConstraints();
return l; return l;
} }

View File

@ -46,6 +46,7 @@ import org.nd4j.linalg.learning.regularization.WeightDecay;
/** A neural network layer. */ /** A neural network layer. */
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@NoArgsConstructor
@SuperBuilder @SuperBuilder
public abstract class BaseLayerConfiguration extends LayerConfiguration public abstract class BaseLayerConfiguration extends LayerConfiguration
implements ITraininableLayerConfiguration, Serializable, Cloneable { implements ITraininableLayerConfiguration, Serializable, Cloneable {
@ -62,7 +63,7 @@ public abstract class BaseLayerConfiguration extends LayerConfiguration
* *
* @param constraints Constraints to apply to all bias parameters of all layers * @param constraints Constraints to apply to all bias parameters of all layers
*/ */
@lombok.Builder.Default @Getter protected final List<LayerConstraint> biasConstraints = new ArrayList<>(); //@lombok.Builder.Default @Getter protected final List<LayerConstraint> biasConstraints = new ArrayList<>();
/** /**
* Set constraints to be applied to all layers. Default: no constraints.<br> * Set constraints to be applied to all layers. Default: no constraints.<br>
* Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm
@ -392,22 +393,7 @@ public B updater(Updater upd) {
return weightDecayBias(coefficient, true); return weightDecayBias(coefficient, true);
} }
/**
* Set constraints to be applied to all layers. Default: no constraints.<br>
* Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm
* regularization, etc). These constraints are applied at each iteration, after the parameters
* have been updated.<br>
* Note: values set by this method will be applied to all applicable layers in the network,
* unless a different value is explicitly set on a given layer. In other words: values set via
* this method are used as the default value, and can be overridden on a per-layer basis.
*
* @param constraints Constraints to apply to all bias parameters of all layers
*/
public B constrainBias(LayerConstraint... constraints) {
biasConstraints$value = Arrays.asList(constraints);
biasConstraints$set = true;
return self();
}
/** /**
* Weight decay for the biases only - see {@link #weightDecay(double)} for more details<br> * Weight decay for the biases only - see {@link #weightDecay(double)} for more details<br>

View File

@ -32,6 +32,7 @@ import org.nd4j.linalg.lossfunctions.impl.LossMCXENT;
@ToString(callSuper = true) @ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@NoArgsConstructor
@SuperBuilder(builderMethodName = "innerBuilder") @SuperBuilder(builderMethodName = "innerBuilder")
public abstract class BaseOutputLayer extends FeedForwardLayer { public abstract class BaseOutputLayer extends FeedForwardLayer {

View File

@ -29,6 +29,6 @@ import lombok.experimental.SuperBuilder;
@Data @Data
@ToString(callSuper = true) @ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@SuperBuilder @SuperBuilder(buildMethodName = "initBuild")
public class Convolution1D extends Convolution1DLayer { public class Convolution1D extends Convolution1DLayer {
} }

View File

@ -30,6 +30,6 @@ import lombok.experimental.SuperBuilder;
@ToString(callSuper = true) @ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@SuperBuilder @SuperBuilder(buildMethodName = "initBuild")
public class Convolution2D extends ConvolutionLayer { public class Convolution2D extends ConvolutionLayer {
} }

View File

@ -46,10 +46,9 @@ import org.nd4j.linalg.api.ndarray.INDArray;
* to be used in the net or in other words the channels The builder specifies the filter/kernel * to be used in the net or in other words the channels The builder specifies the filter/kernel
* size, the stride and padding The pooling layer takes the kernel size * size, the stride and padding The pooling layer takes the kernel size
*/ */
@ToString(callSuper = true) @ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") @SuperBuilder(builderMethodName = "innerBuilder", buildMethodName = "initBuild")
public class ConvolutionLayer extends FeedForwardLayer { public class ConvolutionLayer extends FeedForwardLayer {
/** /**
* Size of the convolution rows/columns * Size of the convolution rows/columns
@ -63,7 +62,8 @@ public class ConvolutionLayer extends FeedForwardLayer {
* Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more * Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more
* details Default is {@link ConvolutionMode}.Truncate. * details Default is {@link ConvolutionMode}.Truncate.
*/ */
@Builder.Default @Getter @Setter private ConvolutionMode convolutionMode = ConvolutionMode.Truncate; @Builder.Default @Getter @Setter
private ConvolutionMode convolutionMode = ConvolutionMode.Truncate;
/** /**
* Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last).
@ -86,36 +86,35 @@ public class ConvolutionLayer extends FeedForwardLayer {
* http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html#dilated-convolutions</a> * http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html#dilated-convolutions</a>
* <br> * <br>
*/ */
@Getter @Setter @Getter @Setter private @Builder.Default int[] dilation = new int[] {1, 1};
private @Builder.Default int[] dilation = new int[] {1, 1};
/** Default is 2. Down-sample by a factor of 2 */ /** Default is 2. Down-sample by a factor of 2 */
@Getter @Setter @Getter @Setter private @Builder.Default int[] stride = new int[] {1, 1};
private @Builder.Default int[] stride = new int[] {1, 1};
@Getter @Setter @Getter @Setter private @Builder.Default int[] padding = new int[] {0, 0};
private @Builder.Default int[] padding = new int[] {0, 0};
/** /**
* When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation
* be allowed? If set to false, an exception in CuDNN will be propagated back to the user. If * be allowed? If set to false, an exception in CuDNN will be propagated back to the user. If
* false, the built-in (non-CuDNN) implementation for ConvolutionLayer will be used * false, the built-in (non-CuDNN) implementation for ConvolutionLayer will be used
*/ */
@Getter @Getter @Setter @Builder.Default private boolean cudnnAllowFallback = true;
@Builder.Default private boolean cudnnAllowFallback = true;
/** Defaults to "PREFER_FASTEST", but "NO_WORKSPACE" uses less memory. */ /** Defaults to "PREFER_FASTEST", but "NO_WORKSPACE" uses less memory. */
@Getter @Getter @Setter @Builder.Default private AlgoMode cudnnAlgoMode = AlgoMode.PREFER_FASTEST;
@Builder.Default private AlgoMode cudnnAlgoMode = AlgoMode.PREFER_FASTEST;
private FwdAlgo cudnnFwdAlgo; @Getter @Setter private FwdAlgo cudnnFwdAlgo;
private BwdFilterAlgo cudnnBwdFilterAlgo; @Getter @Setter private BwdFilterAlgo cudnnBwdFilterAlgo;
private BwdDataAlgo cudnnBwdDataAlgo; @Getter @Setter private BwdDataAlgo cudnnBwdDataAlgo;
@Getter @Setter @Getter @Setter @Builder.Default private int convolutionDim = 2; // 2D convolution by default
@Builder.Default private int convolutionDim = 2; // 2D convolution by default
/** Causal convolution - allowed for 1D only */ /** Causal convolution - allowed for 1D only */
@Builder.Default private boolean allowCausal = false; @Builder.Default private boolean allowCausal = false;
@Builder.Default @JsonIgnore @EqualsAndHashCode.Exclude @Builder.Default @JsonIgnore @EqualsAndHashCode.Exclude @Getter @Setter
private boolean defaultValueOverriden = false; private boolean defaultValueOverriden = false;
public static ConvolutionLayerBuilder<?, ?> builder() {
return innerBuilder();
}
public static ConvolutionLayerBuilder<?, ?> builder(int... kernelSize) { public static ConvolutionLayerBuilder<?, ?> builder(int... kernelSize) {
return innerBuilder().kernelSize(kernelSize); return innerBuilder().kernelSize(kernelSize);
} }
@ -435,7 +434,7 @@ public class ConvolutionLayer extends FeedForwardLayer {
+ Arrays.toString(dilation$value)); + Arrays.toString(dilation$value));
} }
C l = this.initBuild(); C l = initBuild();
l.setType(LayerType.CONV); l.setType(LayerType.CONV);
l.initializeConstraints(); l.initializeConstraints();
return l; return l;

View File

@ -23,7 +23,6 @@ package org.deeplearning4j.nn.conf.layers;
import java.util.Collection; import java.util.Collection;
import java.util.Map; import java.util.Map;
import lombok.*; import lombok.*;
import lombok.experimental.Accessors;
import lombok.experimental.SuperBuilder; import lombok.experimental.SuperBuilder;
import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.api.ParamInitializer;
@ -41,9 +40,7 @@ import org.nd4j.linalg.api.ndarray.INDArray;
@Data @Data
@ToString(callSuper = true) @ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@SuperBuilder( @SuperBuilder
buildMethodName =
"initBuild") // we do this to define build() ourselves and run custom init code
public class DenseLayer extends FeedForwardLayer { public class DenseLayer extends FeedForwardLayer {
/** If true (default = false): enable layer normalization on this layer */ /** If true (default = false): enable layer normalization on this layer */
@ -121,13 +118,17 @@ public class DenseLayer extends FeedForwardLayer {
.build(); .build();
} }
public static abstract class DenseLayerBuilder<C extends DenseLayer, public abstract static class DenseLayerBuilder<
B extends DenseLayerBuilder<C, B>> extends FeedForwardLayerBuilder<C, B>{ C extends DenseLayer, B extends DenseLayerBuilder<C, B>>
extends FeedForwardLayerBuilder<C, B> {
public C build() { }
C l = this.initBuild(); private static final class DenseLayerBuilderImpl extends DenseLayerBuilder<DenseLayer, DenseLayerBuilderImpl> {
public DenseLayer build() {
DenseLayer l = new DenseLayer(this);
l.initializeConstraints(); l.initializeConstraints();
return l; return l;
} }
} }
} }

View File

@ -20,10 +20,7 @@
package org.deeplearning4j.nn.conf.layers; package org.deeplearning4j.nn.conf.layers;
import lombok.EqualsAndHashCode; import lombok.*;
import lombok.Getter;
import lombok.Setter;
import lombok.ToString;
import lombok.experimental.SuperBuilder; import lombok.experimental.SuperBuilder;
import org.deeplearning4j.nn.conf.DataFormat; import org.deeplearning4j.nn.conf.DataFormat;
import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.InputPreProcessor;
@ -34,11 +31,11 @@ import org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor;
@ToString(callSuper = true) @ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@NoArgsConstructor
@SuperBuilder @SuperBuilder
public abstract class FeedForwardLayer extends BaseLayerConfiguration { public abstract class FeedForwardLayer extends BaseLayerConfiguration {
public static abstract class FeedForwardLayerBuilder<C extends FeedForwardLayer, B extends FeedForwardLayerBuilder<C, B>> public static abstract class FeedForwardLayerBuilder<C extends FeedForwardLayer, B extends FeedForwardLayerBuilder<C, B>>
extends BaseLayerConfigurationBuilder<C, B> { extends BaseLayerConfigurationBuilder<C, B> {
} }
/** /**
* Number of inputs for the layer (usually the size of the last layer). <br> Note that for Convolutional layers, * Number of inputs for the layer (usually the size of the last layer). <br> Note that for Convolutional layers,

View File

@ -55,8 +55,7 @@ public class GlobalPoolingLayer extends NoParamLayer {
* @param poolingDimensions Pooling dimensions to use * @param poolingDimensions Pooling dimensions to use
*/ */
public B poolingDimensions(int... poolingDimensions) { public B poolingDimensions(int... poolingDimensions) {
poolingDimensions$value = poolingDimensions; this.poolingDimensions = poolingDimensions;
poolingDimensions$set = true;
return self(); return self();
} }
@ -85,7 +84,7 @@ public class GlobalPoolingLayer extends NoParamLayer {
* width) Default for CNN3D data: pooling dimensions 2,3,4 (depth, height and width) * width) Default for CNN3D data: pooling dimensions 2,3,4 (depth, height and width)
* *
*/ */
@Builder.Default private int[] poolingDimensions; private int[] poolingDimensions;
/** /**
* P-norm constant. Only used if using {@link PoolingType#PNORM} for the pooling type * P-norm constant. Only used if using {@link PoolingType#PNORM} for the pooling type
* *

View File

@ -49,8 +49,8 @@ public class LSTM extends AbstractLSTM {
private double forgetGateBiasInit; private double forgetGateBiasInit;
public static abstract class LSTMBuilder<C extends LSTM, B extends LSTMBuilder<C, B>> extends AbstractLSTMBuilder<C, B> { public static abstract class LSTMBuilder<C extends LSTM, B extends LSTMBuilder<C, B>> extends AbstractLSTMBuilder<C, B> {
public C build() { @Override public C build() {
C l = this.build(); C l = this.initBuild();
l.initializeConstraints(); l.initializeConstraints();
return l; return l;
} }

View File

@ -48,8 +48,9 @@ import org.nd4j.linalg.learning.config.IUpdater;
import org.nd4j.linalg.learning.regularization.Regularization; import org.nd4j.linalg.learning.regularization.Regularization;
/** A neural network layer. */ /** A neural network layer. */
//@JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class") @JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class")
@EqualsAndHashCode @EqualsAndHashCode
@NoArgsConstructor
// @JsonIdentityInfo(generator= ObjectIdGenerators.IntSequenceGenerator.class, property="@id") // @JsonIdentityInfo(generator= ObjectIdGenerators.IntSequenceGenerator.class, property="@id")
@Slf4j @Slf4j
@SuperBuilder @SuperBuilder
@ -327,13 +328,41 @@ public abstract class LayerConfiguration
public abstract static class LayerConfigurationBuilder< public abstract static class LayerConfigurationBuilder<
C extends LayerConfiguration, B extends LayerConfigurationBuilder<C, B>> { C extends LayerConfiguration, B extends LayerConfigurationBuilder<C, B>> {
public B dropOut(double d) { public B dropOut(double d) {
this.dropOut(new Dropout(d)); this.dropOut(new Dropout(d));
return self(); return self();
} }
public B dropOut(IDropout d) { public B dropOut(IDropout d) {
this.dropOut = d; this.dropOut = d;
return self(); return self();
} }
/**
* Set constraints to be applied to all layers. Default: no constraints.<br>
* Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm
* regularization, etc). These constraints are applied at each iteration, after the parameters
* have been updated.<br>
* Note: values set by this method will be applied to all applicable layers in the network,
* unless a different value is explicitly set on a given layer. In other words: values set via
* this method are used as the default value, and can be overridden on a per-layer basis.
*
* @param constraints Constraints to apply to all bias parameters of all layers
*/
public B constrainBias(LayerConstraint... constraints) {
biasConstraints = Arrays.asList(constraints);
return self();
}
/**
* we are doing this to avoid BUG https://github.com/projectlombok/lombok/issues/3419 as some
* child classes may specify their own buildMethodName in @SuperBuilder, but we use only
* "initBuild" here consequently
* @return
*/
public C initBuild() {
return build();
}
} }
} }

View File

@ -24,6 +24,7 @@ import lombok.Data;
import lombok.EqualsAndHashCode; import lombok.EqualsAndHashCode;
import lombok.NoArgsConstructor; import lombok.NoArgsConstructor;
import lombok.ToString; import lombok.ToString;
import lombok.experimental.SuperBuilder;
/** /**
* 1D Pooling (subsampling) layer. Equivalent to {@link Subsampling1DLayer} * 1D Pooling (subsampling) layer. Equivalent to {@link Subsampling1DLayer}
@ -31,8 +32,9 @@ import lombok.ToString;
* Supports the following pooling types: MAX, AVG, SUM, PNORM, NONE * Supports the following pooling types: MAX, AVG, SUM, PNORM, NONE
* @author Max Pumperla * @author Max Pumperla
*/ */
@Data
@ToString(callSuper = true) @ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@SuperBuilder(buildMethodName = "initBuild")
public class Pooling1D extends Subsampling1DLayer { public class Pooling1D extends Subsampling1DLayer {
} }

View File

@ -24,6 +24,7 @@ import lombok.Data;
import lombok.EqualsAndHashCode; import lombok.EqualsAndHashCode;
import lombok.NoArgsConstructor; import lombok.NoArgsConstructor;
import lombok.ToString; import lombok.ToString;
import lombok.experimental.SuperBuilder;
/** /**
* 2D Pooling (subsampling) layer. Equivalent to {@link SubsamplingLayer} * 2D Pooling (subsampling) layer. Equivalent to {@link SubsamplingLayer}
@ -31,8 +32,9 @@ import lombok.ToString;
* Supports the following pooling types: MAX, AVG, SUM, PNORM, NONE * Supports the following pooling types: MAX, AVG, SUM, PNORM, NONE
* @author Max Pumperla * @author Max Pumperla
*/ */
@Data
@ToString(callSuper = true) @ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@SuperBuilder(buildMethodName = "initBuild")
public class Pooling2D extends SubsamplingLayer { public class Pooling2D extends SubsamplingLayer {
} }

View File

@ -47,7 +47,7 @@ import org.nd4j.linalg.api.ndarray.INDArray;
* channels deep). The kernel should be H<L pixels high and W=1 pixels * channels deep). The kernel should be H<L pixels high and W=1 pixels
* wide. * wide.
*/ */
@Data
@ToString(callSuper = true) @ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@SuperBuilder(buildMethodName = "initBuild") @SuperBuilder(buildMethodName = "initBuild")

View File

@ -53,7 +53,7 @@ public class SubsamplingLayer extends NoParamLayer {
* Only conv1d/subsampling1d can use causal mode * Only conv1d/subsampling1d can use causal mode
*/ */
@Builder.Default @Builder.Default
@Accessors protected boolean allowCausal = false; protected boolean allowCausal = false;
/** /**
* Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more * Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more
* details * details

View File

@ -78,7 +78,7 @@ public class Yolo2OutputLayer extends LayerConfiguration {
*/ */
@JsonSerialize(using = NDArrayTextSerializer.class) @JsonSerialize(using = NDArrayTextSerializer.class)
@JsonDeserialize(using = BoundingBoxesDeserializer.class) @JsonDeserialize(using = BoundingBoxesDeserializer.class)
@Builder.Default @Getter @Getter
private INDArray boundingBoxes; private INDArray boundingBoxes;
@Builder.Default @Getter @Builder.Default @Getter
@ -165,14 +165,14 @@ public class Yolo2OutputLayer extends LayerConfiguration {
C extends Yolo2OutputLayer, B extends Yolo2OutputLayerBuilder<C, B>> C extends Yolo2OutputLayer, B extends Yolo2OutputLayerBuilder<C, B>>
extends LayerConfigurationBuilder<C, B> { extends LayerConfigurationBuilder<C, B> {
public C build() { public C build() {
if (boundingBoxes$value == null) { if (boundingBoxes == null) {
throw new IllegalStateException("Bounding boxes have not been set"); throw new IllegalStateException("Bounding boxes have not been set");
} }
if (boundingBoxes$value.rank() != 2 || boundingBoxes$value.size(1) != 2) { if (boundingBoxes.rank() != 2 || boundingBoxes.size(1) != 2) {
throw new IllegalStateException( throw new IllegalStateException(
"Bounding box priors must have shape [nBoxes, 2]. Has shape: " "Bounding box priors must have shape [nBoxes, 2]. Has shape: "
+ Arrays.toString(boundingBoxes$value.shape())); + Arrays.toString(boundingBoxes.shape()));
} }
return initBuild(); return initBuild();
} }

View File

@ -21,6 +21,7 @@
package org.deeplearning4j.nn.conf.layers.recurrent; package org.deeplearning4j.nn.conf.layers.recurrent;
import lombok.experimental.SuperBuilder; import lombok.experimental.SuperBuilder;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration;
@ -45,6 +46,12 @@ public class LastTimeStep extends BaseWrapperLayerConfiguration {
.name(underlying.getName()); .name(underlying.getName());
} }
public static LastTimeStepBuilder<?,?> builder(Layer underlying) {
return innerBuilder()
.underlying(underlying.getLayerConfiguration())
.name(underlying.getLayerConfiguration().getName());
}
public LayerConfiguration getUnderlying() { public LayerConfiguration getUnderlying() {
return underlying; return underlying;
} }

View File

@ -49,6 +49,7 @@ import org.nd4j.linalg.learning.regularization.WeightDecay;
@Slf4j @Slf4j
@EqualsAndHashCode(callSuper = true, doNotUseGetters = true) @EqualsAndHashCode(callSuper = true, doNotUseGetters = true)
@NoArgsConstructor
@SuperBuilder @SuperBuilder
public abstract class AbstractSameDiffLayer extends LayerConfiguration { public abstract class AbstractSameDiffLayer extends LayerConfiguration {

View File

@ -28,6 +28,7 @@ import org.nd4j.autodiff.samediff.SameDiff;
import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ndarray.INDArray;
import java.util.Map; import java.util.Map;
@NoArgsConstructor
@SuperBuilder @SuperBuilder
public abstract class SameDiffLambdaLayer extends SameDiffLayer { public abstract class SameDiffLambdaLayer extends SameDiffLayer {

View File

@ -21,7 +21,9 @@
package org.deeplearning4j.nn.conf.layers.samediff; package org.deeplearning4j.nn.conf.layers.samediff;
import lombok.Builder; import lombok.Builder;
import lombok.Data;
import lombok.EqualsAndHashCode; import lombok.EqualsAndHashCode;
import lombok.NoArgsConstructor;
import lombok.experimental.SuperBuilder; import lombok.experimental.SuperBuilder;
import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.api.MaskState;
@ -42,6 +44,8 @@ import java.util.Map;
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@Data
@NoArgsConstructor
@SuperBuilder @SuperBuilder
public abstract class SameDiffLayer extends AbstractSameDiffLayer { public abstract class SameDiffLayer extends AbstractSameDiffLayer {
@ -97,8 +101,4 @@ public abstract class SameDiffLayer extends AbstractSameDiffLayer {
ret.setLayerConfiguration(lconf); ret.setLayerConfiguration(lconf);
return ret; return ret;
} }
public static abstract class SameDiffLayerBuilder<C extends SameDiffLayer, B extends SameDiffLayerBuilder<C, B>> extends AbstractSameDiffLayerBuilder<C,B> {
}
} }

View File

@ -20,6 +20,7 @@
package org.deeplearning4j.nn.conf.layers.samediff; package org.deeplearning4j.nn.conf.layers.samediff;
import lombok.NoArgsConstructor;
import lombok.experimental.SuperBuilder; import lombok.experimental.SuperBuilder;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration;
@ -31,6 +32,7 @@ import org.nd4j.linalg.api.ndarray.INDArray;
import java.util.Collection; import java.util.Collection;
import java.util.Map; import java.util.Map;
@NoArgsConstructor
@SuperBuilder @SuperBuilder
public abstract class SameDiffOutputLayer extends AbstractSameDiffLayer { public abstract class SameDiffOutputLayer extends AbstractSameDiffLayer {

View File

@ -323,12 +323,12 @@ public class ManualTests {
.l2(0.0005) .l2(0.0005)
.weightInit(WeightInit.XAVIER) .weightInit(WeightInit.XAVIER)
.updater(new Nesterovs(0.01, 0.9)) .updater(new Nesterovs(0.01, 0.9))
.layer(0, new FrozenLayer(ConvolutionLayer.builder(5, 5) .layer(0, FrozenLayer.builder(ConvolutionLayer.builder(5, 5)
//nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied //nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied
.nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build())) .nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build()))
.layer(1, new FrozenLayer(SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) .layer(1, FrozenLayer.builder(SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2)
.stride(2, 2).build())) .stride(2, 2).build()))
.layer(2, new FrozenLayer(DenseLayer.builder().activation(Activation.RELU).nOut(500).build())) .layer(2, FrozenLayer.builder(DenseLayer.builder().activation(Activation.RELU).nOut(500).build()))
.layer(3, OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .layer(3, OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
.nOut(outputNum).activation(Activation.SOFTMAX).build()) .nOut(outputNum).activation(Activation.SOFTMAX).build())
.inputType(InputType.convolutionalFlat(28, 28, nChannels)); .inputType(InputType.convolutionalFlat(28, 28, nChannels));