Using @SuperBuilder for LayerConfigurations
Signed-off-by: brian <brian@brutex.de>master
parent
3267b06bde
commit
e576659639
|
@ -71,7 +71,7 @@ dependencies {
|
||||||
// api "com.fasterxml.jackson.module:jackson-module-scala_${scalaVersion}"
|
// api "com.fasterxml.jackson.module:jackson-module-scala_${scalaVersion}"
|
||||||
|
|
||||||
|
|
||||||
api "org.projectlombok:lombok:1.18.24"
|
api "org.projectlombok:lombok:1.18.26"
|
||||||
|
|
||||||
/*Logging*/
|
/*Logging*/
|
||||||
api 'org.slf4j:slf4j-api:2.0.3'
|
api 'org.slf4j:slf4j-api:2.0.3'
|
||||||
|
|
|
@ -507,7 +507,7 @@ public class DTypeTests extends BaseDL4JTest {
|
||||||
.updater(new Adam(1e-2))
|
.updater(new Adam(1e-2))
|
||||||
|
|
||||||
.layer(ConvolutionLayer.builder().kernelSize(2, 2).stride(1, 1).nOut(3).activation(Activation.TANH).build())
|
.layer(ConvolutionLayer.builder().kernelSize(2, 2).stride(1, 1).nOut(3).activation(Activation.TANH).build())
|
||||||
.layer(new LocalResponseNormalization())
|
.layer(LocalResponseNormalization.builder())
|
||||||
.layer(DropoutLayer.builder(0.5).build())
|
.layer(DropoutLayer.builder(0.5).build())
|
||||||
.layer(DropoutLayer.builder(new AlphaDropout(0.5)).build())
|
.layer(DropoutLayer.builder(new AlphaDropout(0.5)).build())
|
||||||
.layer(DropoutLayer.builder(new GaussianDropout(0.5)).build())
|
.layer(DropoutLayer.builder(new GaussianDropout(0.5)).build())
|
||||||
|
@ -519,12 +519,12 @@ public class DTypeTests extends BaseDL4JTest {
|
||||||
// .layer(LocallyConnected2D.builder().nOut(3).kernelSize(2,2).stride(1,1).activation(Activation.SIGMOID).build()) //EXCEPTION
|
// .layer(LocallyConnected2D.builder().nOut(3).kernelSize(2,2).stride(1,1).activation(Activation.SIGMOID).build()) //EXCEPTION
|
||||||
.layer(ZeroPaddingLayer.builder(1, 1).build())
|
.layer(ZeroPaddingLayer.builder(1, 1).build())
|
||||||
.layer(Cropping2D.builder(1, 1).build())
|
.layer(Cropping2D.builder(1, 1).build())
|
||||||
.layer(new IdentityLayer())
|
.layer(IdentityLayer.builder())
|
||||||
.layer(Upsampling2D.builder().size(2).build())
|
.layer(Upsampling2D.builder().size(2).build())
|
||||||
.layer(SubsamplingLayer.builder().kernelSize(2, 2).stride(2, 2).build())
|
.layer(SubsamplingLayer.builder().kernelSize(2, 2).stride(2, 2).build())
|
||||||
.layer(DepthwiseConvolution2D.builder().nOut(3).activation(Activation.RELU).build())
|
.layer(DepthwiseConvolution2D.builder().nOut(3).activation(Activation.RELU).build())
|
||||||
.layer(SeparableConvolution2D.builder().nOut(3).activation(Activation.HARDTANH).build())
|
.layer(SeparableConvolution2D.builder().nOut(3).activation(Activation.HARDTANH).build())
|
||||||
.layer(new MaskLayer())
|
.layer(MaskLayer.builder())
|
||||||
.layer(BatchNormalization.builder().build())
|
.layer(BatchNormalization.builder().build())
|
||||||
.layer(ActivationLayer.builder().activation(Activation.LEAKYRELU).build())
|
.layer(ActivationLayer.builder().activation(Activation.LEAKYRELU).build())
|
||||||
.layer(secondLast)
|
.layer(secondLast)
|
||||||
|
|
|
@ -114,7 +114,8 @@ public class LocalResponseTest extends BaseDL4JTest {
|
||||||
.layer(LocalResponseNormalization.builder().k(2).n(5).alpha(1e-4).beta(0.75).build())
|
.layer(LocalResponseNormalization.builder().k(2).n(5).alpha(1e-4).beta(0.75).build())
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
layer = new LocalResponseNormalization().instantiate(conf, null, 0, null, false, Nd4j.defaultFloatingPointType());
|
layer = LocalResponseNormalization.builder().build()
|
||||||
|
.instantiate(conf, null, 0, null, false, Nd4j.defaultFloatingPointType());
|
||||||
activationsActual = layer.activate(x, false, LayerWorkspaceMgr.noWorkspaces());
|
activationsActual = layer.activate(x, false, LayerWorkspaceMgr.noWorkspaces());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -243,7 +243,7 @@ public class RnnDataFormatTests extends BaseDL4JTest {
|
||||||
layer = MaskZeroLayer.builder().maskingValue(0.).underlying(layer).build();
|
layer = MaskZeroLayer.builder().maskingValue(0.).underlying(layer).build();
|
||||||
}
|
}
|
||||||
if(lastTimeStep){
|
if(lastTimeStep){
|
||||||
layer = LastTimeStep.builder(layer);
|
layer = LastTimeStep.builder(layer).build();
|
||||||
}
|
}
|
||||||
NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = (NeuralNetConfiguration.NeuralNetConfigurationBuilder) NeuralNetConfiguration.builder()
|
NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = (NeuralNetConfiguration.NeuralNetConfigurationBuilder) NeuralNetConfiguration.builder()
|
||||||
.seed(12345)
|
.seed(12345)
|
||||||
|
|
|
@ -20,6 +20,8 @@
|
||||||
|
|
||||||
package org.deeplearning4j.nn.layers.samediff.testlayers;
|
package org.deeplearning4j.nn.layers.samediff.testlayers;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||||
|
import java.util.*;
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
import lombok.EqualsAndHashCode;
|
import lombok.EqualsAndHashCode;
|
||||||
import lombok.NoArgsConstructor;
|
import lombok.NoArgsConstructor;
|
||||||
|
@ -27,8 +29,8 @@ import lombok.experimental.SuperBuilder;
|
||||||
import org.deeplearning4j.nn.conf.InputPreProcessor;
|
import org.deeplearning4j.nn.conf.InputPreProcessor;
|
||||||
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
||||||
import org.deeplearning4j.nn.conf.inputs.InputType;
|
import org.deeplearning4j.nn.conf.inputs.InputType;
|
||||||
import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer;
|
|
||||||
import org.deeplearning4j.nn.conf.layers.samediff.SDLayerParams;
|
import org.deeplearning4j.nn.conf.layers.samediff.SDLayerParams;
|
||||||
|
import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer;
|
||||||
import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayerUtils;
|
import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayerUtils;
|
||||||
import org.deeplearning4j.nn.params.DefaultParamInitializer;
|
import org.deeplearning4j.nn.params.DefaultParamInitializer;
|
||||||
import org.deeplearning4j.nn.weights.WeightInitUtil;
|
import org.deeplearning4j.nn.weights.WeightInitUtil;
|
||||||
|
@ -36,91 +38,93 @@ import org.nd4j.autodiff.samediff.SDVariable;
|
||||||
import org.nd4j.autodiff.samediff.SameDiff;
|
import org.nd4j.autodiff.samediff.SameDiff;
|
||||||
import org.nd4j.linalg.activations.Activation;
|
import org.nd4j.linalg.activations.Activation;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@EqualsAndHashCode(callSuper = true, exclude = {"paramShapes"})
|
@EqualsAndHashCode(
|
||||||
|
callSuper = true,
|
||||||
|
exclude = {"paramShapes"})
|
||||||
@NoArgsConstructor()
|
@NoArgsConstructor()
|
||||||
@JsonIgnoreProperties("paramShapes")
|
@JsonIgnoreProperties("paramShapes")
|
||||||
@SuperBuilder
|
@SuperBuilder
|
||||||
public class SameDiffDense extends SameDiffLayer {
|
public class SameDiffDense extends SameDiffLayer {
|
||||||
|
|
||||||
private static final List<String> W_KEYS = Collections.singletonList(DefaultParamInitializer.WEIGHT_KEY);
|
private static final List<String> W_KEYS =
|
||||||
private static final List<String> B_KEYS = Collections.singletonList(DefaultParamInitializer.BIAS_KEY);
|
Collections.singletonList(DefaultParamInitializer.WEIGHT_KEY);
|
||||||
private static final List<String> PARAM_KEYS = Arrays.asList(DefaultParamInitializer.WEIGHT_KEY, DefaultParamInitializer.BIAS_KEY);
|
private static final List<String> B_KEYS =
|
||||||
|
Collections.singletonList(DefaultParamInitializer.BIAS_KEY);
|
||||||
|
private static final List<String> PARAM_KEYS =
|
||||||
|
Arrays.asList(DefaultParamInitializer.WEIGHT_KEY, DefaultParamInitializer.BIAS_KEY);
|
||||||
|
|
||||||
private final Map<String, long[]> paramShapes = new HashMap<>();
|
private final Map<String, long[]> paramShapes = new HashMap<>();
|
||||||
|
|
||||||
private long nIn;
|
private long nIn;
|
||||||
private long nOut;
|
private long nOut;
|
||||||
private Activation activation;
|
private Activation activation;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public InputType getOutputType(int layerIndex, InputType inputType) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
@Override
|
public void setNIn(InputType inputType, boolean override) {
|
||||||
public InputType getOutputType(int layerIndex, InputType inputType) {
|
if (override) {
|
||||||
return null;
|
this.nIn = ((InputType.InputTypeFeedForward) inputType).getSize();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setNIn(InputType inputType, boolean override) {
|
public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
|
||||||
if(override){
|
return null;
|
||||||
this.nIn = ((InputType.InputTypeFeedForward)inputType).getSize();
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void defineParameters(SDLayerParams params) {
|
||||||
|
params.clear();
|
||||||
|
params.addWeightParam(DefaultParamInitializer.WEIGHT_KEY, nIn, nOut);
|
||||||
|
params.addBiasParam(DefaultParamInitializer.BIAS_KEY, 1, nOut);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void initializeParameters(Map<String, INDArray> params) {
|
||||||
|
for (Map.Entry<String, INDArray> e : params.entrySet()) {
|
||||||
|
if (paramWeightInit != null && paramWeightInit.containsKey(e.getKey())) {
|
||||||
|
paramWeightInit.get(e.getKey()).init(nIn, nOut, e.getValue().shape(), 'c', e.getValue());
|
||||||
|
} else {
|
||||||
|
if (DefaultParamInitializer.BIAS_KEY.equals(e.getKey())) {
|
||||||
|
e.getValue().assign(0.0);
|
||||||
|
} else {
|
||||||
|
// Normally use 'c' order, but use 'f' for direct comparison to DL4J
|
||||||
|
// DenseLayerConfiguration
|
||||||
|
WeightInitUtil.initWeights(
|
||||||
|
nIn, nOut, new long[] {nIn, nOut}, weightInit, null, 'f', e.getValue());
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
|
public SDVariable defineLayer(
|
||||||
return null;
|
SameDiff sd, SDVariable layerInput, Map<String, SDVariable> paramTable, SDVariable mask) {
|
||||||
|
SDVariable weights = paramTable.get(DefaultParamInitializer.WEIGHT_KEY);
|
||||||
|
SDVariable bias = paramTable.get(DefaultParamInitializer.BIAS_KEY);
|
||||||
|
|
||||||
|
SDVariable mmul = sd.mmul("mmul", layerInput, weights);
|
||||||
|
SDVariable z = mmul.add("z", bias);
|
||||||
|
return activation.asSameDiff("out", sd, z);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void applyGlobalConfigToLayer(
|
||||||
|
NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig) {
|
||||||
|
NeuralNetConfiguration clone = globalConfig.clone().build();
|
||||||
|
if (activation == null) {
|
||||||
|
activation = SameDiffLayerUtils.fromIActivation(clone.getActivation());
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
public char paramReshapeOrder(String param) {
|
||||||
public void defineParameters(SDLayerParams params) {
|
// To match DL4J for easy comparison
|
||||||
params.clear();
|
return 'f';
|
||||||
params.addWeightParam(DefaultParamInitializer.WEIGHT_KEY, nIn, nOut);
|
}
|
||||||
params.addBiasParam(DefaultParamInitializer.BIAS_KEY, 1, nOut);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void initializeParameters(Map<String,INDArray> params){
|
|
||||||
for(Map.Entry<String,INDArray> e : params.entrySet()){
|
|
||||||
if(paramWeightInit != null && paramWeightInit.containsKey(e.getKey())){
|
|
||||||
paramWeightInit.get(e.getKey()).init(nIn, nOut, e.getValue().shape(), 'c', e.getValue());
|
|
||||||
} else {
|
|
||||||
if(DefaultParamInitializer.BIAS_KEY.equals(e.getKey())){
|
|
||||||
e.getValue().assign(0.0);
|
|
||||||
} else {
|
|
||||||
//Normally use 'c' order, but use 'f' for direct comparison to DL4J DenseLayerConfiguration
|
|
||||||
WeightInitUtil.initWeights(nIn, nOut, new long[]{nIn, nOut}, weightInit, null, 'f', e.getValue());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public SDVariable defineLayer(SameDiff sd, SDVariable layerInput, Map<String, SDVariable> paramTable, SDVariable mask) {
|
|
||||||
SDVariable weights = paramTable.get(DefaultParamInitializer.WEIGHT_KEY);
|
|
||||||
SDVariable bias = paramTable.get(DefaultParamInitializer.BIAS_KEY);
|
|
||||||
|
|
||||||
SDVariable mmul = sd.mmul("mmul", layerInput, weights);
|
|
||||||
SDVariable z = mmul.add("z", bias);
|
|
||||||
return activation.asSameDiff("out", sd, z);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void applyGlobalConfigToLayer(NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig) {
|
|
||||||
NeuralNetConfiguration clone = globalConfig.clone().build();
|
|
||||||
if(activation == null){
|
|
||||||
activation = SameDiffLayerUtils.fromIActivation(clone.getActivation());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public char paramReshapeOrder(String param){
|
|
||||||
//To match DL4J for easy comparison
|
|
||||||
return 'f';
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
|
|
||||||
package org.deeplearning4j.nn.layers.samediff.testlayers;
|
package org.deeplearning4j.nn.layers.samediff.testlayers;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
||||||
import org.deeplearning4j.nn.conf.inputs.InputType;
|
import org.deeplearning4j.nn.conf.inputs.InputType;
|
||||||
import org.deeplearning4j.nn.conf.layers.samediff.SDLayerParams;
|
import org.deeplearning4j.nn.conf.layers.samediff.SDLayerParams;
|
||||||
|
@ -31,62 +32,63 @@ import org.nd4j.autodiff.samediff.SameDiff;
|
||||||
import org.nd4j.linalg.activations.Activation;
|
import org.nd4j.linalg.activations.Activation;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
public class SameDiffMSEOutputLayer extends SameDiffOutputLayer {
|
public class SameDiffMSEOutputLayer extends SameDiffOutputLayer {
|
||||||
|
|
||||||
private final int nIn;
|
private final int nIn;
|
||||||
private final int nOut;
|
private final int nOut;
|
||||||
private final Activation activation;
|
private final Activation activation;
|
||||||
private final WeightInit weightInit;
|
private final WeightInit weightInit;
|
||||||
|
|
||||||
public SameDiffMSEOutputLayer(int nIn, int nOut, Activation activation, WeightInit weightInit){
|
public SameDiffMSEOutputLayer(int nIn, int nOut, Activation activation, WeightInit weightInit) {
|
||||||
this.nIn = nIn;
|
this.nIn = nIn;
|
||||||
this.nOut = nOut;
|
this.nOut = nOut;
|
||||||
this.activation = activation;
|
this.activation = activation;
|
||||||
this.weightInit = weightInit;
|
this.weightInit = weightInit;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SDVariable defineLayer(SameDiff sameDiff, SDVariable layerInput, SDVariable labels, Map<String, SDVariable> paramTable) {
|
public SDVariable defineLayer(
|
||||||
SDVariable z = sameDiff.mmul(layerInput, paramTable.get("W")).add(paramTable.get("b"));
|
SameDiff sameDiff,
|
||||||
SDVariable out = activation.asSameDiff("out", sameDiff, z);
|
SDVariable layerInput,
|
||||||
//MSE: 1/nOut * (input-labels)^2
|
SDVariable labels,
|
||||||
SDVariable diff = out.sub(labels);
|
Map<String, SDVariable> paramTable) {
|
||||||
return diff.mul(diff).mean(1).sum();
|
SDVariable z = sameDiff.mmul(layerInput, paramTable.get("W")).add(paramTable.get("b"));
|
||||||
}
|
SDVariable out = activation.asSameDiff("out", sameDiff, z);
|
||||||
|
// MSE: 1/nOut * (input-labels)^2
|
||||||
|
SDVariable diff = out.sub(labels);
|
||||||
|
return diff.mul(diff).mean(1).sum();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String activationsVertexName() {
|
public String activationsVertexName() {
|
||||||
return "out";
|
return "out";
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void defineParameters(SDLayerParams params) {
|
public void defineParameters(SDLayerParams params) {
|
||||||
params.addWeightParam("W", nIn, nOut);
|
params.addWeightParam("W", nIn, nOut);
|
||||||
params.addBiasParam("b", 1, nOut);
|
params.addBiasParam("b", 1, nOut);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void initializeParameters(Map<String, INDArray> params) {
|
public void initializeParameters(Map<String, INDArray> params) {
|
||||||
WeightInitUtil.initWeights(nIn, nOut, new long[]{nIn, nOut}, weightInit, null, 'f', params.get("W"));
|
WeightInitUtil.initWeights(
|
||||||
params.get("b").assign(0.0);
|
nIn, nOut, new long[] {nIn, nOut}, weightInit, null, 'f', params.get("W"));
|
||||||
}
|
params.get("b").assign(0.0);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public InputType getOutputType(int layerIndex, InputType inputType) {
|
public InputType getOutputType(int layerIndex, InputType inputType) {
|
||||||
return InputType.feedForward(nOut);
|
return InputType.feedForward(nOut);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public char paramReshapeOrder(String param){
|
public char paramReshapeOrder(String param) {
|
||||||
//To match DL4J for easy comparison
|
// To match DL4J for easy comparison
|
||||||
return 'f';
|
return 'f';
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void applyGlobalConfigToLayer(NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig){
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void applyGlobalConfigToLayer(
|
||||||
|
NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig) {}
|
||||||
}
|
}
|
||||||
|
|
|
@ -787,7 +787,7 @@ public class MultiLayerTestRNN extends BaseDL4JTest {
|
||||||
|
|
||||||
NeuralNetConfiguration conf = NeuralNetConfiguration.builder()
|
NeuralNetConfiguration conf = NeuralNetConfiguration.builder()
|
||||||
.list()
|
.list()
|
||||||
.layer(new FrozenLayer(org.deeplearning4j.nn.conf.layers.LSTM.builder()
|
.layer(FrozenLayer.builder(org.deeplearning4j.nn.conf.layers.LSTM.builder()
|
||||||
.nIn(5).nOut(5).build()))
|
.nIn(5).nOut(5).build()))
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
|
|
@ -67,7 +67,7 @@ public class MiscRegressionTests extends BaseDL4JTest {
|
||||||
public void testFrozenNewFormat(){
|
public void testFrozenNewFormat(){
|
||||||
NeuralNetConfiguration configuration = NeuralNetConfiguration.builder()
|
NeuralNetConfiguration configuration = NeuralNetConfiguration.builder()
|
||||||
.list()
|
.list()
|
||||||
.layer(0, new FrozenLayer(DenseLayer.builder().nIn(10).nOut(10).build()))
|
.layer(0, FrozenLayer.builder(DenseLayer.builder().nIn(10).nOut(10).build()))
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
String json = configuration.toJson();
|
String json = configuration.toJson();
|
||||||
|
|
|
@ -20,9 +20,10 @@
|
||||||
|
|
||||||
package org.deeplearning4j.regressiontest.customlayer100a;
|
package org.deeplearning4j.regressiontest.customlayer100a;
|
||||||
|
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Map;
|
||||||
import lombok.Getter;
|
import lombok.Getter;
|
||||||
import lombok.NoArgsConstructor;
|
import lombok.NoArgsConstructor;
|
||||||
import lombok.Setter;
|
|
||||||
import lombok.experimental.SuperBuilder;
|
import lombok.experimental.SuperBuilder;
|
||||||
import lombok.val;
|
import lombok.val;
|
||||||
import org.deeplearning4j.nn.api.Layer;
|
import org.deeplearning4j.nn.api.Layer;
|
||||||
|
@ -35,97 +36,108 @@ import org.deeplearning4j.nn.conf.memory.LayerMemoryReport;
|
||||||
import org.deeplearning4j.nn.conf.memory.MemoryReport;
|
import org.deeplearning4j.nn.conf.memory.MemoryReport;
|
||||||
import org.deeplearning4j.nn.params.DefaultParamInitializer;
|
import org.deeplearning4j.nn.params.DefaultParamInitializer;
|
||||||
import org.deeplearning4j.optimize.api.TrainingListener;
|
import org.deeplearning4j.optimize.api.TrainingListener;
|
||||||
import org.nd4j.linalg.activations.Activation;
|
|
||||||
import org.nd4j.linalg.activations.IActivation;
|
import org.nd4j.linalg.activations.IActivation;
|
||||||
import org.nd4j.linalg.api.buffer.DataType;
|
import org.nd4j.linalg.api.buffer.DataType;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
|
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
@NoArgsConstructor
|
@NoArgsConstructor
|
||||||
@SuperBuilder
|
@SuperBuilder
|
||||||
public class CustomLayer extends FeedForwardLayer {
|
public class CustomLayer extends FeedForwardLayer {
|
||||||
/**
|
/**
|
||||||
* A custom property used in this custom layer example. See the CustomLayerExampleReadme.md for details
|
* A custom property used in this custom layer example. See the CustomLayerExampleReadme.md for
|
||||||
*
|
* details
|
||||||
* @param secondActivationFunction Second activation function for the layer
|
*
|
||||||
*/
|
* @param secondActivationFunction Second activation function for the layer
|
||||||
@Getter
|
*/
|
||||||
private IActivation secondActivationFunction;
|
@Getter private IActivation secondActivationFunction;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Layer instantiate(
|
||||||
|
NeuralNetConfiguration conf,
|
||||||
|
Collection<TrainingListener> iterationListeners,
|
||||||
|
int layerIndex,
|
||||||
|
INDArray layerParamsView,
|
||||||
|
boolean initializeParams,
|
||||||
|
DataType networkDataType) {
|
||||||
|
|
||||||
|
LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(0);
|
||||||
|
// The instantiate method is how we go from the configuration class (i.e., this class) to the
|
||||||
|
// implementation class
|
||||||
|
// (i.e., a CustomLayerImpl instance)
|
||||||
|
// For the most part, it's the same for each type of layer
|
||||||
|
runInheritance();
|
||||||
|
|
||||||
@Override
|
CustomLayerImpl myCustomLayer = new CustomLayerImpl(lconf, networkDataType);
|
||||||
public Layer instantiate(NeuralNetConfiguration conf, Collection<TrainingListener> iterationListeners,
|
myCustomLayer.addTrainingListeners(iterationListeners); // Set the iteration listeners, if any
|
||||||
int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) {
|
myCustomLayer.setIndex(layerIndex); // Integer index of the layer
|
||||||
|
|
||||||
LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(0);
|
// Parameter view array: In Deeplearning4j, the network parameters for the entire network (all
|
||||||
//The instantiate method is how we go from the configuration class (i.e., this class) to the implementation class
|
// layers) are
|
||||||
// (i.e., a CustomLayerImpl instance)
|
// allocated in one big array. The relevant section of this parameter vector is extracted out
|
||||||
//For the most part, it's the same for each type of layer
|
// for each layer,
|
||||||
runInheritance();
|
// (i.e., it's a "view" array in that it's a subset of a larger array)
|
||||||
|
// This is a row vector, with length equal to the number of parameters in the layer
|
||||||
|
myCustomLayer.setParamsViewArray(layerParamsView);
|
||||||
|
|
||||||
CustomLayerImpl myCustomLayer = new CustomLayerImpl(lconf, networkDataType);
|
// Initialize the layer parameters. For example,
|
||||||
myCustomLayer.addTrainingListeners(iterationListeners); //Set the iteration listeners, if any
|
// Note that the entries in paramTable (2 entries here: a weight array of shape [nIn,nOut] and
|
||||||
myCustomLayer.setIndex(layerIndex); //Integer index of the layer
|
// biases of shape [1,nOut]
|
||||||
|
// are in turn a view of the 'layerParamsView' array.
|
||||||
|
Map<String, INDArray> paramTable = initializer().init(this, layerParamsView, initializeParams);
|
||||||
|
myCustomLayer.setParamTable(paramTable);
|
||||||
|
myCustomLayer.setLayerConfiguration(lconf);
|
||||||
|
return myCustomLayer;
|
||||||
|
}
|
||||||
|
|
||||||
//Parameter view array: In Deeplearning4j, the network parameters for the entire network (all layers) are
|
@Override
|
||||||
// allocated in one big array. The relevant section of this parameter vector is extracted out for each layer,
|
public ParamInitializer initializer() {
|
||||||
// (i.e., it's a "view" array in that it's a subset of a larger array)
|
// This method returns the parameter initializer for this type of layer
|
||||||
// This is a row vector, with length equal to the number of parameters in the layer
|
// In this case, we can use the DefaultParamInitializer, which is the same one used for
|
||||||
myCustomLayer.setParamsViewArray(layerParamsView);
|
// DenseLayerConfiguration
|
||||||
|
// For more complex layers, you may need to implement a custom parameter initializer
|
||||||
|
// See the various parameter initializers here:
|
||||||
|
// https://github.com/deeplearning4j/deeplearning4j/tree/master/deeplearning4j-core/src/main/java/org/deeplearning4j/nn/params
|
||||||
|
|
||||||
//Initialize the layer parameters. For example,
|
return DefaultParamInitializer.getInstance();
|
||||||
// Note that the entries in paramTable (2 entries here: a weight array of shape [nIn,nOut] and biases of shape [1,nOut]
|
}
|
||||||
// are in turn a view of the 'layerParamsView' array.
|
|
||||||
Map<String, INDArray> paramTable = initializer().init(this, layerParamsView, initializeParams);
|
@Override
|
||||||
myCustomLayer.setParamTable(paramTable);
|
public LayerMemoryReport getMemoryReport(InputType inputType) {
|
||||||
myCustomLayer.setLayerConfiguration(lconf);
|
// Memory report is used to estimate how much memory is required for the layer, for different
|
||||||
return myCustomLayer;
|
// configurations
|
||||||
|
// If you don't need this functionality for your custom layer, you can return a
|
||||||
|
// LayerMemoryReport
|
||||||
|
// with all 0s, or
|
||||||
|
|
||||||
|
// This implementation: based on DenseLayerConfiguration implementation
|
||||||
|
InputType outputType = getOutputType(-1, inputType);
|
||||||
|
|
||||||
|
val numParams = initializer().numParams(this);
|
||||||
|
int updaterStateSize = (int) getIUpdater().stateSize(numParams);
|
||||||
|
|
||||||
|
int trainSizeFixed = 0;
|
||||||
|
int trainSizeVariable = 0;
|
||||||
|
if (getDropOut() != null) {
|
||||||
|
// Assume we dup the input for dropout
|
||||||
|
trainSizeVariable += inputType.arrayElementsPerExample();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
// Also, during backprop: we do a preOut call -> gives us activations size equal to the output
|
||||||
public ParamInitializer initializer() {
|
// size
|
||||||
//This method returns the parameter initializer for this type of layer
|
// which is modified in-place by activation function backprop
|
||||||
//In this case, we can use the DefaultParamInitializer, which is the same one used for DenseLayerConfiguration
|
// then we have 'epsilonNext' which is equivalent to input size
|
||||||
//For more complex layers, you may need to implement a custom parameter initializer
|
trainSizeVariable += outputType.arrayElementsPerExample();
|
||||||
//See the various parameter initializers here:
|
|
||||||
//https://github.com/deeplearning4j/deeplearning4j/tree/master/deeplearning4j-core/src/main/java/org/deeplearning4j/nn/params
|
|
||||||
|
|
||||||
return DefaultParamInitializer.getInstance();
|
return new LayerMemoryReport.Builder(name, CustomLayer.class, inputType, outputType)
|
||||||
}
|
.standardMemory(numParams, updaterStateSize)
|
||||||
|
.workingMemory(
|
||||||
@Override
|
0,
|
||||||
public LayerMemoryReport getMemoryReport(InputType inputType) {
|
0,
|
||||||
//Memory report is used to estimate how much memory is required for the layer, for different configurations
|
trainSizeFixed,
|
||||||
//If you don't need this functionality for your custom layer, you can return a LayerMemoryReport
|
trainSizeVariable) // No additional memory (beyond activations) for inference
|
||||||
// with all 0s, or
|
.cacheMemory(
|
||||||
|
MemoryReport.CACHE_MODE_ALL_ZEROS,
|
||||||
//This implementation: based on DenseLayerConfiguration implementation
|
MemoryReport.CACHE_MODE_ALL_ZEROS) // No caching in DenseLayerConfiguration
|
||||||
InputType outputType = getOutputType(-1, inputType);
|
.build();
|
||||||
|
}
|
||||||
val numParams = initializer().numParams(this);
|
|
||||||
int updaterStateSize = (int) getIUpdater().stateSize(numParams);
|
|
||||||
|
|
||||||
int trainSizeFixed = 0;
|
|
||||||
int trainSizeVariable = 0;
|
|
||||||
if (getDropOut() != null) {
|
|
||||||
//Assume we dup the input for dropout
|
|
||||||
trainSizeVariable += inputType.arrayElementsPerExample();
|
|
||||||
}
|
|
||||||
|
|
||||||
//Also, during backprop: we do a preOut call -> gives us activations size equal to the output size
|
|
||||||
// which is modified in-place by activation function backprop
|
|
||||||
// then we have 'epsilonNext' which is equivalent to input size
|
|
||||||
trainSizeVariable += outputType.arrayElementsPerExample();
|
|
||||||
|
|
||||||
return new LayerMemoryReport.Builder(name, CustomLayer.class, inputType, outputType)
|
|
||||||
.standardMemory(numParams, updaterStateSize)
|
|
||||||
.workingMemory(0, 0, trainSizeFixed,
|
|
||||||
trainSizeVariable) //No additional memory (beyond activations) for inference
|
|
||||||
.cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS,
|
|
||||||
MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching in DenseLayerConfiguration
|
|
||||||
.build();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
|
|
||||||
package org.deeplearning4j.nn.modelimport.keras.layers.convolutional;
|
package org.deeplearning4j.nn.modelimport.keras.layers.convolutional;
|
||||||
|
|
||||||
|
import lombok.val;
|
||||||
import org.deeplearning4j.nn.api.layers.LayerConstraint;
|
import org.deeplearning4j.nn.api.layers.LayerConstraint;
|
||||||
import org.deeplearning4j.nn.conf.CNN2DFormat;
|
import org.deeplearning4j.nn.conf.CNN2DFormat;
|
||||||
import org.deeplearning4j.nn.conf.inputs.InputType;
|
import org.deeplearning4j.nn.conf.inputs.InputType;
|
||||||
|
@ -84,7 +85,7 @@ public class KerasAtrousConvolution2D extends KerasConvolution {
|
||||||
IWeightInit init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(),
|
IWeightInit init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(),
|
||||||
enforceTrainingConfig, conf, kerasMajorVersion);
|
enforceTrainingConfig, conf, kerasMajorVersion);
|
||||||
|
|
||||||
ConvolutionLayer.ConvolutionLayerBuilder builder = ConvolutionLayer.builder().name(this.name)
|
val builder = ConvolutionLayer.builder().name(this.name)
|
||||||
.nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout)
|
.nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout)
|
||||||
.activation(getIActivationFromConfig(layerConfig, conf))
|
.activation(getIActivationFromConfig(layerConfig, conf))
|
||||||
.weightInit(init)
|
.weightInit(init)
|
||||||
|
@ -92,7 +93,7 @@ public class KerasAtrousConvolution2D extends KerasConvolution {
|
||||||
.l1(this.weightL1Regularization).l2(this.weightL2Regularization)
|
.l1(this.weightL1Regularization).l2(this.weightL2Regularization)
|
||||||
.convolutionMode(getConvolutionModeFromConfig(layerConfig, conf))
|
.convolutionMode(getConvolutionModeFromConfig(layerConfig, conf))
|
||||||
.kernelSize(getKernelSizeFromConfig(layerConfig, 2, conf, kerasMajorVersion))
|
.kernelSize(getKernelSizeFromConfig(layerConfig, 2, conf, kerasMajorVersion))
|
||||||
.dataFormat(dimOrder == DimOrder.TENSORFLOW ? CNN2DFormat.NHWC : CNN2DFormat.NCHW)
|
.convFormat(dimOrder == DimOrder.TENSORFLOW ? CNN2DFormat.NHWC : CNN2DFormat.NCHW)
|
||||||
.hasBias(hasBias)
|
.hasBias(hasBias)
|
||||||
.stride(getStrideFromConfig(layerConfig, 2, conf));
|
.stride(getStrideFromConfig(layerConfig, 2, conf));
|
||||||
int[] padding = getPaddingFromBorderModeConfig(layerConfig, 2, conf, kerasMajorVersion);
|
int[] padding = getPaddingFromBorderModeConfig(layerConfig, 2, conf, kerasMajorVersion);
|
||||||
|
|
|
@ -40,7 +40,6 @@ import static org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils.remo
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@Data
|
@Data
|
||||||
@EqualsAndHashCode(callSuper = false)
|
@EqualsAndHashCode(callSuper = false)
|
||||||
@NoArgsConstructor
|
|
||||||
abstract public class KerasConvolution extends KerasLayer {
|
abstract public class KerasConvolution extends KerasLayer {
|
||||||
|
|
||||||
protected int numTrainableParams;
|
protected int numTrainableParams;
|
||||||
|
@ -56,6 +55,10 @@ abstract public class KerasConvolution extends KerasLayer {
|
||||||
super(kerasVersion);
|
super(kerasVersion);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public KerasConvolution() throws UnsupportedKerasConfigurationException {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor from parsed Keras layer configuration dictionary.
|
* Constructor from parsed Keras layer configuration dictionary.
|
||||||
*
|
*
|
||||||
|
|
|
@ -23,6 +23,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolutional;
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
import lombok.EqualsAndHashCode;
|
import lombok.EqualsAndHashCode;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import lombok.val;
|
||||||
import org.deeplearning4j.nn.api.layers.LayerConstraint;
|
import org.deeplearning4j.nn.api.layers.LayerConstraint;
|
||||||
import org.deeplearning4j.nn.conf.CNN2DFormat;
|
import org.deeplearning4j.nn.conf.CNN2DFormat;
|
||||||
import org.deeplearning4j.nn.conf.InputPreProcessor;
|
import org.deeplearning4j.nn.conf.InputPreProcessor;
|
||||||
|
@ -94,11 +95,11 @@ public class KerasConvolution2D extends KerasConvolution {
|
||||||
LayerConstraint weightConstraint = KerasConstraintUtils.getConstraintsFromConfig(
|
LayerConstraint weightConstraint = KerasConstraintUtils.getConstraintsFromConfig(
|
||||||
layerConfig, conf.getLAYER_FIELD_W_CONSTRAINT(), conf, kerasMajorVersion);
|
layerConfig, conf.getLAYER_FIELD_W_CONSTRAINT(), conf, kerasMajorVersion);
|
||||||
|
|
||||||
ConvolutionLayer.ConvolutionLayerBuilder builder = ConvolutionLayer.builder().name(this.name)
|
final var builder = ConvolutionLayer.builder().name(this.name)
|
||||||
.nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout)
|
.nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout)
|
||||||
.activation(getIActivationFromConfig(layerConfig, conf))
|
.activation(getIActivationFromConfig(layerConfig, conf))
|
||||||
.weightInit(init)
|
.weightInit(init)
|
||||||
.dataFormat(dimOrder == DimOrder.TENSORFLOW ? CNN2DFormat.NHWC : CNN2DFormat.NCHW)
|
.convFormat(dimOrder == DimOrder.TENSORFLOW ? CNN2DFormat.NHWC : CNN2DFormat.NCHW)
|
||||||
.l1(this.weightL1Regularization).l2(this.weightL2Regularization)
|
.l1(this.weightL1Regularization).l2(this.weightL2Regularization)
|
||||||
.convolutionMode(getConvolutionModeFromConfig(layerConfig, conf))
|
.convolutionMode(getConvolutionModeFromConfig(layerConfig, conf))
|
||||||
.kernelSize(getKernelSizeFromConfig(layerConfig, 2, conf, kerasMajorVersion))
|
.kernelSize(getKernelSizeFromConfig(layerConfig, 2, conf, kerasMajorVersion))
|
||||||
|
|
|
@ -91,11 +91,11 @@ public class KerasDeconvolution2D extends KerasConvolution {
|
||||||
LayerConstraint weightConstraint = KerasConstraintUtils.getConstraintsFromConfig(
|
LayerConstraint weightConstraint = KerasConstraintUtils.getConstraintsFromConfig(
|
||||||
layerConfig, conf.getLAYER_FIELD_W_CONSTRAINT(), conf, kerasMajorVersion);
|
layerConfig, conf.getLAYER_FIELD_W_CONSTRAINT(), conf, kerasMajorVersion);
|
||||||
|
|
||||||
var builder = Deconvolution2D.builder().name(this.name)
|
final var builder = Deconvolution2D.builder().name(this.name)
|
||||||
.nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout)
|
.nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout)
|
||||||
.activation(getIActivationFromConfig(layerConfig, conf))
|
.activation(getIActivationFromConfig(layerConfig, conf))
|
||||||
.weightInit(init)
|
.weightInit(init)
|
||||||
.dataFormat(KerasConvolutionUtils.getDataFormatFromConfig(layerConfig,conf))
|
.convFormat(KerasConvolutionUtils.getDataFormatFromConfig(layerConfig,conf))
|
||||||
.l1(this.weightL1Regularization).l2(this.weightL2Regularization)
|
.l1(this.weightL1Regularization).l2(this.weightL2Regularization)
|
||||||
.convolutionMode(getConvolutionModeFromConfig(layerConfig, conf))
|
.convolutionMode(getConvolutionModeFromConfig(layerConfig, conf))
|
||||||
.kernelSize(getKernelSizeFromConfig(layerConfig, 2, conf, kerasMajorVersion))
|
.kernelSize(getKernelSizeFromConfig(layerConfig, 2, conf, kerasMajorVersion))
|
||||||
|
|
|
@ -72,7 +72,7 @@ public class KerasMasking extends KerasLayer {
|
||||||
maskingValue = KerasLayerUtils.getMaskingValueFromConfig(layerConfig, conf);
|
maskingValue = KerasLayerUtils.getMaskingValueFromConfig(layerConfig, conf);
|
||||||
this.layer = MaskZeroLayer.builder()
|
this.layer = MaskZeroLayer.builder()
|
||||||
.maskingValue(maskingValue)
|
.maskingValue(maskingValue)
|
||||||
.underlying(new IdentityLayer(this.name))
|
.underlying(IdentityLayer.builder(this.name).build())
|
||||||
.name(this.name)
|
.name(this.name)
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,6 +37,7 @@ import org.deeplearning4j.nn.weights.WeightInit;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.*;
|
import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.*;
|
||||||
|
@ -113,7 +114,7 @@ public class KerasLocallyConnected1D extends KerasConvolution {
|
||||||
if (biasConstraint != null)
|
if (biasConstraint != null)
|
||||||
builder.constrainBias(biasConstraint);
|
builder.constrainBias(biasConstraint);
|
||||||
if (weightConstraint != null)
|
if (weightConstraint != null)
|
||||||
builder.constrainWeights(weightConstraint);
|
builder.weightConstraints(List.of(weightConstraint));
|
||||||
this.layer = builder.build();
|
this.layer = builder.build();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,6 +36,7 @@ import org.deeplearning4j.nn.weights.IWeightInit;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.*;
|
import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.*;
|
||||||
|
@ -113,7 +114,7 @@ public class KerasLocallyConnected2D extends KerasConvolution {
|
||||||
if (biasConstraint != null)
|
if (biasConstraint != null)
|
||||||
builder.constrainBias(biasConstraint);
|
builder.constrainBias(biasConstraint);
|
||||||
if (weightConstraint != null)
|
if (weightConstraint != null)
|
||||||
builder.constrainWeights(weightConstraint);
|
builder.weightConstraints(List.of(weightConstraint));
|
||||||
this.layer = builder.build();
|
this.layer = builder.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -179,7 +179,7 @@ public class KerasLSTM extends KerasLayer {
|
||||||
|
|
||||||
Pair<Boolean, Double> maskingConfig = KerasLayerUtils.getMaskingConfiguration(inboundLayerNames, previousLayers);
|
Pair<Boolean, Double> maskingConfig = KerasLayerUtils.getMaskingConfiguration(inboundLayerNames, previousLayers);
|
||||||
|
|
||||||
LSTM.LSTMBuilder builder = LSTM.builder()
|
final var builder = LSTM.builder()
|
||||||
.gateActivationFunction(getGateActivationFromConfig(layerConfig))
|
.gateActivationFunction(getGateActivationFromConfig(layerConfig))
|
||||||
.forgetGateBiasInit(getForgetBiasInitFromConfig(layerConfig, enforceTrainingConfig))
|
.forgetGateBiasInit(getForgetBiasInitFromConfig(layerConfig, enforceTrainingConfig))
|
||||||
.name(this.name)
|
.name(this.name)
|
||||||
|
@ -203,10 +203,10 @@ public class KerasLSTM extends KerasLayer {
|
||||||
|
|
||||||
this.layer = builder.build();
|
this.layer = builder.build();
|
||||||
if (!returnSequences) {
|
if (!returnSequences) {
|
||||||
this.layer = LastTimeStep.builder(this.layer);
|
this.layer = LastTimeStep.builder().underlying(this.layer).build();
|
||||||
}
|
}
|
||||||
if (maskingConfig.getFirst()) {
|
if (maskingConfig.getFirst()) {
|
||||||
this.layer = new MaskZeroLayer(this.layer, maskingConfig.getSecond());
|
this.layer = MaskZeroLayer.builder().underlying(this.layer).maskingValue(maskingConfig.getSecond()).build();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -174,10 +174,10 @@ public class KerasSimpleRnn extends KerasLayer {
|
||||||
|
|
||||||
this.layer = builder.build();
|
this.layer = builder.build();
|
||||||
if (!returnSequences) {
|
if (!returnSequences) {
|
||||||
this.layer = LastTimeStep.builder(this.layer);
|
this.layer = LastTimeStep.builder(this.layer).build();
|
||||||
}
|
}
|
||||||
if (maskingConfig.getFirst()) {
|
if (maskingConfig.getFirst()) {
|
||||||
this.layer = new MaskZeroLayer(this.layer, maskingConfig.getSecond());
|
this.layer = MaskZeroLayer.builder().underlying(this.layer).maskingValue(maskingConfig.getSecond()).build();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,11 +20,18 @@
|
||||||
|
|
||||||
package org.deeplearning4j.nn.modelimport.keras.layers.local;
|
package org.deeplearning4j.nn.modelimport.keras.layers.local;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import org.deeplearning4j.BaseDL4JTest;
|
||||||
import org.deeplearning4j.nn.conf.ConvolutionMode;
|
import org.deeplearning4j.nn.conf.ConvolutionMode;
|
||||||
import org.deeplearning4j.nn.conf.dropout.Dropout;
|
import org.deeplearning4j.nn.conf.dropout.Dropout;
|
||||||
import org.deeplearning4j.nn.conf.inputs.InputType;
|
import org.deeplearning4j.nn.conf.inputs.InputType;
|
||||||
import org.deeplearning4j.nn.conf.layers.LocallyConnected2D;
|
import org.deeplearning4j.nn.conf.layers.LocallyConnected2D;
|
||||||
import org.deeplearning4j.BaseDL4JTest;
|
|
||||||
import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils;
|
import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils;
|
||||||
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
|
import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
|
||||||
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
|
import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
|
||||||
|
@ -32,107 +39,99 @@ import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
|
||||||
import org.deeplearning4j.nn.weights.WeightInit;
|
import org.deeplearning4j.nn.weights.WeightInit;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author Max Pumperla
|
* @author Max Pumperla
|
||||||
*/
|
*/
|
||||||
public class KerasLocallyConnected2DTest extends BaseDL4JTest {
|
public class KerasLocallyConnected2DTest extends BaseDL4JTest {
|
||||||
|
|
||||||
private final String ACTIVATION_KERAS = "linear";
|
private final String ACTIVATION_KERAS = "linear";
|
||||||
private final String ACTIVATION_DL4J = "identity";
|
private final String ACTIVATION_DL4J = "identity";
|
||||||
private final String LAYER_NAME = "test_layer";
|
private final String LAYER_NAME = "test_layer";
|
||||||
private final String INIT_KERAS = "glorot_normal";
|
private final String INIT_KERAS = "glorot_normal";
|
||||||
private final WeightInit INIT_DL4J = WeightInit.XAVIER;
|
private final WeightInit INIT_DL4J = WeightInit.XAVIER;
|
||||||
private final double L1_REGULARIZATION = 0.01;
|
private final double L1_REGULARIZATION = 0.01;
|
||||||
private final double L2_REGULARIZATION = 0.02;
|
private final double L2_REGULARIZATION = 0.02;
|
||||||
private final double DROPOUT_KERAS = 0.3;
|
private final double DROPOUT_KERAS = 0.3;
|
||||||
private final double DROPOUT_DL4J = 1 - DROPOUT_KERAS;
|
private final double DROPOUT_DL4J = 1 - DROPOUT_KERAS;
|
||||||
private final int[] KERNEL_SIZE = new int[]{1, 2};
|
private final int[] KERNEL_SIZE = new int[] {1, 2};
|
||||||
private final int[] DILATION = new int[]{2, 2};
|
private final int[] DILATION = new int[] {2, 2};
|
||||||
private final int[] STRIDE = new int[]{3, 4};
|
private final int[] STRIDE = new int[] {3, 4};
|
||||||
private final int N_OUT = 13;
|
private final int N_OUT = 13;
|
||||||
private final String BORDER_MODE_VALID = "valid";
|
private final String BORDER_MODE_VALID = "valid";
|
||||||
private final int[] VALID_PADDING = new int[]{0, 0};
|
private final int[] VALID_PADDING = new int[] {0, 0};
|
||||||
|
|
||||||
private final Integer keras1 = 1;
|
private final Integer keras1 = 1;
|
||||||
private final Integer keras2 = 2;
|
private final Integer keras2 = 2;
|
||||||
private final Keras1LayerConfiguration conf1 = new Keras1LayerConfiguration();
|
private final Keras1LayerConfiguration conf1 = new Keras1LayerConfiguration();
|
||||||
private final Keras2LayerConfiguration conf2 = new Keras2LayerConfiguration();
|
private final Keras2LayerConfiguration conf2 = new Keras2LayerConfiguration();
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testLocallyConnected2DLayer() throws Exception {
|
||||||
|
buildLocallyConnected2DLayer(conf1, keras1);
|
||||||
|
buildLocallyConnected2DLayer(conf2, keras2);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
private void buildLocallyConnected2DLayer(KerasLayerConfiguration conf, Integer kerasVersion)
|
||||||
public void testLocallyConnected2DLayer() throws Exception {
|
throws Exception {
|
||||||
buildLocallyConnected2DLayer(conf1, keras1);
|
Map<String, Object> layerConfig = new HashMap<>();
|
||||||
buildLocallyConnected2DLayer(conf2, keras2);
|
layerConfig.put(
|
||||||
|
conf.getLAYER_FIELD_CLASS_NAME(), conf.getLAYER_CLASS_NAME_LOCALLY_CONNECTED_2D());
|
||||||
|
Map<String, Object> config = new HashMap<>();
|
||||||
|
config.put(conf.getLAYER_FIELD_ACTIVATION(), ACTIVATION_KERAS);
|
||||||
|
config.put(conf.getLAYER_FIELD_NAME(), LAYER_NAME);
|
||||||
|
if (kerasVersion == 1) {
|
||||||
|
config.put(conf.getLAYER_FIELD_INIT(), INIT_KERAS);
|
||||||
|
} else {
|
||||||
|
Map<String, Object> init = new HashMap<>();
|
||||||
|
init.put("class_name", conf.getINIT_GLOROT_NORMAL());
|
||||||
|
config.put(conf.getLAYER_FIELD_INIT(), init);
|
||||||
|
}
|
||||||
|
Map<String, Object> W_reg = new HashMap<>();
|
||||||
|
W_reg.put(conf.getREGULARIZATION_TYPE_L1(), L1_REGULARIZATION);
|
||||||
|
W_reg.put(conf.getREGULARIZATION_TYPE_L2(), L2_REGULARIZATION);
|
||||||
|
config.put(conf.getLAYER_FIELD_W_REGULARIZER(), W_reg);
|
||||||
|
config.put(conf.getLAYER_FIELD_DROPOUT(), DROPOUT_KERAS);
|
||||||
|
if (kerasVersion == 1) {
|
||||||
|
config.put(conf.getLAYER_FIELD_NB_ROW(), KERNEL_SIZE[0]);
|
||||||
|
config.put(conf.getLAYER_FIELD_NB_COL(), KERNEL_SIZE[1]);
|
||||||
|
} else {
|
||||||
|
ArrayList kernel =
|
||||||
|
new ArrayList<Integer>() {
|
||||||
|
{
|
||||||
|
for (int i : KERNEL_SIZE) add(i);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
config.put(conf.getLAYER_FIELD_KERNEL_SIZE(), kernel);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
List<Integer> subsampleList = new ArrayList<>();
|
||||||
|
subsampleList.add(STRIDE[0]);
|
||||||
|
subsampleList.add(STRIDE[1]);
|
||||||
|
config.put(conf.getLAYER_FIELD_CONVOLUTION_STRIDES(), subsampleList);
|
||||||
|
config.put(conf.getLAYER_FIELD_NB_FILTER(), N_OUT);
|
||||||
|
config.put(conf.getLAYER_FIELD_BORDER_MODE(), BORDER_MODE_VALID);
|
||||||
|
layerConfig.put(conf.getLAYER_FIELD_CONFIG(), config);
|
||||||
|
layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion);
|
||||||
|
|
||||||
private void buildLocallyConnected2DLayer(KerasLayerConfiguration conf, Integer kerasVersion)
|
KerasLocallyConnected2D kerasLocal = new KerasLocallyConnected2D(layerConfig);
|
||||||
throws Exception {
|
|
||||||
Map<String, Object> layerConfig = new HashMap<>();
|
|
||||||
layerConfig.put(conf.getLAYER_FIELD_CLASS_NAME(), conf.getLAYER_CLASS_NAME_LOCALLY_CONNECTED_2D());
|
|
||||||
Map<String, Object> config = new HashMap<>();
|
|
||||||
config.put(conf.getLAYER_FIELD_ACTIVATION(), ACTIVATION_KERAS);
|
|
||||||
config.put(conf.getLAYER_FIELD_NAME(), LAYER_NAME);
|
|
||||||
if (kerasVersion == 1) {
|
|
||||||
config.put(conf.getLAYER_FIELD_INIT(), INIT_KERAS);
|
|
||||||
} else {
|
|
||||||
Map<String, Object> init = new HashMap<>();
|
|
||||||
init.put("class_name", conf.getINIT_GLOROT_NORMAL());
|
|
||||||
config.put(conf.getLAYER_FIELD_INIT(), init);
|
|
||||||
}
|
|
||||||
Map<String, Object> W_reg = new HashMap<>();
|
|
||||||
W_reg.put(conf.getREGULARIZATION_TYPE_L1(), L1_REGULARIZATION);
|
|
||||||
W_reg.put(conf.getREGULARIZATION_TYPE_L2(), L2_REGULARIZATION);
|
|
||||||
config.put(conf.getLAYER_FIELD_W_REGULARIZER(), W_reg);
|
|
||||||
config.put(conf.getLAYER_FIELD_DROPOUT(), DROPOUT_KERAS);
|
|
||||||
if (kerasVersion == 1) {
|
|
||||||
config.put(conf.getLAYER_FIELD_NB_ROW(), KERNEL_SIZE[0]);
|
|
||||||
config.put(conf.getLAYER_FIELD_NB_COL(), KERNEL_SIZE[1]);
|
|
||||||
} else {
|
|
||||||
ArrayList kernel = new ArrayList<Integer>() {{
|
|
||||||
for (int i : KERNEL_SIZE) add(i);
|
|
||||||
}};
|
|
||||||
config.put(conf.getLAYER_FIELD_KERNEL_SIZE(), kernel);
|
|
||||||
}
|
|
||||||
|
|
||||||
List<Integer> subsampleList = new ArrayList<>();
|
// once get output type is triggered, inputshape, output shape and input depth are updated
|
||||||
subsampleList.add(STRIDE[0]);
|
kerasLocal.getOutputType(InputType.convolutional(4, 4, 3));
|
||||||
subsampleList.add(STRIDE[1]);
|
|
||||||
config.put(conf.getLAYER_FIELD_CONVOLUTION_STRIDES(), subsampleList);
|
|
||||||
config.put(conf.getLAYER_FIELD_NB_FILTER(), N_OUT);
|
|
||||||
config.put(conf.getLAYER_FIELD_BORDER_MODE(), BORDER_MODE_VALID);
|
|
||||||
layerConfig.put(conf.getLAYER_FIELD_CONFIG(), config);
|
|
||||||
layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion);
|
|
||||||
|
|
||||||
|
LocallyConnected2D layer = kerasLocal.getLocallyConnected2DLayer();
|
||||||
|
assertEquals(ACTIVATION_DL4J, layer.getActivation().toString().toLowerCase());
|
||||||
|
assertEquals(LAYER_NAME, layer.getName());
|
||||||
|
assertEquals(INIT_DL4J, layer.getWeightInit());
|
||||||
|
assertEquals(L1_REGULARIZATION, KerasTestUtils.getL1(layer), 0.0);
|
||||||
|
assertEquals(L2_REGULARIZATION, KerasTestUtils.getL2(layer), 0.0);
|
||||||
|
assertEquals(new Dropout(DROPOUT_DL4J), layer.getDropOut());
|
||||||
|
assertArrayEquals(KERNEL_SIZE, layer.getKernel());
|
||||||
|
assertArrayEquals(STRIDE, layer.getStride());
|
||||||
|
assertEquals(N_OUT, layer.getNOut());
|
||||||
|
assertEquals(ConvolutionMode.Truncate, layer.getConvolutionMode());
|
||||||
|
assertArrayEquals(VALID_PADDING, layer.getPadding());
|
||||||
|
|
||||||
KerasLocallyConnected2D kerasLocal = new KerasLocallyConnected2D(layerConfig);
|
assertArrayEquals(layer.getInputSize(), new int[] {4, 4});
|
||||||
|
assertEquals(layer.getNIn(), 3);
|
||||||
// once get output type is triggered, inputshape, output shape and input depth are updated
|
}
|
||||||
kerasLocal.getOutputType(InputType.convolutional(4,4,3));
|
|
||||||
|
|
||||||
LocallyConnected2D layer = kerasLocal.getLocallyConnected2DLayer();
|
|
||||||
assertEquals(ACTIVATION_DL4J, layer.getActivation().toString().toLowerCase());
|
|
||||||
assertEquals(LAYER_NAME, layer.getName());
|
|
||||||
assertEquals(INIT_DL4J, layer.getWeightInit());
|
|
||||||
assertEquals(L1_REGULARIZATION, KerasTestUtils.getL1(layer), 0.0);
|
|
||||||
assertEquals(L2_REGULARIZATION, KerasTestUtils.getL2(layer), 0.0);
|
|
||||||
assertEquals(new Dropout(DROPOUT_DL4J), layer.getDropOut());
|
|
||||||
assertArrayEquals(KERNEL_SIZE, layer.getKernel());
|
|
||||||
assertArrayEquals(STRIDE, layer.getStride());
|
|
||||||
assertEquals(N_OUT, layer.getNOut());
|
|
||||||
assertEquals(ConvolutionMode.Truncate, layer.getConvolutionMode());
|
|
||||||
assertArrayEquals(VALID_PADDING, layer.getPadding());
|
|
||||||
|
|
||||||
assertArrayEquals(layer.getInputSize(), new int[] {4, 4});
|
|
||||||
assertEquals(layer.getNIn(), 3);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,9 @@
|
||||||
* *****************************************************************************
|
* *****************************************************************************
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
plugins {
|
||||||
|
id("io.freefair.lombok") version "8.0.1"
|
||||||
|
}
|
||||||
apply from: "${project.rootProject.projectDir}/createTestBackends.gradle"
|
apply from: "${project.rootProject.projectDir}/createTestBackends.gradle"
|
||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
|
|
|
@ -134,7 +134,7 @@ public class ActivationLayer extends NoParamLayer {
|
||||||
C extends ActivationLayer, B extends ActivationLayerBuilder<C, B>>
|
C extends ActivationLayer, B extends ActivationLayerBuilder<C, B>>
|
||||||
extends NoParamLayer.NoParamLayerBuilder<C, B> {
|
extends NoParamLayer.NoParamLayerBuilder<C, B> {
|
||||||
public C build() {
|
public C build() {
|
||||||
C l = this.build();
|
C l = this.initBuild();
|
||||||
l.initializeConstraints();
|
l.initializeConstraints();
|
||||||
return l;
|
return l;
|
||||||
}
|
}
|
||||||
|
|
|
@ -46,6 +46,7 @@ import org.nd4j.linalg.learning.regularization.WeightDecay;
|
||||||
|
|
||||||
/** A neural network layer. */
|
/** A neural network layer. */
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
|
@NoArgsConstructor
|
||||||
@SuperBuilder
|
@SuperBuilder
|
||||||
public abstract class BaseLayerConfiguration extends LayerConfiguration
|
public abstract class BaseLayerConfiguration extends LayerConfiguration
|
||||||
implements ITraininableLayerConfiguration, Serializable, Cloneable {
|
implements ITraininableLayerConfiguration, Serializable, Cloneable {
|
||||||
|
@ -62,7 +63,7 @@ public abstract class BaseLayerConfiguration extends LayerConfiguration
|
||||||
*
|
*
|
||||||
* @param constraints Constraints to apply to all bias parameters of all layers
|
* @param constraints Constraints to apply to all bias parameters of all layers
|
||||||
*/
|
*/
|
||||||
@lombok.Builder.Default @Getter protected final List<LayerConstraint> biasConstraints = new ArrayList<>();
|
//@lombok.Builder.Default @Getter protected final List<LayerConstraint> biasConstraints = new ArrayList<>();
|
||||||
/**
|
/**
|
||||||
* Set constraints to be applied to all layers. Default: no constraints.<br>
|
* Set constraints to be applied to all layers. Default: no constraints.<br>
|
||||||
* Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm
|
* Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm
|
||||||
|
@ -392,22 +393,7 @@ public B updater(Updater upd) {
|
||||||
return weightDecayBias(coefficient, true);
|
return weightDecayBias(coefficient, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Set constraints to be applied to all layers. Default: no constraints.<br>
|
|
||||||
* Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm
|
|
||||||
* regularization, etc). These constraints are applied at each iteration, after the parameters
|
|
||||||
* have been updated.<br>
|
|
||||||
* Note: values set by this method will be applied to all applicable layers in the network,
|
|
||||||
* unless a different value is explicitly set on a given layer. In other words: values set via
|
|
||||||
* this method are used as the default value, and can be overridden on a per-layer basis.
|
|
||||||
*
|
|
||||||
* @param constraints Constraints to apply to all bias parameters of all layers
|
|
||||||
*/
|
|
||||||
public B constrainBias(LayerConstraint... constraints) {
|
|
||||||
biasConstraints$value = Arrays.asList(constraints);
|
|
||||||
biasConstraints$set = true;
|
|
||||||
return self();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Weight decay for the biases only - see {@link #weightDecay(double)} for more details<br>
|
* Weight decay for the biases only - see {@link #weightDecay(double)} for more details<br>
|
||||||
|
|
|
@ -32,6 +32,7 @@ import org.nd4j.linalg.lossfunctions.impl.LossMCXENT;
|
||||||
|
|
||||||
@ToString(callSuper = true)
|
@ToString(callSuper = true)
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
|
@NoArgsConstructor
|
||||||
@SuperBuilder(builderMethodName = "innerBuilder")
|
@SuperBuilder(builderMethodName = "innerBuilder")
|
||||||
public abstract class BaseOutputLayer extends FeedForwardLayer {
|
public abstract class BaseOutputLayer extends FeedForwardLayer {
|
||||||
|
|
||||||
|
|
|
@ -29,6 +29,6 @@ import lombok.experimental.SuperBuilder;
|
||||||
@Data
|
@Data
|
||||||
@ToString(callSuper = true)
|
@ToString(callSuper = true)
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
@SuperBuilder
|
@SuperBuilder(buildMethodName = "initBuild")
|
||||||
public class Convolution1D extends Convolution1DLayer {
|
public class Convolution1D extends Convolution1DLayer {
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,6 +30,6 @@ import lombok.experimental.SuperBuilder;
|
||||||
|
|
||||||
@ToString(callSuper = true)
|
@ToString(callSuper = true)
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
@SuperBuilder
|
@SuperBuilder(buildMethodName = "initBuild")
|
||||||
public class Convolution2D extends ConvolutionLayer {
|
public class Convolution2D extends ConvolutionLayer {
|
||||||
}
|
}
|
||||||
|
|
|
@ -46,10 +46,9 @@ import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
* to be used in the net or in other words the channels The builder specifies the filter/kernel
|
* to be used in the net or in other words the channels The builder specifies the filter/kernel
|
||||||
* size, the stride and padding The pooling layer takes the kernel size
|
* size, the stride and padding The pooling layer takes the kernel size
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ToString(callSuper = true)
|
@ToString(callSuper = true)
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder")
|
@SuperBuilder(builderMethodName = "innerBuilder", buildMethodName = "initBuild")
|
||||||
public class ConvolutionLayer extends FeedForwardLayer {
|
public class ConvolutionLayer extends FeedForwardLayer {
|
||||||
/**
|
/**
|
||||||
* Size of the convolution rows/columns
|
* Size of the convolution rows/columns
|
||||||
|
@ -63,7 +62,8 @@ public class ConvolutionLayer extends FeedForwardLayer {
|
||||||
* Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more
|
* Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more
|
||||||
* details Default is {@link ConvolutionMode}.Truncate.
|
* details Default is {@link ConvolutionMode}.Truncate.
|
||||||
*/
|
*/
|
||||||
@Builder.Default @Getter @Setter private ConvolutionMode convolutionMode = ConvolutionMode.Truncate;
|
@Builder.Default @Getter @Setter
|
||||||
|
private ConvolutionMode convolutionMode = ConvolutionMode.Truncate;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last).
|
* Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last).
|
||||||
|
@ -86,36 +86,35 @@ public class ConvolutionLayer extends FeedForwardLayer {
|
||||||
* http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html#dilated-convolutions</a>
|
* http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html#dilated-convolutions</a>
|
||||||
* <br>
|
* <br>
|
||||||
*/
|
*/
|
||||||
@Getter @Setter
|
@Getter @Setter private @Builder.Default int[] dilation = new int[] {1, 1};
|
||||||
private @Builder.Default int[] dilation = new int[] {1, 1};
|
|
||||||
/** Default is 2. Down-sample by a factor of 2 */
|
/** Default is 2. Down-sample by a factor of 2 */
|
||||||
@Getter @Setter
|
@Getter @Setter private @Builder.Default int[] stride = new int[] {1, 1};
|
||||||
private @Builder.Default int[] stride = new int[] {1, 1};
|
|
||||||
@Getter @Setter
|
@Getter @Setter private @Builder.Default int[] padding = new int[] {0, 0};
|
||||||
private @Builder.Default int[] padding = new int[] {0, 0};
|
|
||||||
/**
|
/**
|
||||||
* When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation
|
* When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation
|
||||||
* be allowed? If set to false, an exception in CuDNN will be propagated back to the user. If
|
* be allowed? If set to false, an exception in CuDNN will be propagated back to the user. If
|
||||||
* false, the built-in (non-CuDNN) implementation for ConvolutionLayer will be used
|
* false, the built-in (non-CuDNN) implementation for ConvolutionLayer will be used
|
||||||
*/
|
*/
|
||||||
@Getter
|
@Getter @Setter @Builder.Default private boolean cudnnAllowFallback = true;
|
||||||
@Builder.Default private boolean cudnnAllowFallback = true;
|
|
||||||
|
|
||||||
/** Defaults to "PREFER_FASTEST", but "NO_WORKSPACE" uses less memory. */
|
/** Defaults to "PREFER_FASTEST", but "NO_WORKSPACE" uses less memory. */
|
||||||
@Getter
|
@Getter @Setter @Builder.Default private AlgoMode cudnnAlgoMode = AlgoMode.PREFER_FASTEST;
|
||||||
@Builder.Default private AlgoMode cudnnAlgoMode = AlgoMode.PREFER_FASTEST;
|
|
||||||
|
|
||||||
private FwdAlgo cudnnFwdAlgo;
|
@Getter @Setter private FwdAlgo cudnnFwdAlgo;
|
||||||
private BwdFilterAlgo cudnnBwdFilterAlgo;
|
@Getter @Setter private BwdFilterAlgo cudnnBwdFilterAlgo;
|
||||||
private BwdDataAlgo cudnnBwdDataAlgo;
|
@Getter @Setter private BwdDataAlgo cudnnBwdDataAlgo;
|
||||||
@Getter @Setter
|
@Getter @Setter @Builder.Default private int convolutionDim = 2; // 2D convolution by default
|
||||||
@Builder.Default private int convolutionDim = 2; // 2D convolution by default
|
|
||||||
/** Causal convolution - allowed for 1D only */
|
/** Causal convolution - allowed for 1D only */
|
||||||
@Builder.Default private boolean allowCausal = false;
|
@Builder.Default private boolean allowCausal = false;
|
||||||
|
|
||||||
@Builder.Default @JsonIgnore @EqualsAndHashCode.Exclude
|
@Builder.Default @JsonIgnore @EqualsAndHashCode.Exclude @Getter @Setter
|
||||||
private boolean defaultValueOverriden = false;
|
private boolean defaultValueOverriden = false;
|
||||||
|
|
||||||
|
public static ConvolutionLayerBuilder<?, ?> builder() {
|
||||||
|
return innerBuilder();
|
||||||
|
}
|
||||||
|
|
||||||
public static ConvolutionLayerBuilder<?, ?> builder(int... kernelSize) {
|
public static ConvolutionLayerBuilder<?, ?> builder(int... kernelSize) {
|
||||||
return innerBuilder().kernelSize(kernelSize);
|
return innerBuilder().kernelSize(kernelSize);
|
||||||
}
|
}
|
||||||
|
@ -435,7 +434,7 @@ public class ConvolutionLayer extends FeedForwardLayer {
|
||||||
+ Arrays.toString(dilation$value));
|
+ Arrays.toString(dilation$value));
|
||||||
}
|
}
|
||||||
|
|
||||||
C l = this.initBuild();
|
C l = initBuild();
|
||||||
l.setType(LayerType.CONV);
|
l.setType(LayerType.CONV);
|
||||||
l.initializeConstraints();
|
l.initializeConstraints();
|
||||||
return l;
|
return l;
|
||||||
|
|
|
@ -23,7 +23,6 @@ package org.deeplearning4j.nn.conf.layers;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import lombok.*;
|
import lombok.*;
|
||||||
import lombok.experimental.Accessors;
|
|
||||||
import lombok.experimental.SuperBuilder;
|
import lombok.experimental.SuperBuilder;
|
||||||
import org.deeplearning4j.nn.api.Layer;
|
import org.deeplearning4j.nn.api.Layer;
|
||||||
import org.deeplearning4j.nn.api.ParamInitializer;
|
import org.deeplearning4j.nn.api.ParamInitializer;
|
||||||
|
@ -41,15 +40,13 @@ import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
@Data
|
@Data
|
||||||
@ToString(callSuper = true)
|
@ToString(callSuper = true)
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
@SuperBuilder(
|
@SuperBuilder
|
||||||
buildMethodName =
|
|
||||||
"initBuild") // we do this to define build() ourselves and run custom init code
|
|
||||||
public class DenseLayer extends FeedForwardLayer {
|
public class DenseLayer extends FeedForwardLayer {
|
||||||
|
|
||||||
/** If true (default = false): enable layer normalization on this layer */
|
/** If true (default = false): enable layer normalization on this layer */
|
||||||
@lombok.Builder.Default private boolean hasLayerNorm = false;
|
@lombok.Builder.Default private boolean hasLayerNorm = false;
|
||||||
|
|
||||||
@lombok.Builder.Default private boolean hasBias = true;
|
@lombok.Builder.Default private boolean hasBias = true;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Layer instantiate(
|
public Layer instantiate(
|
||||||
|
@ -121,13 +118,17 @@ public class DenseLayer extends FeedForwardLayer {
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static abstract class DenseLayerBuilder<C extends DenseLayer,
|
public abstract static class DenseLayerBuilder<
|
||||||
B extends DenseLayerBuilder<C, B>> extends FeedForwardLayerBuilder<C, B>{
|
C extends DenseLayer, B extends DenseLayerBuilder<C, B>>
|
||||||
|
extends FeedForwardLayerBuilder<C, B> {
|
||||||
|
|
||||||
public C build() {
|
}
|
||||||
C l = this.initBuild();
|
private static final class DenseLayerBuilderImpl extends DenseLayerBuilder<DenseLayer, DenseLayerBuilderImpl> {
|
||||||
|
public DenseLayer build() {
|
||||||
|
DenseLayer l = new DenseLayer(this);
|
||||||
l.initializeConstraints();
|
l.initializeConstraints();
|
||||||
return l;
|
return l;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
}
|
||||||
|
|
|
@ -20,10 +20,7 @@
|
||||||
|
|
||||||
package org.deeplearning4j.nn.conf.layers;
|
package org.deeplearning4j.nn.conf.layers;
|
||||||
|
|
||||||
import lombok.EqualsAndHashCode;
|
import lombok.*;
|
||||||
import lombok.Getter;
|
|
||||||
import lombok.Setter;
|
|
||||||
import lombok.ToString;
|
|
||||||
import lombok.experimental.SuperBuilder;
|
import lombok.experimental.SuperBuilder;
|
||||||
import org.deeplearning4j.nn.conf.DataFormat;
|
import org.deeplearning4j.nn.conf.DataFormat;
|
||||||
import org.deeplearning4j.nn.conf.InputPreProcessor;
|
import org.deeplearning4j.nn.conf.InputPreProcessor;
|
||||||
|
@ -34,12 +31,12 @@ import org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor;
|
||||||
|
|
||||||
@ToString(callSuper = true)
|
@ToString(callSuper = true)
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
|
@NoArgsConstructor
|
||||||
@SuperBuilder
|
@SuperBuilder
|
||||||
public abstract class FeedForwardLayer extends BaseLayerConfiguration {
|
public abstract class FeedForwardLayer extends BaseLayerConfiguration {
|
||||||
public static abstract class FeedForwardLayerBuilder<C extends FeedForwardLayer, B extends FeedForwardLayerBuilder<C, B>>
|
public static abstract class FeedForwardLayerBuilder<C extends FeedForwardLayer, B extends FeedForwardLayerBuilder<C, B>>
|
||||||
extends BaseLayerConfigurationBuilder<C, B> {
|
extends BaseLayerConfigurationBuilder<C, B> {
|
||||||
|
}
|
||||||
}
|
|
||||||
/**
|
/**
|
||||||
* Number of inputs for the layer (usually the size of the last layer). <br> Note that for Convolutional layers,
|
* Number of inputs for the layer (usually the size of the last layer). <br> Note that for Convolutional layers,
|
||||||
* this is the input channels, otherwise is the previous layer size.
|
* this is the input channels, otherwise is the previous layer size.
|
||||||
|
|
|
@ -55,8 +55,7 @@ public class GlobalPoolingLayer extends NoParamLayer {
|
||||||
* @param poolingDimensions Pooling dimensions to use
|
* @param poolingDimensions Pooling dimensions to use
|
||||||
*/
|
*/
|
||||||
public B poolingDimensions(int... poolingDimensions) {
|
public B poolingDimensions(int... poolingDimensions) {
|
||||||
poolingDimensions$value = poolingDimensions;
|
this.poolingDimensions = poolingDimensions;
|
||||||
poolingDimensions$set = true;
|
|
||||||
return self();
|
return self();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -85,7 +84,7 @@ public class GlobalPoolingLayer extends NoParamLayer {
|
||||||
* width) Default for CNN3D data: pooling dimensions 2,3,4 (depth, height and width)
|
* width) Default for CNN3D data: pooling dimensions 2,3,4 (depth, height and width)
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
@Builder.Default private int[] poolingDimensions;
|
private int[] poolingDimensions;
|
||||||
/**
|
/**
|
||||||
* P-norm constant. Only used if using {@link PoolingType#PNORM} for the pooling type
|
* P-norm constant. Only used if using {@link PoolingType#PNORM} for the pooling type
|
||||||
*
|
*
|
||||||
|
|
|
@ -49,8 +49,8 @@ public class LSTM extends AbstractLSTM {
|
||||||
private double forgetGateBiasInit;
|
private double forgetGateBiasInit;
|
||||||
|
|
||||||
public static abstract class LSTMBuilder<C extends LSTM, B extends LSTMBuilder<C, B>> extends AbstractLSTMBuilder<C, B> {
|
public static abstract class LSTMBuilder<C extends LSTM, B extends LSTMBuilder<C, B>> extends AbstractLSTMBuilder<C, B> {
|
||||||
public C build() {
|
@Override public C build() {
|
||||||
C l = this.build();
|
C l = this.initBuild();
|
||||||
l.initializeConstraints();
|
l.initializeConstraints();
|
||||||
return l;
|
return l;
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,8 +48,9 @@ import org.nd4j.linalg.learning.config.IUpdater;
|
||||||
import org.nd4j.linalg.learning.regularization.Regularization;
|
import org.nd4j.linalg.learning.regularization.Regularization;
|
||||||
|
|
||||||
/** A neural network layer. */
|
/** A neural network layer. */
|
||||||
//@JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class")
|
@JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class")
|
||||||
@EqualsAndHashCode
|
@EqualsAndHashCode
|
||||||
|
@NoArgsConstructor
|
||||||
// @JsonIdentityInfo(generator= ObjectIdGenerators.IntSequenceGenerator.class, property="@id")
|
// @JsonIdentityInfo(generator= ObjectIdGenerators.IntSequenceGenerator.class, property="@id")
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@SuperBuilder
|
@SuperBuilder
|
||||||
|
@ -327,13 +328,41 @@ public abstract class LayerConfiguration
|
||||||
|
|
||||||
public abstract static class LayerConfigurationBuilder<
|
public abstract static class LayerConfigurationBuilder<
|
||||||
C extends LayerConfiguration, B extends LayerConfigurationBuilder<C, B>> {
|
C extends LayerConfiguration, B extends LayerConfigurationBuilder<C, B>> {
|
||||||
|
|
||||||
public B dropOut(double d) {
|
public B dropOut(double d) {
|
||||||
this.dropOut(new Dropout(d));
|
this.dropOut(new Dropout(d));
|
||||||
return self();
|
return self();
|
||||||
}
|
}
|
||||||
|
|
||||||
public B dropOut(IDropout d) {
|
public B dropOut(IDropout d) {
|
||||||
this.dropOut = d;
|
this.dropOut = d;
|
||||||
return self();
|
return self();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set constraints to be applied to all layers. Default: no constraints.<br>
|
||||||
|
* Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm
|
||||||
|
* regularization, etc). These constraints are applied at each iteration, after the parameters
|
||||||
|
* have been updated.<br>
|
||||||
|
* Note: values set by this method will be applied to all applicable layers in the network,
|
||||||
|
* unless a different value is explicitly set on a given layer. In other words: values set via
|
||||||
|
* this method are used as the default value, and can be overridden on a per-layer basis.
|
||||||
|
*
|
||||||
|
* @param constraints Constraints to apply to all bias parameters of all layers
|
||||||
|
*/
|
||||||
|
public B constrainBias(LayerConstraint... constraints) {
|
||||||
|
biasConstraints = Arrays.asList(constraints);
|
||||||
|
return self();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* we are doing this to avoid BUG https://github.com/projectlombok/lombok/issues/3419 as some
|
||||||
|
* child classes may specify their own buildMethodName in @SuperBuilder, but we use only
|
||||||
|
* "initBuild" here consequently
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public C initBuild() {
|
||||||
|
return build();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,6 +24,7 @@ import lombok.Data;
|
||||||
import lombok.EqualsAndHashCode;
|
import lombok.EqualsAndHashCode;
|
||||||
import lombok.NoArgsConstructor;
|
import lombok.NoArgsConstructor;
|
||||||
import lombok.ToString;
|
import lombok.ToString;
|
||||||
|
import lombok.experimental.SuperBuilder;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 1D Pooling (subsampling) layer. Equivalent to {@link Subsampling1DLayer}
|
* 1D Pooling (subsampling) layer. Equivalent to {@link Subsampling1DLayer}
|
||||||
|
@ -31,8 +32,9 @@ import lombok.ToString;
|
||||||
* Supports the following pooling types: MAX, AVG, SUM, PNORM, NONE
|
* Supports the following pooling types: MAX, AVG, SUM, PNORM, NONE
|
||||||
* @author Max Pumperla
|
* @author Max Pumperla
|
||||||
*/
|
*/
|
||||||
@Data
|
|
||||||
@ToString(callSuper = true)
|
@ToString(callSuper = true)
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
|
@SuperBuilder(buildMethodName = "initBuild")
|
||||||
public class Pooling1D extends Subsampling1DLayer {
|
public class Pooling1D extends Subsampling1DLayer {
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,6 +24,7 @@ import lombok.Data;
|
||||||
import lombok.EqualsAndHashCode;
|
import lombok.EqualsAndHashCode;
|
||||||
import lombok.NoArgsConstructor;
|
import lombok.NoArgsConstructor;
|
||||||
import lombok.ToString;
|
import lombok.ToString;
|
||||||
|
import lombok.experimental.SuperBuilder;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 2D Pooling (subsampling) layer. Equivalent to {@link SubsamplingLayer}
|
* 2D Pooling (subsampling) layer. Equivalent to {@link SubsamplingLayer}
|
||||||
|
@ -31,8 +32,9 @@ import lombok.ToString;
|
||||||
* Supports the following pooling types: MAX, AVG, SUM, PNORM, NONE
|
* Supports the following pooling types: MAX, AVG, SUM, PNORM, NONE
|
||||||
* @author Max Pumperla
|
* @author Max Pumperla
|
||||||
*/
|
*/
|
||||||
@Data
|
|
||||||
@ToString(callSuper = true)
|
@ToString(callSuper = true)
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
|
@SuperBuilder(buildMethodName = "initBuild")
|
||||||
public class Pooling2D extends SubsamplingLayer {
|
public class Pooling2D extends SubsamplingLayer {
|
||||||
}
|
}
|
||||||
|
|
|
@ -47,7 +47,7 @@ import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
* channels deep). The kernel should be H<L pixels high and W=1 pixels
|
* channels deep). The kernel should be H<L pixels high and W=1 pixels
|
||||||
* wide.
|
* wide.
|
||||||
*/
|
*/
|
||||||
@Data
|
|
||||||
@ToString(callSuper = true)
|
@ToString(callSuper = true)
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
@SuperBuilder(buildMethodName = "initBuild")
|
@SuperBuilder(buildMethodName = "initBuild")
|
||||||
|
|
|
@ -53,7 +53,7 @@ public class SubsamplingLayer extends NoParamLayer {
|
||||||
* Only conv1d/subsampling1d can use causal mode
|
* Only conv1d/subsampling1d can use causal mode
|
||||||
*/
|
*/
|
||||||
@Builder.Default
|
@Builder.Default
|
||||||
@Accessors protected boolean allowCausal = false;
|
protected boolean allowCausal = false;
|
||||||
/**
|
/**
|
||||||
* Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more
|
* Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more
|
||||||
* details
|
* details
|
||||||
|
|
|
@ -78,7 +78,7 @@ public class Yolo2OutputLayer extends LayerConfiguration {
|
||||||
*/
|
*/
|
||||||
@JsonSerialize(using = NDArrayTextSerializer.class)
|
@JsonSerialize(using = NDArrayTextSerializer.class)
|
||||||
@JsonDeserialize(using = BoundingBoxesDeserializer.class)
|
@JsonDeserialize(using = BoundingBoxesDeserializer.class)
|
||||||
@Builder.Default @Getter
|
@Getter
|
||||||
private INDArray boundingBoxes;
|
private INDArray boundingBoxes;
|
||||||
|
|
||||||
@Builder.Default @Getter
|
@Builder.Default @Getter
|
||||||
|
@ -165,14 +165,14 @@ public class Yolo2OutputLayer extends LayerConfiguration {
|
||||||
C extends Yolo2OutputLayer, B extends Yolo2OutputLayerBuilder<C, B>>
|
C extends Yolo2OutputLayer, B extends Yolo2OutputLayerBuilder<C, B>>
|
||||||
extends LayerConfigurationBuilder<C, B> {
|
extends LayerConfigurationBuilder<C, B> {
|
||||||
public C build() {
|
public C build() {
|
||||||
if (boundingBoxes$value == null) {
|
if (boundingBoxes == null) {
|
||||||
throw new IllegalStateException("Bounding boxes have not been set");
|
throw new IllegalStateException("Bounding boxes have not been set");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (boundingBoxes$value.rank() != 2 || boundingBoxes$value.size(1) != 2) {
|
if (boundingBoxes.rank() != 2 || boundingBoxes.size(1) != 2) {
|
||||||
throw new IllegalStateException(
|
throw new IllegalStateException(
|
||||||
"Bounding box priors must have shape [nBoxes, 2]. Has shape: "
|
"Bounding box priors must have shape [nBoxes, 2]. Has shape: "
|
||||||
+ Arrays.toString(boundingBoxes$value.shape()));
|
+ Arrays.toString(boundingBoxes.shape()));
|
||||||
}
|
}
|
||||||
return initBuild();
|
return initBuild();
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
package org.deeplearning4j.nn.conf.layers.recurrent;
|
package org.deeplearning4j.nn.conf.layers.recurrent;
|
||||||
|
|
||||||
import lombok.experimental.SuperBuilder;
|
import lombok.experimental.SuperBuilder;
|
||||||
|
import org.deeplearning4j.nn.api.Layer;
|
||||||
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
||||||
import org.deeplearning4j.nn.conf.inputs.InputType;
|
import org.deeplearning4j.nn.conf.inputs.InputType;
|
||||||
import org.deeplearning4j.nn.conf.layers.LayerConfiguration;
|
import org.deeplearning4j.nn.conf.layers.LayerConfiguration;
|
||||||
|
@ -45,6 +46,12 @@ public class LastTimeStep extends BaseWrapperLayerConfiguration {
|
||||||
.name(underlying.getName());
|
.name(underlying.getName());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static LastTimeStepBuilder<?,?> builder(Layer underlying) {
|
||||||
|
return innerBuilder()
|
||||||
|
.underlying(underlying.getLayerConfiguration())
|
||||||
|
.name(underlying.getLayerConfiguration().getName());
|
||||||
|
}
|
||||||
|
|
||||||
public LayerConfiguration getUnderlying() {
|
public LayerConfiguration getUnderlying() {
|
||||||
return underlying;
|
return underlying;
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,6 +49,7 @@ import org.nd4j.linalg.learning.regularization.WeightDecay;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@EqualsAndHashCode(callSuper = true, doNotUseGetters = true)
|
@EqualsAndHashCode(callSuper = true, doNotUseGetters = true)
|
||||||
|
@NoArgsConstructor
|
||||||
@SuperBuilder
|
@SuperBuilder
|
||||||
public abstract class AbstractSameDiffLayer extends LayerConfiguration {
|
public abstract class AbstractSameDiffLayer extends LayerConfiguration {
|
||||||
|
|
||||||
|
|
|
@ -28,6 +28,7 @@ import org.nd4j.autodiff.samediff.SameDiff;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@NoArgsConstructor
|
||||||
@SuperBuilder
|
@SuperBuilder
|
||||||
public abstract class SameDiffLambdaLayer extends SameDiffLayer {
|
public abstract class SameDiffLambdaLayer extends SameDiffLayer {
|
||||||
|
|
||||||
|
|
|
@ -21,7 +21,9 @@
|
||||||
package org.deeplearning4j.nn.conf.layers.samediff;
|
package org.deeplearning4j.nn.conf.layers.samediff;
|
||||||
|
|
||||||
import lombok.Builder;
|
import lombok.Builder;
|
||||||
|
import lombok.Data;
|
||||||
import lombok.EqualsAndHashCode;
|
import lombok.EqualsAndHashCode;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
import lombok.experimental.SuperBuilder;
|
import lombok.experimental.SuperBuilder;
|
||||||
import org.deeplearning4j.nn.api.Layer;
|
import org.deeplearning4j.nn.api.Layer;
|
||||||
import org.deeplearning4j.nn.api.MaskState;
|
import org.deeplearning4j.nn.api.MaskState;
|
||||||
|
@ -42,6 +44,8 @@ import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
|
@Data
|
||||||
|
@NoArgsConstructor
|
||||||
@SuperBuilder
|
@SuperBuilder
|
||||||
public abstract class SameDiffLayer extends AbstractSameDiffLayer {
|
public abstract class SameDiffLayer extends AbstractSameDiffLayer {
|
||||||
|
|
||||||
|
@ -97,8 +101,4 @@ public abstract class SameDiffLayer extends AbstractSameDiffLayer {
|
||||||
ret.setLayerConfiguration(lconf);
|
ret.setLayerConfiguration(lconf);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static abstract class SameDiffLayerBuilder<C extends SameDiffLayer, B extends SameDiffLayerBuilder<C, B>> extends AbstractSameDiffLayerBuilder<C,B> {
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
|
|
||||||
package org.deeplearning4j.nn.conf.layers.samediff;
|
package org.deeplearning4j.nn.conf.layers.samediff;
|
||||||
|
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
import lombok.experimental.SuperBuilder;
|
import lombok.experimental.SuperBuilder;
|
||||||
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
||||||
import org.deeplearning4j.nn.conf.layers.LayerConfiguration;
|
import org.deeplearning4j.nn.conf.layers.LayerConfiguration;
|
||||||
|
@ -31,6 +32,7 @@ import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@NoArgsConstructor
|
||||||
@SuperBuilder
|
@SuperBuilder
|
||||||
public abstract class SameDiffOutputLayer extends AbstractSameDiffLayer {
|
public abstract class SameDiffOutputLayer extends AbstractSameDiffLayer {
|
||||||
|
|
||||||
|
|
|
@ -323,12 +323,12 @@ public class ManualTests {
|
||||||
.l2(0.0005)
|
.l2(0.0005)
|
||||||
.weightInit(WeightInit.XAVIER)
|
.weightInit(WeightInit.XAVIER)
|
||||||
.updater(new Nesterovs(0.01, 0.9))
|
.updater(new Nesterovs(0.01, 0.9))
|
||||||
.layer(0, new FrozenLayer(ConvolutionLayer.builder(5, 5)
|
.layer(0, FrozenLayer.builder(ConvolutionLayer.builder(5, 5)
|
||||||
//nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied
|
//nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied
|
||||||
.nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build()))
|
.nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build()))
|
||||||
.layer(1, new FrozenLayer(SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2)
|
.layer(1, FrozenLayer.builder(SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2)
|
||||||
.stride(2, 2).build()))
|
.stride(2, 2).build()))
|
||||||
.layer(2, new FrozenLayer(DenseLayer.builder().activation(Activation.RELU).nOut(500).build()))
|
.layer(2, FrozenLayer.builder(DenseLayer.builder().activation(Activation.RELU).nOut(500).build()))
|
||||||
.layer(3, OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
|
.layer(3, OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
|
||||||
.nOut(outputNum).activation(Activation.SOFTMAX).build())
|
.nOut(outputNum).activation(Activation.SOFTMAX).build())
|
||||||
.inputType(InputType.convolutionalFlat(28, 28, nChannels));
|
.inputType(InputType.convolutionalFlat(28, 28, nChannels));
|
||||||
|
|
Loading…
Reference in New Issue