{
-
- /**
- * Number of inputs to the layer (input size)
- */
- private int nIn;
-
- /**
- * Number of outputs (output size)
- */
- private int nOut;
-
- /**
- * Number of Attention Heads
- */
- private int nHeads;
-
- /**
- * Size of attention heads
- */
- private int headSize;
-
- /**
- * Project input before applying attention or not.
- */
- private boolean projectInput;
-
-
- /**
- * Number of queries to learn
- */
- private int nQueries;
-
- /**
- * @param nIn Number of inputs to the layer (input size)
- */
- public Builder nIn(int nIn) {
- this.nIn = nIn;
- return this;
- }
-
- /**
- * @param nOut Number of outputs (output size)
- */
- public Builder nOut(int nOut) {
- this.nOut = nOut;
- return this;
- }
-
- /**
- * Number of Attention Heads
- */
- public Builder nHeads(int nHeads){
- this.nHeads = nHeads;
- return this;
- }
-
- /**
- * Size of attention heads
- */
- public Builder headSize(int headSize){
- this.headSize = headSize;
- return this;
- }
-
- /**
- * Project input before applying attention or not.
- */
- public Builder projectInput(boolean projectInput){
- this.projectInput = projectInput;
- return this;
- }
-
- /**
- * Number of queries to learn
- */
- public Builder nQueries(int nQueries){
- this.nQueries = nQueries;
- return this;
- }
-
- @Override
- @SuppressWarnings("unchecked")
- public LearnedSelfAttentionLayer build() {
- Preconditions.checkArgument(this.projectInput || this.nHeads == 1, "projectInput must be true when nHeads != 1");
- Preconditions.checkArgument(this.projectInput || nIn == nOut, "nIn must be equal to nOut when projectInput is false");
- Preconditions.checkArgument(!this.projectInput || nOut != 0, "nOut must be specified when projectInput is true");
- Preconditions.checkArgument(this.nOut % nHeads == 0 || headSize > 0, "nOut isn't divided by nHeads cleanly. Specify the headSize manually.");
- Preconditions.checkArgument(this.nQueries > 0, "You must set numQueries.");
-
- return new LearnedSelfAttentionLayer(this);
- }
+ return initBuild();
}
+ }
}
diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected2D.java
index 138ecfd73..2d5b448a5 100644
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected2D.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected2D.java
@@ -41,6 +41,7 @@ import org.nd4j.autodiff.samediff.SDVariable;
import org.nd4j.autodiff.samediff.SameDiff;
import org.nd4j.enums.PadMode;
import org.nd4j.linalg.activations.Activation;
+import org.nd4j.linalg.activations.IActivation;
import org.nd4j.linalg.api.memory.MemoryWorkspace;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
@@ -333,6 +334,11 @@ public class LocallyConnected2D extends SameDiffLayer {
return self();
}
+ public B inputSize(int ... size) {
+ this.inputSize = size;
+ return self();
+ }
+
public B stride(int ... stride) {
this.stride$value = ValidationUtils.validate2NonNegative(stride, false, "stride");
this.stride$set = true;
diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PrimaryCapsules.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PrimaryCapsules.java
index 8617adf7d..9edb50322 100644
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PrimaryCapsules.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PrimaryCapsules.java
@@ -20,7 +20,9 @@
package org.deeplearning4j.nn.conf.layers;
+import java.util.Map;
import lombok.*;
+import lombok.experimental.SuperBuilder;
import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.inputs.InputType.InputTypeConvolutional;
@@ -37,388 +39,387 @@ import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.ops.impl.layers.convolution.config.Conv2DConfig;
import org.nd4j.linalg.factory.Nd4j;
-import java.util.Map;
-
@Data
@NoArgsConstructor
@EqualsAndHashCode(callSuper = true)
+@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder")
public class PrimaryCapsules extends SameDiffLayer {
- private int[] kernelSize;
- private int[] stride;
- private int[] padding;
- private int[] dilation;
- private int inputChannels;
- private int channels;
+ private static final String WEIGHT_PARAM = "weight";
+ private static final String BIAS_PARAM = "bias";
+ /**
+ * Sets the kernel size of the 2d convolution
+ *
+ * @param kernelSize
+ * @return
+ */
+ @Builder.Default private int[] kernelSize = new int[] {9, 9};
+ /**
+ * Sets the stride of the 2d convolution
+ *
+ * @param stride
+ * @return
+ */
+ @Builder.Default private int[] stride = new int[] {2, 2};
+ /**
+ * Sets the padding of the 2d convolution
+ *
+ * @param padding
+ * @return
+ */
+ @Builder.Default private int[] padding = new int[] {0, 0};
+ /**
+ * Sets the dilation of the 2d convolution
+ *
+ * @param dilation
+ * @return
+ */
+ @Builder.Default private int[] dilation = new int[] {1, 1};
- private boolean hasBias;
+ private int inputChannels;
+ /**
+ * Sets the number of channels to use in the 2d convolution.
+ *
+ * Note that the actual number of channels is channels * capsuleDimensions
+ *
+ *
Does the same thing as nOut()
+ *
+ * @param channels
+ * @return
+ */
+ @Builder.Default private int channels = 32;
- private int capsules;
- private int capsuleDimensions;
+ @Builder.Default private boolean hasBias = true;
+ /**
+ * Usually inferred automatically.
+ *
+ * @param capsules
+ * @return
+ */
+ private int capsules;
+ /**
+ * Sets the number of dimensions to use in the capsules.
+ *
+ * @param capsuleDimensions
+ * @return
+ */
+ private int capsuleDimensions;
+ /**
+ * The convolution mode to use in the 2d convolution
+ *
+ * @param convolutionMode
+ * @return
+ */
+ @Builder.Default private ConvolutionMode convolutionMode = ConvolutionMode.Truncate;
+ /**
+ * Whether to use a ReLU activation on the 2d convolution
+ *
+ * @param useRelu
+ * @return
+ */
+ @Builder.Default private boolean useRelU = false;
+ /**
+ * Use a LeakyReLU activation on the 2d convolution
+ *
+ * @param leak the alpha value for the LeakyReLU activation.
+ * @return
+ */
+ @Builder.Default private double useLeakyReLU = 0;
- private ConvolutionMode convolutionMode = ConvolutionMode.Truncate;
+ public static PrimaryCapsulesBuilder, ?> builder() {
+ return innerBuilder();
+ }
- private boolean useRelu = false;
- private double leak = 0;
+ public static PrimaryCapsulesBuilder, ?> builder(
+ int capsuleDimensions,
+ int channels,
+ int[] kernelSize,
+ int[] stride,
+ int[] padding,
+ int[] dilation,
+ ConvolutionMode convolutionMode) {
+ return innerBuilder()
+ .capsuleDimensions(capsuleDimensions)
+ .channels(channels)
+ .kernelSize(kernelSize)
+ .stride(stride)
+ .padding(padding)
+ .dilation(dilation)
+ .convolutionMode(convolutionMode);
+ }
- private static final String WEIGHT_PARAM = "weight";
- private static final String BIAS_PARAM = "bias";
+ public static PrimaryCapsulesBuilder, ?> builder(
+ int capsuleDimensions,
+ int channels,
+ int[] kernelSize,
+ int[] stride,
+ int[] padding,
+ int[] dilation) {
+ return innerBuilder()
+ .capsuleDimensions(capsuleDimensions)
+ .channels(channels)
+ .kernelSize(kernelSize)
+ .stride(stride)
+ .padding(padding)
+ .dilation(dilation);
+ }
- public PrimaryCapsules(Builder builder){
- super(builder);
+ public static PrimaryCapsulesBuilder, ?> builder(
+ int capsuleDimensions, int channels, int[] kernelSize, int[] stride, int[] padding) {
+ return innerBuilder()
+ .capsuleDimensions(capsuleDimensions)
+ .channels(channels)
+ .kernelSize(kernelSize)
+ .stride(stride)
+ .padding(padding);
+ }
- this.kernelSize = builder.kernelSize;
- this.stride = builder.stride;
- this.padding = builder.padding;
- this.dilation = builder.dilation;
- this.channels = builder.channels;
- this.hasBias = builder.hasBias;
- this.capsules = builder.capsules;
- this.capsuleDimensions = builder.capsuleDimensions;
- this.convolutionMode = builder.convolutionMode;
- this.useRelu = builder.useRelu;
- this.leak = builder.leak;
+ public static PrimaryCapsulesBuilder, ?> builder(
+ int capsuleDimensions, int channels, int[] kernelSize, int[] stride) {
+ return innerBuilder()
+ .capsuleDimensions(capsuleDimensions)
+ .channels(channels)
+ .kernelSize(kernelSize)
+ .stride(stride);
+ }
- if(capsuleDimensions <= 0 || channels <= 0){
- throw new IllegalArgumentException("Invalid configuration for Primary Capsules (layer name = \""
- + name + "\"):"
- + " capsuleDimensions and channels must be > 0. Got: "
- + capsuleDimensions + ", " + channels);
- }
+ public static PrimaryCapsulesBuilder, ?> builder(
+ int capsuleDimensions, int channels, int[] kernelSize) {
+ return innerBuilder()
+ .capsuleDimensions(capsuleDimensions)
+ .channels(channels)
+ .kernelSize(kernelSize);
+ }
- if(capsules < 0){
- throw new IllegalArgumentException("Invalid configuration for Capsule ILayer (layer name = \""
- + name + "\"):"
- + " capsules must be >= 0 if set. Got: "
- + capsules);
- }
+ public static PrimaryCapsulesBuilder, ?> builder(int capsuleDimensions, int channels) {
+ return innerBuilder().capsuleDimensions(capsuleDimensions).channels(channels);
+ }
+ @Override
+ public SDVariable defineLayer(
+ SameDiff SD, SDVariable input, Map paramTable, SDVariable mask) {
+ Conv2DConfig conf =
+ Conv2DConfig.builder()
+ .kH(kernelSize[0])
+ .kW(kernelSize[1])
+ .sH(stride[0])
+ .sW(stride[1])
+ .pH(padding[0])
+ .pW(padding[1])
+ .dH(dilation[0])
+ .dW(dilation[1])
+ .isSameMode(convolutionMode == ConvolutionMode.Same)
+ .build();
+
+ SDVariable conved;
+
+ if (hasBias) {
+ conved = SD.cnn.conv2d(input, paramTable.get(WEIGHT_PARAM), paramTable.get(BIAS_PARAM), conf);
+ } else {
+ conved = SD.cnn.conv2d(input, paramTable.get(WEIGHT_PARAM), conf);
}
- @Override
- public SDVariable defineLayer(SameDiff SD, SDVariable input, Map paramTable, SDVariable mask) {
- Conv2DConfig conf = Conv2DConfig.builder()
- .kH(kernelSize[0]).kW(kernelSize[1])
- .sH(stride[0]).sW(stride[1])
- .pH(padding[0]).pW(padding[1])
- .dH(dilation[0]).dW(dilation[1])
- .isSameMode(convolutionMode == ConvolutionMode.Same)
- .build();
-
- SDVariable conved;
-
- if(hasBias){
- conved = SD.cnn.conv2d(input, paramTable.get(WEIGHT_PARAM), paramTable.get(BIAS_PARAM), conf);
- } else {
- conved = SD.cnn.conv2d(input, paramTable.get(WEIGHT_PARAM), conf);
- }
-
- if(useRelu){
- if(leak == 0) {
- conved = SD.nn.relu(conved, 0);
- } else {
- conved = SD.nn.leakyRelu(conved, leak);
- }
- }
-
- SDVariable reshaped = conved.reshape(-1, capsules, capsuleDimensions);
- return CapsuleUtils.squash(SD, reshaped, 2);
+ if (useRelU) {
+ if (useLeakyReLU == 0) {
+ conved = SD.nn.relu(conved, 0);
+ } else {
+ conved = SD.nn.leakyRelu(conved, useLeakyReLU);
+ }
}
- @Override
- public void defineParameters(SDLayerParams params) {
- params.clear();
- params.addWeightParam(WEIGHT_PARAM,
- kernelSize[0], kernelSize[1], inputChannels, (long) capsuleDimensions * channels);
+ SDVariable reshaped = conved.reshape(-1, capsules, capsuleDimensions);
+ return CapsuleUtils.squash(SD, reshaped, 2);
+ }
- if(hasBias){
- params.addBiasParam(BIAS_PARAM, (long) capsuleDimensions * channels);
+ @Override
+ public void defineParameters(SDLayerParams params) {
+ params.clear();
+ params.addWeightParam(
+ WEIGHT_PARAM,
+ kernelSize[0],
+ kernelSize[1],
+ inputChannels,
+ (long) capsuleDimensions * channels);
+
+ if (hasBias) {
+ params.addBiasParam(BIAS_PARAM, (long) capsuleDimensions * channels);
+ }
+ }
+
+ @Override
+ public void initializeParameters(Map params) {
+ try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) {
+ for (Map.Entry e : params.entrySet()) {
+ if (BIAS_PARAM.equals(e.getKey())) {
+ e.getValue().assign(0);
+ } else if (WEIGHT_PARAM.equals(e.getKey())) {
+ double fanIn = inputChannels * kernelSize[0] * kernelSize[1];
+ double fanOut =
+ capsuleDimensions
+ * channels
+ * kernelSize[0]
+ * kernelSize[1]
+ / ((double) stride[0] * stride[1]);
+ WeightInitUtil.initWeights(
+ fanIn, fanOut, e.getValue().shape(), weightInit, null, 'c', e.getValue());
}
+ }
+ }
+ }
+
+ @Override
+ public InputType getOutputType(int layerIndex, InputType inputType) {
+ if (inputType == null || inputType.getType() != Type.CNN) {
+ throw new IllegalStateException(
+ "Invalid input for Primary Capsules layer (layer name = \""
+ + name
+ + "\"): expect CNN input. Got: "
+ + inputType);
}
- @Override
- public void initializeParameters(Map params) {
- try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) {
- for (Map.Entry e : params.entrySet()) {
- if (BIAS_PARAM.equals(e.getKey())) {
- e.getValue().assign(0);
- } else if(WEIGHT_PARAM.equals(e.getKey())){
- double fanIn = inputChannels * kernelSize[0] * kernelSize[1];
- double fanOut = capsuleDimensions * channels * kernelSize[0] * kernelSize[1] / ((double) stride[0] * stride[1]);
- WeightInitUtil.initWeights(fanIn, fanOut, e.getValue().shape(), weightInit, null, 'c',
- e.getValue());
- }
- }
- }
+ if (capsules > 0) {
+ return InputType.recurrent(capsules, capsuleDimensions);
+ } else {
+
+ InputTypeConvolutional out =
+ (InputTypeConvolutional)
+ InputTypeUtil.getOutputTypeCnnLayers(
+ inputType,
+ kernelSize,
+ stride,
+ padding,
+ dilation,
+ convolutionMode,
+ (long) capsuleDimensions * channels,
+ -1,
+ getName(),
+ PrimaryCapsules.class);
+
+ return InputType.recurrent(
+ (int) (out.getChannels() * out.getHeight() * out.getWidth() / capsuleDimensions),
+ capsuleDimensions);
+ }
+ }
+
+ @Override
+ public void setNIn(InputType inputType, boolean override) {
+ if (inputType == null || inputType.getType() != Type.CNN) {
+ throw new IllegalStateException(
+ "Invalid input for Primary Capsules layer (layer name = \""
+ + name
+ + "\"): expect CNN input. Got: "
+ + inputType);
}
- @Override
- public InputType getOutputType(int layerIndex, InputType inputType) {
- if (inputType == null || inputType.getType() != Type.CNN) {
- throw new IllegalStateException("Invalid input for Primary Capsules layer (layer name = \""
- + name + "\"): expect CNN input. Got: " + inputType);
- }
+ InputTypeConvolutional ci = (InputTypeConvolutional) inputType;
- if(capsules > 0){
- return InputType.recurrent(capsules, capsuleDimensions);
- } else {
+ this.inputChannels = (int) ci.getChannels();
- InputTypeConvolutional out = (InputTypeConvolutional) InputTypeUtil
- .getOutputTypeCnnLayers(inputType, kernelSize, stride, padding, dilation, convolutionMode,
- (long) capsuleDimensions * channels, -1, getName(), PrimaryCapsules.class);
+ if (capsules <= 0 || override) {
- return InputType.recurrent((int) (out.getChannels() * out.getHeight() * out.getWidth() / capsuleDimensions),
- capsuleDimensions);
- }
+ InputTypeConvolutional out =
+ (InputTypeConvolutional)
+ InputTypeUtil.getOutputTypeCnnLayers(
+ inputType,
+ kernelSize,
+ stride,
+ padding,
+ dilation,
+ convolutionMode,
+ (long) capsuleDimensions * channels,
+ -1,
+ getName(),
+ PrimaryCapsules.class);
+
+ this.capsules =
+ (int) (out.getChannels() * out.getHeight() * out.getWidth() / capsuleDimensions);
+ }
+ }
+
+ public abstract static class PrimaryCapsulesBuilder<
+ C extends PrimaryCapsules, B extends PrimaryCapsulesBuilder>
+ extends SameDiffLayerBuilder {
+
+ public B kernelSize(int... kernelSize) {
+ this.kernelSize$value = ValidationUtils.validate2NonNegative(kernelSize, true, "kernelSize");
+ this.kernelSize$set = true;
+ return self();
}
- @Override
- public void setNIn(InputType inputType, boolean override) {
- if (inputType == null || inputType.getType() != Type.CNN) {
- throw new IllegalStateException("Invalid input for Primary Capsules layer (layer name = \""
- + name + "\"): expect CNN input. Got: " + inputType);
- }
-
- InputTypeConvolutional ci = (InputTypeConvolutional) inputType;
-
- this.inputChannels = (int) ci.getChannels();
-
- if(capsules <= 0 || override) {
-
- InputTypeConvolutional out = (InputTypeConvolutional) InputTypeUtil
- .getOutputTypeCnnLayers(inputType, kernelSize, stride, padding, dilation, convolutionMode,
- (long) capsuleDimensions * channels, -1, getName(), PrimaryCapsules.class);
-
- this.capsules = (int) (out.getChannels() * out.getHeight() * out.getWidth() / capsuleDimensions);
- }
+ public B stride(int... stride) {
+ this.stride$value = ValidationUtils.validate2NonNegative(stride, true, "stride");
+ this.stride$set = true;
+ return self();
}
- @Getter
- @Setter
- public static class Builder extends SameDiffLayer.Builder{
-
- @Setter(AccessLevel.NONE)
- private int[] kernelSize = new int[]{9, 9};
-
- @Setter(AccessLevel.NONE)
- private int[] stride = new int[]{2, 2};
-
- @Setter(AccessLevel.NONE)
- private int[] padding = new int[]{0, 0};
-
- @Setter(AccessLevel.NONE)
- private int[] dilation = new int[]{1, 1};
-
- private int channels = 32;
-
- private boolean hasBias = true;
-
- private int capsules;
- private int capsuleDimensions;
-
- private ConvolutionMode convolutionMode = ConvolutionMode.Truncate;
-
- private boolean useRelu = false;
- private double leak = 0;
-
-
- public void setKernelSize(int... kernelSize){
- this.kernelSize = ValidationUtils.validate2NonNegative(kernelSize, true, "kernelSize");
- }
-
- public void setStride(int... stride){
- this.stride = ValidationUtils.validate2NonNegative(stride, true, "stride");
- }
-
- public void setPadding(int... padding){
- this.padding = ValidationUtils.validate2NonNegative(padding, true, "padding");
- }
-
- public void setDilation(int... dilation){
- this.dilation = ValidationUtils.validate2NonNegative(dilation, true, "dilation");
- }
-
-
- public Builder(int capsuleDimensions, int channels,
- int[] kernelSize, int[] stride, int[] padding, int[] dilation,
- ConvolutionMode convolutionMode){
- this.capsuleDimensions = capsuleDimensions;
- this.channels = channels;
- this.setKernelSize(kernelSize);
- this.setStride(stride);
- this.setPadding(padding);
- this.setDilation(dilation);
- this.convolutionMode = convolutionMode;
- }
-
- public Builder(int capsuleDimensions, int channels,
- int[] kernelSize, int[] stride, int[] padding, int[] dilation){
- this(capsuleDimensions, channels, kernelSize, stride, padding, dilation, ConvolutionMode.Truncate);
- }
-
- public Builder(int capsuleDimensions, int channels,
- int[] kernelSize, int[] stride, int[] padding){
- this(capsuleDimensions, channels, kernelSize, stride, padding, new int[]{1, 1}, ConvolutionMode.Truncate);
- }
-
- public Builder(int capsuleDimensions, int channels,
- int[] kernelSize, int[] stride){
- this(capsuleDimensions, channels, kernelSize, stride, new int[]{0, 0}, new int[]{1, 1}, ConvolutionMode.Truncate);
- }
-
- public Builder(int capsuleDimensions, int channels,
- int[] kernelSize){
- this(capsuleDimensions, channels, kernelSize, new int[]{2, 2}, new int[]{0, 0}, new int[]{1, 1}, ConvolutionMode.Truncate);
- }
-
- public Builder(int capsuleDimensions, int channels){
- this(capsuleDimensions, channels, new int[]{9, 9}, new int[]{2, 2}, new int[]{0, 0}, new int[]{1, 1}, ConvolutionMode.Truncate);
- }
-
- /**
- * Sets the kernel size of the 2d convolution
- *
- * @see ConvolutionLayer.Builder#kernelSize(int...)
- * @param kernelSize
- * @return
- */
- public Builder kernelSize(int... kernelSize){
- this.setKernelSize(kernelSize);
- return this;
- }
-
- /**
- * Sets the stride of the 2d convolution
- *
- * @see ConvolutionLayer.Builder#stride(int...)
- * @param stride
- * @return
- */
- public Builder stride(int... stride){
- this.setStride(stride);
- return this;
- }
-
- /**
- * Sets the padding of the 2d convolution
- *
- * @see ConvolutionLayer.Builder#padding(int...)
- * @param padding
- * @return
- */
- public Builder padding(int... padding){
- this.setPadding(padding);
- return this;
- }
-
- /**
- * Sets the dilation of the 2d convolution
- *
- * @see ConvolutionLayer.Builder#dilation(int...)
- * @param dilation
- * @return
- */
- public Builder dilation(int... dilation){
- this.setDilation(dilation);
- return this;
- }
-
- /**
- * Sets the number of channels to use in the 2d convolution.
- *
- * Note that the actual number of channels is channels * capsuleDimensions
- *
- * Does the same thing as nOut()
- *
- * @param channels
- * @return
- */
- public Builder channels(int channels){
- this.channels = channels;
- return this;
- }
-
- /**
- * Sets the number of channels to use in the 2d convolution.
- *
- * Note that the actual number of channels is channels * capsuleDimensions
- *
- * Does the same thing as channels()
- *
- * @param nOut
- * @return
- */
- public Builder nOut(int nOut){
- return channels(nOut);
- }
-
- /**
- * Sets the number of dimensions to use in the capsules.
- * @param capsuleDimensions
- * @return
- */
- public Builder capsuleDimensions(int capsuleDimensions){
- this.capsuleDimensions = capsuleDimensions;
- return this;
- }
-
- /**
- * Usually inferred automatically.
- * @param capsules
- * @return
- */
- public Builder capsules(int capsules){
- this.capsules = capsules;
- return this;
- }
-
- public Builder hasBias(boolean hasBias){
- this.hasBias = hasBias;
- return this;
- }
-
- /**
- * The convolution mode to use in the 2d convolution
- * @param convolutionMode
- * @return
- */
- public Builder convolutionMode(ConvolutionMode convolutionMode){
- this.convolutionMode = convolutionMode;
- return this;
- }
-
- /**
- * Whether to use a ReLU activation on the 2d convolution
- * @param useRelu
- * @return
- */
- public Builder useReLU(boolean useRelu){
- this.useRelu = useRelu;
- return this;
- }
-
- /**
- * Use a ReLU activation on the 2d convolution
- * @return
- */
- public Builder useReLU(){
- return useReLU(true);
- }
-
- /**
- * Use a LeakyReLU activation on the 2d convolution
- * @param leak the alpha value for the LeakyReLU activation.
- * @return
- */
- public Builder useLeakyReLU(double leak){
- this.useRelu = true;
- this.leak = leak;
- return this;
- }
-
- @Override
- public E build() {
- return (E) new PrimaryCapsules(this);
- }
+ public B padding(int... padding) {
+ this.padding$value = ValidationUtils.validate2NonNegative(padding, true, "padding");
+ this.padding$set = true;
+ return self();
}
+
+ public B dilation(int... dilation) {
+ this.dilation$value = ValidationUtils.validate2NonNegative(dilation, true, "dilation");
+ this.dilation$set = true;
+ return self();
+ }
+ /**
+ * Sets the number of channels to use in the 2d convolution.
+ *
+ * Note that the actual number of channels is channels * capsuleDimensions
+ *
+ *
Does the same thing as channels()
+ *
+ * @param nOut
+ * @return
+ */
+ public B nOut(int nOut) {
+ return channels(nOut);
+ }
+ /**
+ * Use a ReLU activation on the 2d convolution
+ *
+ * @return
+ */
+ public B useReLU() {
+ return useRelU(true);
+ }
+
+ /**
+ * Use a LeakyReLU activation on the 2d convolution. Implies {@link #useReLU()} set true.
+ *
+ * @param leak the alpha value for the LeakyReLU activation.
+ * @return
+ */
+ public B useLeakyReLU(double leak) {
+ this.useRelU(true);
+ this.useLeakyReLU$value = leak;
+ this.useLeakyReLU$set = true;
+ return self();
+ }
+
+ public C build() {
+ C l = initBuild();
+ if (capsuleDimensions <= 0 || channels$value <= 0) {
+ throw new IllegalArgumentException(
+ "Invalid configuration for Primary Capsules (layer name = \""
+ + l.getName()
+ + "\"):"
+ + " capsuleDimensions and channels must be > 0. Got: "
+ + capsuleDimensions
+ + ", "
+ + channels$value);
+ }
+
+ if (capsules < 0) {
+ throw new IllegalArgumentException(
+ "Invalid configuration for Capsule ILayer (layer name = \""
+ + l.getName()
+ + "\"):"
+ + " capsules must be >= 0 if set. Got: "
+ + capsules);
+ }
+ return l;
+ }
+ }
}
diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RecurrentAttentionLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RecurrentAttentionLayer.java
index 67a821b3c..ec2ed14bc 100644
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RecurrentAttentionLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RecurrentAttentionLayer.java
@@ -21,6 +21,7 @@
package org.deeplearning4j.nn.conf.layers;
import lombok.*;
+import lombok.experimental.SuperBuilder;
import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.RNNFormat;
@@ -41,15 +42,63 @@ import org.nd4j.linalg.factory.Nd4j;
import java.util.Map;
@Data
+@NoArgsConstructor
@EqualsAndHashCode(callSuper = true)
+@SuperBuilder(buildMethodName = "initBuild")
public class RecurrentAttentionLayer extends SameDiffLayer {
- private long nIn;
- private long nOut;
+
+ public static abstract class RecurrentAttentionLayerBuilder>
+ extends SameDiffLayerBuilder {
+
+ public C build() {
+ Preconditions.checkArgument(this.projectInput$value || this.nHeads == 1, "projectInput must be true when nHeads != 1");
+ Preconditions.checkArgument(this.projectInput$value || nIn == nOut, "nIn must be equal to nOut when projectInput is false");
+ Preconditions.checkArgument(!this.projectInput$value || nOut != 0, "nOut must be specified when projectInput is true");
+ Preconditions.checkArgument(this.nOut % nHeads == 0 || headSize > 0, "nOut isn't divided by nHeads cleanly. Specify the headSize manually.");
+
+ C l = initBuild();
+ return l;
+ }
+ }
+
+ /**
+ * Number of inputs to the layer (input size)
+ */
+ private int nIn;
+
+ /**
+ * Number of outputs (output size)
+ */
+ private int nOut;
+
+ /**
+ * Number of Attention Heads
+ */
private int nHeads;
- private long headSize;
- private boolean projectInput;
- private Activation activation;
- private boolean hasBias;
+
+ /**
+ * Size of attention heads
+ */
+ private int headSize;
+
+ /**
+ * Project input before applying attention or not.
+ */
+ @Builder.Default
+ private boolean projectInput = true;
+
+ /**
+ * If true (default is true) the layer will have a bias
+ */
+ @Builder.Default
+ private boolean hasBias = true;
+
+ /**
+ * Activation function for the layer
+ */
+ @Builder.Default
+ private Activation activation = Activation.TANH;
+
private static final String WEIGHT_KEY_QUERY_PROJECTION = "Wq";
private static final String WEIGHT_KEY_KEY_PROJECTION = "Wk";
@@ -60,18 +109,7 @@ public class RecurrentAttentionLayer extends SameDiffLayer {
private static final String RECURRENT_WEIGHT_KEY = SimpleRnnParamInitializer.RECURRENT_WEIGHT_KEY;
private int timeSteps;
- private RecurrentAttentionLayer(){/*No arg constructor for serialization*/}
- protected RecurrentAttentionLayer(Builder builder){
- super(builder);
- nIn = builder.nIn;
- nOut = builder.nOut;
- nHeads = builder.nHeads;
- headSize = builder.headSize == 0 ? nOut / nHeads : builder.headSize;
- projectInput = builder.projectInput;
- activation = builder.activation;
- hasBias = builder.hasBias;
- }
@Override
public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
@@ -87,7 +125,7 @@ public class RecurrentAttentionLayer extends SameDiffLayer {
if (nIn <= 0 || override) {
InputType.InputTypeRecurrent r = (InputType.InputTypeRecurrent) inputType;
- this.nIn = r.getSize();
+ this.nIn = (int) r.getSize();
}
}
@@ -206,109 +244,5 @@ public class RecurrentAttentionLayer extends SameDiffLayer {
return sameDiff.concat(2, outputSlices);
}
- @Getter
- @Setter
- public static class Builder extends SameDiffLayer.Builder {
- /**
- * Number of inputs to the layer (input size)
- */
- private int nIn;
-
- /**
- * Number of outputs (output size)
- */
- private int nOut;
-
- /**
- * Number of Attention Heads
- */
- private int nHeads;
-
- /**
- * Size of attention heads
- */
- private int headSize;
-
- /**
- * Project input before applying attention or not.
- */
- private boolean projectInput = true;
-
- /**
- * If true (default is true) the layer will have a bias
- */
- private boolean hasBias = true;
-
- /**
- * Activation function for the layer
- */
- private Activation activation = Activation.TANH;
-
- /**
- * @param nIn Number of inputs to the layer (input size)
- */
- public Builder nIn(int nIn) {
- this.nIn = nIn;
- return this;
- }
-
- /**
- * @param nOut Number of outputs (output size)
- */
- public Builder nOut(int nOut) {
- this.nOut = nOut;
- return this;
- }
-
- /**
- * Number of Attention Heads
- */
- public Builder nHeads(int nHeads){
- this.nHeads = nHeads;
- return this;
- }
-
- /**
- * Size of attention heads
- */
- public Builder headSize(int headSize){
- this.headSize = headSize;
- return this;
- }
-
- /**
- * Project input before applying attention or not.
- */
- public Builder projectInput(boolean projectInput){
- this.projectInput = projectInput;
- return this;
- }
-
- /**
- * @param hasBias If true (default is true) the layer will have a bias
- */
- public Builder hasBias(boolean hasBias) {
- this.hasBias = hasBias;
- return this;
- }
-
- /**
- * @param activation Activation function for the layer
- */
- public Builder activation(Activation activation) {
- this.activation = activation;
- return this;
- }
-
- @Override
- @SuppressWarnings("unchecked")
- public RecurrentAttentionLayer build() {
- Preconditions.checkArgument(this.projectInput || this.nHeads == 1, "projectInput must be true when nHeads != 1");
- Preconditions.checkArgument(this.projectInput || nIn == nOut, "nIn must be equal to nOut when projectInput is false");
- Preconditions.checkArgument(!this.projectInput || nOut != 0, "nOut must be specified when projectInput is true");
- Preconditions.checkArgument(this.nOut % nHeads == 0 || headSize > 0, "nOut isn't divided by nHeads cleanly. Specify the headSize manually.");
- return new RecurrentAttentionLayer(this);
- }
- }
}
diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SelfAttentionLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SelfAttentionLayer.java
index ab7947201..2f399d5e1 100644
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SelfAttentionLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SelfAttentionLayer.java
@@ -20,7 +20,9 @@
package org.deeplearning4j.nn.conf.layers;
+import java.util.Map;
import lombok.*;
+import lombok.experimental.SuperBuilder;
import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.deeplearning4j.nn.conf.RNNFormat;
import org.deeplearning4j.nn.conf.inputs.InputType;
@@ -34,186 +36,130 @@ import org.nd4j.linalg.api.memory.MemoryWorkspace;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
-import java.util.Map;
-
@Data
@EqualsAndHashCode(callSuper = true)
+@NoArgsConstructor()
+@SuperBuilder(buildMethodName = "initBuild")
public class SelfAttentionLayer extends SameDiffLayer {
- private long nIn;
- private long nOut;
- private int nHeads;
- private long headSize;
- private boolean projectInput;
+ private static final String WEIGHT_KEY_QUERY_PROJECTION = "Wq";
+ private static final String WEIGHT_KEY_KEY_PROJECTION = "Wk";
+ private static final String WEIGHT_KEY_VALUE_PROJECTION = "Wv";
+ private static final String WEIGHT_KEY_OUT_PROJECTION = "Wo";
+ /** Number of inputs to the layer (input size) */
+ private int nIn;
+ /** Number of outputs (output size) */
+ private int nOut;
+ /** Number of Attention Heads */
+ private int nHeads;
+ /** Size of attention heads */
+ private int headSize;
+ /** Project input before applying attention or not. */
+ private boolean projectInput;
- private static final String WEIGHT_KEY_QUERY_PROJECTION = "Wq";
- private static final String WEIGHT_KEY_KEY_PROJECTION = "Wk";
- private static final String WEIGHT_KEY_VALUE_PROJECTION = "Wv";
- private static final String WEIGHT_KEY_OUT_PROJECTION = "Wo";
+ @Override
+ public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
+ return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, RNNFormat.NCW, getName());
+ }
- private SelfAttentionLayer(){/*No arg constructor for serialization*/}
-
- protected SelfAttentionLayer(Builder builder){
- super(builder);
- nIn = builder.nIn;
- nOut = builder.nOut;
- nHeads = builder.nHeads;
- headSize = builder.headSize == 0 ? nOut / nHeads : builder.headSize;
- projectInput = builder.projectInput;
+ @Override
+ public void setNIn(InputType inputType, boolean override) {
+ if (inputType == null || inputType.getType() != InputType.Type.RNN) {
+ throw new IllegalStateException(
+ "Invalid input for Self Attention layer (layer name = \""
+ + getName()
+ + "\"): expect RNN input type with size > 0. Got: "
+ + inputType);
}
- @Override
- public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
- return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, RNNFormat.NCW, getName());
+ if (nIn <= 0 || override) {
+ InputType.InputTypeRecurrent r = (InputType.InputTypeRecurrent) inputType;
+ this.nIn = (int) r.getSize();
+ }
+ }
+
+ @Override
+ public InputType getOutputType(int layerIndex, InputType inputType) {
+ if (inputType == null || inputType.getType() != InputType.Type.RNN) {
+ throw new IllegalStateException(
+ "Invalid input for Self Attention layer (layer index = "
+ + layerIndex
+ + ", layer name = \""
+ + getName()
+ + "\"): expect RNN input type with size > 0. Got: "
+ + inputType);
}
- @Override
- public void setNIn(InputType inputType, boolean override) {
- if (inputType == null || inputType.getType() != InputType.Type.RNN) {
- throw new IllegalStateException("Invalid input for Self Attention layer (layer name = \"" + getName()
- + "\"): expect RNN input type with size > 0. Got: " + inputType);
- }
+ InputType.InputTypeRecurrent itr = (InputType.InputTypeRecurrent) inputType;
- if (nIn <= 0 || override) {
- InputType.InputTypeRecurrent r = (InputType.InputTypeRecurrent) inputType;
- this.nIn = r.getSize();
- }
+ if (projectInput) {
+ return InputType.recurrent(nOut, itr.getTimeSeriesLength());
+ } else {
+ return InputType.recurrent(nIn, itr.getTimeSeriesLength());
}
+ }
- @Override
- public InputType getOutputType(int layerIndex, InputType inputType) {
- if (inputType == null || inputType.getType() != InputType.Type.RNN) {
- throw new IllegalStateException("Invalid input for Self Attention layer (layer index = " + layerIndex
- + ", layer name = \"" + getName() + "\"): expect RNN input type with size > 0. Got: "
- + inputType);
- }
+ @Override
+ public void defineParameters(SDLayerParams params) {
+ params.clear();
- InputType.InputTypeRecurrent itr = (InputType.InputTypeRecurrent) inputType;
-
- if(projectInput){
- return InputType.recurrent(nOut, itr.getTimeSeriesLength());
- }else{
- return InputType.recurrent(nIn, itr.getTimeSeriesLength());
- }
+ if (projectInput) {
+ params.addWeightParam(WEIGHT_KEY_QUERY_PROJECTION, nHeads, headSize, nIn);
+ params.addWeightParam(WEIGHT_KEY_KEY_PROJECTION, nHeads, headSize, nIn);
+ params.addWeightParam(WEIGHT_KEY_VALUE_PROJECTION, nHeads, headSize, nIn);
+ params.addWeightParam(WEIGHT_KEY_OUT_PROJECTION, nHeads * headSize, nOut);
}
+ }
- @Override
- public void defineParameters(SDLayerParams params) {
- params.clear();
-
- if(projectInput){
- params.addWeightParam(WEIGHT_KEY_QUERY_PROJECTION, nHeads, headSize, nIn);
- params.addWeightParam(WEIGHT_KEY_KEY_PROJECTION, nHeads, headSize, nIn);
- params.addWeightParam(WEIGHT_KEY_VALUE_PROJECTION, nHeads, headSize, nIn);
- params.addWeightParam(WEIGHT_KEY_OUT_PROJECTION, nHeads * headSize, nOut);
+ @Override
+ public void initializeParameters(Map params) {
+ try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) {
+ for (Map.Entry e : params.entrySet()) {
+ if (e.getKey().equals(WEIGHT_KEY_OUT_PROJECTION)) {
+ WeightInitUtil.initWeights(
+ nIn, headSize, e.getValue().shape(), weightInit, null, 'c', e.getValue());
+ } else {
+ WeightInitUtil.initWeights(
+ nHeads * headSize, nOut, e.getValue().shape(), weightInit, null, 'c', e.getValue());
}
+ }
}
+ }
- @Override
- public void initializeParameters(Map params) {
- try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) {
- for (Map.Entry e : params.entrySet()) {
- if(e.getKey().equals(WEIGHT_KEY_OUT_PROJECTION)){
- WeightInitUtil.initWeights(nIn, headSize, e.getValue().shape(), weightInit, null, 'c', e.getValue());
- }else{
- WeightInitUtil.initWeights(nHeads * headSize, nOut, e.getValue().shape(), weightInit, null, 'c', e.getValue());
- }
- }
- }
+ @Override
+ public SDVariable defineLayer(
+ SameDiff sameDiff,
+ SDVariable layerInput,
+ Map paramTable,
+ SDVariable mask) {
+ if (projectInput) {
+ val Wq = paramTable.get(WEIGHT_KEY_QUERY_PROJECTION);
+ val Wk = paramTable.get(WEIGHT_KEY_KEY_PROJECTION);
+ val Wv = paramTable.get(WEIGHT_KEY_VALUE_PROJECTION);
+ val Wo = paramTable.get(WEIGHT_KEY_OUT_PROJECTION);
+
+ return sameDiff.nn.multiHeadDotProductAttention(
+ getName(), layerInput, layerInput, layerInput, Wq, Wk, Wv, Wo, mask, true);
+ } else {
+ return sameDiff.nn.dotProductAttention(
+ getName(), layerInput, layerInput, layerInput, mask, true);
}
+ }
+ public abstract static class SelfAttentionLayerBuilder<
+ C extends SelfAttentionLayer, B extends SelfAttentionLayerBuilder>
+ extends SameDiffLayerBuilder {
+ public C build() {
+ Preconditions.checkArgument(
+ this.projectInput || this.nHeads == 1, "projectInput must be true when nHeads != 1");
+ Preconditions.checkArgument(
+ this.projectInput || nIn == nOut, "nIn must be equal to nOut when projectInput is false");
+ Preconditions.checkArgument(
+ !this.projectInput || nOut != 0, "nOut must be specified when projectInput is true");
+ Preconditions.checkArgument(
+ this.nOut % nHeads == 0 || headSize > 0,
+ "nOut isn't divided by nHeads cleanly. Specify the headSize manually.");
- @Override
- public SDVariable defineLayer(SameDiff sameDiff, SDVariable layerInput, Map paramTable, SDVariable mask) {
- if(projectInput){
- val Wq = paramTable.get(WEIGHT_KEY_QUERY_PROJECTION);
- val Wk = paramTable.get(WEIGHT_KEY_KEY_PROJECTION);
- val Wv = paramTable.get(WEIGHT_KEY_VALUE_PROJECTION);
- val Wo = paramTable.get(WEIGHT_KEY_OUT_PROJECTION);
-
- return sameDiff.nn.multiHeadDotProductAttention(getName(), layerInput, layerInput, layerInput, Wq, Wk, Wv, Wo, mask, true);
- }else{
- return sameDiff.nn.dotProductAttention(getName(), layerInput, layerInput, layerInput, mask, true);
- }
- }
-
-
- @Getter
- @Setter
- public static class Builder extends SameDiffLayer.Builder {
-
- /**
- * Number of inputs to the layer (input size)
- */
- private int nIn;
-
- /**
- * Number of outputs (output size)
- */
- private int nOut;
-
- /**
- * Number of Attention Heads
- */
- private int nHeads;
-
- /**
- * Size of attention heads
- */
- private int headSize;
-
- /**
- * Project input before applying attention or not.
- */
- private boolean projectInput;
-
- /**
- * @param nIn Number of inputs to the layer (input size)
- */
- public Builder nIn(int nIn) {
- this.nIn = nIn;
- return this;
- }
-
- /**
- * @param nOut Number of outputs (output size)
- */
- public Builder nOut(int nOut) {
- this.nOut = nOut;
- return this;
- }
-
- /**
- * Number of Attention Heads
- */
- public Builder nHeads(int nHeads){
- this.nHeads = nHeads;
- return this;
- }
-
- /**
- * Size of attention heads
- */
- public Builder headSize(int headSize){
- this.headSize = headSize;
- return this;
- }
-
- /**
- * Project input before applying attention or not.
- */
- public Builder projectInput(boolean projectInput){
- this.projectInput = projectInput;
- return this;
- }
-
- @Override
- @SuppressWarnings("unchecked")
- public SelfAttentionLayer build() {
- Preconditions.checkArgument(this.projectInput || this.nHeads == 1, "projectInput must be true when nHeads != 1");
- Preconditions.checkArgument(this.projectInput || nIn == nOut, "nIn must be equal to nOut when projectInput is false");
- Preconditions.checkArgument(!this.projectInput || nOut != 0, "nOut must be specified when projectInput is true");
- Preconditions.checkArgument(this.nOut % nHeads == 0 || headSize > 0, "nOut isn't divided by nHeads cleanly. Specify the headSize manually.");
- return new SelfAttentionLayer(this);
- }
+ return initBuild();
}
+ }
}
diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SeparableConvolution2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SeparableConvolution2D.java
index bc036193c..c6ed235ab 100644
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SeparableConvolution2D.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SeparableConvolution2D.java
@@ -63,7 +63,16 @@ public class SeparableConvolution2D extends ConvolutionLayer {
* @return Builder
*/
@Builder.Default private int depthMultiplier = 1;
-
+ /**
+ * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last).
+ * See {@link CNN2DFormat} for more details.
+ * Default: NCHW
+ *
+ * @param format Format for activations (in and out)
+ */
+ @Builder.Default
+ protected CNN2DFormat dataFormat =
+ CNN2DFormat.NCHW; // default value for legacy serialization reasons
public static SeparableConvolution2DBuilder, ?> builder() {
return innerBuilder();
}
diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToBatchLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToBatchLayer.java
index 13bbe63c1..88a7b2444 100644
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToBatchLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToBatchLayer.java
@@ -20,7 +20,10 @@
package org.deeplearning4j.nn.conf.layers;
+import java.util.Collection;
+import java.util.Map;
import lombok.*;
+import lombok.experimental.SuperBuilder;
import org.deeplearning4j.nn.api.ParamInitializer;
import org.deeplearning4j.nn.conf.CNN2DFormat;
import org.deeplearning4j.nn.conf.InputPreProcessor;
@@ -35,195 +38,160 @@ import org.nd4j.common.base.Preconditions;
import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.ndarray.INDArray;
-import java.util.Collection;
-import java.util.Map;
-
@Data
@NoArgsConstructor
@ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true)
+@SuperBuilder(builderMethodName = "innerBuilder")
public class SpaceToBatchLayer extends NoParamLayer {
- // TODO: throw error when block and padding dims don't match
+ /**
+ * Block size for SpaceToBatch layer. Should be a length 2 array for the height and width
+ * dimensions
+ */
+ protected int[] blockSize;
+ /** A 2d array, with format [[padTop, padBottom], [padLeft, padRight]] */
+ @Builder.Default protected int[][] padding = new int[][] {{0, 0}, {0, 0}};
+ /**
+ * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last).
+ * See {@link CNN2DFormat} for more details.
+ * Default: NCHW
+ *
+ * @param format Format for activations (in and out)
+ */
+ @Builder.Default protected CNN2DFormat dataFormat = CNN2DFormat.NCHW;
- protected int[] blocks;
- protected int[][] padding;
- protected CNN2DFormat format = CNN2DFormat.NCHW;
+ public static SpaceToBatchLayerBuilder, ?> builder() {
+ return innerBuilder();
+ }
+ // TODO: throw error when block and padding dims don't match
+ /**
+ * @param blocks Block size for SpaceToBatch layer. Should be a length 2 array for the height and
+ * width dimensions
+ */
+ public static SpaceToBatchLayerBuilder, ?> builder(int[] blocks) {
+ return innerBuilder().blockSize(blocks);
+ }
- protected SpaceToBatchLayer(Builder builder) {
- super(builder);
- this.blocks = builder.blocks;
- this.padding = builder.padding;
- this.format = builder.format;
+ /**
+ * @param blocks Block size for SpaceToBatch layer. Should be a length 2 array for the height and
+ * width dimensions
+ * @param padding Padding - should be a 2d array, with format [[padTop, padBottom], [padLeft,
+ * padRight]]
+ */
+ public static SpaceToBatchLayerBuilder, ?> builder(int[] blocks, int[][] padding) {
+ return innerBuilder().blockSize(blocks).padding(padding);
+ }
+
+ @Override
+ public SpaceToBatchLayer clone() {
+ return (SpaceToBatchLayer) super.clone();
+ }
+
+ @Override
+ public org.deeplearning4j.nn.api.Layer instantiate(
+ NeuralNetConfiguration conf,
+ Collection trainingListeners,
+ int layerIndex,
+ INDArray layerParamsView,
+ boolean initializeParams,
+ DataType networkDataType) {
+ LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
+
+ org.deeplearning4j.nn.layers.convolution.SpaceToBatch ret =
+ new org.deeplearning4j.nn.layers.convolution.SpaceToBatch(lconf, networkDataType);
+ ret.addTrainingListeners(trainingListeners);
+ ret.setIndex(layerIndex);
+ ret.setParamsViewArray(layerParamsView);
+ Map paramTable = initializer().init(this, layerParamsView, initializeParams);
+ ret.setParamTable(paramTable);
+ ret.setLayerConfiguration(lconf);
+ return ret;
+ }
+
+ @Override
+ public LayerMemoryReport getMemoryReport(InputType inputType) {
+ InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType;
+ InputType.InputTypeConvolutional outputType =
+ (InputType.InputTypeConvolutional) getOutputType(-1, inputType);
+
+ return new LayerMemoryReport.Builder(name, SpaceToBatchLayer.class, inputType, outputType)
+ .standardMemory(0, 0) // No params
+ .cacheMemory(
+ MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) // No caching
+ .build();
+ }
+
+ @Override
+ public InputType getOutputType(int layerIndex, InputType inputType) {
+ if (inputType == null || inputType.getType() != InputType.Type.CNN) {
+ throw new IllegalStateException(
+ "Invalid input for Subsampling layer (layer name=\""
+ + getName()
+ + "\"): Expected CNN input, got "
+ + inputType);
+ }
+ InputType.InputTypeConvolutional i = (InputType.InputTypeConvolutional) inputType;
+ return InputType.convolutional(
+ (i.getHeight() + padding[0][0] + padding[0][1]) / blockSize[0],
+ (i.getWidth() + padding[1][0] + padding[1][1]) / blockSize[1],
+ i.getChannels(),
+ i.getFormat());
+ }
+
+ @Override
+ public ParamInitializer initializer() {
+ return EmptyParamInitializer.getInstance();
+ }
+
+ @Override
+ public void setNIn(InputType inputType, boolean override) {
+ Preconditions.checkState(
+ inputType.getType() == InputType.Type.CNN,
+ "Only CNN input types can be used with SpaceToBatchLayer, got %s",
+ inputType);
+ this.dataFormat = ((InputType.InputTypeConvolutional) inputType).getFormat();
+ }
+
+ @Override
+ public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
+ if (inputType == null) {
+ throw new IllegalStateException(
+ "Invalid input for space to batch layer (layer name=\""
+ + getName()
+ + "\"): input is null");
+ }
+ return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getName());
+ }
+
+ @Override
+ public boolean isPretrainParam(String paramName) {
+ throw new UnsupportedOperationException("SpaceToBatchLayer does not contain parameters");
+ }
+
+ public abstract static class SpaceToBatchLayerBuilder<
+ C extends SpaceToBatchLayer, B extends SpaceToBatchLayerBuilder>
+ extends NoParamLayerBuilder {
+ /**
+ * @param blocks Block size for SpaceToBatch layer. Should be a length 2 array for the height
+ * and width dimensions
+ * @return
+ */
+ public B blockSize(int... blocks) {
+ this.blockSize = ValidationUtils.validate2NonNegative(blocks, false, "blocks");
+ return self();
}
- @Override
- public SpaceToBatchLayer clone() {
- return (SpaceToBatchLayer) super.clone();
+ /**
+ * @param padding Padding - should be a 2d array, with format [[padTop, padBottom], [padLeft,
+ * padRight]]
+ * @return
+ */
+ public B padding(int[][] padding) {
+ this.padding$value = ValidationUtils.validate2x2NonNegative(padding, "padding");
+ this.padding$set = true;
+ return self();
}
-
- @Override
- public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf,
- Collection trainingListeners, int layerIndex, INDArray layerParamsView,
- boolean initializeParams, DataType networkDataType) {
- LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
-
- org.deeplearning4j.nn.layers.convolution.SpaceToBatch ret =
- new org.deeplearning4j.nn.layers.convolution.SpaceToBatch(lconf, networkDataType);
- ret.addTrainingListeners(trainingListeners);
- ret.setIndex(layerIndex);
- ret.setParamsViewArray(layerParamsView);
- Map paramTable = initializer().init(this, layerParamsView, initializeParams);
- ret.setParamTable(paramTable);
- ret.setLayerConfiguration(lconf);
- return ret;
- }
-
- @Override
- public LayerMemoryReport getMemoryReport(InputType inputType) {
- InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType;
- InputType.InputTypeConvolutional outputType = (InputType.InputTypeConvolutional) getOutputType(-1, inputType);
-
- return new LayerMemoryReport.Builder(name, SpaceToBatchLayer.class, inputType, outputType)
- .standardMemory(0, 0) //No params
- .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching
- .build();
- }
-
- @Override
- public InputType getOutputType(int layerIndex, InputType inputType) {
- if (inputType == null || inputType.getType() != InputType.Type.CNN) {
- throw new IllegalStateException("Invalid input for Subsampling layer (layer name=\"" + getName()
- + "\"): Expected CNN input, got " + inputType);
- }
- InputType.InputTypeConvolutional i = (InputType.InputTypeConvolutional) inputType;
- return InputType.convolutional((i.getHeight() + padding[0][0] + padding[0][1]) / blocks[0],
- (i.getWidth() + padding[1][0] + padding[1][1]) / blocks[1], i.getChannels(), i.getFormat());
- }
-
- @Override
- public ParamInitializer initializer() {
- return EmptyParamInitializer.getInstance();
- }
-
-
- @Override
- public void setNIn(InputType inputType, boolean override) {
- Preconditions.checkState(inputType.getType() == InputType.Type.CNN, "Only CNN input types can be used with SpaceToBatchLayer, got %s", inputType);
- this.format = ((InputType.InputTypeConvolutional)inputType).getFormat();
- }
-
- @Override
- public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
- if (inputType == null) {
- throw new IllegalStateException("Invalid input for space to batch layer (layer name=\"" + getName()
- + "\"): input is null");
- }
- return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getName());
- }
-
- @Override
- public boolean isPretrainParam(String paramName) {
- throw new UnsupportedOperationException("SpaceToBatchLayer does not contain parameters");
- }
-
-
- @NoArgsConstructor
- @Getter
- @Setter
- public static class Builder> extends LayerConfiguration.Builder {
-
- /**
- * Block size for SpaceToBatch layer. Should be a length 2 array for the height and width
- * dimensions
- */
- @Setter(AccessLevel.NONE)
- protected int[] blocks;
-
- /**
- * A 2d array, with format [[padTop, padBottom], [padLeft, padRight]]
- */
- protected int[][] padding;
-
- protected CNN2DFormat format = CNN2DFormat.NCHW;
-
- /**
- * @param blocks Block size for SpaceToBatch layer. Should be a length 2 array for the height and width
- * dimensions
- */
- public void setBlocks(int... blocks) {
- this.blocks = ValidationUtils.validate2NonNegative(blocks, false, "blocks");
- }
-
- /**
- * @param padding Padding - should be a 2d array, with format [[padTop, padBottom], [padLeft, padRight]]
- */
- public void setPadding(int[][] padding) {
- this.padding = ValidationUtils.validate2x2NonNegative(padding, "padding");
- }
-
-
- /**
- * @param blocks Block size for SpaceToBatch layer. Should be a length 2 array for the height and width
- * dimensions
- */
- public Builder(int[] blocks) {
- this.setBlocks(blocks);
- this.setPadding(new int[][] {{0, 0}, {0, 0}});
- }
-
- /**
- * @param blocks Block size for SpaceToBatch layer. Should be a length 2 array for the height and width
- * dimensions
- * @param padding Padding - should be a 2d array, with format [[padTop, padBottom], [padLeft, padRight]]
- */
- public Builder(int[] blocks, int[][] padding) {
- this.setBlocks(blocks);
- this.setPadding(padding);
- }
-
- /**
- * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last).
- * See {@link CNN2DFormat} for more details.
- * Default: NCHW
- * @param format Format for activations (in and out)
- */
- public T dataFormat(CNN2DFormat format){
- this.format = format;
- return (T)this;
- }
-
- /**
- * @param blocks Block size for SpaceToBatch layer. Should be a length 2 array for the height and width
- * dimensions
- */
- public T blocks(int... blocks) {
- this.setBlocks(blocks);
- return (T) this;
- }
-
- /**
- * @param padding Padding - should be a 2d array, with format [[padTop, padBottom], [padLeft, padRight]]
- */
- public T padding(int[][] padding) {
- this.setPadding(padding);
- return (T) this;
- }
-
- @Override
- public T name(String layerName) {
- this.setLayerName(layerName);
- return (T) this;
- }
-
- @Override
- @SuppressWarnings("unchecked")
- public SpaceToBatchLayer build() {
- if(padding == null)
- setPadding(new int[][] {{0, 0}, {0, 0}});
- return new SpaceToBatchLayer(this);
- }
- }
-
+ }
}
diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToDepthLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToDepthLayer.java
index 114fc2753..6b884a5d0 100644
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToDepthLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToDepthLayer.java
@@ -21,6 +21,7 @@
package org.deeplearning4j.nn.conf.layers;
import lombok.*;
+import lombok.experimental.SuperBuilder;
import org.deeplearning4j.nn.api.ParamInitializer;
import org.deeplearning4j.nn.conf.CNN2DFormat;
import org.deeplearning4j.nn.conf.InputPreProcessor;
@@ -40,6 +41,7 @@ import java.util.Map;
@NoArgsConstructor
@ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true)
+@SuperBuilder
public class SpaceToDepthLayer extends NoParamLayer {
/**
@@ -53,16 +55,20 @@ public class SpaceToDepthLayer extends NoParamLayer {
return this == NCHW ? CNN2DFormat.NCHW : CNN2DFormat.NHWC;
}
}
-
+ /**
+ * @param blockSize Block size
+ */
protected int blockSize;
- protected CNN2DFormat dataFormat;
+ /**
+ * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last).
+ * See {@link CNN2DFormat} for more details.
+ * Default: NCHW
+ * @param dataFormat Format for activations (in and out)
+ */
+ @Builder.Default
+ protected CNN2DFormat dataFormat = CNN2DFormat.NCHW;
- protected SpaceToDepthLayer(Builder builder) {
- super(builder);
- this.setBlockSize(builder.blockSize);
- this.setDataFormat(builder.dataFormat);
- }
@Override
public SpaceToDepthLayer clone() {
@@ -74,7 +80,7 @@ public class SpaceToDepthLayer extends NoParamLayer {
Collection trainingListeners, int layerIndex, INDArray layerParamsView,
boolean initializeParams, DataType networkDataType) {
LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
-
+runInheritance();
org.deeplearning4j.nn.layers.convolution.SpaceToDepth ret =
new org.deeplearning4j.nn.layers.convolution.SpaceToDepth(lconf, networkDataType);
ret.addTrainingListeners(trainingListeners);
@@ -133,78 +139,5 @@ public class SpaceToDepthLayer extends NoParamLayer {
}
- @NoArgsConstructor
- @Getter
- @Setter
- public static class Builder> extends LayerConfiguration.Builder {
-
- protected int blockSize;
-
- /**
- * Data format for input activations. Note DL4J uses NCHW in most cases
- */
- protected CNN2DFormat dataFormat = CNN2DFormat.NCHW;
-
- /**
- * @param blockSize Block size
- */
- public Builder(int blockSize) {
- this.setBlockSize(blockSize);
- }
-
- /**
- * @param blockSize Block size
- * @param dataFormat Data format for input activations. Note DL4J uses NCHW in most cases
- */
- @Deprecated
- public Builder(int blockSize, DataFormat dataFormat) {
- this(blockSize, dataFormat.toFormat());
- }
-
- public Builder(int blockSize, CNN2DFormat dataFormat) {
- this.setBlockSize(blockSize);
- this.setDataFormat(dataFormat);
- }
-
- /**
- * @param blockSize Block size
- */
- public T blocks(int blockSize) {
- this.setBlockSize(blockSize);
- return (T) this;
- }
-
- /**
- * @param dataFormat Data format for input activations. Note DL4J uses NCHW in most cases
- * @deprecated Use {@link #dataFormat(CNN2DFormat)}
- */
- @Deprecated
- public T dataFormat(DataFormat dataFormat) {
- return dataFormat(dataFormat.toFormat());
- }
-
- /**
- * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last).
- * See {@link CNN2DFormat} for more details.
- * Default: NCHW
- * @param dataFormat Format for activations (in and out)
- */
- public T dataFormat(CNN2DFormat dataFormat) {
- this.setDataFormat(dataFormat);
- return (T) this;
- }
-
- @Override
- public T name(String layerName) {
- this.setLayerName(layerName);
- return (T) this;
- }
-
- @Override
- @SuppressWarnings("unchecked")
- public SpaceToDepthLayer build() {
- return new SpaceToDepthLayer(this);
- }
- }
}
diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java
index b7e88d8be..b67ac6a8b 100644
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java
@@ -20,10 +20,14 @@
package org.deeplearning4j.nn.conf.layers.objdetect;
-import lombok.Data;
-import lombok.EqualsAndHashCode;
-import lombok.Getter;
-import lombok.Setter;
+import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
+import com.fasterxml.jackson.databind.annotation.JsonSerialize;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import lombok.*;
+import lombok.experimental.SuperBuilder;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.ParamInitializer;
import org.deeplearning4j.nn.conf.CNN2DFormat;
@@ -41,218 +45,139 @@ import org.nd4j.linalg.learning.regularization.Regularization;
import org.nd4j.linalg.lossfunctions.ILossFunction;
import org.nd4j.linalg.lossfunctions.impl.LossL2;
import org.nd4j.serde.jackson.shaded.NDArrayTextSerializer;
-import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
-import com.fasterxml.jackson.databind.annotation.JsonSerialize;
-
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
@Data
@EqualsAndHashCode(callSuper = false)
+@SuperBuilder(buildMethodName = "initBuild")
public class Yolo2OutputLayer extends LayerConfiguration {
- private double lambdaCoord;
- private double lambdaNoObj;
- private ILossFunction lossPositionScale;
- private ILossFunction lossClassPredictions;
- @JsonSerialize(using = NDArrayTextSerializer.class)
- @JsonDeserialize(using = BoundingBoxesDeserializer.class)
- private INDArray boundingBoxes;
+ /**
+ * Loss function coefficient for position and size/scale components of the loss function. Default
+ * (as per paper): 5
+ */
+ @Builder.Default private double lambdaCoord = 5;
+ /**
+ * Loss function coefficient for the "no object confidence" components of the loss function.
+ * Default (as per paper): 0.5
+ */
+ @Builder.Default private double lambdaNoObj = 0.5;
+ /** Loss function for position/scale component of the loss function */
+ @Builder.Default private ILossFunction lossPositionScale = new LossL2();
+ /**
+ * Loss function for the class predictions - defaults to L2 loss (i.e., sum of squared errors, as
+ * per the paper), however Loss MCXENT could also be used (which is more common for
+ * classification).
+ */
+ @Builder.Default private ILossFunction lossClassPredictions = new LossL2();
+ ;
+ /**
+ * Bounding box priors dimensions [width, height]. For N bounding boxes, input has shape [rows,
+ * columns] = [N, 2] Note that dimensions should be specified as fraction of grid size. For
+ * example, a network with 13x13 output, a value of 1.0 would correspond to one grid cell; a value
+ * of 13 would correspond to the entire image.
+ */
+ @JsonSerialize(using = NDArrayTextSerializer.class)
+ @JsonDeserialize(using = BoundingBoxesDeserializer.class)
+ @Builder.Default
+ private INDArray boundingBoxes;
- private CNN2DFormat format = CNN2DFormat.NCHW; //Default for serialization of old formats
+ @Builder.Default
+ private CNN2DFormat format = CNN2DFormat.NCHW; // Default for serialization of old formats
- private Yolo2OutputLayer() {
- //No-arg constructor for Jackson JSON
- }
+ private Yolo2OutputLayer() {
+ // No-arg constructor for Jackson JSON
+ }
- private Yolo2OutputLayer(Builder builder) {
- super(builder);
- this.lambdaCoord = builder.lambdaCoord;
- this.lambdaNoObj = builder.lambdaNoObj;
- this.lossPositionScale = builder.lossPositionScale;
- this.lossClassPredictions = builder.lossClassPredictions;
- this.boundingBoxes = builder.boundingBoxes;
- }
+ @Override
+ public Layer instantiate(
+ NeuralNetConfiguration conf,
+ Collection trainingListeners,
+ int layerIndex,
+ INDArray layerParamsView,
+ boolean initializeParams,
+ DataType networkDataType) {
+ LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
- @Override
- public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners,
- int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) {
- LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
+ org.deeplearning4j.nn.layers.objdetect.Yolo2OutputLayer ret =
+ new org.deeplearning4j.nn.layers.objdetect.Yolo2OutputLayer(lconf, networkDataType);
+ ret.addTrainingListeners(trainingListeners);
+ ret.setIndex(layerIndex);
+ ret.setParamsViewArray(layerParamsView);
+ Map paramTable = initializer().init(this, layerParamsView, initializeParams);
+ ret.setParamTable(paramTable);
+ ret.setLayerConfiguration(lconf);
+ return ret;
+ }
- org.deeplearning4j.nn.layers.objdetect.Yolo2OutputLayer ret =
- new org.deeplearning4j.nn.layers.objdetect.Yolo2OutputLayer(lconf, networkDataType);
- ret.addTrainingListeners(trainingListeners);
- ret.setIndex(layerIndex);
- ret.setParamsViewArray(layerParamsView);
- Map paramTable = initializer().init(this, layerParamsView, initializeParams);
- ret.setParamTable(paramTable);
- ret.setLayerConfiguration(lconf);
- return ret;
- }
+ @Override
+ public ParamInitializer initializer() {
+ return EmptyParamInitializer.getInstance();
+ }
- @Override
- public ParamInitializer initializer() {
- return EmptyParamInitializer.getInstance();
- }
+ @Override
+ public InputType getOutputType(int layerIndex, InputType inputType) {
+ return inputType; // Same shape output as input
+ }
- @Override
- public InputType getOutputType(int layerIndex, InputType inputType) {
- return inputType; //Same shape output as input
- }
+ @Override
+ public void setNIn(InputType inputType, boolean override) {
+ InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType;
+ this.format = c.getFormat();
+ }
- @Override
- public void setNIn(InputType inputType, boolean override) {
- InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType;
- this.format = c.getFormat();
- }
-
- @Override
- public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
- switch (inputType.getType()) {
- case FF:
- case RNN:
- throw new UnsupportedOperationException("Cannot use FF or RNN input types");
- case CNN:
- return null;
- case CNNFlat:
- InputType.InputTypeConvolutionalFlat cf = (InputType.InputTypeConvolutionalFlat) inputType;
- return new FeedForwardToCnnPreProcessor(cf.getHeight(), cf.getWidth(), cf.getDepth());
- default:
- return null;
- }
- }
-
- @Override
- public List getRegularizationByParam(String paramName) {
- //Not applicable
+ @Override
+ public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
+ switch (inputType.getType()) {
+ case FF:
+ case RNN:
+ throw new UnsupportedOperationException("Cannot use FF or RNN input types");
+ case CNN:
+ return null;
+ case CNNFlat:
+ InputType.InputTypeConvolutionalFlat cf = (InputType.InputTypeConvolutionalFlat) inputType;
+ return new FeedForwardToCnnPreProcessor(cf.getHeight(), cf.getWidth(), cf.getDepth());
+ default:
return null;
}
+ }
- @Override
- public boolean isPretrainParam(String paramName) {
- return false; //No params
- }
- @Override
- public LayerMemoryReport getMemoryReport(InputType inputType) {
- long numValues = inputType.arrayElementsPerExample();
-
- //This is a VERY rough estimate...
- return new LayerMemoryReport.Builder(name, Yolo2OutputLayer.class, inputType, inputType)
- .standardMemory(0, 0) //No params
- .workingMemory(0, numValues, 0, 6 * numValues).cacheMemory(0, 0) //No cache
- .build();
- }
-
- @Getter
- @Setter
- public static class Builder extends LayerConfiguration.Builder {
-
- /**
- * Loss function coefficient for position and size/scale components of the loss function. Default (as per
- * paper): 5
- *
- */
- private double lambdaCoord = 5;
-
- /**
- * Loss function coefficient for the "no object confidence" components of the loss function. Default (as per
- * paper): 0.5
- *
- */
- private double lambdaNoObj = 0.5;
-
- /**
- * Loss function for position/scale component of the loss function
- *
- */
- private ILossFunction lossPositionScale = new LossL2();
-
- /**
- * Loss function for the class predictions - defaults to L2 loss (i.e., sum of squared errors, as per the
- * paper), however Loss MCXENT could also be used (which is more common for classification).
- *
- */
- private ILossFunction lossClassPredictions = new LossL2();
-
- /**
- * Bounding box priors dimensions [width, height]. For N bounding boxes, input has shape [rows, columns] = [N,
- * 2] Note that dimensions should be specified as fraction of grid size. For example, a network with 13x13
- * output, a value of 1.0 would correspond to one grid cell; a value of 13 would correspond to the entire
- * image.
- *
- */
- private INDArray boundingBoxes;
-
- /**
- * Loss function coefficient for position and size/scale components of the loss function. Default (as per
- * paper): 5
- *
- * @param lambdaCoord Lambda value for size/scale component of loss function
- */
- public Builder lambdaCoord(double lambdaCoord) {
- this.setLambdaCoord(lambdaCoord);
- return this;
- }
-
- /**
- * Loss function coefficient for the "no object confidence" components of the loss function. Default (as per
- * paper): 0.5
- *
- * @param lambdaNoObj Lambda value for no-object (confidence) component of the loss function
- */
- public Builder lambdaNoObj(double lambdaNoObj) {
- this.setLambdaNoObj(lambdaNoObj);
- return this;
- }
-
- /**
- * Loss function for position/scale component of the loss function
- *
- * @param lossPositionScale Loss function for position/scale
- */
- public Builder lossPositionScale(ILossFunction lossPositionScale) {
- this.setLossPositionScale(lossPositionScale);
- return this;
- }
-
- /**
- * Loss function for the class predictions - defaults to L2 loss (i.e., sum of squared errors, as per the
- * paper), however Loss MCXENT could also be used (which is more common for classification).
- *
- * @param lossClassPredictions Loss function for the class prediction error component of the YOLO loss function
- */
- public Builder lossClassPredictions(ILossFunction lossClassPredictions) {
- this.setLossClassPredictions(lossClassPredictions);
- return this;
- }
-
- /**
- * Bounding box priors dimensions [width, height]. For N bounding boxes, input has shape [rows, columns] = [N,
- * 2] Note that dimensions should be specified as fraction of grid size. For example, a network with 13x13
- * output, a value of 1.0 would correspond to one grid cell; a value of 13 would correspond to the entire
- * image.
- *
- * @param boundingBoxes Bounding box prior dimensions (width, height)
- */
- public Builder boundingBoxPriors(INDArray boundingBoxes) {
- this.setBoundingBoxes(boundingBoxes);
- return this;
- }
-
- @Override
- public Yolo2OutputLayer build() {
- if (boundingBoxes == null) {
- throw new IllegalStateException("Bounding boxes have not been set");
- }
-
- if (boundingBoxes.rank() != 2 || boundingBoxes.size(1) != 2) {
- throw new IllegalStateException("Bounding box priors must have shape [nBoxes, 2]. Has shape: "
- + Arrays.toString(boundingBoxes.shape()));
- }
-
- return new Yolo2OutputLayer(this);
- }
+ @Override
+ public List getRegularizationByParam(String paramName) {
+ // Not applicable
+ return null;
+ }
+
+ @Override
+ public boolean isPretrainParam(String paramName) {
+ return false; // No params
+ }
+
+ @Override
+ public LayerMemoryReport getMemoryReport(InputType inputType) {
+ long numValues = inputType.arrayElementsPerExample();
+
+ // This is a VERY rough estimate...
+ return new LayerMemoryReport.Builder(name, Yolo2OutputLayer.class, inputType, inputType)
+ .standardMemory(0, 0) // No params
+ .workingMemory(0, numValues, 0, 6 * numValues)
+ .cacheMemory(0, 0) // No cache
+ .build();
+ }
+
+ public static abstract class Yolo2OutputLayerBuilder<
+ C extends Yolo2OutputLayer, B extends Yolo2OutputLayerBuilder>
+ extends LayerConfigurationBuilder {
+ public C build() {
+ if (boundingBoxes$value == null) {
+ throw new IllegalStateException("Bounding boxes have not been set");
+ }
+
+ if (boundingBoxes$value.rank() != 2 || boundingBoxes$value.size(1) != 2) {
+ throw new IllegalStateException(
+ "Bounding box priors must have shape [nBoxes, 2]. Has shape: "
+ + Arrays.toString(boundingBoxes$value.shape()));
+ }
+ return initBuild();
}
+ }
}
diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/SimpleRnn.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/SimpleRnn.java
index c89782a92..65866d427 100644
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/SimpleRnn.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/SimpleRnn.java
@@ -48,7 +48,7 @@ public class SimpleRnn extends BaseRecurrentLayer {
* If true (default = false): enable layer normalization on this layer
*
*/
- @lombok.Builder.Default @Accessors
+ @lombok.Builder.Default @Accessors @Getter
private boolean hasLayerNorm = false;
diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java
index 8dd744c9a..86a8038dc 100644
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java
@@ -20,6 +20,9 @@
package org.deeplearning4j.nn.conf.layers.samediff;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
import lombok.*;
import lombok.experimental.SuperBuilder;
import lombok.extern.slf4j.Slf4j;
@@ -44,11 +47,6 @@ import org.nd4j.linalg.learning.regularization.L2Regularization;
import org.nd4j.linalg.learning.regularization.Regularization;
import org.nd4j.linalg.learning.regularization.WeightDecay;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-
@Slf4j
@Data
@EqualsAndHashCode(callSuper = true, doNotUseGetters = true)
@@ -56,291 +54,324 @@ import java.util.Map;
@NoArgsConstructor
public abstract class AbstractSameDiffLayer extends LayerConfiguration {
+ /**
+ * The regularization for the parameters (excluding biases) - for example {@link WeightDecay}
+ *
+ * -- SETTER -- Set the regularization for the parameters (excluding biases) - for example
+ * {@link WeightDecay}
+ *
+ * @param regularization Regularization to apply for the network parameters/weights (excluding
+ * biases)
+ */
+ protected List regularization;
+ /**
+ * The regularization for the biases only - for example {@link WeightDecay} -- SETTER -- Set the
+ * regularization for the biases only - for example {@link WeightDecay}
+ *
+ * @param regularizationBias Regularization to apply for the network biases only
+ */
+ protected List regularizationBias;
+ /**
+ * Gradient updater. For example, {@link org.nd4j.linalg.learning.config.Adam} or {@link
+ * org.nd4j.linalg.learning.config.Nesterovs}
+ *
+ * @param updater Updater to use
+ */
+ protected @Getter @Setter IUpdater updater;
+ /**
+ * Gradient updater configuration, for the biases only. If not set, biases will use the updater as
+ * set by {@link #setUpdater(IUpdater)}
+ *
+ * @param biasUpdater Updater to use for bias parameters
+ */
+ protected @Getter @Setter IUpdater biasUpdater;
+
+ protected GradientNormalization gradientNormalization;
+ protected double gradientNormalizationThreshold = Double.NaN;
+
+ private SDLayerParams layerParams;
+
+ @Override
+ public List getRegularizationByParam(String paramName) {
+ if (layerParams.isWeightParam(paramName)) {
+ return regularization;
+ } else if (layerParams.isBiasParam(paramName)) {
+ return regularizationBias;
+ }
+ return null;
+ }
+
+ public SDLayerParams getLayerParams() {
+ if (layerParams == null) {
+ layerParams = new SDLayerParams();
+ defineParameters(layerParams);
+ }
+ return layerParams;
+ }
+
+ @Override
+ public void setNIn(InputType inputType, boolean override) {
+ // Default implementation: no-op
+ }
+
+ @Override
+ public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
+ // Default implementation: no-op
+ return null;
+ }
+
+ public void applyGlobalConfigToLayer(
+ NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig) {
+ // Default implementation: no op
+ }
+
+ /**
+ * Define the parameters for the network. Use {@link SDLayerParams#addWeightParam(String,
+ * long...)} and {@link SDLayerParams#addBiasParam(String, long...)}
+ *
+ * @param params Object used to set parameters for this layer
+ */
+ public abstract void defineParameters(SDLayerParams params);
+
+ /**
+ * Set the initial parameter values for this layer, if required
+ *
+ * @param params Parameter arrays that may be initialized
+ */
+ public abstract void initializeParameters(Map params);
+
+ @Override
+ public abstract org.deeplearning4j.nn.api.Layer instantiate(
+ NeuralNetConfiguration conf,
+ Collection trainingListeners,
+ int layerIndex,
+ INDArray layerParamsView,
+ boolean initializeParams,
+ DataType networkDataType);
+
+ // ==================================================================================================================
+
+ @Override
+ public ParamInitializer initializer() {
+ return SameDiffParamInitializer.getInstance();
+ }
+
+ @Override
+ public IUpdater getUpdaterByParam(String paramName) {
+ if (biasUpdater != null && initializer().isBiasParam(this, paramName)) {
+ return biasUpdater;
+ } else if (initializer().isBiasParam(this, paramName)
+ || initializer().isWeightParam(this, paramName)) {
+ return updater;
+ }
+ throw new IllegalStateException("Unknown parameter key: " + paramName);
+ }
+
+ @Override
+ public boolean isPretrainParam(String paramName) {
+ return false;
+ }
+
+ @Override
+ public LayerMemoryReport getMemoryReport(InputType inputType) {
+ return new LayerMemoryReport(); // TODO
+ }
+
+ /**
+ * Returns the memory layout ('c' or 'f' order - i.e., row/column major) of the parameters. In
+ * most cases, this can/should be left
+ *
+ * @param param Name of the parameter
+ * @return Memory layout ('c' or 'f') of the parameter
+ */
+ public char paramReshapeOrder(String param) {
+ return 'c';
+ }
+
+ protected void initWeights(int fanIn, int fanOut, WeightInit weightInit, INDArray array) {
+ WeightInitUtil.initWeights(
+ fanIn, fanOut, array.shape(), weightInit, null, paramReshapeOrder(null), array);
+ }
+
+ public void applyGlobalConfig(NeuralNetConfiguration.NeuralNetConfigurationBuilder b) {
+ NeuralNetConfiguration bConf = b.build();
+ if (regularization == null || regularization.isEmpty()) {
+ regularization = bConf.getRegularization();
+ }
+ if (regularizationBias == null || regularizationBias.isEmpty()) {
+ regularizationBias = bConf.getRegularizationBias();
+ }
+ if (updater == null) {
+ updater = bConf.getUpdater();
+ }
+ if (biasUpdater == null) {
+ biasUpdater = bConf.getBiasUpdater();
+ }
+ if (gradientNormalization == null) {
+ gradientNormalization = bConf.getGradientNormalization();
+ }
+ if (Double.isNaN(gradientNormalizationThreshold)) {
+ gradientNormalizationThreshold = bConf.getGradientNormalizationThreshold();
+ }
+
+ applyGlobalConfigToLayer(b);
+ }
+
+ /**
+ * This method generates an "all ones" mask array for use in the SameDiff model when none is
+ * provided.
+ *
+ * @param input Input to the layer
+ * @return A mask array - should be same datatype as the input (usually)
+ */
+ public INDArray onesMaskForInput(INDArray input) {
+ if (input.rank() == 2) {
+ return Nd4j.ones(input.dataType(), input.size(0), 1);
+ } else if (input.rank() == 3) {
+ return Nd4j.ones(
+ input.dataType(),
+ input.size(0),
+ input.size(2)); // mask: [mb, length] vs. input [mb, nIn, length]
+ } else if (input.rank() == 4) {
+ // CNN style - return [mb, 1, 1, 1] for broadcast...
+ return Nd4j.ones(input.dataType(), input.size(0), 1, 1, 1);
+ } else if (input.rank() == 5) {
+ // CNN3D style - return [mb, 1, 1, 1, 1] for broadcast...
+ return Nd4j.ones(input.dataType(), input.size(0), 1, 1, 1, 1);
+ } else {
+ throw new IllegalStateException(
+ "When using masking with rank 1 or 6+ inputs, the onesMaskForInput method must be implemented, "
+ + "in order to determine the correct mask shape for this layer");
+ }
+ }
+
+ public abstract static class AbstractSameDiffLayerBuilder<
+ C extends AbstractSameDiffLayer, B extends AbstractSameDiffLayerBuilder>
+ extends LayerConfigurationBuilder {
/**
- * The regularization for the parameters (excluding biases) - for example {@link WeightDecay}
+ * L1 regularization coefficient (weights only). Use {@link #l1Bias(double)} to configure the l1
+ * regularization coefficient for the bias.
+ */
+ public B l1(double l1) {
+ // Check if existing L1 exists; if so, replace it
+ NetworkUtils.removeInstances(this.regularization, L1Regularization.class);
+ if (l1 > 0.0) {
+ this.regularization.add(new L1Regularization(l1));
+ }
+ return self();
+ }
+
+ /**
+ * L2 regularization coefficient (weights only). Use {@link #l2Bias(double)} to configure the l2
+ * regularization coefficient for the bias.
+ * Note: Generally, {@link WeightDecay} (set via {@link #weightDecay(double,boolean)}
+ * should be preferred to L2 regularization. See {@link WeightDecay} javadoc for further
+ * details.
+ */
+ public B l2(double l2) {
+ // Check if existing L2 exists; if so, replace it. Also remove weight decay - it doesn't make
+ // sense to use both
+ NetworkUtils.removeInstances(this.regularization, L2Regularization.class);
+ if (l2 > 0.0) {
+ NetworkUtils.removeInstancesWithWarning(
+ this.regularization,
+ WeightDecay.class,
+ "WeightDecay regularization removed: incompatible with added L2 regularization");
+ this.regularization.add(new L2Regularization(l2));
+ }
+ return self();
+ }
+
+ /** L1 regularization coefficient for the bias. Default: 0. See also {@link #l1(double)} */
+ public B l1Bias(double l1Bias) {
+ NetworkUtils.removeInstances(this.regularizationBias, L1Regularization.class);
+ if (l1Bias > 0.0) {
+ this.regularizationBias.add(new L1Regularization(l1Bias));
+ }
+ return self();
+ }
+
+ /**
+ * L2 regularization coefficient for the bias. Default: 0. See also {@link #l2(double)}
+ * Note: Generally, {@link WeightDecay} (set via {@link #weightDecayBias(double,boolean)}
+ * should be preferred to L2 regularization. See {@link WeightDecay} javadoc for further
+ * details.
+ */
+ public B l2Bias(double l2Bias) {
+ NetworkUtils.removeInstances(this.regularizationBias, L2Regularization.class);
+ if (l2Bias > 0.0) {
+ NetworkUtils.removeInstancesWithWarning(
+ this.regularizationBias,
+ WeightDecay.class,
+ "WeightDecay bias regularization removed: incompatible with added L2 regularization");
+ this.regularizationBias.add(new L2Regularization(l2Bias));
+ }
+ return self();
+ }
+
+ /**
+ * Add weight decay regularization for the network parameters (excluding biases).
+ * This applies weight decay with multiplying the learning rate - see {@link WeightDecay}
+ * for more details.
*
- * -- SETTER --
- * Set the regularization for the parameters (excluding biases) - for example {@link WeightDecay}
- * @param regularization Regularization to apply for the network parameters/weights (excluding biases)
+ * @param coefficient Weight decay regularization coefficient
+ * @see #weightDecay(double, boolean)
*/
- protected List regularization;
+ public B weightDecay(double coefficient) {
+ return weightDecay(coefficient, true);
+ }
+
/**
- * The regularization for the biases only - for example {@link WeightDecay}
- * -- SETTER --
- * Set the regularization for the biases only - for example {@link WeightDecay}
- * @param regularizationBias Regularization to apply for the network biases only
- */
- protected List regularizationBias;
- /**
- * Gradient updater. For example, {@link org.nd4j.linalg.learning.config.Adam} or {@link
- * org.nd4j.linalg.learning.config.Nesterovs}
+ * Add weight decay regularization for the network parameters (excluding biases). See {@link
+ * WeightDecay} for more details.
*
- * @param updater Updater to use
+ * @param coefficient Weight decay regularization coefficient
+ * @param applyLR Whether the learning rate should be multiplied in when performing weight decay
+ * updates. See {@link WeightDecay} for more details.
+ * @see #weightDecay(double, boolean)
*/
- protected @Getter @Setter IUpdater updater;
+ public B weightDecay(double coefficient, boolean applyLR) {
+ // Check if existing weight decay if it exists; if so, replace it. Also remove L2 - it doesn't
+ // make sense to use both
+ NetworkUtils.removeInstances(this.regularization, WeightDecay.class);
+ if (coefficient > 0.0) {
+ NetworkUtils.removeInstancesWithWarning(
+ this.regularization,
+ L2Regularization.class,
+ "L2 regularization removed: incompatible with added WeightDecay regularization");
+ this.regularization.add(new WeightDecay(coefficient, applyLR));
+ }
+ return self();
+ }
+
/**
- * Gradient updater configuration, for the biases only. If not set, biases will use the updater as set by {@link
- * #setUpdater(IUpdater)}
+ * Weight decay for the biases only - see {@link #weightDecay(double)} for more details. This
+ * applies weight decay with multiplying the learning rate.
*
- * @param biasUpdater Updater to use for bias parameters
+ * @param coefficient Weight decay regularization coefficient
+ * @see #weightDecayBias(double, boolean)
*/
- protected @Getter @Setter IUpdater biasUpdater;
-
-
- protected GradientNormalization gradientNormalization;
- protected double gradientNormalizationThreshold = Double.NaN;
-
- private SDLayerParams layerParams;
-
- @Override
- public List getRegularizationByParam(String paramName) {
- if(layerParams.isWeightParam(paramName)){
- return regularization;
- } else if(layerParams.isBiasParam(paramName)){
- return regularizationBias;
- }
- return null;
- }
-
- public SDLayerParams getLayerParams() {
- if (layerParams == null) {
- layerParams = new SDLayerParams();
- defineParameters(layerParams);
- }
- return layerParams;
- }
-
- @Override
- public void setNIn(InputType inputType, boolean override) {
- //Default implementation: no-op
- }
-
- @Override
- public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
- //Default implementation: no-op
- return null;
- }
-
-
- public void applyGlobalConfigToLayer(NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig) {
- //Default implementation: no op
+ public B weightDecayBias(double coefficient) {
+ return weightDecayBias(coefficient, true);
}
/**
- * Define the parameters for the network. Use {@link SDLayerParams#addWeightParam(String, long...)} and {@link
- * SDLayerParams#addBiasParam(String, long...)}
+ * Weight decay for the biases only - see {@link #weightDecay(double)} for more details
*
- * @param params Object used to set parameters for this layer
+ * @param coefficient Weight decay regularization coefficient
*/
- public abstract void defineParameters(SDLayerParams params);
-
- /**
- * Set the initial parameter values for this layer, if required
- *
- * @param params Parameter arrays that may be initialized
- */
- public abstract void initializeParameters(Map params);
-
- @Override
- public abstract org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf,
- Collection trainingListeners, int layerIndex, INDArray layerParamsView,
- boolean initializeParams, DataType networkDataType);
-
- //==================================================================================================================
-
- @Override
- public ParamInitializer initializer() {
- return SameDiffParamInitializer.getInstance();
- }
-
- @Override
- public IUpdater getUpdaterByParam(String paramName) {
- if (biasUpdater != null && initializer().isBiasParam(this, paramName)) {
- return biasUpdater;
- } else if (initializer().isBiasParam(this, paramName) || initializer().isWeightParam(this, paramName)) {
- return updater;
- }
- throw new IllegalStateException("Unknown parameter key: " + paramName);
- }
-
- @Override
- public boolean isPretrainParam(String paramName) {
- return false;
- }
-
- @Override
- public LayerMemoryReport getMemoryReport(InputType inputType) {
- return new LayerMemoryReport(); //TODO
- }
-
- /**
- * Returns the memory layout ('c' or 'f' order - i.e., row/column major) of the parameters. In most cases, this
- * can/should be left
- *
- * @param param Name of the parameter
- * @return Memory layout ('c' or 'f') of the parameter
- */
- public char paramReshapeOrder(String param) {
- return 'c';
- }
-
- protected void initWeights(int fanIn, int fanOut, WeightInit weightInit, INDArray array) {
- WeightInitUtil.initWeights(fanIn, fanOut, array.shape(), weightInit, null, paramReshapeOrder(null), array);
- }
-
- public void applyGlobalConfig(NeuralNetConfiguration.NeuralNetConfigurationBuilder b) {
- NeuralNetConfiguration bConf = b.build();
- if (regularization == null || regularization.isEmpty()) {
- regularization = bConf.getRegularization();
- }
- if (regularizationBias == null || regularizationBias.isEmpty()) {
- regularizationBias = bConf.getRegularizationBias();
- }
- if (updater == null) {
- updater = bConf.getUpdater();
- }
- if (biasUpdater == null) {
- biasUpdater = bConf.getBiasUpdater();
- }
- if (gradientNormalization == null) {
- gradientNormalization = bConf.getGradientNormalization();
- }
- if (Double.isNaN(gradientNormalizationThreshold)) {
- gradientNormalizationThreshold = bConf.getGradientNormalizationThreshold();
- }
-
- applyGlobalConfigToLayer(b);
- }
-
- /**
- * This method generates an "all ones" mask array for use in the SameDiff model when none is provided.
- * @param input Input to the layer
- * @return A mask array - should be same datatype as the input (usually)
- */
- public INDArray onesMaskForInput(INDArray input){
- if(input.rank() == 2){
- return Nd4j.ones(input.dataType(), input.size(0), 1);
- } else if(input.rank() == 3){
- return Nd4j.ones(input.dataType(), input.size(0), input.size(2)); //mask: [mb, length] vs. input [mb, nIn, length]
- } else if(input.rank() == 4){
- //CNN style - return [mb, 1, 1, 1] for broadcast...
- return Nd4j.ones(input.dataType(), input.size(0), 1, 1, 1);
- } else if(input.rank() == 5){
- //CNN3D style - return [mb, 1, 1, 1, 1] for broadcast...
- return Nd4j.ones(input.dataType(), input.size(0), 1, 1, 1, 1);
- } else {
- throw new IllegalStateException("When using masking with rank 1 or 6+ inputs, the onesMaskForInput method must be implemented, " +
- "in order to determine the correct mask shape for this layer");
- }
- }
-
- public static abstract class AbstractSameDiffLayerBuilder> {
- /**
- * L1 regularization coefficient (weights only). Use {@link #l1Bias(double)} to configure the l1 regularization
- * coefficient for the bias.
- */
- public B l1(double l1) {
- //Check if existing L1 exists; if so, replace it
- NetworkUtils.removeInstances(this.regularization, L1Regularization.class);
- if(l1 > 0.0) {
- this.regularization.add(new L1Regularization(l1));
- }
- return self();
- }
-
- /**
- * L2 regularization coefficient (weights only). Use {@link #l2Bias(double)} to configure the l2 regularization
- * coefficient for the bias.
- * Note: Generally, {@link WeightDecay} (set via {@link #weightDecay(double,boolean)} should be preferred to
- * L2 regularization. See {@link WeightDecay} javadoc for further details.
- */
- public B l2(double l2) {
- //Check if existing L2 exists; if so, replace it. Also remove weight decay - it doesn't make sense to use both
- NetworkUtils.removeInstances(this.regularization, L2Regularization.class);
- if(l2 > 0.0) {
- NetworkUtils.removeInstancesWithWarning(this.regularization, WeightDecay.class, "WeightDecay regularization removed: incompatible with added L2 regularization");
- this.regularization.add(new L2Regularization(l2));
- }
- return self();
- }
-
- /**
- * L1 regularization coefficient for the bias. Default: 0. See also {@link #l1(double)}
- */
- public B l1Bias(double l1Bias) {
- NetworkUtils.removeInstances(this.regularizationBias, L1Regularization.class);
- if(l1Bias > 0.0) {
- this.regularizationBias.add(new L1Regularization(l1Bias));
- }
- return self();
- }
-
- /**
- * L2 regularization coefficient for the bias. Default: 0. See also {@link #l2(double)}
- * Note: Generally, {@link WeightDecay} (set via {@link #weightDecayBias(double,boolean)} should be preferred to
- * L2 regularization. See {@link WeightDecay} javadoc for further details.
- */
- public B l2Bias(double l2Bias) {
- NetworkUtils.removeInstances(this.regularizationBias, L2Regularization.class);
- if(l2Bias > 0.0) {
- NetworkUtils.removeInstancesWithWarning(this.regularizationBias, WeightDecay.class, "WeightDecay bias regularization removed: incompatible with added L2 regularization");
- this.regularizationBias.add(new L2Regularization(l2Bias));
- }
- return self();
- }
-
- /**
- * Add weight decay regularization for the network parameters (excluding biases).
- * This applies weight decay with multiplying the learning rate - see {@link WeightDecay} for more details.
- *
- * @param coefficient Weight decay regularization coefficient
- * @see #weightDecay(double, boolean)
- */
- public B weightDecay(double coefficient) {
- return weightDecay(coefficient, true);
- }
-
- /**
- * Add weight decay regularization for the network parameters (excluding biases). See {@link WeightDecay} for more details.
- *
- * @param coefficient Weight decay regularization coefficient
- * @param applyLR Whether the learning rate should be multiplied in when performing weight decay updates. See {@link WeightDecay} for more details.
- * @see #weightDecay(double, boolean)
- */
- public B weightDecay(double coefficient, boolean applyLR) {
- //Check if existing weight decay if it exists; if so, replace it. Also remove L2 - it doesn't make sense to use both
- NetworkUtils.removeInstances(this.regularization, WeightDecay.class);
- if(coefficient > 0.0) {
- NetworkUtils.removeInstancesWithWarning(this.regularization, L2Regularization.class, "L2 regularization removed: incompatible with added WeightDecay regularization");
- this.regularization.add(new WeightDecay(coefficient, applyLR));
- }
- return self();
- }
-
- /**
- * Weight decay for the biases only - see {@link #weightDecay(double)} for more details.
- * This applies weight decay with multiplying the learning rate.
- *
- * @param coefficient Weight decay regularization coefficient
- * @see #weightDecayBias(double, boolean)
- */
- public B weightDecayBias(double coefficient) {
- return weightDecayBias(coefficient, true);
- }
-
- /**
- * Weight decay for the biases only - see {@link #weightDecay(double)} for more details
- *
- * @param coefficient Weight decay regularization coefficient
- */
- public B weightDecayBias(double coefficient, boolean applyLR) {
- //Check if existing weight decay if it exists; if so, replace it. Also remove L2 - it doesn't make sense to use both
- NetworkUtils.removeInstances(this.regularizationBias, WeightDecay.class);
- if(coefficient > 0.0) {
- NetworkUtils.removeInstancesWithWarning(this.regularizationBias, L2Regularization.class, "L2 bias regularization removed: incompatible with added WeightDecay regularization");
- this.regularizationBias.add(new WeightDecay(coefficient, applyLR));
- }
- return self();
- }
+ public B weightDecayBias(double coefficient, boolean applyLR) {
+ // Check if existing weight decay if it exists; if so, replace it. Also remove L2 - it doesn't
+ // make sense to use both
+ NetworkUtils.removeInstances(this.regularizationBias, WeightDecay.class);
+ if (coefficient > 0.0) {
+ NetworkUtils.removeInstancesWithWarning(
+ this.regularizationBias,
+ L2Regularization.class,
+ "L2 bias regularization removed: incompatible with added WeightDecay regularization");
+ this.regularizationBias.add(new WeightDecay(coefficient, applyLR));
+ }
+ return self();
}
+ }
}
diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffVertex.java
index accc675d0..9d6144c90 100644
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffVertex.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffVertex.java
@@ -176,7 +176,7 @@ public abstract class SameDiffVertex extends GraphVertex implements ITraininable
}
@Override
- public String getLayerName() {
+ public String getName() {
return name;
}
diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java
index be7be75c8..0c4419a76 100644
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java
@@ -285,5 +285,14 @@ public class VariationalAutoencoder extends BasePretrainNetwork {
super.nOut(nOut);
return self();
}
+
+ public B pzxActivationFunction(IActivation activation) {
+ this.pzxActivationFunction$value = activation;
+ this.pzxActivationFunction$set = true;
+ return self();
+ }
+ public B pzxActivationFunction(Activation activation) {
+ return this.pzxActivationFunction(activation.getActivationFunction());
+ }
}
}
diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java
index 3bb40171a..be4aea4fe 100644
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java
@@ -107,7 +107,7 @@ public class ConvolutionLayer extends BaseLayer p = preOutput4d(true, true, workspaceMgr);
INDArray z = p.getFirst();
- CNN2DFormat f = getTypedLayerConfiguration().getDataFormat();
+ CNN2DFormat f = getTypedLayerConfiguration().getConvFormat();
if(f != CNN2DFormat.NCHW){
z = z.permute(0,3,1,2); //NHWC to NCHW
}
@@ -159,7 +159,7 @@ public class ConvolutionLayer extends BaseLayer backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
assertInputSet(true);
- CNN2DFormat format = getTypedLayerConfiguration().getDataFormat();
+ CNN2DFormat format = getTypedLayerConfiguration().getConvFormat();
boolean nchw = format == CNN2DFormat.NCHW;
if (input.rank() != 4) {
throw new DL4JInvalidInputException("Got rank " + input.rank()
+ " array as input to Convolution layer with shape " + Arrays.toString(input.shape())
- + ". Expected rank 4 array with shape " + getTypedLayerConfiguration().getDataFormat().dimensionNames() + ". "
+ + ". Expected rank 4 array with shape " + getTypedLayerConfiguration().getConvFormat().dimensionNames() + ". "
+ layerId());
}
INDArray bias;
@@ -158,7 +158,7 @@ public class DepthwiseConvolution2DLayer extends ConvolutionLayer {
throw new DL4JInvalidInputException("Got rank " + input.rank()
+ " array as input to DepthwiseConvolution2D (layer name = " + layerName + ", layer index = "
+ index + ") with shape " + Arrays.toString(input.shape()) + ". "
- + "Expected rank 4 array with shape " + getTypedLayerConfiguration().getDataFormat().dimensionNames() + "."
+ + "Expected rank 4 array with shape " + getTypedLayerConfiguration().getConvFormat().dimensionNames() + "."
+ (input.rank() == 2
? " (Wrong input type (see InputType.convolutionalFlat()) or wrong data type?)"
: "") + " " + layerId());
@@ -166,7 +166,7 @@ public class DepthwiseConvolution2DLayer extends ConvolutionLayer {
INDArray input = this.input.castTo(dataType); //no-op if correct dtype
- CNN2DFormat format = getTypedLayerConfiguration().getDataFormat();
+ CNN2DFormat format = getTypedLayerConfiguration().getConvFormat();
boolean nchw = format == CNN2DFormat.NCHW;
long inDepth = depthWiseWeights.size(2);
diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SeparableConvolution2DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SeparableConvolution2DLayer.java
index c6a9bba63..60533ee2a 100644
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SeparableConvolution2DLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SeparableConvolution2DLayer.java
@@ -63,7 +63,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer {
if (input.rank() != 4) {
throw new DL4JInvalidInputException("Got rank " + input.rank()
+ " array as input to SubsamplingLayer with shape " + Arrays.toString(input.shape())
- + ". Expected rank 4 array with shape " + getTypedLayerConfiguration().getDataFormat().dimensionNames() + ". "
+ + ". Expected rank 4 array with shape " + getTypedLayerConfiguration().getConvFormat().dimensionNames() + ". "
+ layerId());
}
INDArray bias;
@@ -74,7 +74,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer {
INDArray input = this.input.castTo(dataType);
- CNN2DFormat format = getTypedLayerConfiguration().getDataFormat();
+ CNN2DFormat format = getTypedLayerConfiguration().getConvFormat();
boolean nchw = format == CNN2DFormat.NCHW;
long miniBatch = input.size(0);
@@ -167,7 +167,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer {
getParamWithNoise(SeparableConvolutionParamInitializer.POINT_WISE_WEIGHT_KEY, training, workspaceMgr);
INDArray input = this.input.castTo(dataType);
- if(getTypedLayerConfiguration().getDataFormat() == CNN2DFormat.NHWC) {
+ if(getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) {
input = input.permute(0,3,1,2).dup();
}
@@ -182,7 +182,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer {
throw new DL4JInvalidInputException("Got rank " + input.rank()
+ " array as input to SeparableConvolution2D (layer name = " + layerName + ", layer index = "
+ index + ") with shape " + Arrays.toString(input.shape()) + ". "
- + "Expected rank 4 array with shape " + getTypedLayerConfiguration().getDataFormat().dimensionNames() + "."
+ + "Expected rank 4 array with shape " + getTypedLayerConfiguration().getConvFormat().dimensionNames() + "."
+ (input.rank() == 2
? " (Wrong input type (see InputType.convolutionalFlat()) or wrong data type?)"
: "")
@@ -199,7 +199,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer {
String s = "Cannot do forward pass in SeparableConvolution2D layer (layer name = " + layerName
+ ", layer index = " + index + "): input array channels does not match CNN layer configuration"
- + " (data format = " + getTypedLayerConfiguration().getDataFormat() + ", data input channels = " + input.size(1) + ", [minibatch,inputDepth,height,width]="
+ + " (data format = " + getTypedLayerConfiguration().getConvFormat() + ", data input channels = " + input.size(1) + ", [minibatch,inputDepth,height,width]="
+ Arrays.toString(input.shape()) + "; expected" + " input channels = " + inDepth + ") "
+ layerId();
@@ -287,7 +287,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer {
.build();
Nd4j.getExecutioner().exec(op);
- if(getTypedLayerConfiguration().getDataFormat() == CNN2DFormat.NHWC) {
+ if(getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) {
output = output.permute(0,2,3,1); //NCHW to NHWC
}
diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SpaceToBatch.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SpaceToBatch.java
index 1e5c7b270..371511075 100644
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SpaceToBatch.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SpaceToBatch.java
@@ -47,7 +47,7 @@ public class SpaceToBatch extends AbstractLayer