DL4J/ND4J: Do pass on integer casts (#15)
* Int cast fixes. * Revert "Int cast fixes." This reverts commit aa36e8ca * Int casts * Int cast * Int casts * Get rid of int casts. Dropping deprecated aggregate ops. * java scatterUpdate changes Signed-off-by: raver119 <raver119@gmail.com> * c++ scatterUpdate changes Signed-off-by: raver119 <raver119@gmail.com> * Remove aggregated ops. * Restored test * Tests restored. * Minor fixesmaster
parent
95f7ad7b94
commit
45a40c8a89
|
@ -124,7 +124,6 @@ public class TestUtils {
|
||||||
public static INDArray randomOneHot(long examples, long nOut, Random rng){
|
public static INDArray randomOneHot(long examples, long nOut, Random rng){
|
||||||
INDArray arr = Nd4j.create(examples, nOut);
|
INDArray arr = Nd4j.create(examples, nOut);
|
||||||
for( int i=0; i<examples; i++ ){
|
for( int i=0; i<examples; i++ ){
|
||||||
// FIXME: int cast
|
|
||||||
arr.putScalar(i, rng.nextInt((int) nOut), 1.0);
|
arr.putScalar(i, rng.nextInt((int) nOut), 1.0);
|
||||||
}
|
}
|
||||||
return arr;
|
return arr;
|
||||||
|
|
|
@ -132,7 +132,6 @@ public class TestUtils {
|
||||||
public static INDArray randomOneHot(long examples, long nOut, Random rng){
|
public static INDArray randomOneHot(long examples, long nOut, Random rng){
|
||||||
INDArray arr = Nd4j.create(examples, nOut);
|
INDArray arr = Nd4j.create(examples, nOut);
|
||||||
for( int i=0; i<examples; i++ ){
|
for( int i=0; i<examples; i++ ){
|
||||||
// FIXME: int cast
|
|
||||||
arr.putScalar(i, rng.nextInt((int) nOut), 1.0);
|
arr.putScalar(i, rng.nextInt((int) nOut), 1.0);
|
||||||
}
|
}
|
||||||
return arr;
|
return arr;
|
||||||
|
|
|
@ -187,7 +187,6 @@ public class AsyncDataSetIteratorTest extends BaseDL4JTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testVariableTimeSeries2() throws Exception {
|
public void testVariableTimeSeries2() throws Exception {
|
||||||
AsyncDataSetIterator adsi =
|
AsyncDataSetIterator adsi =
|
||||||
|
|
|
@ -464,13 +464,11 @@ public class LossFunctionGradientCheck extends BaseDL4JTest {
|
||||||
ret[1] = Nd4j.zeros(labelsShape);
|
ret[1] = Nd4j.zeros(labelsShape);
|
||||||
if (labelsShape.length == 2) {
|
if (labelsShape.length == 2) {
|
||||||
for (int i = 0; i < labelsShape[0]; i++) {
|
for (int i = 0; i < labelsShape[0]; i++) {
|
||||||
// FIXME: int cast
|
|
||||||
ret[1].putScalar(i, r.nextInt((int) labelsShape[1]), 1.0);
|
ret[1].putScalar(i, r.nextInt((int) labelsShape[1]), 1.0);
|
||||||
}
|
}
|
||||||
} else if (labelsShape.length == 3) {
|
} else if (labelsShape.length == 3) {
|
||||||
for (int i = 0; i < labelsShape[0]; i++) {
|
for (int i = 0; i < labelsShape[0]; i++) {
|
||||||
for (int j = 0; j < labelsShape[2]; j++) {
|
for (int j = 0; j < labelsShape[2]; j++) {
|
||||||
// FIXME: int cast
|
|
||||||
ret[1].putScalar(i, r.nextInt((int) labelsShape[1]), j, 1.0);
|
ret[1].putScalar(i, r.nextInt((int) labelsShape[1]), j, 1.0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -484,13 +482,11 @@ public class LossFunctionGradientCheck extends BaseDL4JTest {
|
||||||
ret[1] = Nd4j.ones(labelsShape);
|
ret[1] = Nd4j.ones(labelsShape);
|
||||||
if (labelsShape.length == 2) {
|
if (labelsShape.length == 2) {
|
||||||
for (int i = 0; i < labelsShape[0]; i++) {
|
for (int i = 0; i < labelsShape[0]; i++) {
|
||||||
// FIXME: int cast
|
|
||||||
ret[1].putScalar(i, r.nextInt((int) labelsShape[1]), -1.0);
|
ret[1].putScalar(i, r.nextInt((int) labelsShape[1]), -1.0);
|
||||||
}
|
}
|
||||||
} else if (labelsShape.length == 3) {
|
} else if (labelsShape.length == 3) {
|
||||||
for (int i = 0; i < labelsShape[0]; i++) {
|
for (int i = 0; i < labelsShape[0]; i++) {
|
||||||
for (int j = 0; j < labelsShape[2]; j++) {
|
for (int j = 0; j < labelsShape[2]; j++) {
|
||||||
// FIXME: int cast
|
|
||||||
ret[1].putScalar(i, r.nextInt((int) labelsShape[1]), j, -1.0);
|
ret[1].putScalar(i, r.nextInt((int) labelsShape[1]), j, -1.0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -176,8 +176,7 @@ public class ShiftVertexTest extends BaseDL4JTest {
|
||||||
manual_weights.put("output_b", c);
|
manual_weights.put("output_b", c);
|
||||||
|
|
||||||
// First things first, let's calculate the score.
|
// First things first, let's calculate the score.
|
||||||
// FIXME: int cast
|
long batchsz = input.shape()[0];
|
||||||
int batchsz = (int) input.shape()[0];
|
|
||||||
INDArray z = input.castTo(W.dataType()).mmul(W).add(b.repmat(batchsz, 1));
|
INDArray z = input.castTo(W.dataType()).mmul(W).add(b.repmat(batchsz, 1));
|
||||||
INDArray a = a1.getActivation(z.dup(), true).add(sf); // activation modifies it's input!!
|
INDArray a = a1.getActivation(z.dup(), true).add(sf); // activation modifies it's input!!
|
||||||
INDArray q = a.mmul(V).add(c.repmat(batchsz, 1));
|
INDArray q = a.mmul(V).add(c.repmat(batchsz, 1));
|
||||||
|
|
|
@ -36,6 +36,7 @@ import org.nd4j.linalg.api.ops.impl.transforms.strict.SigmoidDerivative;
|
||||||
import org.nd4j.linalg.api.ops.impl.transforms.strict.TanhDerivative;
|
import org.nd4j.linalg.api.ops.impl.transforms.strict.TanhDerivative;
|
||||||
import org.nd4j.linalg.dataset.DataSet;
|
import org.nd4j.linalg.dataset.DataSet;
|
||||||
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
|
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
|
||||||
|
import org.nd4j.linalg.exception.ND4JArraySizeException;
|
||||||
import org.nd4j.linalg.factory.Nd4j;
|
import org.nd4j.linalg.factory.Nd4j;
|
||||||
import org.nd4j.linalg.learning.config.Sgd;
|
import org.nd4j.linalg.learning.config.Sgd;
|
||||||
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction;
|
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction;
|
||||||
|
@ -340,7 +341,8 @@ public class BackPropMLPTest extends BaseDL4JTest {
|
||||||
|
|
||||||
public static float[] asFloat(INDArray arr) {
|
public static float[] asFloat(INDArray arr) {
|
||||||
long len = arr.length();
|
long len = arr.length();
|
||||||
// FIXME: int cast
|
if (len > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
float[] f = new float[(int) len];
|
float[] f = new float[(int) len];
|
||||||
NdIndexIterator iterator = new NdIndexIterator('c', arr.shape());
|
NdIndexIterator iterator = new NdIndexIterator('c', arr.shape());
|
||||||
for (int i = 0; i < len; i++) {
|
for (int i = 0; i < len; i++) {
|
||||||
|
|
|
@ -320,7 +320,6 @@ public class MultiLayerTest extends BaseDL4JTest {
|
||||||
public static float[] asFloat(INDArray arr) {
|
public static float[] asFloat(INDArray arr) {
|
||||||
long len = arr.length();
|
long len = arr.length();
|
||||||
|
|
||||||
// FIXME: int cast
|
|
||||||
float[] f = new float[(int) len];
|
float[] f = new float[(int) len];
|
||||||
for (int i = 0; i < len; i++)
|
for (int i = 0; i < len; i++)
|
||||||
f[i] = arr.getFloat(i);
|
f[i] = arr.getFloat(i);
|
||||||
|
|
|
@ -331,7 +331,6 @@ public class TestUpdaters extends BaseDL4JTest {
|
||||||
double calculatedByHandMScalar = 0.2;
|
double calculatedByHandMScalar = 0.2;
|
||||||
double[] expectedM = Nd4j.ones(1, numParams).mul(calculatedByHandMScalar).data().asDouble();
|
double[] expectedM = Nd4j.ones(1, numParams).mul(calculatedByHandMScalar).data().asDouble();
|
||||||
|
|
||||||
// FIXME: int cast
|
|
||||||
double[] actualM = Arrays.copyOfRange(nadamUpdater.getM().data().asDouble(), 0, (int) numParams);
|
double[] actualM = Arrays.copyOfRange(nadamUpdater.getM().data().asDouble(), 0, (int) numParams);
|
||||||
for (int i = 0; i < actualM.length; i++) {
|
for (int i = 0; i < actualM.length; i++) {
|
||||||
actualM[i] = Math.round(actualM[i] * 1e2) / 1e2;
|
actualM[i] = Math.round(actualM[i] * 1e2) / 1e2;
|
||||||
|
|
|
@ -48,6 +48,7 @@ import org.nd4j.linalg.api.rng.DefaultRandom;
|
||||||
import org.nd4j.linalg.api.rng.Random;
|
import org.nd4j.linalg.api.rng.Random;
|
||||||
import org.nd4j.linalg.dataset.DataSet;
|
import org.nd4j.linalg.dataset.DataSet;
|
||||||
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
|
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
|
||||||
|
import org.nd4j.linalg.exception.ND4JArraySizeException;
|
||||||
import org.nd4j.linalg.factory.Nd4j;
|
import org.nd4j.linalg.factory.Nd4j;
|
||||||
import org.nd4j.linalg.indexing.conditions.Condition;
|
import org.nd4j.linalg.indexing.conditions.Condition;
|
||||||
import org.nd4j.linalg.learning.config.AdaGrad;
|
import org.nd4j.linalg.learning.config.AdaGrad;
|
||||||
|
@ -664,7 +665,9 @@ public class TestOptimizers extends BaseDL4JTest {
|
||||||
double xlm1 = parameters.getDouble(nDims - 2);
|
double xlm1 = parameters.getDouble(nDims - 2);
|
||||||
double gl = 200 * (xl - xlm1 * xlm1);
|
double gl = 200 * (xl - xlm1 * xlm1);
|
||||||
|
|
||||||
// FIXME: int cast
|
if (nDims - 1 > Integer.MAX_VALUE) {
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
|
}
|
||||||
gradient.put(0, (int)nDims - 1, gl);
|
gradient.put(0, (int)nDims - 1, gl);
|
||||||
Gradient g = new DefaultGradient();
|
Gradient g = new DefaultGradient();
|
||||||
g.gradientForVariable().put("W", gradient);
|
g.gradientForVariable().put("W", gradient);
|
||||||
|
@ -865,8 +868,7 @@ public class TestOptimizers extends BaseDL4JTest {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long numParams() {
|
public long numParams() {
|
||||||
// FIXME: int cast
|
return parameters.length();
|
||||||
return (int) parameters.length();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -286,7 +286,7 @@ public class RecordReaderMultiDataSetIterator implements MultiDataSetIterator, S
|
||||||
for (INDArray w : exampleData) {
|
for (INDArray w : exampleData) {
|
||||||
val n = w.size(0);
|
val n = w.size(0);
|
||||||
|
|
||||||
// FIXME: int cast
|
if (Math.min(minExamples, n) < Integer.MAX_VALUE)
|
||||||
minExamples = (int) Math.min(minExamples, n);
|
minExamples = (int) Math.min(minExamples, n);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -366,7 +366,6 @@ public class SequenceRecordReaderDataSetIterator implements DataSetIterator {
|
||||||
DataSet ds = mdsToDataSet(mds);
|
DataSet ds = mdsToDataSet(mds);
|
||||||
|
|
||||||
if (totalOutcomes == -1) {
|
if (totalOutcomes == -1) {
|
||||||
// FIXME: int cast
|
|
||||||
inputColumns = (int) ds.getFeatures().size(1);
|
inputColumns = (int) ds.getFeatures().size(1);
|
||||||
totalOutcomes = ds.getLabels() == null ? -1 : (int) ds.getLabels().size(1);
|
totalOutcomes = ds.getLabels() == null ? -1 : (int) ds.getLabels().size(1);
|
||||||
}
|
}
|
||||||
|
@ -394,7 +393,6 @@ public class SequenceRecordReaderDataSetIterator implements DataSetIterator {
|
||||||
stored = next();
|
stored = next();
|
||||||
useStored = true;
|
useStored = true;
|
||||||
|
|
||||||
// FIXME: int cast
|
|
||||||
inputColumns = (int) stored.getFeatures().size(1);
|
inputColumns = (int) stored.getFeatures().size(1);
|
||||||
totalOutcomes = (int) stored.getLabels().size(1);
|
totalOutcomes = (int) stored.getLabels().size(1);
|
||||||
}
|
}
|
||||||
|
|
|
@ -172,7 +172,6 @@ public abstract class AbstractDataSetIterator<T> implements DataSetIterator {
|
||||||
Pair<T, T> pair = iterator.next();
|
Pair<T, T> pair = iterator.next();
|
||||||
if (numFeatures < 1) {
|
if (numFeatures < 1) {
|
||||||
if (pair.getFirst() instanceof INDArray) {
|
if (pair.getFirst() instanceof INDArray) {
|
||||||
// FIXME: int cast
|
|
||||||
numFeatures = (int) ((INDArray) pair.getFirst()).length();
|
numFeatures = (int) ((INDArray) pair.getFirst()).length();
|
||||||
numLabels = (int) ((INDArray) pair.getSecond()).length();
|
numLabels = (int) ((INDArray) pair.getSecond()).length();
|
||||||
} else if (pair.getFirst() instanceof float[]) {
|
} else if (pair.getFirst() instanceof float[]) {
|
||||||
|
|
|
@ -95,7 +95,6 @@ public class IteratorDataSetIterator implements DataSetIterator {
|
||||||
//Set columns etc for later use
|
//Set columns etc for later use
|
||||||
DataSet temp = list.get(0);
|
DataSet temp = list.get(0);
|
||||||
|
|
||||||
// FIXME: int cast
|
|
||||||
inputColumns = (int) temp.getFeatures().size(1);
|
inputColumns = (int) temp.getFeatures().size(1);
|
||||||
totalOutcomes = temp.getLabels() == null ? 0 : (int) temp.getLabels().size(1); //May be null for layerwise pretraining
|
totalOutcomes = temp.getLabels() == null ? 0 : (int) temp.getLabels().size(1); //May be null for layerwise pretraining
|
||||||
}
|
}
|
||||||
|
|
|
@ -73,8 +73,7 @@ public class IteratorMultiDataSetIterator implements MultiDataSetIterator {
|
||||||
next = iterator.next();
|
next = iterator.next();
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: int cast
|
long nExamples = next.getFeatures(0).size(0);
|
||||||
int nExamples = (int) next.getFeatures(0).size(0);
|
|
||||||
if (countSoFar + nExamples <= batchSize) {
|
if (countSoFar + nExamples <= batchSize) {
|
||||||
//Add the entire MultiDataSet as-is
|
//Add the entire MultiDataSet as-is
|
||||||
list.add(next);
|
list.add(next);
|
||||||
|
@ -140,7 +139,7 @@ public class IteratorMultiDataSetIterator implements MultiDataSetIterator {
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static INDArray getRange(INDArray arr, int exampleFrom, int exampleToExclusive) {
|
private static INDArray getRange(INDArray arr, long exampleFrom, long exampleToExclusive) {
|
||||||
if (arr == null)
|
if (arr == null)
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
|
@ -134,7 +134,7 @@ public abstract class BaseFileIterator<T, P> implements Iterator<T> {
|
||||||
List<T> remainder = new ArrayList<>();
|
List<T> remainder = new ArrayList<>();
|
||||||
int soFar = 0;
|
int soFar = 0;
|
||||||
for (T t : toMerge) {
|
for (T t : toMerge) {
|
||||||
int size = sizeOf(t);
|
long size = sizeOf(t);
|
||||||
|
|
||||||
if (soFar + size <= batchSize) {
|
if (soFar + size <= batchSize) {
|
||||||
correctNum.add(t);
|
correctNum.add(t);
|
||||||
|
@ -190,7 +190,7 @@ public abstract class BaseFileIterator<T, P> implements Iterator<T> {
|
||||||
|
|
||||||
protected abstract T load(File f);
|
protected abstract T load(File f);
|
||||||
|
|
||||||
protected abstract int sizeOf(T of);
|
protected abstract long sizeOf(T of);
|
||||||
|
|
||||||
protected abstract List<T> split(T toSplit);
|
protected abstract List<T> split(T toSplit);
|
||||||
|
|
||||||
|
|
|
@ -151,7 +151,7 @@ public class FileDataSetIterator extends BaseFileIterator<DataSet, DataSetPrePro
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected int sizeOf(DataSet of) {
|
protected long sizeOf(DataSet of) {
|
||||||
return of.numExamples();
|
return of.numExamples();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -151,9 +151,8 @@ public class FileMultiDataSetIterator extends BaseFileIterator<MultiDataSet, Mul
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected int sizeOf(MultiDataSet of) {
|
protected long sizeOf(MultiDataSet of) {
|
||||||
// FIXME: int cast
|
return of.getFeatures(0).size(0);
|
||||||
return (int) of.getFeatures(0).size(0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -665,8 +665,7 @@ public class BarnesHutTsne implements Model {
|
||||||
|
|
||||||
if (useAdaGrad) {
|
if (useAdaGrad) {
|
||||||
if (adaGrad == null) {
|
if (adaGrad == null) {
|
||||||
// FIXME: int cast
|
adaGrad = new AdaGrad(gradient.shape(), learningRate);
|
||||||
adaGrad = new AdaGrad(ArrayUtil.toInts(gradient.shape()), learningRate);
|
|
||||||
adaGrad.setStateViewArray(Nd4j.zeros(gradient.shape()).reshape(1, gradChange.length()),
|
adaGrad.setStateViewArray(Nd4j.zeros(gradient.shape()).reshape(1, gradChange.length()),
|
||||||
gradChange.shape(), gradient.ordering(), true);
|
gradChange.shape(), gradient.ordering(), true);
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.core;
|
||||||
|
|
||||||
|
|
||||||
import lombok.val;
|
import lombok.val;
|
||||||
|
import org.apache.commons.lang3.ArrayUtils;
|
||||||
import org.deeplearning4j.nn.conf.InputPreProcessor;
|
import org.deeplearning4j.nn.conf.InputPreProcessor;
|
||||||
import org.deeplearning4j.nn.conf.inputs.InputType;
|
import org.deeplearning4j.nn.conf.inputs.InputType;
|
||||||
import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
|
import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
|
||||||
|
@ -51,6 +52,13 @@ public class KerasReshape extends KerasLayer {
|
||||||
this(layerConfig, true);
|
this(layerConfig, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private long[] listToLongArray(List<Integer> list) {
|
||||||
|
long[] retVal = new long[list.size()];
|
||||||
|
for (int i = 0; i < list.size(); ++i) {
|
||||||
|
retVal[i] = list.get(i);
|
||||||
|
}
|
||||||
|
return retVal;
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* Constructor from parsed Keras layer configuration dictionary.
|
* Constructor from parsed Keras layer configuration dictionary.
|
||||||
*
|
*
|
||||||
|
@ -67,9 +75,7 @@ public class KerasReshape extends KerasLayer {
|
||||||
if (innerConfig.containsKey(targetShape)) {
|
if (innerConfig.containsKey(targetShape)) {
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
List<Integer> targetShapeList = (List<Integer>) innerConfig.get(targetShape);
|
List<Integer> targetShapeList = (List<Integer>) innerConfig.get(targetShape);
|
||||||
|
this.targetShape = listToLongArray(targetShapeList);
|
||||||
// FIXME: int cast
|
|
||||||
this.targetShape = ArrayUtil.toLongArray(ArrayUtil.toArray(targetShapeList));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -690,13 +690,11 @@ public class KerasModelEndToEndTest extends BaseDL4JTest {
|
||||||
INDArray testLabels = Nd4j.create(predictionsDl4j.shape());
|
INDArray testLabels = Nd4j.create(predictionsDl4j.shape());
|
||||||
if (testLabels.rank() == 2) {
|
if (testLabels.rank() == 2) {
|
||||||
for (int i = 0; i < testLabels.size(0); i++) {
|
for (int i = 0; i < testLabels.size(0); i++) {
|
||||||
// FIXME: int cast
|
|
||||||
testLabels.putScalar(i, r.nextInt((int) testLabels.size(1)), 1.0);
|
testLabels.putScalar(i, r.nextInt((int) testLabels.size(1)), 1.0);
|
||||||
}
|
}
|
||||||
} else if (testLabels.rank() == 3) {
|
} else if (testLabels.rank() == 3) {
|
||||||
for (int i = 0; i < testLabels.size(0); i++) {
|
for (int i = 0; i < testLabels.size(0); i++) {
|
||||||
for (int j = 0; j < testLabels.size(1); j++) {
|
for (int j = 0; j < testLabels.size(1); j++) {
|
||||||
// FIXME: int cast
|
|
||||||
testLabels.putScalar(i, j, r.nextInt((int) testLabels.size(1)), 1.0);
|
testLabels.putScalar(i, j, r.nextInt((int) testLabels.size(1)), 1.0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -104,7 +104,7 @@ public class InMemoryLookupTable<T extends SequenceElement> implements WeightLoo
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void initAdaGrad() {
|
protected void initAdaGrad() {
|
||||||
int[] shape = new int[] {vocab.numWords() + 1, vectorLength};
|
long[] shape = new long[] {vocab.numWords() + 1, vectorLength};
|
||||||
int length = ArrayUtil.prod(shape);
|
int length = ArrayUtil.prod(shape);
|
||||||
adaGrad = new AdaGrad(shape, lr.get());
|
adaGrad = new AdaGrad(shape, lr.get());
|
||||||
adaGrad.setStateViewArray(Nd4j.zeros(shape).reshape(1, length), shape, Nd4j.order(), true);
|
adaGrad.setStateViewArray(Nd4j.zeros(shape).reshape(1, length), shape, Nd4j.order(), true);
|
||||||
|
@ -124,8 +124,7 @@ public class InMemoryLookupTable<T extends SequenceElement> implements WeightLoo
|
||||||
if (adaGrad == null)
|
if (adaGrad == null)
|
||||||
initAdaGrad();
|
initAdaGrad();
|
||||||
|
|
||||||
// FIXME: int cast
|
return adaGrad.getGradient(gradient, column, syn0.shape());
|
||||||
return adaGrad.getGradient(gradient, column, ArrayUtil.toInts(syn0.shape()));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -370,7 +369,6 @@ public class InMemoryLookupTable<T extends SequenceElement> implements WeightLoo
|
||||||
else {
|
else {
|
||||||
nextRandom.set(nextRandom.get() * 25214903917L + 11);
|
nextRandom.set(nextRandom.get() * 25214903917L + 11);
|
||||||
|
|
||||||
// FIXME: int cast
|
|
||||||
int idx = (int) Math.abs((int) (nextRandom.get() >> 16) % table.length());
|
int idx = (int) Math.abs((int) (nextRandom.get() >> 16) % table.length());
|
||||||
|
|
||||||
target = table.getInt(idx);
|
target = table.getInt(idx);
|
||||||
|
|
|
@ -33,7 +33,6 @@ import org.deeplearning4j.models.word2vec.wordstore.VocabCache;
|
||||||
import org.nd4j.linalg.api.buffer.DataType;
|
import org.nd4j.linalg.api.buffer.DataType;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
import org.nd4j.linalg.api.ops.aggregates.Aggregate;
|
import org.nd4j.linalg.api.ops.aggregates.Aggregate;
|
||||||
import org.nd4j.linalg.api.ops.aggregates.impl.AggregateCBOW;
|
|
||||||
import org.nd4j.linalg.api.ops.impl.nlp.CbowRound;
|
import org.nd4j.linalg.api.ops.impl.nlp.CbowRound;
|
||||||
import org.nd4j.linalg.factory.Nd4j;
|
import org.nd4j.linalg.factory.Nd4j;
|
||||||
import org.nd4j.linalg.util.DeviceLocalNDArray;
|
import org.nd4j.linalg.util.DeviceLocalNDArray;
|
||||||
|
|
|
@ -104,11 +104,10 @@ public class GloVe<T extends SequenceElement> implements ElementsLearningAlgorit
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
weightAdaGrad = new AdaGrad(new int[] {this.vocabCache.numWords() + 1, vectorLength}, learningRate);
|
weightAdaGrad = new AdaGrad(new long[] {this.vocabCache.numWords() + 1, vectorLength}, learningRate);
|
||||||
bias = Nd4j.create(syn0.rows());
|
bias = Nd4j.create(syn0.rows());
|
||||||
|
|
||||||
// FIXME: int cast
|
biasAdaGrad = new AdaGrad(bias.shape(), this.learningRate);
|
||||||
biasAdaGrad = new AdaGrad(ArrayUtil.toInts(bias.shape()), this.learningRate);
|
|
||||||
|
|
||||||
// maxmemory = Runtime.getRuntime().maxMemory() - (vocabCache.numWords() * vectorLength * 2 * 8);
|
// maxmemory = Runtime.getRuntime().maxMemory() - (vocabCache.numWords() * vectorLength * 2 * 8);
|
||||||
|
|
||||||
|
@ -237,15 +236,13 @@ public class GloVe<T extends SequenceElement> implements ElementsLearningAlgorit
|
||||||
private void update(T element1, INDArray wordVector, INDArray contextVector, double gradient) {
|
private void update(T element1, INDArray wordVector, INDArray contextVector, double gradient) {
|
||||||
//gradient for word vectors
|
//gradient for word vectors
|
||||||
INDArray grad1 = contextVector.mul(gradient);
|
INDArray grad1 = contextVector.mul(gradient);
|
||||||
// FIXME: int cast
|
INDArray update = weightAdaGrad.getGradient(grad1, element1.getIndex(), syn0.shape());
|
||||||
INDArray update = weightAdaGrad.getGradient(grad1, element1.getIndex(), ArrayUtil.toInts(syn0.shape()));
|
|
||||||
|
|
||||||
//update vector
|
//update vector
|
||||||
wordVector.subi(update);
|
wordVector.subi(update);
|
||||||
|
|
||||||
double w1Bias = bias.getDouble(element1.getIndex());
|
double w1Bias = bias.getDouble(element1.getIndex());
|
||||||
// FIXME: int cast
|
double biasGradient = biasAdaGrad.getGradient(gradient, element1.getIndex(), bias.shape());
|
||||||
double biasGradient = biasAdaGrad.getGradient(gradient, element1.getIndex(), ArrayUtil.toInts(bias.shape()));
|
|
||||||
double update2 = w1Bias - biasGradient;
|
double update2 = w1Bias - biasGradient;
|
||||||
bias.putScalar(element1.getIndex(), update2);
|
bias.putScalar(element1.getIndex(), update2);
|
||||||
}
|
}
|
||||||
|
|
|
@ -358,7 +358,6 @@ public class BasicModelUtils<T extends SequenceElement> implements ModelUtils<T>
|
||||||
INDArray sort = sorted[0];
|
INDArray sort = sorted[0];
|
||||||
List<String> ret = new ArrayList<>();
|
List<String> ret = new ArrayList<>();
|
||||||
|
|
||||||
// FIXME: int cast
|
|
||||||
if (top > sort.length())
|
if (top > sort.length())
|
||||||
top = (int) sort.length();
|
top = (int) sort.length();
|
||||||
//there will be a redundant word
|
//there will be a redundant word
|
||||||
|
|
|
@ -72,7 +72,7 @@ public class GloveWeightLookupTable<T extends SequenceElement> extends InMemoryL
|
||||||
putVector(Word2Vec.DEFAULT_UNK, randUnk);
|
putVector(Word2Vec.DEFAULT_UNK, randUnk);
|
||||||
}
|
}
|
||||||
if (weightAdaGrad == null || reset) {
|
if (weightAdaGrad == null || reset) {
|
||||||
weightAdaGrad = new AdaGrad(new int[] {vocab.numWords() + 1, vectorLength}, lr.get());
|
weightAdaGrad = new AdaGrad(new long[]{vocab.numWords() + 1, vectorLength}, lr.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -81,7 +81,7 @@ public class GloveWeightLookupTable<T extends SequenceElement> extends InMemoryL
|
||||||
bias = Nd4j.create(syn0.rows());
|
bias = Nd4j.create(syn0.rows());
|
||||||
|
|
||||||
if (biasAdaGrad == null || reset) {
|
if (biasAdaGrad == null || reset) {
|
||||||
biasAdaGrad = new AdaGrad(ArrayUtil.toInts(bias.shape()), lr.get());
|
biasAdaGrad = new AdaGrad(bias.shape(), lr.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -140,13 +140,13 @@ public class GloveWeightLookupTable<T extends SequenceElement> extends InMemoryL
|
||||||
private void update(T w1, INDArray wordVector, INDArray contextVector, double gradient) {
|
private void update(T w1, INDArray wordVector, INDArray contextVector, double gradient) {
|
||||||
//gradient for word vectors
|
//gradient for word vectors
|
||||||
INDArray grad1 = contextVector.mul(gradient);
|
INDArray grad1 = contextVector.mul(gradient);
|
||||||
INDArray update = weightAdaGrad.getGradient(grad1, w1.getIndex(), ArrayUtil.toInts(syn0.shape()));
|
INDArray update = weightAdaGrad.getGradient(grad1, w1.getIndex(), syn0.shape());
|
||||||
|
|
||||||
//update vector
|
//update vector
|
||||||
wordVector.subi(update);
|
wordVector.subi(update);
|
||||||
|
|
||||||
double w1Bias = bias.getDouble(w1.getIndex());
|
double w1Bias = bias.getDouble(w1.getIndex());
|
||||||
double biasGradient = biasAdaGrad.getGradient(gradient, w1.getIndex(), ArrayUtil.toInts(bias.shape()));
|
double biasGradient = biasAdaGrad.getGradient(gradient, w1.getIndex(), bias.shape());
|
||||||
double update2 = w1Bias - biasGradient;
|
double update2 = w1Bias - biasGradient;
|
||||||
bias.putScalar(w1.getIndex(), update2);
|
bias.putScalar(w1.getIndex(), update2);
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,6 +20,7 @@ import lombok.extern.slf4j.Slf4j;
|
||||||
import lombok.val;
|
import lombok.val;
|
||||||
import org.deeplearning4j.nn.api.Model;
|
import org.deeplearning4j.nn.api.Model;
|
||||||
import org.nd4j.linalg.api.buffer.DataType;
|
import org.nd4j.linalg.api.buffer.DataType;
|
||||||
|
import org.nd4j.linalg.exception.ND4JArraySizeException;
|
||||||
import org.nd4j.linalg.function.Consumer;
|
import org.nd4j.linalg.function.Consumer;
|
||||||
import org.nd4j.linalg.lossfunctions.impl.LossBinaryXENT;
|
import org.nd4j.linalg.lossfunctions.impl.LossBinaryXENT;
|
||||||
import org.nd4j.linalg.primitives.Pair;
|
import org.nd4j.linalg.primitives.Pair;
|
||||||
|
@ -293,7 +294,8 @@ public class GradientCheckUtil {
|
||||||
ss = n;
|
ss = n;
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: int cast
|
if (ss > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
stepSizeForParam.put(paramNames.get(i), (int) ss);
|
stepSizeForParam.put(paramNames.get(i), (int) ss);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -140,10 +140,9 @@ public class ElementWiseVertex extends GraphVertex {
|
||||||
//CNN inputs... also check that the channels, width and heights match:
|
//CNN inputs... also check that the channels, width and heights match:
|
||||||
InputType.InputTypeConvolutional firstConv = (InputType.InputTypeConvolutional) first;
|
InputType.InputTypeConvolutional firstConv = (InputType.InputTypeConvolutional) first;
|
||||||
|
|
||||||
// FIXME: int cast
|
val fd = firstConv.getChannels();
|
||||||
val fd = (int) firstConv.getChannels();
|
val fw = firstConv.getWidth();
|
||||||
val fw = (int) firstConv.getWidth();
|
val fh = firstConv.getHeight();
|
||||||
val fh = (int) firstConv.getHeight();
|
|
||||||
|
|
||||||
for (int i = 1; i < vertexInputs.length; i++) {
|
for (int i = 1; i < vertexInputs.length; i++) {
|
||||||
if (vertexInputs[i].getType() != InputType.Type.CNN) {
|
if (vertexInputs[i].getType() != InputType.Type.CNN) {
|
||||||
|
@ -155,10 +154,9 @@ public class ElementWiseVertex extends GraphVertex {
|
||||||
|
|
||||||
InputType.InputTypeConvolutional otherConv = (InputType.InputTypeConvolutional) vertexInputs[i];
|
InputType.InputTypeConvolutional otherConv = (InputType.InputTypeConvolutional) vertexInputs[i];
|
||||||
|
|
||||||
// FIXME: int cast
|
val od = otherConv.getChannels();
|
||||||
val od = (int) otherConv.getChannels();
|
val ow = otherConv.getWidth();
|
||||||
val ow = (int) otherConv.getWidth();
|
val oh = otherConv.getHeight();
|
||||||
val oh = (int) otherConv.getHeight();
|
|
||||||
|
|
||||||
if (fd != od || fw != ow || fh != oh) {
|
if (fd != od || fw != ow || fh != oh) {
|
||||||
throw new InvalidInputTypeException(
|
throw new InvalidInputTypeException(
|
||||||
|
|
|
@ -94,13 +94,12 @@ public class MergeVertex extends GraphVertex {
|
||||||
// CNN3D inputs: check that the channels, width and height match:
|
// CNN3D inputs: check that the channels, width and height match:
|
||||||
InputType.InputTypeConvolutional3D firstConv = (InputType.InputTypeConvolutional3D) first;
|
InputType.InputTypeConvolutional3D firstConv = (InputType.InputTypeConvolutional3D) first;
|
||||||
|
|
||||||
// FIXME: int cast
|
val fd = firstConv.getDepth();
|
||||||
val fd = (int) firstConv.getDepth();
|
val fw = firstConv.getWidth();
|
||||||
val fw = (int) firstConv.getWidth();
|
val fh = firstConv.getHeight();
|
||||||
val fh = (int) firstConv.getHeight();
|
val fc = firstConv.getChannels();
|
||||||
val fc = (int) firstConv.getChannels();
|
|
||||||
|
|
||||||
int depthSum = fc;
|
long depthSum = fc;
|
||||||
InputType.InputTypeConvolutional3D otherConv = null;
|
InputType.InputTypeConvolutional3D otherConv = null;
|
||||||
for (int i = 1; i < vertexInputs.length; i++) {
|
for (int i = 1; i < vertexInputs.length; i++) {
|
||||||
if (vertexInputs[i].getType() != InputType.Type.CNN3D) {
|
if (vertexInputs[i].getType() != InputType.Type.CNN3D) {
|
||||||
|
@ -109,10 +108,10 @@ public class MergeVertex extends GraphVertex {
|
||||||
}
|
}
|
||||||
|
|
||||||
otherConv = (InputType.InputTypeConvolutional3D) vertexInputs[i];
|
otherConv = (InputType.InputTypeConvolutional3D) vertexInputs[i];
|
||||||
val od = (int) otherConv.getDepth();
|
val od = otherConv.getDepth();
|
||||||
val ow = (int) otherConv.getWidth();
|
val ow = otherConv.getWidth();
|
||||||
val oh = (int) otherConv.getHeight();
|
val oh = otherConv.getHeight();
|
||||||
val oc = (int) otherConv.getChannels();
|
val oc = otherConv.getChannels();
|
||||||
|
|
||||||
if (fd != od || fw != ow || fh != oh) {
|
if (fd != od || fw != ow || fh != oh) {
|
||||||
throw new InvalidInputTypeException("Invalid input: MergeVertex cannot merge CNN3D activations of different width/heights:" + "first [channels,width,height] = [" + fd + "," + fw + "," + fh
|
throw new InvalidInputTypeException("Invalid input: MergeVertex cannot merge CNN3D activations of different width/heights:" + "first [channels,width,height] = [" + fd + "," + fw + "," + fh
|
||||||
|
@ -177,12 +176,11 @@ public class MergeVertex extends GraphVertex {
|
||||||
//CNN inputs... also check that the channels, width and heights match:
|
//CNN inputs... also check that the channels, width and heights match:
|
||||||
InputType.InputTypeConvolutional firstConv = (InputType.InputTypeConvolutional) first;
|
InputType.InputTypeConvolutional firstConv = (InputType.InputTypeConvolutional) first;
|
||||||
|
|
||||||
// FIXME: int cast
|
val fd = firstConv.getChannels();
|
||||||
val fd = (int) firstConv.getChannels();
|
val fw = firstConv.getWidth();
|
||||||
val fw = (int) firstConv.getWidth();
|
val fh = firstConv.getHeight();
|
||||||
val fh = (int) firstConv.getHeight();
|
|
||||||
|
|
||||||
int depthSum = fd;
|
long depthSum = fd;
|
||||||
|
|
||||||
for (int i = 1; i < vertexInputs.length; i++) {
|
for (int i = 1; i < vertexInputs.length; i++) {
|
||||||
if (vertexInputs[i].getType() != InputType.Type.CNN) {
|
if (vertexInputs[i].getType() != InputType.Type.CNN) {
|
||||||
|
@ -194,10 +192,9 @@ public class MergeVertex extends GraphVertex {
|
||||||
|
|
||||||
InputType.InputTypeConvolutional otherConv = (InputType.InputTypeConvolutional) vertexInputs[i];
|
InputType.InputTypeConvolutional otherConv = (InputType.InputTypeConvolutional) vertexInputs[i];
|
||||||
|
|
||||||
// FIXME: int cast
|
val od = otherConv.getChannels();
|
||||||
val od = (int) otherConv.getChannels();
|
val ow = otherConv.getWidth();
|
||||||
val ow = (int) otherConv.getWidth();
|
val oh = otherConv.getHeight();
|
||||||
val oh = (int) otherConv.getHeight();
|
|
||||||
|
|
||||||
if (fw != ow || fh != oh) {
|
if (fw != ow || fh != oh) {
|
||||||
throw new InvalidInputTypeException(
|
throw new InvalidInputTypeException(
|
||||||
|
|
|
@ -131,12 +131,11 @@ public class PoolHelperVertex extends GraphVertex {
|
||||||
//CNN inputs... also check that the channels, width and heights match:
|
//CNN inputs... also check that the channels, width and heights match:
|
||||||
InputType.InputTypeConvolutional firstConv = (InputType.InputTypeConvolutional) first;
|
InputType.InputTypeConvolutional firstConv = (InputType.InputTypeConvolutional) first;
|
||||||
|
|
||||||
// FIXME: int cast
|
val fd = firstConv.getChannels();
|
||||||
val fd = (int) firstConv.getChannels();
|
val fw = firstConv.getWidth();
|
||||||
val fw = (int) firstConv.getWidth();
|
val fh = firstConv.getHeight();
|
||||||
val fh = (int) firstConv.getHeight();
|
|
||||||
|
|
||||||
int depthSum = fd;
|
long depthSum = fd;
|
||||||
|
|
||||||
for (int i = 1; i < vertexInputs.length; i++) {
|
for (int i = 1; i < vertexInputs.length; i++) {
|
||||||
if (vertexInputs[i].getType() != InputType.Type.CNN) {
|
if (vertexInputs[i].getType() != InputType.Type.CNN) {
|
||||||
|
@ -148,10 +147,9 @@ public class PoolHelperVertex extends GraphVertex {
|
||||||
|
|
||||||
InputType.InputTypeConvolutional otherConv = (InputType.InputTypeConvolutional) vertexInputs[i];
|
InputType.InputTypeConvolutional otherConv = (InputType.InputTypeConvolutional) vertexInputs[i];
|
||||||
|
|
||||||
// FIXME: int cast
|
long od = otherConv.getChannels();
|
||||||
int od = (int) otherConv.getChannels();
|
long ow = otherConv.getWidth();
|
||||||
int ow = (int) otherConv.getWidth();
|
long oh = otherConv.getHeight();
|
||||||
int oh = (int) otherConv.getHeight();
|
|
||||||
|
|
||||||
if (fw != ow || fh != oh) {
|
if (fw != ow || fh != oh) {
|
||||||
throw new InvalidInputTypeException(
|
throw new InvalidInputTypeException(
|
||||||
|
|
|
@ -150,12 +150,11 @@ public class UnstackVertex extends GraphVertex {
|
||||||
//CNN inputs... also check that the channels, width and heights match:
|
//CNN inputs... also check that the channels, width and heights match:
|
||||||
InputType.InputTypeConvolutional firstConv = (InputType.InputTypeConvolutional) first;
|
InputType.InputTypeConvolutional firstConv = (InputType.InputTypeConvolutional) first;
|
||||||
|
|
||||||
// FIXME: int cast
|
val fd = firstConv.getChannels();
|
||||||
val fd = (int) firstConv.getChannels();
|
val fw = firstConv.getWidth();
|
||||||
val fw = (int) firstConv.getWidth();
|
val fh = firstConv.getHeight();
|
||||||
val fh = (int) firstConv.getHeight();
|
|
||||||
|
|
||||||
int depthSum = fd;
|
long depthSum = fd;
|
||||||
|
|
||||||
for (int i = 1; i < vertexInputs.length; i++) {
|
for (int i = 1; i < vertexInputs.length; i++) {
|
||||||
if (vertexInputs[i].getType() != InputType.Type.CNN) {
|
if (vertexInputs[i].getType() != InputType.Type.CNN) {
|
||||||
|
@ -167,10 +166,9 @@ public class UnstackVertex extends GraphVertex {
|
||||||
|
|
||||||
InputType.InputTypeConvolutional otherConv = (InputType.InputTypeConvolutional) vertexInputs[i];
|
InputType.InputTypeConvolutional otherConv = (InputType.InputTypeConvolutional) vertexInputs[i];
|
||||||
|
|
||||||
// FIXME: int cast
|
val od = otherConv.getChannels();
|
||||||
val od = (int) otherConv.getChannels();
|
val ow = otherConv.getWidth();
|
||||||
val ow = (int) otherConv.getWidth();
|
val oh = otherConv.getHeight();
|
||||||
val oh = (int) otherConv.getHeight();
|
|
||||||
|
|
||||||
if (fw != ow || fh != oh) {
|
if (fw != ow || fh != oh) {
|
||||||
throw new InvalidInputTypeException(
|
throw new InvalidInputTypeException(
|
||||||
|
|
|
@ -402,18 +402,17 @@ public abstract class InputType implements Serializable {
|
||||||
//Note: ConvolutionalFlat and FeedForward look identical... but either should work OK if using something
|
//Note: ConvolutionalFlat and FeedForward look identical... but either should work OK if using something
|
||||||
// like FeedForwardToCnnPreProcessor
|
// like FeedForwardToCnnPreProcessor
|
||||||
|
|
||||||
// FIXME: int cast
|
|
||||||
switch (inputArray.rank()) {
|
switch (inputArray.rank()) {
|
||||||
case 2:
|
case 2:
|
||||||
return InputType.feedForward((int) inputArray.size(1));
|
return InputType.feedForward(inputArray.size(1));
|
||||||
case 3:
|
case 3:
|
||||||
return InputType.recurrent((int) inputArray.size(1), (int) inputArray.size(2));
|
return InputType.recurrent(inputArray.size(1), (int) inputArray.size(2));
|
||||||
case 4:
|
case 4:
|
||||||
//Order: [minibatch, channels, height, width] -> [h, w, c]
|
//Order: [minibatch, channels, height, width] -> [h, w, c]
|
||||||
return InputType.convolutional((int) inputArray.size(2), (int) inputArray.size(3), (int) inputArray.size(1));
|
return InputType.convolutional(inputArray.size(2), (int) inputArray.size(3), (int) inputArray.size(1));
|
||||||
case 5:
|
case 5:
|
||||||
//Order: [minibatch, channels, depth, height, width] -> [d, h, w, c]
|
//Order: [minibatch, channels, depth, height, width] -> [d, h, w, c]
|
||||||
return InputType.convolutional3D((int) inputArray.size(2), (int) inputArray.size(3),
|
return InputType.convolutional3D(inputArray.size(2), (int) inputArray.size(3),
|
||||||
(int) inputArray.size(4), (int) inputArray.size(1));
|
(int) inputArray.size(4), (int) inputArray.size(1));
|
||||||
default:
|
default:
|
||||||
throw new IllegalArgumentException(
|
throw new IllegalArgumentException(
|
||||||
|
|
|
@ -152,17 +152,18 @@ public class Cnn3DLossLayer extends FeedForwardLayer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setNIn(int nIn){
|
public void setNIn(long nIn){
|
||||||
throw new UnsupportedOperationException(
|
throw new UnsupportedOperationException(
|
||||||
"Cnn3DLossLayer has no parameters, thus nIn will always equal nOut.");
|
"Cnn3DLossLayer has no parameters, thus nIn will always equal nOut.");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setNOut(int nOut){
|
public void setNOut(long nOut){
|
||||||
throw new UnsupportedOperationException(
|
throw new UnsupportedOperationException(
|
||||||
"Cnn3DLossLayer has no parameters, thus nIn will always equal nOut.");
|
"Cnn3DLossLayer has no parameters, thus nIn will always equal nOut.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
public Cnn3DLossLayer build() {
|
public Cnn3DLossLayer build() {
|
||||||
|
|
|
@ -145,13 +145,13 @@ public class CnnLossLayer extends FeedForwardLayer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setNIn(int nIn){
|
public void setNIn(long nIn){
|
||||||
throw new UnsupportedOperationException(
|
throw new UnsupportedOperationException(
|
||||||
"This layer has no parameters, thus nIn will always equal nOut.");
|
"This layer has no parameters, thus nIn will always equal nOut.");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setNOut(int nOut){
|
public void setNOut(long nOut){
|
||||||
throw new UnsupportedOperationException(
|
throw new UnsupportedOperationException(
|
||||||
"This layer has no parameters, thus nIn will always equal nOut.");
|
"This layer has no parameters, thus nIn will always equal nOut.");
|
||||||
}
|
}
|
||||||
|
|
|
@ -88,7 +88,7 @@ public class Convolution1DLayer extends ConvolutionLayer {
|
||||||
//Probably: user did InputType.recurrent(x) without specifying sequence length
|
//Probably: user did InputType.recurrent(x) without specifying sequence length
|
||||||
outLength = -1;
|
outLength = -1;
|
||||||
} else {
|
} else {
|
||||||
outLength = Convolution1DUtils.getOutputSize((int) inputTsLength, kernelSize[0], stride[0], padding[0],
|
outLength = Convolution1DUtils.getOutputSize(inputTsLength, kernelSize[0], stride[0], padding[0],
|
||||||
convolutionMode, dilation[0]);
|
convolutionMode, dilation[0]);
|
||||||
}
|
}
|
||||||
return InputType.recurrent(nOut, outLength);
|
return InputType.recurrent(nOut, outLength);
|
||||||
|
|
|
@ -117,14 +117,14 @@ public abstract class FeedForwardLayer extends BaseLayer {
|
||||||
* this is the input channels, otherwise is the previous layer size.
|
* this is the input channels, otherwise is the previous layer size.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
protected int nIn = 0;
|
protected long nIn = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Number of inputs for the layer (usually the size of the last layer). <br> Note that for Convolutional layers,
|
* Number of inputs for the layer (usually the size of the last layer). <br> Note that for Convolutional layers,
|
||||||
* this is the input channels, otherwise is the previous layer size.
|
* this is the input channels, otherwise is the previous layer size.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
protected int nOut = 0;
|
protected long nOut = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Number of inputs for the layer (usually the size of the last layer). <br> Note that for Convolutional layers,
|
* Number of inputs for the layer (usually the size of the last layer). <br> Note that for Convolutional layers,
|
||||||
|
@ -144,8 +144,7 @@ public abstract class FeedForwardLayer extends BaseLayer {
|
||||||
* @param nIn Number of inputs for the layer
|
* @param nIn Number of inputs for the layer
|
||||||
*/
|
*/
|
||||||
public T nIn(long nIn) {
|
public T nIn(long nIn) {
|
||||||
// FIXME: int cast
|
this.setNIn(nIn);
|
||||||
this.setNIn((int) nIn);
|
|
||||||
return (T) this;
|
return (T) this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -41,12 +41,9 @@ public class InputTypeUtil {
|
||||||
Class<?> layerClass) {
|
Class<?> layerClass) {
|
||||||
InputType.InputTypeConvolutional i = (InputType.InputTypeConvolutional) inputType;
|
InputType.InputTypeConvolutional i = (InputType.InputTypeConvolutional) inputType;
|
||||||
|
|
||||||
// FIXME: int cast
|
val hIn = i.getHeight();
|
||||||
val hIn = (int) i.getHeight();
|
val wIn = i.getWidth();
|
||||||
val wIn = (int) i.getWidth();
|
|
||||||
|
|
||||||
val inHeight = (int) i.getHeight();
|
|
||||||
val inWidth = (int) i.getWidth();
|
|
||||||
int padH = (padding == null ? 0 : padding[0]); //May be null for ConvolutionMode.Same
|
int padH = (padding == null ? 0 : padding[0]); //May be null for ConvolutionMode.Same
|
||||||
int padW = (padding == null ? 0 : padding[1]);
|
int padW = (padding == null ? 0 : padding[1]);
|
||||||
int kH = kernelSize[0];
|
int kH = kernelSize[0];
|
||||||
|
@ -69,13 +66,13 @@ public class InputTypeUtil {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (convolutionMode == ConvolutionMode.Same) {
|
if (convolutionMode == ConvolutionMode.Same) {
|
||||||
int hOut = stride[0] * hIn;
|
long hOut = stride[0] * hIn;
|
||||||
int wOut = stride[1] * wIn;
|
long wOut = stride[1] * wIn;
|
||||||
return InputType.convolutional(hOut, wOut, outputDepth);
|
return InputType.convolutional(hOut, wOut, outputDepth);
|
||||||
}
|
}
|
||||||
|
|
||||||
int hOut = sH * (hIn - 1) + kH - 2 * padH;
|
long hOut = sH * (hIn - 1) + kH - 2 * padH;
|
||||||
int wOut = sW * (wIn - 1) + kW - 2 * padW;
|
long wOut = sW * (wIn - 1) + kW - 2 * padW;
|
||||||
|
|
||||||
return InputType.convolutional(hOut, wOut, outputDepth);
|
return InputType.convolutional(hOut, wOut, outputDepth);
|
||||||
}
|
}
|
||||||
|
@ -91,10 +88,9 @@ public class InputTypeUtil {
|
||||||
|
|
||||||
InputType.InputTypeConvolutional3D i = (InputType.InputTypeConvolutional3D) inputType;
|
InputType.InputTypeConvolutional3D i = (InputType.InputTypeConvolutional3D) inputType;
|
||||||
|
|
||||||
// FIXME: int cast
|
long inDepth = i.getDepth();
|
||||||
val inDepth = (int) i.getDepth();
|
long inHeight = i.getHeight();
|
||||||
val inHeight = (int) i.getHeight();
|
long inWidth = i.getWidth();
|
||||||
val inWidth = (int) i.getWidth();
|
|
||||||
|
|
||||||
int padD = (padding == null ? 0 : padding[0]);
|
int padD = (padding == null ? 0 : padding[0]);
|
||||||
int padH = (padding == null ? 0 : padding[1]);
|
int padH = (padding == null ? 0 : padding[1]);
|
||||||
|
@ -211,9 +207,9 @@ public class InputTypeUtil {
|
||||||
return InputType.convolutional3D(outD, outH, outW, outputChannels);
|
return InputType.convolutional3D(outD, outH, outW, outputChannels);
|
||||||
}
|
}
|
||||||
|
|
||||||
int dOut = (inDepth - kD + 2 * padD) / sD + 1;
|
long dOut = (inDepth - kD + 2 * padD) / sD + 1;
|
||||||
int hOut = (inHeight - kH + 2 * padH) / sH + 1;
|
long hOut = (inHeight - kH + 2 * padH) / sH + 1;
|
||||||
int wOut = (inWidth - kW + 2 * padW) / sW + 1;
|
long wOut = (inWidth - kW + 2 * padW) / sW + 1;
|
||||||
return InputType.convolutional3D(dOut, hOut, wOut, outputChannels);
|
return InputType.convolutional3D(dOut, hOut, wOut, outputChannels);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -296,9 +292,8 @@ public class InputTypeUtil {
|
||||||
|
|
||||||
InputType.InputTypeConvolutional i = (InputType.InputTypeConvolutional) inputType;
|
InputType.InputTypeConvolutional i = (InputType.InputTypeConvolutional) inputType;
|
||||||
|
|
||||||
// FIXME: int cast
|
long inHeight = i.getHeight();
|
||||||
val inHeight = (int) i.getHeight();
|
long inWidth = i.getWidth();
|
||||||
val inWidth = (int) i.getWidth();
|
|
||||||
int padH = (padding == null ? 0 : padding[0]); //May be null for ConvolutionMode.Same
|
int padH = (padding == null ? 0 : padding[0]); //May be null for ConvolutionMode.Same
|
||||||
int padW = (padding == null ? 0 : padding[1]);
|
int padW = (padding == null ? 0 : padding[1]);
|
||||||
int kH = kernelSize[0];
|
int kH = kernelSize[0];
|
||||||
|
@ -379,8 +374,8 @@ public class InputTypeUtil {
|
||||||
return InputType.convolutional(outH, outW, outputDepth);
|
return InputType.convolutional(outH, outW, outputDepth);
|
||||||
}
|
}
|
||||||
|
|
||||||
int hOut = (inHeight - kH + 2 * padH) / sH + 1;
|
long hOut = (inHeight - kH + 2 * padH) / sH + 1;
|
||||||
int wOut = (inWidth - kW + 2 * padW) / sW + 1;
|
long wOut = (inWidth - kW + 2 * padW) / sW + 1;
|
||||||
return InputType.convolutional(hOut, wOut, outputDepth);
|
return InputType.convolutional(hOut, wOut, outputDepth);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -142,13 +142,13 @@ public class RnnLossLayer extends FeedForwardLayer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setNIn(int nIn){
|
public void setNIn(long nIn){
|
||||||
throw new UnsupportedOperationException(
|
throw new UnsupportedOperationException(
|
||||||
"This layer has no parameters, thus nIn will always equal nOut.");
|
"This layer has no parameters, thus nIn will always equal nOut.");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setNOut(int nOut){
|
public void setNOut(long nOut){
|
||||||
throw new UnsupportedOperationException(
|
throw new UnsupportedOperationException(
|
||||||
"This layer has no parameters, thus nIn will always equal nOut.");
|
"This layer has no parameters, thus nIn will always equal nOut.");
|
||||||
}
|
}
|
||||||
|
|
|
@ -82,12 +82,12 @@ public class Subsampling1DLayer extends SubsamplingLayer {
|
||||||
}
|
}
|
||||||
InputType.InputTypeRecurrent r = (InputType.InputTypeRecurrent) inputType;
|
InputType.InputTypeRecurrent r = (InputType.InputTypeRecurrent) inputType;
|
||||||
long inputTsLength = r.getTimeSeriesLength();
|
long inputTsLength = r.getTimeSeriesLength();
|
||||||
int outLength;
|
long outLength;
|
||||||
if (inputTsLength < 0) {
|
if (inputTsLength < 0) {
|
||||||
//Probably: user did InputType.recurrent(x) without specifying sequence length
|
//Probably: user did InputType.recurrent(x) without specifying sequence length
|
||||||
outLength = -1;
|
outLength = -1;
|
||||||
} else {
|
} else {
|
||||||
outLength = Convolution1DUtils.getOutputSize((int) inputTsLength, kernelSize[0], stride[0], padding[0],
|
outLength = Convolution1DUtils.getOutputSize(inputTsLength, kernelSize[0], stride[0], padding[0],
|
||||||
convolutionMode, dilation[0]);
|
convolutionMode, dilation[0]);
|
||||||
}
|
}
|
||||||
return InputType.recurrent(r.getSize(), outLength);
|
return InputType.recurrent(r.getSize(), outLength);
|
||||||
|
|
|
@ -32,6 +32,7 @@ import org.deeplearning4j.util.ValidationUtils;
|
||||||
import org.nd4j.base.Preconditions;
|
import org.nd4j.base.Preconditions;
|
||||||
import org.nd4j.linalg.api.buffer.DataType;
|
import org.nd4j.linalg.api.buffer.DataType;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
|
import org.nd4j.linalg.exception.ND4JArraySizeException;
|
||||||
import org.nd4j.linalg.learning.regularization.Regularization;
|
import org.nd4j.linalg.learning.regularization.Regularization;
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
@ -138,9 +139,11 @@ public class Subsampling3DLayer extends NoParamLayer {
|
||||||
+ "\"): Expected CNN input, got " + inputType);
|
+ "\"): Expected CNN input, got " + inputType);
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: int cast
|
long inChannels = ((InputType.InputTypeConvolutional3D) inputType).getChannels();
|
||||||
|
if (inChannels > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
return InputTypeUtil.getOutputTypeCnn3DLayers(inputType, kernelSize, stride, padding, new int[] {1, 1, 1}, // no dilation
|
return InputTypeUtil.getOutputTypeCnn3DLayers(inputType, kernelSize, stride, padding, new int[] {1, 1, 1}, // no dilation
|
||||||
convolutionMode, (int) ((InputType.InputTypeConvolutional3D) inputType).getChannels(),
|
convolutionMode, (int) inChannels,
|
||||||
layerIndex, getLayerName(), Subsampling3DLayer.class);
|
layerIndex, getLayerName(), Subsampling3DLayer.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -83,11 +83,10 @@ public class Upsampling3D extends BaseUpsamplingLayer {
|
||||||
}
|
}
|
||||||
InputType.InputTypeConvolutional3D i = (InputType.InputTypeConvolutional3D) inputType;
|
InputType.InputTypeConvolutional3D i = (InputType.InputTypeConvolutional3D) inputType;
|
||||||
|
|
||||||
// FIXME: int cast
|
long inHeight = (int) i.getHeight();
|
||||||
int inHeight = (int) i.getHeight();
|
long inWidth = (int) i.getWidth();
|
||||||
int inWidth = (int) i.getWidth();
|
long inDepth = (int) i.getDepth();
|
||||||
int inDepth = (int) i.getDepth();
|
long inChannels = (int) i.getChannels();
|
||||||
int inChannels = (int) i.getChannels();
|
|
||||||
|
|
||||||
return InputType.convolutional3D(size[0] * inDepth, size[1] * inHeight, size[2] * inWidth, inChannels);
|
return InputType.convolutional3D(size[0] * inDepth, size[1] * inHeight, size[2] * inWidth, inChannels);
|
||||||
}
|
}
|
||||||
|
|
|
@ -259,7 +259,7 @@ public class OCNNOutputLayer extends BaseOutputLayer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setNOut(int nOut){
|
public void setNOut(long nOut){
|
||||||
throw new UnsupportedOperationException(
|
throw new UnsupportedOperationException(
|
||||||
"Unable to specify number of outputs with ocnn. Outputs are fixed to 1.");
|
"Unable to specify number of outputs with ocnn. Outputs are fixed to 1.");
|
||||||
}
|
}
|
||||||
|
|
|
@ -79,6 +79,7 @@ import org.nd4j.linalg.dataset.api.DataSetUtil;
|
||||||
import org.nd4j.linalg.dataset.api.MultiDataSet;
|
import org.nd4j.linalg.dataset.api.MultiDataSet;
|
||||||
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
|
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
|
||||||
import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator;
|
import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator;
|
||||||
|
import org.nd4j.linalg.exception.ND4JArraySizeException;
|
||||||
import org.nd4j.linalg.factory.Nd4j;
|
import org.nd4j.linalg.factory.Nd4j;
|
||||||
import org.nd4j.linalg.heartbeat.Heartbeat;
|
import org.nd4j.linalg.heartbeat.Heartbeat;
|
||||||
import org.nd4j.linalg.heartbeat.reports.Environment;
|
import org.nd4j.linalg.heartbeat.reports.Environment;
|
||||||
|
@ -3329,7 +3330,6 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork {
|
||||||
//In 99+% of cases, the input and labels dimension 0 size should be identical
|
//In 99+% of cases, the input and labels dimension 0 size should be identical
|
||||||
//The only real exceptions: space to batch, and batch to space layers
|
//The only real exceptions: space to batch, and batch to space layers
|
||||||
//In those cases, we should base it on the labels size, as this impacts gradient calculation
|
//In those cases, we should base it on the labels size, as this impacts gradient calculation
|
||||||
// FIXME: int cast
|
|
||||||
return labels == null || labels[0] == null ? (int) inputs[0].size(0) : (int)labels[0].size(0);
|
return labels == null || labels[0] == null ? (int) inputs[0].size(0) : (int)labels[0].size(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3653,7 +3653,8 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork {
|
||||||
if (endTimeIdx > timeSeriesLength)
|
if (endTimeIdx > timeSeriesLength)
|
||||||
endTimeIdx = timeSeriesLength;
|
endTimeIdx = timeSeriesLength;
|
||||||
|
|
||||||
// FIXME: int cast
|
if (startTimeIdx > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
List<INDArray[]> list = getSubsetsForTbptt((int) startTimeIdx, endTimeIdx, inputs, labels, featureMasks, labelMasks);
|
List<INDArray[]> list = getSubsetsForTbptt((int) startTimeIdx, endTimeIdx, inputs, labels, featureMasks, labelMasks);
|
||||||
|
|
||||||
setInputs(list.get(0));
|
setInputs(list.get(0));
|
||||||
|
@ -3799,7 +3800,8 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: int cast
|
if (minibatchSize > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
Pair<INDArray, MaskState> outPair =
|
Pair<INDArray, MaskState> outPair =
|
||||||
current.feedForwardMaskArrays(inputMasks, maskState, (int)minibatchSize);
|
current.feedForwardMaskArrays(inputMasks, maskState, (int)minibatchSize);
|
||||||
map.put(topologicalOrder[i], outPair);
|
map.put(topologicalOrder[i], outPair);
|
||||||
|
@ -4664,7 +4666,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork {
|
||||||
* @param layer Index of the layer to get the size of. Must be in range 0 to nLayers-1 inclusive
|
* @param layer Index of the layer to get the size of. Must be in range 0 to nLayers-1 inclusive
|
||||||
* @return Size of the layer
|
* @return Size of the layer
|
||||||
*/
|
*/
|
||||||
public int layerSize(int layer) {
|
public long layerSize(int layer) {
|
||||||
if (layer < 0 || layer > layers.length) {
|
if (layer < 0 || layer > layers.length) {
|
||||||
throw new IllegalArgumentException("Invalid layer index: " + layer + ". Layer index must be between 0 and "
|
throw new IllegalArgumentException("Invalid layer index: " + layer + ". Layer index must be between 0 and "
|
||||||
+ (layers.length - 1) + " inclusive");
|
+ (layers.length - 1) + " inclusive");
|
||||||
|
@ -4683,7 +4685,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork {
|
||||||
* @param layer Index of the layer to get the size of. Must be in range 0 to nLayers-1 inclusive
|
* @param layer Index of the layer to get the size of. Must be in range 0 to nLayers-1 inclusive
|
||||||
* @return Size of the layer
|
* @return Size of the layer
|
||||||
*/
|
*/
|
||||||
public int layerInputSize(int layer) {
|
public long layerInputSize(int layer) {
|
||||||
if (layer < 0 || layer > layers.length) {
|
if (layer < 0 || layer > layers.length) {
|
||||||
throw new IllegalArgumentException("Invalid layer index: " + layer + ". Layer index must be between 0 and "
|
throw new IllegalArgumentException("Invalid layer index: " + layer + ". Layer index must be between 0 and "
|
||||||
+ (layers.length - 1) + " inclusive");
|
+ (layers.length - 1) + " inclusive");
|
||||||
|
@ -4701,7 +4703,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork {
|
||||||
* @param layerName Name of the layer to get the size of
|
* @param layerName Name of the layer to get the size of
|
||||||
* @return Size of the layer
|
* @return Size of the layer
|
||||||
*/
|
*/
|
||||||
public int layerSize(String layerName) {
|
public long layerSize(String layerName) {
|
||||||
Layer l = getLayer(layerName);
|
Layer l = getLayer(layerName);
|
||||||
if(l == null){
|
if(l == null){
|
||||||
throw new IllegalArgumentException("No layer with name \"" + layerName + "\" exists");
|
throw new IllegalArgumentException("No layer with name \"" + layerName + "\" exists");
|
||||||
|
@ -4712,8 +4714,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork {
|
||||||
}
|
}
|
||||||
FeedForwardLayer ffl = (FeedForwardLayer) conf;
|
FeedForwardLayer ffl = (FeedForwardLayer) conf;
|
||||||
|
|
||||||
// FIXME: int cast
|
return ffl.getNOut();
|
||||||
return (int) ffl.getNOut();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -4727,7 +4728,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork {
|
||||||
* @param layerName Name of the layer to get the size of
|
* @param layerName Name of the layer to get the size of
|
||||||
* @return Size of the layer
|
* @return Size of the layer
|
||||||
*/
|
*/
|
||||||
public int layerInputSize(String layerName) {
|
public long layerInputSize(String layerName) {
|
||||||
Layer l = getLayer(layerName);
|
Layer l = getLayer(layerName);
|
||||||
if(l == null){
|
if(l == null){
|
||||||
throw new IllegalArgumentException("No layer with name \"" + layerName + "\" exists");
|
throw new IllegalArgumentException("No layer with name \"" + layerName + "\" exists");
|
||||||
|
@ -4738,8 +4739,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork {
|
||||||
}
|
}
|
||||||
FeedForwardLayer ffl = (FeedForwardLayer) conf;
|
FeedForwardLayer ffl = (FeedForwardLayer) conf;
|
||||||
|
|
||||||
// FIXME: int cast
|
return ffl.getNIn();
|
||||||
return (int) ffl.getNIn();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -43,10 +43,10 @@ import java.util.Arrays;
|
||||||
* @author Justin Long (crockpotveggies)
|
* @author Justin Long (crockpotveggies)
|
||||||
*/
|
*/
|
||||||
public class UnstackVertex extends BaseGraphVertex {
|
public class UnstackVertex extends BaseGraphVertex {
|
||||||
private int from;
|
private long from;
|
||||||
private int stackSize;
|
private int stackSize;
|
||||||
private long forwardShape[];
|
private long forwardShape[];
|
||||||
private int step;
|
private long step;
|
||||||
|
|
||||||
public UnstackVertex(ComputationGraph graph, String name, int vertexIndex, int from, int stackSize, DataType dataType) {
|
public UnstackVertex(ComputationGraph graph, String name, int vertexIndex, int from, int stackSize, DataType dataType) {
|
||||||
this(graph, name, vertexIndex, null, null, from, stackSize, dataType);
|
this(graph, name, vertexIndex, null, null, from, stackSize, dataType);
|
||||||
|
@ -77,10 +77,9 @@ public class UnstackVertex extends BaseGraphVertex {
|
||||||
// once we know the inputs, save the shape and interval size for doBackward
|
// once we know the inputs, save the shape and interval size for doBackward
|
||||||
this.forwardShape = Arrays.copyOf(inputs[0].shape(), inputs[0].rank());
|
this.forwardShape = Arrays.copyOf(inputs[0].shape(), inputs[0].rank());
|
||||||
|
|
||||||
// FIXME: int cast
|
this.step = inputs[0].size(0) / stackSize;
|
||||||
this.step = (int) inputs[0].size(0) / stackSize;
|
long start = from * step;
|
||||||
int start = from * step;
|
long end = (from + 1) * step;
|
||||||
int end = (from + 1) * step;
|
|
||||||
|
|
||||||
INDArray ret;
|
INDArray ret;
|
||||||
switch (inputs[0].rank()) { //TODO remove the dups here if/when possible (gradient checks must pass)
|
switch (inputs[0].rank()) { //TODO remove the dups here if/when possible (gradient checks must pass)
|
||||||
|
@ -108,8 +107,8 @@ public class UnstackVertex extends BaseGraphVertex {
|
||||||
throw new IllegalStateException("Cannot do backward pass: error not set");
|
throw new IllegalStateException("Cannot do backward pass: error not set");
|
||||||
|
|
||||||
INDArray out = workspaceMgr.create(ArrayType.ACTIVATION_GRAD, inputs[0].dataType(), forwardShape);
|
INDArray out = workspaceMgr.create(ArrayType.ACTIVATION_GRAD, inputs[0].dataType(), forwardShape);
|
||||||
int start = from * step;
|
long start = from * step;
|
||||||
int end = (from + 1) * step;
|
long end = (from + 1) * step;
|
||||||
|
|
||||||
switch (forwardShape.length) {
|
switch (forwardShape.length) {
|
||||||
case 2:
|
case 2:
|
||||||
|
@ -154,8 +153,8 @@ public class UnstackVertex extends BaseGraphVertex {
|
||||||
}
|
}
|
||||||
|
|
||||||
//Mask arrays are either 1d (column vector) or 2d...
|
//Mask arrays are either 1d (column vector) or 2d...
|
||||||
int start = from * minibatchSize;
|
long start = from * minibatchSize;
|
||||||
int end = (from + 1) * minibatchSize;
|
long end = (from + 1) * minibatchSize;
|
||||||
INDArray outMask = maskArrays[0].get(NDArrayIndex.interval(start, end), NDArrayIndex.all());
|
INDArray outMask = maskArrays[0].get(NDArrayIndex.interval(start, end), NDArrayIndex.all());
|
||||||
return new Pair<>(outMask, currentMaskState);
|
return new Pair<>(outMask, currentMaskState);
|
||||||
}
|
}
|
||||||
|
|
|
@ -87,9 +87,8 @@ public class LastTimeStepVertex extends BaseGraphVertex {
|
||||||
|
|
||||||
INDArray out;
|
INDArray out;
|
||||||
if (mask == null) {
|
if (mask == null) {
|
||||||
// FIXME: int cast
|
|
||||||
//No mask array -> extract same (last) column for all
|
//No mask array -> extract same (last) column for all
|
||||||
int lastTS = (int) inputs[0].size(2) - 1;
|
long lastTS = inputs[0].size(2) - 1;
|
||||||
out = inputs[0].get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(lastTS));
|
out = inputs[0].get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(lastTS));
|
||||||
out = workspaceMgr.dup(ArrayType.ACTIVATIONS, out);
|
out = workspaceMgr.dup(ArrayType.ACTIVATIONS, out);
|
||||||
fwdPassTimeSteps = null; //Null -> last time step for all examples
|
fwdPassTimeSteps = null; //Null -> last time step for all examples
|
||||||
|
@ -99,8 +98,7 @@ public class LastTimeStepVertex extends BaseGraphVertex {
|
||||||
|
|
||||||
//Want the index of the last non-zero entry in the mask array.
|
//Want the index of the last non-zero entry in the mask array.
|
||||||
//Check a little here by using mulRowVector([0,1,2,3,...]) and argmax
|
//Check a little here by using mulRowVector([0,1,2,3,...]) and argmax
|
||||||
// FIXME: int cast
|
long maxTsLength = fwdPassShape[2];
|
||||||
int maxTsLength = (int) fwdPassShape[2];
|
|
||||||
INDArray row = Nd4j.linspace(0, maxTsLength - 1, maxTsLength, mask.dataType());
|
INDArray row = Nd4j.linspace(0, maxTsLength - 1, maxTsLength, mask.dataType());
|
||||||
INDArray temp = mask.mulRowVector(row);
|
INDArray temp = mask.mulRowVector(row);
|
||||||
INDArray lastElementIdx = Nd4j.argMax(temp, 1);
|
INDArray lastElementIdx = Nd4j.argMax(temp, 1);
|
||||||
|
|
|
@ -346,7 +346,6 @@ public abstract class AbstractLayer<LayerConfT extends org.deeplearning4j.nn.con
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getInputMiniBatchSize() {
|
public int getInputMiniBatchSize() {
|
||||||
// FIXME: int cast
|
|
||||||
return (int) input.size(0);
|
return (int) input.size(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -229,7 +229,6 @@ public abstract class BaseOutputLayer<LayerConfT extends org.deeplearning4j.nn.c
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public int numLabels() {
|
public int numLabels() {
|
||||||
// FIXME: int cast
|
|
||||||
return (int) labels.size(1);
|
return (int) labels.size(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -236,7 +236,6 @@ public class LossLayer extends BaseLayer<org.deeplearning4j.nn.conf.layers.LossL
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public int numLabels() {
|
public int numLabels() {
|
||||||
// FIXME: int cast
|
|
||||||
return (int) labels.size(1);
|
return (int) labels.size(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -86,19 +86,18 @@ public class Cnn3DLossLayer extends BaseLayer<org.deeplearning4j.nn.conf.layers.
|
||||||
INDArray delta2d = lossFunction.computeGradient(labels2d, input2d.dup(input2d.ordering()), layerConf().getActivationFn(), maskReshaped);
|
INDArray delta2d = lossFunction.computeGradient(labels2d, input2d.dup(input2d.ordering()), layerConf().getActivationFn(), maskReshaped);
|
||||||
delta2d = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta2d);
|
delta2d = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta2d);
|
||||||
|
|
||||||
// FIXME: int cast
|
long n = input.size(0);
|
||||||
int n = (int)input.size(0);
|
long d, h, w, c;
|
||||||
int d, h, w, c;
|
|
||||||
if(layerConf().getDataFormat() == Convolution3D.DataFormat.NDHWC){
|
if(layerConf().getDataFormat() == Convolution3D.DataFormat.NDHWC){
|
||||||
d = (int)input.size(1);
|
d = input.size(1);
|
||||||
h = (int)input.size(2);
|
h = input.size(2);
|
||||||
w = (int)input.size(3);
|
w = input.size(3);
|
||||||
c = (int)input.size(4);
|
c = input.size(4);
|
||||||
} else {
|
} else {
|
||||||
d = (int)input.size(2);
|
d = input.size(2);
|
||||||
h = (int)input.size(3);
|
h = input.size(3);
|
||||||
w = (int)input.size(4);
|
w = input.size(4);
|
||||||
c = (int)input.size(1);
|
c = input.size(1);
|
||||||
}
|
}
|
||||||
INDArray delta5d = ConvolutionUtils.reshape2dTo5d(layerConf().getDataFormat(), delta2d, n, d, h, w, c, workspaceMgr, ArrayType.ACTIVATION_GRAD);
|
INDArray delta5d = ConvolutionUtils.reshape2dTo5d(layerConf().getDataFormat(), delta2d, n, d, h, w, c, workspaceMgr, ArrayType.ACTIVATION_GRAD);
|
||||||
|
|
||||||
|
@ -130,7 +129,6 @@ public class Cnn3DLossLayer extends BaseLayer<org.deeplearning4j.nn.conf.layers.
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int numLabels() {
|
public int numLabels() {
|
||||||
// FIXME: int cast
|
|
||||||
return (int) labels.size(1);
|
return (int) labels.size(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -180,10 +178,8 @@ public class Cnn3DLossLayer extends BaseLayer<org.deeplearning4j.nn.conf.layers.
|
||||||
INDArray input2d = ConvolutionUtils.reshape5dTo2d(layerConf().getDataFormat(), in, workspaceMgr, ArrayType.ACTIVATIONS);
|
INDArray input2d = ConvolutionUtils.reshape5dTo2d(layerConf().getDataFormat(), in, workspaceMgr, ArrayType.ACTIVATIONS);
|
||||||
INDArray out2d = layerConf().getActivationFn().getActivation(input2d, training);
|
INDArray out2d = layerConf().getActivationFn().getActivation(input2d, training);
|
||||||
|
|
||||||
// FIXME: int cast
|
long n = input.size(0);
|
||||||
|
long d, h, w, c;
|
||||||
int n = (int)input.size(0);
|
|
||||||
int d, h, w, c;
|
|
||||||
if(layerConf().getDataFormat() == Convolution3D.DataFormat.NDHWC){
|
if(layerConf().getDataFormat() == Convolution3D.DataFormat.NDHWC){
|
||||||
d = (int)input.size(1);
|
d = (int)input.size(1);
|
||||||
h = (int)input.size(2);
|
h = (int)input.size(2);
|
||||||
|
@ -262,19 +258,18 @@ public class Cnn3DLossLayer extends BaseLayer<org.deeplearning4j.nn.conf.layers.
|
||||||
val newShape = input.shape().clone();
|
val newShape = input.shape().clone();
|
||||||
newShape[1] = 1;
|
newShape[1] = 1;
|
||||||
|
|
||||||
// FIXME
|
long n = input.size(0);
|
||||||
int n = (int)input.size(0);
|
long d, h, w, c;
|
||||||
int d, h, w, c;
|
|
||||||
if(layerConf().getDataFormat() == Convolution3D.DataFormat.NDHWC){
|
if(layerConf().getDataFormat() == Convolution3D.DataFormat.NDHWC){
|
||||||
d = (int)input.size(1);
|
d = input.size(1);
|
||||||
h = (int)input.size(2);
|
h = input.size(2);
|
||||||
w = (int)input.size(3);
|
w = input.size(3);
|
||||||
c = (int)input.size(4);
|
c = input.size(4);
|
||||||
} else {
|
} else {
|
||||||
d = (int)input.size(2);
|
d = input.size(2);
|
||||||
h = (int)input.size(3);
|
h = input.size(3);
|
||||||
w = (int)input.size(4);
|
w = input.size(4);
|
||||||
c = (int)input.size(1);
|
c = input.size(1);
|
||||||
}
|
}
|
||||||
INDArray scoreArrayTs = ConvolutionUtils.reshape2dTo5d(layerConf().getDataFormat(), scoreArray, n, d, h, w, c, workspaceMgr, ArrayType.FF_WORKING_MEM);
|
INDArray scoreArrayTs = ConvolutionUtils.reshape2dTo5d(layerConf().getDataFormat(), scoreArray, n, d, h, w, c, workspaceMgr, ArrayType.FF_WORKING_MEM);
|
||||||
INDArray summedScores = scoreArrayTs.sum(1,2,3,4);
|
INDArray summedScores = scoreArrayTs.sum(1,2,3,4);
|
||||||
|
|
|
@ -88,8 +88,7 @@ public class CnnLossLayer extends BaseLayer<org.deeplearning4j.nn.conf.layers.Cn
|
||||||
INDArray delta2d = lossFunction.computeGradient(labels2d, input2d.dup(input2d.ordering()), layerConf().getActivationFn(), maskReshaped);
|
INDArray delta2d = lossFunction.computeGradient(labels2d, input2d.dup(input2d.ordering()), layerConf().getActivationFn(), maskReshaped);
|
||||||
delta2d = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta2d);
|
delta2d = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta2d);
|
||||||
|
|
||||||
// FIXME: int cast
|
INDArray delta4d = ConvolutionUtils.reshape2dTo4d(delta2d, input.shape(), workspaceMgr, ArrayType.ACTIVATION_GRAD);
|
||||||
INDArray delta4d = ConvolutionUtils.reshape2dTo4d(delta2d, ArrayUtil.toInts(input.shape()), workspaceMgr, ArrayType.ACTIVATION_GRAD);
|
|
||||||
|
|
||||||
// grab the empty gradient
|
// grab the empty gradient
|
||||||
Gradient gradient = new DefaultGradient();
|
Gradient gradient = new DefaultGradient();
|
||||||
|
@ -119,7 +118,6 @@ public class CnnLossLayer extends BaseLayer<org.deeplearning4j.nn.conf.layers.Cn
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int numLabels() {
|
public int numLabels() {
|
||||||
// FIXME: int cast
|
|
||||||
return (int) labels.size(1);
|
return (int) labels.size(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -169,8 +167,7 @@ public class CnnLossLayer extends BaseLayer<org.deeplearning4j.nn.conf.layers.Cn
|
||||||
INDArray input2d = ConvolutionUtils.reshape4dTo2d(in, workspaceMgr, ArrayType.ACTIVATIONS);
|
INDArray input2d = ConvolutionUtils.reshape4dTo2d(in, workspaceMgr, ArrayType.ACTIVATIONS);
|
||||||
INDArray out2d = layerConf().getActivationFn().getActivation(input2d, training);
|
INDArray out2d = layerConf().getActivationFn().getActivation(input2d, training);
|
||||||
|
|
||||||
// FIXME: int cast
|
return ConvolutionUtils.reshape2dTo4d(out2d, input.shape(), workspaceMgr, ArrayType.ACTIVATIONS);
|
||||||
return ConvolutionUtils.reshape2dTo4d(out2d, ArrayUtil.toInts(input.shape()), workspaceMgr, ArrayType.ACTIVATIONS);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -236,8 +233,7 @@ public class CnnLossLayer extends BaseLayer<org.deeplearning4j.nn.conf.layers.Cn
|
||||||
val newShape = input.shape().clone();
|
val newShape = input.shape().clone();
|
||||||
newShape[1] = 1;
|
newShape[1] = 1;
|
||||||
|
|
||||||
// FIXME
|
INDArray scoreArrayTs = ConvolutionUtils.reshape2dTo4d(scoreArray, newShape, workspaceMgr, ArrayType.FF_WORKING_MEM);
|
||||||
INDArray scoreArrayTs = ConvolutionUtils.reshape2dTo4d(scoreArray, ArrayUtil.toInts(newShape), workspaceMgr, ArrayType.FF_WORKING_MEM);
|
|
||||||
INDArray summedScores = scoreArrayTs.sum(1,2,3).reshape(scoreArrayTs.size(0), 1);
|
INDArray summedScores = scoreArrayTs.sum(1,2,3).reshape(scoreArrayTs.size(0), 1);
|
||||||
|
|
||||||
if (fullNetRegTerm != 0.0) {
|
if (fullNetRegTerm != 0.0) {
|
||||||
|
|
|
@ -71,8 +71,7 @@ public class Convolution3DLayer extends ConvolutionLayer {
|
||||||
|
|
||||||
boolean isNCDHW = layerConfig.getDataFormat() == Convolution3D.DataFormat.NCDHW;
|
boolean isNCDHW = layerConfig.getDataFormat() == Convolution3D.DataFormat.NCDHW;
|
||||||
|
|
||||||
// FIXME: int cast
|
long miniBatch = input.size(0);
|
||||||
int miniBatch = (int) input.size(0);
|
|
||||||
int inD = (int) (isNCDHW ? input.size(2) : input.size(1));
|
int inD = (int) (isNCDHW ? input.size(2) : input.size(1));
|
||||||
int inH = (int) (isNCDHW ? input.size(3) : input.size(2));
|
int inH = (int) (isNCDHW ? input.size(3) : input.size(2));
|
||||||
int inW = (int) (isNCDHW ? input.size(4) : input.size(3));
|
int inW = (int) (isNCDHW ? input.size(4) : input.size(3));
|
||||||
|
@ -189,8 +188,7 @@ public class Convolution3DLayer extends ConvolutionLayer {
|
||||||
+ " " + layerId());
|
+ " " + layerId());
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: int cast
|
long miniBatch = input.size(0);
|
||||||
int miniBatch = (int) input.size(0);
|
|
||||||
int inputChannels = (int) (isNCDHW ? input.size(1) : input.size(4));
|
int inputChannels = (int) (isNCDHW ? input.size(1) : input.size(4));
|
||||||
int inD =(int) (isNCDHW ? input.size(2) : input.size(1));
|
int inD =(int) (isNCDHW ? input.size(2) : input.size(1));
|
||||||
int inH = (int) (isNCDHW ? input.size(3) : input.size(2));
|
int inH = (int) (isNCDHW ? input.size(3) : input.size(2));
|
||||||
|
|
|
@ -35,6 +35,7 @@ import org.nd4j.linalg.api.memory.MemoryWorkspace;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
import org.nd4j.linalg.api.shape.Shape;
|
import org.nd4j.linalg.api.shape.Shape;
|
||||||
import org.nd4j.linalg.convolution.Convolution;
|
import org.nd4j.linalg.convolution.Convolution;
|
||||||
|
import org.nd4j.linalg.exception.ND4JArraySizeException;
|
||||||
import org.nd4j.linalg.exception.ND4JOpProfilerException;
|
import org.nd4j.linalg.exception.ND4JOpProfilerException;
|
||||||
import org.nd4j.linalg.factory.Nd4j;
|
import org.nd4j.linalg.factory.Nd4j;
|
||||||
import org.nd4j.linalg.primitives.Pair;
|
import org.nd4j.linalg.primitives.Pair;
|
||||||
|
@ -113,13 +114,12 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
|
||||||
if(epsilon.dataType() != dataType)
|
if(epsilon.dataType() != dataType)
|
||||||
epsilon = epsilon.castTo(dataType);
|
epsilon = epsilon.castTo(dataType);
|
||||||
|
|
||||||
// FIXME: int cast
|
long miniBatch = input.size(0);
|
||||||
int miniBatch = (int) input.size(0);
|
|
||||||
int inH = (int) input.size(2);
|
int inH = (int) input.size(2);
|
||||||
int inW = (int) input.size(3);
|
int inW = (int) input.size(3);
|
||||||
|
|
||||||
int outDepth = (int) weights.size(0);
|
long outDepth = weights.size(0);
|
||||||
int inDepth = (int) weights.size(1);
|
long inDepth = weights.size(1);
|
||||||
int kH = (int) weights.size(2);
|
int kH = (int) weights.size(2);
|
||||||
int kW = (int) weights.size(3);
|
int kW = (int) weights.size(3);
|
||||||
|
|
||||||
|
@ -143,7 +143,7 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
|
||||||
INDArray biasGradView = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY);
|
INDArray biasGradView = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY);
|
||||||
INDArray weightGradView = gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY); //4d, c order. Shape: [outDepth,inDepth,kH,kW]
|
INDArray weightGradView = gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY); //4d, c order. Shape: [outDepth,inDepth,kH,kW]
|
||||||
INDArray weightGradView2df = Shape
|
INDArray weightGradView2df = Shape
|
||||||
.newShapeNoCopy(weightGradView, new int[] {outDepth, inDepth * kH * kW}, false).transpose();
|
.newShapeNoCopy(weightGradView, new long[]{outDepth, inDepth * kH * kW}, false).transpose();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -204,7 +204,7 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
|
||||||
|
|
||||||
//Note: due to the permute in preOut, and the fact that we essentially do a preOut.muli(epsilon), this reshape
|
//Note: due to the permute in preOut, and the fact that we essentially do a preOut.muli(epsilon), this reshape
|
||||||
// should be zero-copy; only possible exception being sometimes with the "identity" activation case
|
// should be zero-copy; only possible exception being sometimes with the "identity" activation case
|
||||||
INDArray delta2d = delta.reshape('c', new int[] {outDepth, miniBatch * outH * outW}); //Shape.newShapeNoCopy(delta,new int[]{outDepth,miniBatch*outH*outW},false);
|
INDArray delta2d = delta.reshape('c', new long[] {outDepth, miniBatch * outH * outW}); //Shape.newShapeNoCopy(delta,new int[]{outDepth,miniBatch*outH*outW},false);
|
||||||
|
|
||||||
//Do im2col, but with order [miniB,outH,outW,depthIn,kH,kW]; but need to input [miniBatch,channels,kH,kW,outH,outW] given the current im2col implementation
|
//Do im2col, but with order [miniB,outH,outW,depthIn,kH,kW]; but need to input [miniBatch,channels,kH,kW,outH,outW] given the current im2col implementation
|
||||||
//To get this: create an array of the order we want, permute it to the order required by im2col implementation, and then do im2col on that
|
//To get this: create an array of the order we want, permute it to the order required by im2col implementation, and then do im2col on that
|
||||||
|
@ -231,7 +231,7 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
|
||||||
//Calculate epsilons for layer below, in 2d format (note: this is in 'image patch' format before col2im reduction)
|
//Calculate epsilons for layer below, in 2d format (note: this is in 'image patch' format before col2im reduction)
|
||||||
//Note: cc -> f mmul here, then reshape to 6d in f order
|
//Note: cc -> f mmul here, then reshape to 6d in f order
|
||||||
INDArray epsNext2d = w2d.mmul(delta2d); //TODO can we reuse im2col array instead of allocating new result array?
|
INDArray epsNext2d = w2d.mmul(delta2d); //TODO can we reuse im2col array instead of allocating new result array?
|
||||||
INDArray eps6d = Shape.newShapeNoCopy(epsNext2d, new int[] {kW, kH, inDepth, outW, outH, miniBatch}, true);
|
INDArray eps6d = Shape.newShapeNoCopy(epsNext2d, new long[] {kW, kH, inDepth, outW, outH, miniBatch}, true);
|
||||||
|
|
||||||
//Calculate epsilonNext by doing im2col reduction.
|
//Calculate epsilonNext by doing im2col reduction.
|
||||||
//Current col2im implementation expects input with order: [miniBatch,channels,kH,kW,outH,outW]
|
//Current col2im implementation expects input with order: [miniBatch,channels,kH,kW,outH,outW]
|
||||||
|
@ -282,7 +282,7 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void validateInputDepth(int inDepth) {
|
protected void validateInputDepth(long inDepth) {
|
||||||
if (input.size(1) != inDepth) {
|
if (input.size(1) != inDepth) {
|
||||||
String layerName = conf.getLayer().getLayerName();
|
String layerName = conf.getLayer().getLayerName();
|
||||||
if (layerName == null)
|
if (layerName == null)
|
||||||
|
@ -313,14 +313,13 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
|
||||||
|
|
||||||
INDArray input = this.input.castTo(dataType);
|
INDArray input = this.input.castTo(dataType);
|
||||||
|
|
||||||
// FIXME: int cast
|
long miniBatch = input.size(0);
|
||||||
int miniBatch = (int) input.size(0);
|
long outDepth = weights.size(0);
|
||||||
int outDepth = (int) weights.size(0);
|
long inDepth = weights.size(1);
|
||||||
int inDepth = (int) weights.size(1);
|
|
||||||
validateInputDepth(inDepth);
|
validateInputDepth(inDepth);
|
||||||
|
|
||||||
int kH = (int) weights.size(2);
|
long kH = weights.size(2);
|
||||||
int kW = (int) weights.size(3);
|
long kW = weights.size(3);
|
||||||
|
|
||||||
int[] dilation = layerConf().getDilation();
|
int[] dilation = layerConf().getDilation();
|
||||||
int[] kernel = layerConf().getKernelSize();
|
int[] kernel = layerConf().getKernelSize();
|
||||||
|
@ -331,7 +330,8 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
|
||||||
if (convolutionMode == ConvolutionMode.Same) {
|
if (convolutionMode == ConvolutionMode.Same) {
|
||||||
outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation); //Also performs validation
|
outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation); //Also performs validation
|
||||||
|
|
||||||
// FIXME: int cast
|
if (input.size(2) > Integer.MAX_VALUE || input.size(3) > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
pad = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[] {(int) input.size(2), (int) input.size(3)}, kernel,
|
pad = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[] {(int) input.size(2), (int) input.size(3)}, kernel,
|
||||||
strides, dilation );
|
strides, dilation );
|
||||||
} else {
|
} else {
|
||||||
|
@ -397,10 +397,12 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
|
||||||
INDArray col = Nd4j.createUninitialized(weights.dataType(), new long[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c');
|
INDArray col = Nd4j.createUninitialized(weights.dataType(), new long[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c');
|
||||||
INDArray col2 = col.permute(0, 3, 4, 5, 1, 2);
|
INDArray col2 = col.permute(0, 3, 4, 5, 1, 2);
|
||||||
INDArray im2ColIn = input.castTo(col2.dataType()); //No op if already (for example) float
|
INDArray im2ColIn = input.castTo(col2.dataType()); //No op if already (for example) float
|
||||||
Convolution.im2col(im2ColIn, kH, kW, strides[0], strides[1], pad[0], pad[1], dilation[0], dilation[1],
|
if (kH > Integer.MAX_VALUE || kW > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
|
Convolution.im2col(im2ColIn, (int)kH, (int)kW, strides[0], strides[1], pad[0], pad[1], dilation[0], dilation[1],
|
||||||
convolutionMode == ConvolutionMode.Same, col2);
|
convolutionMode == ConvolutionMode.Same, col2);
|
||||||
|
|
||||||
INDArray im2col2d = Shape.newShapeNoCopy(col, new int[] {miniBatch * outH * outW, inDepth * kH * kW}, false);
|
INDArray im2col2d = Shape.newShapeNoCopy(col, new long[] {miniBatch * outH * outW, inDepth * kH * kW}, false);
|
||||||
|
|
||||||
//Current order of weights: [depthOut,depthIn,kH,kW], c order
|
//Current order of weights: [depthOut,depthIn,kH,kW], c order
|
||||||
//Permute to give [kW,kH,depthIn,depthOut], f order
|
//Permute to give [kW,kH,depthIn,depthOut], f order
|
||||||
|
@ -418,7 +420,7 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
|
||||||
}
|
}
|
||||||
|
|
||||||
//Now, reshape to [outW,outH,miniBatch,outDepth], and permute to have correct output order: [miniBath,outDepth,outH,outW];
|
//Now, reshape to [outW,outH,miniBatch,outDepth], and permute to have correct output order: [miniBath,outDepth,outH,outW];
|
||||||
z = Shape.newShapeNoCopy(z, new int[] {outW, outH, miniBatch, outDepth}, true);
|
z = Shape.newShapeNoCopy(z, new long[] {outW, outH, miniBatch, outDepth}, true);
|
||||||
z = z.permute(2, 3, 1, 0);
|
z = z.permute(2, 3, 1, 0);
|
||||||
|
|
||||||
if (training && cacheMode != CacheMode.NONE && workspaceMgr.hasConfiguration(ArrayType.FF_CACHE) && workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) {
|
if (training && cacheMode != CacheMode.NONE && workspaceMgr.hasConfiguration(ArrayType.FF_CACHE) && workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) {
|
||||||
|
|
|
@ -171,9 +171,8 @@ public class Deconvolution2DLayer extends ConvolutionLayer {
|
||||||
+ " " + layerId());
|
+ " " + layerId());
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: int cast
|
long inDepth = weights.size(0);
|
||||||
int inDepth = (int) weights.size(0);
|
long outDepth = weights.size(1);
|
||||||
int outDepth = (int) weights.size(1);
|
|
||||||
|
|
||||||
if (input.size(1) != inDepth && input.size(3) == inDepth) {
|
if (input.size(1) != inDepth && input.size(3) == inDepth) {
|
||||||
//TODO AB 2019/10/25 this is an ugly "pseudo-NHWC support" hack that needs to be removed ASAD
|
//TODO AB 2019/10/25 this is an ugly "pseudo-NHWC support" hack that needs to be removed ASAD
|
||||||
|
@ -199,7 +198,6 @@ public class Deconvolution2DLayer extends ConvolutionLayer {
|
||||||
int[] pad;
|
int[] pad;
|
||||||
int[] outSize;
|
int[] outSize;
|
||||||
if (convolutionMode == ConvolutionMode.Same) {
|
if (convolutionMode == ConvolutionMode.Same) {
|
||||||
// FIXME: int cast
|
|
||||||
outSize = ConvolutionUtils.getDeconvolutionOutputSize(input, kernel, strides, null, convolutionMode, dilation); //Also performs validation
|
outSize = ConvolutionUtils.getDeconvolutionOutputSize(input, kernel, strides, null, convolutionMode, dilation); //Also performs validation
|
||||||
pad = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[] {(int) input.size(2), (int) input.size(3)}, kernel,
|
pad = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[] {(int) input.size(2), (int) input.size(3)}, kernel,
|
||||||
strides, dilation );
|
strides, dilation );
|
||||||
|
@ -208,8 +206,8 @@ public class Deconvolution2DLayer extends ConvolutionLayer {
|
||||||
outSize = ConvolutionUtils.getDeconvolutionOutputSize(input, kernel, strides, pad, convolutionMode, dilation); //Also performs validation
|
outSize = ConvolutionUtils.getDeconvolutionOutputSize(input, kernel, strides, pad, convolutionMode, dilation); //Also performs validation
|
||||||
}
|
}
|
||||||
|
|
||||||
int outH = outSize[0];
|
long outH = outSize[0];
|
||||||
int outW = outSize[1];
|
long outW = outSize[1];
|
||||||
|
|
||||||
|
|
||||||
val miniBatch = input.size(0);
|
val miniBatch = input.size(0);
|
||||||
|
|
|
@ -32,6 +32,7 @@ import org.nd4j.linalg.api.buffer.DataType;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
import org.nd4j.linalg.api.ops.CustomOp;
|
import org.nd4j.linalg.api.ops.CustomOp;
|
||||||
import org.nd4j.linalg.api.ops.DynamicCustomOp;
|
import org.nd4j.linalg.api.ops.DynamicCustomOp;
|
||||||
|
import org.nd4j.linalg.exception.ND4JArraySizeException;
|
||||||
import org.nd4j.linalg.factory.Nd4j;
|
import org.nd4j.linalg.factory.Nd4j;
|
||||||
import org.nd4j.linalg.primitives.Pair;
|
import org.nd4j.linalg.primitives.Pair;
|
||||||
|
|
||||||
|
@ -75,12 +76,11 @@ public class DepthwiseConvolution2DLayer extends ConvolutionLayer {
|
||||||
|
|
||||||
INDArray input = this.input.castTo(dataType); //No-op if correct type
|
INDArray input = this.input.castTo(dataType); //No-op if correct type
|
||||||
|
|
||||||
// FIXME: int cast
|
long miniBatch = input.size(0);
|
||||||
int miniBatch = (int) input.size(0);
|
|
||||||
int inH = (int)input.size(2);
|
int inH = (int)input.size(2);
|
||||||
int inW = (int)input.size(3);
|
int inW = (int)input.size(3);
|
||||||
|
|
||||||
int inDepth = (int) depthWiseWeights.size(2);
|
long inDepth = depthWiseWeights.size(2);
|
||||||
int kH = (int) depthWiseWeights.size(0);
|
int kH = (int) depthWiseWeights.size(0);
|
||||||
int kW = (int) depthWiseWeights.size(1);
|
int kW = (int) depthWiseWeights.size(1);
|
||||||
|
|
||||||
|
@ -169,10 +169,9 @@ public class DepthwiseConvolution2DLayer extends ConvolutionLayer {
|
||||||
|
|
||||||
INDArray input = this.input.castTo(dataType); //no-op if correct dtype
|
INDArray input = this.input.castTo(dataType); //no-op if correct dtype
|
||||||
|
|
||||||
// FIXME: int cast
|
long inDepth = depthWiseWeights.size(2);
|
||||||
int inDepth = (int) depthWiseWeights.size(2);
|
long depthMultiplier = depthWiseWeights.size(3);
|
||||||
int depthMultiplier = (int) depthWiseWeights.size(3);
|
long outDepth = depthMultiplier * inDepth;
|
||||||
int outDepth = depthMultiplier * inDepth;
|
|
||||||
|
|
||||||
if (input.size(1) != inDepth) {
|
if (input.size(1) != inDepth) {
|
||||||
String layerName = conf.getLayer().getLayerName();
|
String layerName = conf.getLayer().getLayerName();
|
||||||
|
@ -197,7 +196,9 @@ public class DepthwiseConvolution2DLayer extends ConvolutionLayer {
|
||||||
if (convolutionMode == ConvolutionMode.Same) {
|
if (convolutionMode == ConvolutionMode.Same) {
|
||||||
outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation);
|
outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation);
|
||||||
|
|
||||||
// FIXME: int cast
|
if (input.size(2) > Integer.MAX_VALUE || input.size(3) > Integer.MAX_VALUE) {
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
|
}
|
||||||
pad = ConvolutionUtils.getSameModeTopLeftPadding(
|
pad = ConvolutionUtils.getSameModeTopLeftPadding(
|
||||||
outSize, new int[]{(int) input.size(2), (int) input.size(3)}, kernel, strides, dilation);
|
outSize, new int[]{(int) input.size(2), (int) input.size(3)}, kernel, strides, dilation);
|
||||||
} else {
|
} else {
|
||||||
|
@ -205,8 +206,8 @@ public class DepthwiseConvolution2DLayer extends ConvolutionLayer {
|
||||||
outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, pad, convolutionMode, dilation);
|
outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, pad, convolutionMode, dilation);
|
||||||
}
|
}
|
||||||
|
|
||||||
int outH = outSize[0];
|
long outH = outSize[0];
|
||||||
int outW = outSize[1];
|
long outW = outSize[1];
|
||||||
|
|
||||||
val miniBatch = input.size(0);
|
val miniBatch = input.size(0);
|
||||||
INDArray output = workspaceMgr.create(
|
INDArray output = workspaceMgr.create(
|
||||||
|
|
|
@ -33,6 +33,7 @@ import org.nd4j.linalg.api.memory.MemoryWorkspace;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
import org.nd4j.linalg.api.ops.CustomOp;
|
import org.nd4j.linalg.api.ops.CustomOp;
|
||||||
import org.nd4j.linalg.api.ops.DynamicCustomOp;
|
import org.nd4j.linalg.api.ops.DynamicCustomOp;
|
||||||
|
import org.nd4j.linalg.exception.ND4JArraySizeException;
|
||||||
import org.nd4j.linalg.factory.Nd4j;
|
import org.nd4j.linalg.factory.Nd4j;
|
||||||
import org.nd4j.linalg.primitives.Pair;
|
import org.nd4j.linalg.primitives.Pair;
|
||||||
import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
|
import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
|
||||||
|
@ -90,8 +91,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer {
|
||||||
|
|
||||||
INDArray input = this.input.castTo(dataType);
|
INDArray input = this.input.castTo(dataType);
|
||||||
|
|
||||||
// FIXME: int cast
|
long miniBatch = input.size(0);
|
||||||
int miniBatch = (int) input.size(0);
|
|
||||||
int inH = (int)input.size(2);
|
int inH = (int)input.size(2);
|
||||||
int inW = (int)input.size(3);
|
int inW = (int)input.size(3);
|
||||||
|
|
||||||
|
@ -194,9 +194,8 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer {
|
||||||
+ " " + layerId());
|
+ " " + layerId());
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: int cast
|
long inDepth = depthWiseWeights.size(1);
|
||||||
int inDepth = (int) depthWiseWeights.size(1);
|
long outDepth = pointWiseWeights.size(0);
|
||||||
int outDepth = (int) pointWiseWeights.size(0);
|
|
||||||
|
|
||||||
if (input.size(1) != inDepth) {
|
if (input.size(1) != inDepth) {
|
||||||
String layerName = conf.getLayer().getLayerName();
|
String layerName = conf.getLayer().getLayerName();
|
||||||
|
@ -220,7 +219,9 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer {
|
||||||
if (convolutionMode == ConvolutionMode.Same) {
|
if (convolutionMode == ConvolutionMode.Same) {
|
||||||
outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation); //Also performs validation
|
outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation); //Also performs validation
|
||||||
|
|
||||||
// FIXME: int cast
|
if (input.size(2) > Integer.MAX_VALUE || input.size(3) > Integer.MAX_VALUE) {
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
|
}
|
||||||
pad = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[] {(int) input.size(2), (int) input.size(3)}, kernel,
|
pad = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[] {(int) input.size(2), (int) input.size(3)}, kernel,
|
||||||
strides, dilation );
|
strides, dilation );
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -75,11 +75,10 @@ public class SpaceToDepth extends AbstractLayer<org.deeplearning4j.nn.conf.layer
|
||||||
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
|
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
|
||||||
assertInputSet(true);
|
assertInputSet(true);
|
||||||
|
|
||||||
// FIXME: int cast
|
long miniBatch = input.size(0);
|
||||||
int miniBatch = (int) input.size(0);
|
long inDepth = input.size(1);
|
||||||
int inDepth = (int) input.size(1);
|
long inH = input.size(2);
|
||||||
int inH = (int) input.size(2);
|
long inW = input.size(3);
|
||||||
int inW = (int) input.size(3);
|
|
||||||
|
|
||||||
INDArray input = this.input.castTo(dataType); //No-op if already correct type
|
INDArray input = this.input.castTo(dataType); //No-op if already correct type
|
||||||
|
|
||||||
|
@ -122,17 +121,16 @@ public class SpaceToDepth extends AbstractLayer<org.deeplearning4j.nn.conf.layer
|
||||||
return preOutput;
|
return preOutput;
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: int cast
|
long miniBatch = input.size(0);
|
||||||
int miniBatch = (int) input.size(0);
|
long depth = input.size(1);
|
||||||
int depth = (int) input.size(1);
|
long inH = input.size(2);
|
||||||
int inH = (int) input.size(2);
|
long inW = input.size(3);
|
||||||
int inW = (int) input.size(3);
|
|
||||||
|
|
||||||
int blockSize = getBlockSize();
|
int blockSize = getBlockSize();
|
||||||
|
|
||||||
int outH = inH / blockSize;
|
long outH = inH / blockSize;
|
||||||
int outW = inW / blockSize;
|
long outW = inW / blockSize;
|
||||||
int outDepth = depth * blockSize * blockSize;
|
long outDepth = depth * blockSize * blockSize;
|
||||||
|
|
||||||
INDArray out = workspaceMgr.create(ArrayType.ACTIVATIONS, input.dataType(), new long[]{1, miniBatch * outDepth * outH * outW}, 'c');
|
INDArray out = workspaceMgr.create(ArrayType.ACTIVATIONS, input.dataType(), new long[]{1, miniBatch * outDepth * outH * outW}, 'c');
|
||||||
INDArray reshapedOut;
|
INDArray reshapedOut;
|
||||||
|
|
|
@ -71,9 +71,8 @@ public class Subsampling3DLayer extends AbstractLayer<org.deeplearning4j.nn.conf
|
||||||
|
|
||||||
boolean isNCDHW = layerConf().getDataFormat() == Convolution3D.DataFormat.NCDHW;
|
boolean isNCDHW = layerConf().getDataFormat() == Convolution3D.DataFormat.NCDHW;
|
||||||
|
|
||||||
// FIXME: int cast
|
long miniBatch = input.size(0);
|
||||||
int miniBatch = (int) input.size(0);
|
long inChannels = isNCDHW ? input.size(1) : input.size(4);
|
||||||
int inChannels = (int) (isNCDHW ? input.size(1) : input.size(4));
|
|
||||||
int inD = (int) (isNCDHW ? input.size(2) : input.size(1));
|
int inD = (int) (isNCDHW ? input.size(2) : input.size(1));
|
||||||
int inH = (int) (isNCDHW ? input.size(3) : input.size(2));
|
int inH = (int) (isNCDHW ? input.size(3) : input.size(2));
|
||||||
int inW = (int) (isNCDHW ? input.size(4) : input.size(3));
|
int inW = (int) (isNCDHW ? input.size(4) : input.size(3));
|
||||||
|
@ -148,9 +147,8 @@ public class Subsampling3DLayer extends AbstractLayer<org.deeplearning4j.nn.conf
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: int cast
|
long miniBatch = input.size(0);
|
||||||
int miniBatch = (int) input.size(0);
|
long inChannels = isNCDHW ? input.size(1) : input.size(4);
|
||||||
int inChannels = (int) (isNCDHW ? input.size(1) : input.size(4));
|
|
||||||
int inD = (int) (isNCDHW ? input.size(2) : input.size(1));
|
int inD = (int) (isNCDHW ? input.size(2) : input.size(1));
|
||||||
int inH = (int) (isNCDHW ? input.size(3) : input.size(2));
|
int inH = (int) (isNCDHW ? input.size(3) : input.size(2));
|
||||||
int inW = (int) (isNCDHW ? input.size(4) : input.size(3));
|
int inW = (int) (isNCDHW ? input.size(4) : input.size(3));
|
||||||
|
@ -170,9 +168,9 @@ public class Subsampling3DLayer extends AbstractLayer<org.deeplearning4j.nn.conf
|
||||||
outSize = Convolution3DUtils.get3DOutputSize(
|
outSize = Convolution3DUtils.get3DOutputSize(
|
||||||
input, kernel, strides, pad, convolutionMode, dilation, isNCDHW);
|
input, kernel, strides, pad, convolutionMode, dilation, isNCDHW);
|
||||||
}
|
}
|
||||||
int outD = outSize[0];
|
long outD = outSize[0];
|
||||||
int outH = outSize[1];
|
long outH = outSize[1];
|
||||||
int outW = outSize[2];
|
long outW = outSize[2];
|
||||||
|
|
||||||
String opName = layerConf().getPoolingType() == PoolingType.MAX ? "maxpool3dnew" : "avgpool3dnew";
|
String opName = layerConf().getPoolingType() == PoolingType.MAX ? "maxpool3dnew" : "avgpool3dnew";
|
||||||
|
|
||||||
|
|
|
@ -108,9 +108,6 @@ public class SubsamplingLayer extends AbstractLayer<org.deeplearning4j.nn.conf.l
|
||||||
if(epsilon.dataType() != dataType)
|
if(epsilon.dataType() != dataType)
|
||||||
epsilon = epsilon.castTo(dataType);
|
epsilon = epsilon.castTo(dataType);
|
||||||
|
|
||||||
// FIXME: int cast
|
|
||||||
int miniBatch = (int) input.size(0);
|
|
||||||
int inDepth = (int) input.size(1);
|
|
||||||
int inH = (int)input.size(2);
|
int inH = (int)input.size(2);
|
||||||
int inW = (int)input.size(3);
|
int inW = (int)input.size(3);
|
||||||
|
|
||||||
|
@ -158,9 +155,6 @@ public class SubsamplingLayer extends AbstractLayer<org.deeplearning4j.nn.conf.l
|
||||||
|
|
||||||
//subsampling doesn't have weights and thus gradients are not calculated for this layer
|
//subsampling doesn't have weights and thus gradients are not calculated for this layer
|
||||||
//only scale and reshape epsilon
|
//only scale and reshape epsilon
|
||||||
// FIXME: int cast
|
|
||||||
int inputHeight = (int) input().size(-2);
|
|
||||||
int inputWidth = (int) input().size(-1);
|
|
||||||
Gradient retGradient = new DefaultGradient();
|
Gradient retGradient = new DefaultGradient();
|
||||||
|
|
||||||
|
|
||||||
|
@ -231,9 +225,8 @@ public class SubsamplingLayer extends AbstractLayer<org.deeplearning4j.nn.conf.l
|
||||||
|
|
||||||
INDArray input = this.input.castTo(dataType);
|
INDArray input = this.input.castTo(dataType);
|
||||||
|
|
||||||
// FIXME: int cast
|
long miniBatch = input.size(0);
|
||||||
int miniBatch = (int) input.size(0);
|
long inDepth = input.size(1);
|
||||||
int inDepth = (int) input.size(1);
|
|
||||||
int inH = (int)input.size(2);
|
int inH = (int)input.size(2);
|
||||||
int inW = (int)input.size(3);
|
int inW = (int)input.size(3);
|
||||||
|
|
||||||
|
@ -250,8 +243,8 @@ public class SubsamplingLayer extends AbstractLayer<org.deeplearning4j.nn.conf.l
|
||||||
pad = layerConf().getPadding();
|
pad = layerConf().getPadding();
|
||||||
outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, pad, convolutionMode, dilation); //Also performs validation
|
outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, pad, convolutionMode, dilation); //Also performs validation
|
||||||
}
|
}
|
||||||
int outH = outSize[0];
|
long outH = outSize[0];
|
||||||
int outW = outSize[1];
|
long outW = outSize[1];
|
||||||
|
|
||||||
|
|
||||||
if (helper != null && (helperCountFail == 0 || !layerConf().isCudnnAllowFallback())) {
|
if (helper != null && (helperCountFail == 0 || !layerConf().isCudnnAllowFallback())) {
|
||||||
|
@ -278,9 +271,6 @@ public class SubsamplingLayer extends AbstractLayer<org.deeplearning4j.nn.conf.l
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
INDArray output = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, input.dataType(), new long[]{miniBatch, inDepth, outH, outW}, 'c');
|
INDArray output = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, input.dataType(), new long[]{miniBatch, inDepth, outH, outW}, 'c');
|
||||||
DynamicCustomOp.DynamicCustomOpsBuilder b;
|
DynamicCustomOp.DynamicCustomOpsBuilder b;
|
||||||
int extra = 0;
|
int extra = 0;
|
||||||
|
|
|
@ -65,11 +65,10 @@ public class Upsampling1D extends Upsampling2D {
|
||||||
INDArray originalInput = input;
|
INDArray originalInput = input;
|
||||||
input = input.castTo(dataType).reshape(input.size(0), input.size(1), input.size(2), 1);
|
input = input.castTo(dataType).reshape(input.size(0), input.size(1), input.size(2), 1);
|
||||||
|
|
||||||
// FIXME: int cast
|
long miniBatch = input.size(0);
|
||||||
int miniBatch = (int) input.size(0);
|
long inDepth = input.size(1);
|
||||||
int inDepth = (int) input.size(1);
|
long inH = input.size(2);
|
||||||
int inH = (int) input.size(2);
|
long inW = input.size(3);
|
||||||
int inW = (int) input.size(3);
|
|
||||||
|
|
||||||
|
|
||||||
INDArray outEpsilon = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), miniBatch * inDepth * inH * inW);
|
INDArray outEpsilon = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), miniBatch * inDepth * inH * inW);
|
||||||
|
|
|
@ -62,11 +62,10 @@ public class Upsampling2D extends AbstractLayer<org.deeplearning4j.nn.conf.layer
|
||||||
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
|
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
|
||||||
assertInputSet(true);
|
assertInputSet(true);
|
||||||
|
|
||||||
// FIXME: int cast
|
long miniBatch = (int) input.size(0);
|
||||||
int miniBatch = (int) input.size(0);
|
long inDepth = (int) input.size(1);
|
||||||
int inDepth = (int) input.size(1);
|
long inH = (int) input.size(2);
|
||||||
int inH = (int) input.size(2);
|
long inW = (int) input.size(3);
|
||||||
int inW = (int) input.size(3);
|
|
||||||
|
|
||||||
INDArray reshapedEpsilon = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, epsilon.dataType(), new long[]{miniBatch, inDepth, inH, inW}, 'c');
|
INDArray reshapedEpsilon = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, epsilon.dataType(), new long[]{miniBatch, inDepth, inH, inW}, 'c');
|
||||||
|
|
||||||
|
@ -106,15 +105,14 @@ public class Upsampling2D extends AbstractLayer<org.deeplearning4j.nn.conf.layer
|
||||||
return preOutput;
|
return preOutput;
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: int cast
|
long miniBatch = (int) input.size(0);
|
||||||
int miniBatch = (int) input.size(0);
|
long inDepth = (int) input.size(1);
|
||||||
int inDepth = (int) input.size(1);
|
long inH = (int) input.size(2);
|
||||||
int inH = (int) input.size(2);
|
long inW = (int) input.size(3);
|
||||||
int inW = (int) input.size(3);
|
|
||||||
|
|
||||||
int[] size = getSize();
|
int[] size = getSize();
|
||||||
int outH = inH * size[0];
|
int outH = (int)inH * size[0];
|
||||||
int outW = inW * size[1];
|
int outW = (int)inW * size[1];
|
||||||
|
|
||||||
INDArray reshapedOutput = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, input.dataType(), new long[]{miniBatch, inDepth, outH, outW}, 'c');
|
INDArray reshapedOutput = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, input.dataType(), new long[]{miniBatch, inDepth, outH, outW}, 'c');
|
||||||
|
|
||||||
|
|
|
@ -68,22 +68,21 @@ public class Upsampling3D extends AbstractLayer<org.deeplearning4j.nn.conf.layer
|
||||||
assertInputSet(true);
|
assertInputSet(true);
|
||||||
|
|
||||||
boolean ncdhw = layerConf().getDataFormat() == org.deeplearning4j.nn.conf.layers.Convolution3D.DataFormat.NCDHW;
|
boolean ncdhw = layerConf().getDataFormat() == org.deeplearning4j.nn.conf.layers.Convolution3D.DataFormat.NCDHW;
|
||||||
// FIXME: int cast
|
|
||||||
// Assumes NCDHW order
|
// Assumes NCDHW order
|
||||||
int miniBatch = (int) input.size(0);
|
long miniBatch = input.size(0);
|
||||||
int inChannels, inD, inH, inW;
|
long inChannels, inD, inH, inW;
|
||||||
int[] intArgs;
|
int[] intArgs;
|
||||||
if(ncdhw){
|
if(ncdhw){
|
||||||
inChannels = (int) input.size(1);
|
inChannels = input.size(1);
|
||||||
inD = (int) input.size(2);
|
inD = input.size(2);
|
||||||
inH = (int) input.size(3);
|
inH = input.size(3);
|
||||||
inW = (int) input.size(4);
|
inW = input.size(4);
|
||||||
intArgs = new int[] {1}; // 1 is channels first
|
intArgs = new int[] {1}; // 1 is channels first
|
||||||
} else {
|
} else {
|
||||||
inD = (int) input.size(1);
|
inD = input.size(1);
|
||||||
inH = (int) input.size(2);
|
inH = input.size(2);
|
||||||
inW = (int) input.size(3);
|
inW = input.size(3);
|
||||||
inChannels = (int) input.size(4);
|
inChannels = input.size(4);
|
||||||
intArgs = new int[] {0}; // 0 is channels last
|
intArgs = new int[] {0}; // 0 is channels last
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -134,9 +133,8 @@ public class Upsampling3D extends AbstractLayer<org.deeplearning4j.nn.conf.layer
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean ncdhw = layerConf().getDataFormat() == org.deeplearning4j.nn.conf.layers.Convolution3D.DataFormat.NCDHW;
|
boolean ncdhw = layerConf().getDataFormat() == org.deeplearning4j.nn.conf.layers.Convolution3D.DataFormat.NCDHW;
|
||||||
// FIXME: int cast
|
long miniBatch = input.size(0);
|
||||||
int miniBatch = (int) input.size(0);
|
long inChannels, inD, inH, inW;
|
||||||
int inChannels, inD, inH, inW;
|
|
||||||
int[] intArgs;
|
int[] intArgs;
|
||||||
int[] size = getSize();
|
int[] size = getSize();
|
||||||
if(ncdhw){
|
if(ncdhw){
|
||||||
|
|
|
@ -20,6 +20,7 @@ import lombok.extern.slf4j.Slf4j;
|
||||||
import lombok.val;
|
import lombok.val;
|
||||||
import org.nd4j.linalg.api.buffer.DataType;
|
import org.nd4j.linalg.api.buffer.DataType;
|
||||||
import org.nd4j.linalg.api.ops.custom.ScatterUpdate;
|
import org.nd4j.linalg.api.ops.custom.ScatterUpdate;
|
||||||
|
import org.nd4j.linalg.exception.ND4JArraySizeException;
|
||||||
import org.nd4j.linalg.primitives.Pair;
|
import org.nd4j.linalg.primitives.Pair;
|
||||||
import org.deeplearning4j.exception.DL4JInvalidInputException;
|
import org.deeplearning4j.exception.DL4JInvalidInputException;
|
||||||
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
||||||
|
@ -64,8 +65,7 @@ public class EmbeddingLayer extends BaseLayer<org.deeplearning4j.nn.conf.layers.
|
||||||
INDArray weightGradients = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY);
|
INDArray weightGradients = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY);
|
||||||
weightGradients.assign(0);
|
weightGradients.assign(0);
|
||||||
|
|
||||||
// FIXME: int cast
|
long[] indexes = new long[(int) input.length()];
|
||||||
int[] indexes = new int[(int) input.length()];
|
|
||||||
for (int i = 0; i < indexes.length; i++) {
|
for (int i = 0; i < indexes.length; i++) {
|
||||||
indexes[i] = input.getInt(i, 0);
|
indexes[i] = input.getInt(i, 0);
|
||||||
}
|
}
|
||||||
|
@ -99,7 +99,8 @@ public class EmbeddingLayer extends BaseLayer<org.deeplearning4j.nn.conf.layers.
|
||||||
|
|
||||||
val nIn = layerConf().getNIn();
|
val nIn = layerConf().getNIn();
|
||||||
|
|
||||||
// FIXME: int cast
|
if (input.length() > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
int[] indexes = new int[(int) input.length()];
|
int[] indexes = new int[(int) input.length()];
|
||||||
for (int i = 0; i < indexes.length; i++) {
|
for (int i = 0; i < indexes.length; i++) {
|
||||||
indexes[i] = input.getInt(i, 0);
|
indexes[i] = input.getInt(i, 0);
|
||||||
|
|
|
@ -56,7 +56,7 @@ public class MKLDNNBatchNormHelper implements BatchNormalizationHelper {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Pair<Gradient, INDArray> backpropGradient(INDArray input, INDArray epsilon, int[] shape, INDArray gamma,
|
public Pair<Gradient, INDArray> backpropGradient(INDArray input, INDArray epsilon, long[] shape, INDArray gamma,
|
||||||
INDArray dGammaView, INDArray dBetaView, double eps, LayerWorkspaceMgr workspaceMgr) {
|
INDArray dGammaView, INDArray dBetaView, double eps, LayerWorkspaceMgr workspaceMgr) {
|
||||||
//2019-02-14: Backprop disabled pending fixes. https://github.com/deeplearning4j/deeplearning4j/issues/7166
|
//2019-02-14: Backprop disabled pending fixes. https://github.com/deeplearning4j/deeplearning4j/issues/7166
|
||||||
//Also no MKL-DNN implemented for backprop anyway
|
//Also no MKL-DNN implemented for backprop anyway
|
||||||
|
@ -82,7 +82,7 @@ public class MKLDNNBatchNormHelper implements BatchNormalizationHelper {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public INDArray preOutput(INDArray x, boolean training, int[] shape, INDArray gamma, INDArray beta, INDArray mean, INDArray var,
|
public INDArray preOutput(INDArray x, boolean training, long[] shape, INDArray gamma, INDArray beta, INDArray mean, INDArray var,
|
||||||
double decay, double eps, LayerWorkspaceMgr workspaceMgr) {
|
double decay, double eps, LayerWorkspaceMgr workspaceMgr) {
|
||||||
if(x.dataType() != DataType.FLOAT)
|
if(x.dataType() != DataType.FLOAT)
|
||||||
return null; //MKL-DNN only supports float
|
return null; //MKL-DNN only supports float
|
||||||
|
|
|
@ -152,10 +152,9 @@ public class BatchNormalization extends BaseLayer<org.deeplearning4j.nn.conf.lay
|
||||||
eps = epsilon;
|
eps = epsilon;
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: int cast
|
|
||||||
Pair<Gradient,INDArray> ret = null;
|
Pair<Gradient,INDArray> ret = null;
|
||||||
try {
|
try {
|
||||||
ret = helper.backpropGradient(in, eps, ArrayUtil.toInts(shape), gamma, dGammaView, dBetaView,
|
ret = helper.backpropGradient(in, eps, shape, gamma, dGammaView, dBetaView,
|
||||||
layerConf.getEps(), workspaceMgr);
|
layerConf.getEps(), workspaceMgr);
|
||||||
} catch (ND4JOpProfilerException e){
|
} catch (ND4JOpProfilerException e){
|
||||||
throw e; //NaN panic etc for debugging
|
throw e; //NaN panic etc for debugging
|
||||||
|
@ -438,7 +437,6 @@ public class BatchNormalization extends BaseLayer<org.deeplearning4j.nn.conf.lay
|
||||||
//Note that cudnn does not support dense (2d) batch norm case as of v7.1
|
//Note that cudnn does not support dense (2d) batch norm case as of v7.1
|
||||||
double decay = layerConf.getDecay();
|
double decay = layerConf.getDecay();
|
||||||
|
|
||||||
// FIXME: int cast
|
|
||||||
INDArray ret = null;
|
INDArray ret = null;
|
||||||
try {
|
try {
|
||||||
if(globalVarView == null){
|
if(globalVarView == null){
|
||||||
|
@ -448,7 +446,7 @@ public class BatchNormalization extends BaseLayer<org.deeplearning4j.nn.conf.lay
|
||||||
globalVarView.muli(globalVarView);
|
globalVarView.muli(globalVarView);
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = helper.preOutput(in, training == TrainingMode.TRAIN, ArrayUtil.toInts(shape), gamma, beta, globalMeanView,
|
ret = helper.preOutput(in, training == TrainingMode.TRAIN, shape, gamma, beta, globalMeanView,
|
||||||
globalVarView, decay, layerConf.getEps(), workspaceMgr);
|
globalVarView, decay, layerConf.getEps(), workspaceMgr);
|
||||||
} catch (ND4JOpProfilerException e){
|
} catch (ND4JOpProfilerException e){
|
||||||
throw e; //NaN panic etc for debugging
|
throw e; //NaN panic etc for debugging
|
||||||
|
|
|
@ -31,10 +31,10 @@ import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
|
||||||
public interface BatchNormalizationHelper extends LayerHelper {
|
public interface BatchNormalizationHelper extends LayerHelper {
|
||||||
boolean checkSupported(double eps, boolean fixedGammaBeta);
|
boolean checkSupported(double eps, boolean fixedGammaBeta);
|
||||||
|
|
||||||
Pair<Gradient, INDArray> backpropGradient(INDArray input, INDArray epsilon, int[] shape, INDArray gamma,
|
Pair<Gradient, INDArray> backpropGradient(INDArray input, INDArray epsilon, long[] shape, INDArray gamma,
|
||||||
INDArray dGammaView, INDArray dBetaView, double eps, LayerWorkspaceMgr workspaceMgr);
|
INDArray dGammaView, INDArray dBetaView, double eps, LayerWorkspaceMgr workspaceMgr);
|
||||||
|
|
||||||
INDArray preOutput(INDArray x, boolean training, int[] shape, INDArray gamma, INDArray beta, INDArray mean,
|
INDArray preOutput(INDArray x, boolean training, long[] shape, INDArray gamma, INDArray beta, INDArray mean,
|
||||||
INDArray var, double decay, double eps, LayerWorkspaceMgr workspaceMgr);
|
INDArray var, double decay, double eps, LayerWorkspaceMgr workspaceMgr);
|
||||||
|
|
||||||
INDArray getMeanCache(DataType dataType);
|
INDArray getMeanCache(DataType dataType);
|
||||||
|
|
|
@ -114,10 +114,9 @@ public class Yolo2OutputLayer extends AbstractLayer<org.deeplearning4j.nn.conf.l
|
||||||
double lambdaCoord = layerConf().getLambdaCoord();
|
double lambdaCoord = layerConf().getLambdaCoord();
|
||||||
double lambdaNoObj = layerConf().getLambdaNoObj();
|
double lambdaNoObj = layerConf().getLambdaNoObj();
|
||||||
|
|
||||||
// FIXME: int cast
|
long mb = input.size(0);
|
||||||
int mb = (int) input.size(0);
|
long h = input.size(2);
|
||||||
int h = (int) input.size(2);
|
long w = input.size(3);
|
||||||
int w = (int) input.size(3);
|
|
||||||
int b = (int) layerConf().getBoundingBoxes().size(0);
|
int b = (int) layerConf().getBoundingBoxes().size(0);
|
||||||
int c = (int) labels.size(1)-4;
|
int c = (int) labels.size(1)-4;
|
||||||
|
|
||||||
|
@ -243,12 +242,12 @@ public class Yolo2OutputLayer extends AbstractLayer<org.deeplearning4j.nn.conf.l
|
||||||
|
|
||||||
//Class prediction loss
|
//Class prediction loss
|
||||||
INDArray classPredictionsPreSoftmax2d = inputClassesPreSoftmax.permute(0,1,3,4,2) //[minibatch, b, c, h, w] To [mb, b, h, w, c]
|
INDArray classPredictionsPreSoftmax2d = inputClassesPreSoftmax.permute(0,1,3,4,2) //[minibatch, b, c, h, w] To [mb, b, h, w, c]
|
||||||
.dup('c').reshape('c', new int[]{mb*b*h*w, c});
|
.dup('c').reshape('c', new long[]{mb*b*h*w, c});
|
||||||
INDArray classLabelsBroadcast = Nd4j.createUninitialized(input.dataType(), new long[]{mb, b, c, h, w}, 'c');
|
INDArray classLabelsBroadcast = Nd4j.createUninitialized(input.dataType(), new long[]{mb, b, c, h, w}, 'c');
|
||||||
for(int i=0; i<b; i++ ){
|
for(int i=0; i<b; i++ ){
|
||||||
classLabelsBroadcast.get(all(), point(i), all(), all(), all()).assign(classLabels); //[mb, c, h, w] to [mb, b, c, h, w]
|
classLabelsBroadcast.get(all(), point(i), all(), all(), all()).assign(classLabels); //[mb, c, h, w] to [mb, b, c, h, w]
|
||||||
}
|
}
|
||||||
INDArray classLabels2d = classLabelsBroadcast.permute(0,1,3,4,2).dup('c').reshape('c', new int[]{mb*b*h*w, c});
|
INDArray classLabels2d = classLabelsBroadcast.permute(0,1,3,4,2).dup('c').reshape('c', new long[]{mb*b*h*w, c});
|
||||||
|
|
||||||
//Calculate the loss:
|
//Calculate the loss:
|
||||||
ILossFunction lossConfidence = new LossL2();
|
ILossFunction lossConfidence = new LossL2();
|
||||||
|
@ -297,7 +296,7 @@ public class Yolo2OutputLayer extends AbstractLayer<org.deeplearning4j.nn.conf.l
|
||||||
// ----- Gradient Calculation (specifically: return dL/dIn -----
|
// ----- Gradient Calculation (specifically: return dL/dIn -----
|
||||||
|
|
||||||
INDArray epsOut = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape(), 'c');
|
INDArray epsOut = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape(), 'c');
|
||||||
INDArray epsOut5 = Shape.newShapeNoCopy(epsOut, new int[]{mb, b, 5+c, h, w}, false);
|
INDArray epsOut5 = Shape.newShapeNoCopy(epsOut, new long[]{mb, b, 5+c, h, w}, false);
|
||||||
INDArray epsClassPredictions = epsOut5.get(all(), all(), interval(5, 5+c), all(), all()); //Shape: [mb, b, 5+c, h, w]
|
INDArray epsClassPredictions = epsOut5.get(all(), all(), interval(5, 5+c), all(), all()); //Shape: [mb, b, 5+c, h, w]
|
||||||
INDArray epsXY = epsOut5.get(all(), all(), interval(0,2), all(), all());
|
INDArray epsXY = epsOut5.get(all(), all(), interval(0,2), all(), all());
|
||||||
INDArray epsWH = epsOut5.get(all(), all(), interval(2,4), all(), all());
|
INDArray epsWH = epsOut5.get(all(), all(), interval(2,4), all(), all());
|
||||||
|
@ -426,16 +425,16 @@ public class Yolo2OutputLayer extends AbstractLayer<org.deeplearning4j.nn.conf.l
|
||||||
* @return IOU and gradients
|
* @return IOU and gradients
|
||||||
*/
|
*/
|
||||||
private static IOURet calculateIOULabelPredicted(INDArray labelTL, INDArray labelBR, INDArray predictedWH, INDArray predictedXYinGridBox, INDArray objectPresentMask, INDArray objectPresentMaskBool){
|
private static IOURet calculateIOULabelPredicted(INDArray labelTL, INDArray labelBR, INDArray predictedWH, INDArray predictedXYinGridBox, INDArray objectPresentMask, INDArray objectPresentMaskBool){
|
||||||
// FIXME: int cast
|
|
||||||
int mb = (int) labelTL.size(0);
|
long mb = labelTL.size(0);
|
||||||
int h = (int) labelTL.size(2);
|
long h = labelTL.size(2);
|
||||||
int w = (int) labelTL.size(3);
|
long w = labelTL.size(3);
|
||||||
int b = (int) predictedWH.size(1);
|
long b = predictedWH.size(1);
|
||||||
|
|
||||||
INDArray labelWH = labelBR.sub(labelTL); //4d [mb, 2, H, W], label W/H in terms of number of grid boxes
|
INDArray labelWH = labelBR.sub(labelTL); //4d [mb, 2, H, W], label W/H in terms of number of grid boxes
|
||||||
|
|
||||||
int gridH = (int) labelTL.size(2);
|
long gridH = labelTL.size(2);
|
||||||
int gridW = (int) labelTL.size(3);
|
long gridW = labelTL.size(3);
|
||||||
//Add grid positions to the predicted XY values (to get predicted XY in terms of grid cell units in image,
|
//Add grid positions to the predicted XY values (to get predicted XY in terms of grid cell units in image,
|
||||||
// from (0 to 1 in grid cell) format)
|
// from (0 to 1 in grid cell) format)
|
||||||
INDArray linspaceX = Nd4j.linspace(0, gridW-1, gridW, predictedWH.dataType());
|
INDArray linspaceX = Nd4j.linspace(0, gridW-1, gridW, predictedWH.dataType());
|
||||||
|
|
|
@ -45,12 +45,11 @@ public class YoloUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static INDArray activate(@NonNull INDArray boundingBoxPriors, @NonNull INDArray input, LayerWorkspaceMgr layerWorkspaceMgr){
|
public static INDArray activate(@NonNull INDArray boundingBoxPriors, @NonNull INDArray input, LayerWorkspaceMgr layerWorkspaceMgr){
|
||||||
// FIXME: int cast
|
long mb = input.size(0);
|
||||||
int mb = (int) input.size(0);
|
long h = input.size(2);
|
||||||
int h = (int) input.size(2);
|
long w = input.size(3);
|
||||||
int w = (int) input.size(3);
|
long b = boundingBoxPriors.size(0);
|
||||||
int b = (int) boundingBoxPriors.size(0);
|
long c = input.size(1)/b-5; //input.size(1) == b * (5 + C) -> C = (input.size(1)/b) - 5
|
||||||
int c = (int) (input.size(1)/b)-5; //input.size(1) == b * (5 + C) -> C = (input.size(1)/b) - 5
|
|
||||||
|
|
||||||
INDArray output = layerWorkspaceMgr.create(ArrayType.ACTIVATIONS, input.dataType(), input.shape(), 'c');
|
INDArray output = layerWorkspaceMgr.create(ArrayType.ACTIVATIONS, input.dataType(), input.shape(), 'c');
|
||||||
INDArray output5 = output.reshape('c', mb, b, 5+c, h, w);
|
INDArray output5 = output.reshape('c', mb, b, 5+c, h, w);
|
||||||
|
@ -77,7 +76,7 @@ public class YoloUtils {
|
||||||
//TODO OPTIMIZE?
|
//TODO OPTIMIZE?
|
||||||
INDArray inputClassesPreSoftmax = input5.get(all(), all(), interval(5, 5+c), all(), all()); //Shape: [minibatch, C, H, W]
|
INDArray inputClassesPreSoftmax = input5.get(all(), all(), interval(5, 5+c), all(), all()); //Shape: [minibatch, C, H, W]
|
||||||
INDArray classPredictionsPreSoftmax2d = inputClassesPreSoftmax.permute(0,1,3,4,2) //[minibatch, b, c, h, w] To [mb, b, h, w, c]
|
INDArray classPredictionsPreSoftmax2d = inputClassesPreSoftmax.permute(0,1,3,4,2) //[minibatch, b, c, h, w] To [mb, b, h, w, c]
|
||||||
.dup('c').reshape('c', new int[]{mb*b*h*w, c});
|
.dup('c').reshape('c', new long[]{mb*b*h*w, c});
|
||||||
Transforms.softmax(classPredictionsPreSoftmax2d, false);
|
Transforms.softmax(classPredictionsPreSoftmax2d, false);
|
||||||
INDArray postSoftmax5d = classPredictionsPreSoftmax2d.reshape('c', mb, b, h, w, c ).permute(0, 1, 4, 2, 3);
|
INDArray postSoftmax5d = classPredictionsPreSoftmax2d.reshape('c', mb, b, h, w, c ).permute(0, 1, 4, 2, 3);
|
||||||
|
|
||||||
|
@ -173,13 +172,12 @@ public class YoloUtils {
|
||||||
throw new IllegalStateException("Invalid confidence threshold: must be in range [0,1]. Got: " + confThreshold);
|
throw new IllegalStateException("Invalid confidence threshold: must be in range [0,1]. Got: " + confThreshold);
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: int cast
|
|
||||||
//Activations format: [mb, 5b+c, h, w]
|
//Activations format: [mb, 5b+c, h, w]
|
||||||
int mb = (int) networkOutput.size(0);
|
long mb = networkOutput.size(0);
|
||||||
int h = (int) networkOutput.size(2);
|
long h = networkOutput.size(2);
|
||||||
int w = (int) networkOutput.size(3);
|
long w = networkOutput.size(3);
|
||||||
int b = (int) boundingBoxPriors.size(0);
|
long b = boundingBoxPriors.size(0);
|
||||||
int c = (int) (networkOutput.size(1)/b)-5; //input.size(1) == b * (5 + C) -> C = (input.size(1)/b) - 5
|
long c = (networkOutput.size(1)/b)-5; //input.size(1) == b * (5 + C) -> C = (input.size(1)/b) - 5
|
||||||
|
|
||||||
//Reshape from [minibatch, B*(5+C), H, W] to [minibatch, B, 5+C, H, W] to [minibatch, B, 5, H, W]
|
//Reshape from [minibatch, B*(5+C), H, W] to [minibatch, B, 5+C, H, W] to [minibatch, B, 5, H, W]
|
||||||
INDArray output5 = networkOutput.dup('c').reshape(mb, b, 5+c, h, w);
|
INDArray output5 = networkOutput.dup('c').reshape(mb, b, 5+c, h, w);
|
||||||
|
|
|
@ -40,6 +40,7 @@ import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
import org.nd4j.linalg.api.ops.impl.transforms.pairwise.arithmetic.MulOp;
|
import org.nd4j.linalg.api.ops.impl.transforms.pairwise.arithmetic.MulOp;
|
||||||
import org.nd4j.linalg.api.ops.impl.transforms.same.TimesOneMinus;
|
import org.nd4j.linalg.api.ops.impl.transforms.same.TimesOneMinus;
|
||||||
import org.nd4j.linalg.api.shape.Shape;
|
import org.nd4j.linalg.api.shape.Shape;
|
||||||
|
import org.nd4j.linalg.exception.ND4JArraySizeException;
|
||||||
import org.nd4j.linalg.exception.ND4JOpProfilerException;
|
import org.nd4j.linalg.exception.ND4JOpProfilerException;
|
||||||
import org.nd4j.linalg.factory.Nd4j;
|
import org.nd4j.linalg.factory.Nd4j;
|
||||||
import org.nd4j.linalg.indexing.NDArrayIndex;
|
import org.nd4j.linalg.indexing.NDArrayIndex;
|
||||||
|
@ -113,7 +114,9 @@ public class LSTMHelpers {
|
||||||
|
|
||||||
input = input.castTo(inputWeights.dataType()); //No-op if already correct dtype
|
input = input.castTo(inputWeights.dataType()); //No-op if already correct dtype
|
||||||
|
|
||||||
// FIXME
|
if ((!is2dInput && (input.size(2) > Integer.MAX_VALUE)) ||
|
||||||
|
recurrentWeights.size(0) > Integer.MAX_VALUE || input.size(0) > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
int timeSeriesLength = (int) (is2dInput ? 1 : input.size(2));
|
int timeSeriesLength = (int) (is2dInput ? 1 : input.size(2));
|
||||||
int hiddenLayerSize = (int) recurrentWeights.size(0);
|
int hiddenLayerSize = (int) recurrentWeights.size(0);
|
||||||
int miniBatchSize = (int) input.size(0);
|
int miniBatchSize = (int) input.size(0);
|
||||||
|
@ -550,7 +553,8 @@ public class LSTMHelpers {
|
||||||
for (long iTimeIndex = timeSeriesLength - 1; iTimeIndex >= endIdx; iTimeIndex--) {
|
for (long iTimeIndex = timeSeriesLength - 1; iTimeIndex >= endIdx; iTimeIndex--) {
|
||||||
try(MemoryWorkspace ws = workspaceMgr.notifyScopeEntered(ArrayType.RNN_BP_LOOP_WORKING_MEM)) {
|
try(MemoryWorkspace ws = workspaceMgr.notifyScopeEntered(ArrayType.RNN_BP_LOOP_WORKING_MEM)) {
|
||||||
|
|
||||||
// FIXME: int cast
|
if (iTimeIndex > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
int time = (int) iTimeIndex;
|
int time = (int) iTimeIndex;
|
||||||
int inext = 1;
|
int inext = 1;
|
||||||
|
|
||||||
|
@ -574,8 +578,6 @@ public class LSTMHelpers {
|
||||||
(iTimeIndex == 0 ? fwdPass.prevAct : fwdPass.fwdPassOutputAsArrays[(int) (time - inext)]);
|
(iTimeIndex == 0 ? fwdPass.prevAct : fwdPass.fwdPassOutputAsArrays[(int) (time - inext)]);
|
||||||
INDArray currMemCellState = fwdPass.memCellState[(int) time];
|
INDArray currMemCellState = fwdPass.memCellState[(int) time];
|
||||||
|
|
||||||
|
|
||||||
// FIXME: int cast
|
|
||||||
//LSTM unit output errors (dL/d(a_out)); not to be confused with \delta=dL/d(z_out)
|
//LSTM unit output errors (dL/d(a_out)); not to be confused with \delta=dL/d(z_out)
|
||||||
INDArray epsilonSlice = (is2dInput ? epsilon : epsilon.tensorAlongDimension((int) time, 1, 0)); //(w^{L+1}*(delta^{(L+1)t})^T)^T or equiv.
|
INDArray epsilonSlice = (is2dInput ? epsilon : epsilon.tensorAlongDimension((int) time, 1, 0)); //(w^{L+1}*(delta^{(L+1)t})^T)^T or equiv.
|
||||||
|
|
||||||
|
|
|
@ -89,8 +89,7 @@ public class RnnLossLayer extends BaseLayer<org.deeplearning4j.nn.conf.layers.Rn
|
||||||
ILossFunction lossFunction = layerConf().getLossFn();
|
ILossFunction lossFunction = layerConf().getLossFn();
|
||||||
INDArray delta2d = lossFunction.computeGradient(labels2d, input2d.dup(input2d.ordering()), layerConf().getActivationFn(), maskReshaped);
|
INDArray delta2d = lossFunction.computeGradient(labels2d, input2d.dup(input2d.ordering()), layerConf().getActivationFn(), maskReshaped);
|
||||||
|
|
||||||
// FIXME: int cast
|
INDArray delta3d = TimeSeriesUtils.reshape2dTo3d(delta2d, input.size(0), workspaceMgr, ArrayType.ACTIVATION_GRAD);
|
||||||
INDArray delta3d = TimeSeriesUtils.reshape2dTo3d(delta2d, (int) input.size(0), workspaceMgr, ArrayType.ACTIVATION_GRAD);
|
|
||||||
|
|
||||||
// grab the empty gradient
|
// grab the empty gradient
|
||||||
Gradient gradient = new DefaultGradient();
|
Gradient gradient = new DefaultGradient();
|
||||||
|
@ -119,7 +118,6 @@ public class RnnLossLayer extends BaseLayer<org.deeplearning4j.nn.conf.layers.Rn
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int numLabels() {
|
public int numLabels() {
|
||||||
// FIXME: int cast
|
|
||||||
return (int) labels.size(1);
|
return (int) labels.size(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -167,7 +165,7 @@ public class RnnLossLayer extends BaseLayer<org.deeplearning4j.nn.conf.layers.Rn
|
||||||
|
|
||||||
INDArray as2d = TimeSeriesUtils.reshape3dTo2d(input);
|
INDArray as2d = TimeSeriesUtils.reshape3dTo2d(input);
|
||||||
INDArray out2d = layerConf().getActivationFn().getActivation(workspaceMgr.dup(ArrayType.ACTIVATIONS, as2d, as2d.ordering()), training);
|
INDArray out2d = layerConf().getActivationFn().getActivation(workspaceMgr.dup(ArrayType.ACTIVATIONS, as2d, as2d.ordering()), training);
|
||||||
return workspaceMgr.leverageTo(ArrayType.ACTIVATIONS, TimeSeriesUtils.reshape2dTo3d(out2d, (int)input.size(0), workspaceMgr, ArrayType.ACTIVATIONS));
|
return workspaceMgr.leverageTo(ArrayType.ACTIVATIONS, TimeSeriesUtils.reshape2dTo3d(out2d, input.size(0), workspaceMgr, ArrayType.ACTIVATIONS));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -254,7 +252,6 @@ public class RnnLossLayer extends BaseLayer<org.deeplearning4j.nn.conf.layers.Rn
|
||||||
//scoreArray: shape [minibatch*timeSeriesLength, 1]
|
//scoreArray: shape [minibatch*timeSeriesLength, 1]
|
||||||
//Reshape it to [minibatch, timeSeriesLength] then sum over time step
|
//Reshape it to [minibatch, timeSeriesLength] then sum over time step
|
||||||
|
|
||||||
// FIXME: int cast
|
|
||||||
INDArray scoreArrayTs = TimeSeriesUtils.reshapeVectorToTimeSeriesMask(scoreArray, (int)input.size(0));
|
INDArray scoreArrayTs = TimeSeriesUtils.reshapeVectorToTimeSeriesMask(scoreArray, (int)input.size(0));
|
||||||
INDArray summedScores = scoreArrayTs.sum(1);
|
INDArray summedScores = scoreArrayTs.sum(1);
|
||||||
|
|
||||||
|
|
|
@ -70,8 +70,7 @@ public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.l
|
||||||
this.input = inputTemp;
|
this.input = inputTemp;
|
||||||
INDArray epsilon2d = gradAndEpsilonNext.getSecond();
|
INDArray epsilon2d = gradAndEpsilonNext.getSecond();
|
||||||
|
|
||||||
// FIXME: int cast
|
INDArray epsilon3d = TimeSeriesUtils.reshape2dTo3d(epsilon2d, input.size(0), workspaceMgr, ArrayType.ACTIVATION_GRAD);
|
||||||
INDArray epsilon3d = TimeSeriesUtils.reshape2dTo3d(epsilon2d, (int) input.size(0), workspaceMgr, ArrayType.ACTIVATION_GRAD);
|
|
||||||
|
|
||||||
weightNoiseParams.clear();
|
weightNoiseParams.clear();
|
||||||
|
|
||||||
|
@ -145,8 +144,7 @@ public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.l
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: int cast
|
return TimeSeriesUtils.reshape2dTo3d(act2d, input.size(0), workspaceMgr, ArrayType.ACTIVATIONS);
|
||||||
return TimeSeriesUtils.reshape2dTo3d(act2d, (int) input.size(0), workspaceMgr, ArrayType.ACTIVATIONS);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -205,7 +203,6 @@ public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.l
|
||||||
//scoreArray: shape [minibatch*timeSeriesLength, 1]
|
//scoreArray: shape [minibatch*timeSeriesLength, 1]
|
||||||
//Reshape it to [minibatch, timeSeriesLength] then sum over time step
|
//Reshape it to [minibatch, timeSeriesLength] then sum over time step
|
||||||
|
|
||||||
// FIXME: int cast
|
|
||||||
INDArray scoreArrayTs = TimeSeriesUtils.reshapeVectorToTimeSeriesMask(scoreArray, (int)input.size(0));
|
INDArray scoreArrayTs = TimeSeriesUtils.reshapeVectorToTimeSeriesMask(scoreArray, (int)input.size(0));
|
||||||
INDArray summedScores = scoreArrayTs.sum(true, 1);
|
INDArray summedScores = scoreArrayTs.sum(true, 1);
|
||||||
|
|
||||||
|
|
|
@ -41,6 +41,7 @@ import org.nd4j.linalg.api.blas.Level1;
|
||||||
import org.nd4j.linalg.api.buffer.DataType;
|
import org.nd4j.linalg.api.buffer.DataType;
|
||||||
import org.nd4j.linalg.api.memory.MemoryWorkspace;
|
import org.nd4j.linalg.api.memory.MemoryWorkspace;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
|
import org.nd4j.linalg.exception.ND4JArraySizeException;
|
||||||
import org.nd4j.linalg.factory.Nd4j;
|
import org.nd4j.linalg.factory.Nd4j;
|
||||||
import org.nd4j.linalg.learning.regularization.Regularization;
|
import org.nd4j.linalg.learning.regularization.Regularization;
|
||||||
import org.nd4j.linalg.lossfunctions.ILossFunction;
|
import org.nd4j.linalg.lossfunctions.ILossFunction;
|
||||||
|
@ -552,7 +553,8 @@ public class VariationalAutoencoder implements Layer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int batchSize() {
|
public int batchSize() {
|
||||||
// FIXME: int cast
|
if (input.size(0) > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
return (int) input.size(0);
|
return (int) input.size(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -862,7 +864,8 @@ public class VariationalAutoencoder implements Layer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getInputMiniBatchSize() {
|
public int getInputMiniBatchSize() {
|
||||||
// FIXME: int cast
|
if (input.size(0) > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
return (int) input.size(0);
|
return (int) input.size(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -75,6 +75,7 @@ import org.nd4j.linalg.dataset.DataSet;
|
||||||
import org.nd4j.linalg.dataset.api.MultiDataSet;
|
import org.nd4j.linalg.dataset.api.MultiDataSet;
|
||||||
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
|
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
|
||||||
import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator;
|
import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator;
|
||||||
|
import org.nd4j.linalg.exception.ND4JArraySizeException;
|
||||||
import org.nd4j.linalg.factory.Nd4j;
|
import org.nd4j.linalg.factory.Nd4j;
|
||||||
import org.nd4j.linalg.heartbeat.Heartbeat;
|
import org.nd4j.linalg.heartbeat.Heartbeat;
|
||||||
import org.nd4j.linalg.heartbeat.reports.Environment;
|
import org.nd4j.linalg.heartbeat.reports.Environment;
|
||||||
|
@ -425,7 +426,8 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura
|
||||||
try(MemoryWorkspace ws = workspaceMgr.notifyScopeEntered(ArrayType.FF_WORKING_MEM)) {
|
try(MemoryWorkspace ws = workspaceMgr.notifyScopeEntered(ArrayType.FF_WORKING_MEM)) {
|
||||||
if (layerWiseConfigurations.getInputPreProcess(layerIdx) != null) {
|
if (layerWiseConfigurations.getInputPreProcess(layerIdx) != null) {
|
||||||
|
|
||||||
// FIXME: int cast
|
if (input.size(0) > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
outputOfPrevLayer = layerWiseConfigurations.getInputPreProcess(layerIdx).preProcess(outputOfPrevLayer, (int) input.size(0),
|
outputOfPrevLayer = layerWiseConfigurations.getInputPreProcess(layerIdx).preProcess(outputOfPrevLayer, (int) input.size(0),
|
||||||
LayerWorkspaceMgr.noWorkspaces(helperWorkspaces));
|
LayerWorkspaceMgr.noWorkspaces(helperWorkspaces));
|
||||||
}
|
}
|
||||||
|
@ -439,7 +441,8 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura
|
||||||
//In 99+% of cases, the input and labels dimension 0 size should be identical
|
//In 99+% of cases, the input and labels dimension 0 size should be identical
|
||||||
//The only real exceptions: space to batch, and batch to space layers
|
//The only real exceptions: space to batch, and batch to space layers
|
||||||
//In those cases, we should base it on the labels size, as this impacts gradient calculation
|
//In those cases, we should base it on the labels size, as this impacts gradient calculation
|
||||||
// FIXME: int cast
|
if (input.size(0) > Integer.MAX_VALUE || labels.size(0) > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
return labels == null ? (int) input.size(0) : (int)labels.size(0);
|
return labels == null ? (int) input.size(0) : (int)labels.size(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2074,7 +2077,8 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura
|
||||||
if (endTimeIdx > timeSeriesLength)
|
if (endTimeIdx > timeSeriesLength)
|
||||||
endTimeIdx = timeSeriesLength;
|
endTimeIdx = timeSeriesLength;
|
||||||
|
|
||||||
// FIXME: int cast
|
if (startTimeIdx > Integer.MAX_VALUE || endTimeIdx > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
INDArray[] subsets = getSubsetsForTbptt((int) startTimeIdx, (int) endTimeIdx, input, labels,
|
INDArray[] subsets = getSubsetsForTbptt((int) startTimeIdx, (int) endTimeIdx, input, labels,
|
||||||
featuresMaskArray, labelsMaskArray);
|
featuresMaskArray, labelsMaskArray);
|
||||||
|
|
||||||
|
@ -2211,7 +2215,9 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura
|
||||||
public int[] predict(INDArray d) {
|
public int[] predict(INDArray d) {
|
||||||
INDArray output = output(d, Layer.TrainingMode.TEST);
|
INDArray output = output(d, Layer.TrainingMode.TEST);
|
||||||
|
|
||||||
// FIXME: int cast
|
if (d.size(0) > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
|
|
||||||
int[] ret = new int[(int) d.size(0)];
|
int[] ret = new int[(int) d.size(0)];
|
||||||
if (d.isRowVectorOrScalar())
|
if (d.isRowVectorOrScalar())
|
||||||
ret[0] = Nd4j.getBlasWrapper().iamax(output);
|
ret[0] = Nd4j.getBlasWrapper().iamax(output);
|
||||||
|
@ -2335,7 +2341,8 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura
|
||||||
org.deeplearning4j.nn.conf.layers.OutputLayer layerConf =
|
org.deeplearning4j.nn.conf.layers.OutputLayer layerConf =
|
||||||
(org.deeplearning4j.nn.conf.layers.OutputLayer) getOutputLayer().conf().getLayer();
|
(org.deeplearning4j.nn.conf.layers.OutputLayer) getOutputLayer().conf().getLayer();
|
||||||
|
|
||||||
// FIXME: int cast
|
if (layerConf.getNOut() > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
fit(examples, FeatureUtil.toOutcomeMatrix(labels, (int) layerConf.getNOut()));
|
fit(examples, FeatureUtil.toOutcomeMatrix(labels, (int) layerConf.getNOut()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2584,7 +2591,8 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura
|
||||||
INDArray inputToOutputLayer = outputOfLayerDetached(training, FwdPassType.STANDARD,layers.length-2, data.getFeatures(),
|
INDArray inputToOutputLayer = outputOfLayerDetached(training, FwdPassType.STANDARD,layers.length-2, data.getFeatures(),
|
||||||
data.getFeaturesMaskArray(), data.getLabelsMaskArray(), null);
|
data.getFeaturesMaskArray(), data.getLabelsMaskArray(), null);
|
||||||
|
|
||||||
// FIXME: int cast
|
if (data.getFeatures().size(0) > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
IOutputLayer ol = (IOutputLayer) getOutputLayer();
|
IOutputLayer ol = (IOutputLayer) getOutputLayer();
|
||||||
if (getLayerWiseConfigurations().getInputPreProcess(layers.length - 1) != null) {
|
if (getLayerWiseConfigurations().getInputPreProcess(layers.length - 1) != null) {
|
||||||
inputToOutputLayer = getLayerWiseConfigurations().getInputPreProcess(layers.length - 1)
|
inputToOutputLayer = getLayerWiseConfigurations().getInputPreProcess(layers.length - 1)
|
||||||
|
@ -2647,7 +2655,8 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura
|
||||||
IOutputLayer ol = (IOutputLayer) getOutputLayer();
|
IOutputLayer ol = (IOutputLayer) getOutputLayer();
|
||||||
if(layerWiseConfigurations.getInputPreProcess(layers.length-1) != null){
|
if(layerWiseConfigurations.getInputPreProcess(layers.length-1) != null){
|
||||||
|
|
||||||
// FIXME: int cast
|
if (data.getFeatures().size(0) > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
inputLast = layerWiseConfigurations.getInputPreProcess(layers.length-1).preProcess(inputLast,
|
inputLast = layerWiseConfigurations.getInputPreProcess(layers.length-1).preProcess(inputLast,
|
||||||
(int) data.getFeatures().size(0), mgr);
|
(int) data.getFeatures().size(0), mgr);
|
||||||
}
|
}
|
||||||
|
@ -2811,7 +2820,8 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura
|
||||||
throw new IllegalArgumentException(
|
throw new IllegalArgumentException(
|
||||||
"Invalid input: length 0 (shape: " + Arrays.toString(input.shape()) + ")");
|
"Invalid input: length 0 (shape: " + Arrays.toString(input.shape()) + ")");
|
||||||
|
|
||||||
// FIXME: int cast
|
if (input.size(0) > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
setInputMiniBatchSize((int) input.size(0));
|
setInputMiniBatchSize((int) input.size(0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3086,7 +3096,8 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura
|
||||||
if(!conf().isMiniBatch())
|
if(!conf().isMiniBatch())
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
// FIXME: int cast
|
if (input.size(0) > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
return (int) input.size(0);
|
return (int) input.size(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3256,7 +3267,8 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura
|
||||||
public void setLayerMaskArrays(INDArray featuresMaskArray, INDArray labelsMaskArray) {
|
public void setLayerMaskArrays(INDArray featuresMaskArray, INDArray labelsMaskArray) {
|
||||||
if (featuresMaskArray != null) {
|
if (featuresMaskArray != null) {
|
||||||
|
|
||||||
// FIXME: int cast
|
if (featuresMaskArray.size(0) > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
//New approach: use feedForwardMaskArray method
|
//New approach: use feedForwardMaskArray method
|
||||||
feedForwardMaskArray(featuresMaskArray, MaskState.Active, (int) featuresMaskArray.size(0));
|
feedForwardMaskArray(featuresMaskArray, MaskState.Active, (int) featuresMaskArray.size(0));
|
||||||
|
|
||||||
|
@ -3438,7 +3450,8 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura
|
||||||
val startTimeIdx = i * fwdLen;
|
val startTimeIdx = i * fwdLen;
|
||||||
val endTimeIdx = Math.min(startTimeIdx + fwdLen, tsLength);
|
val endTimeIdx = Math.min(startTimeIdx + fwdLen, tsLength);
|
||||||
|
|
||||||
// FIXME: int cast
|
if (endTimeIdx > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
INDArray[] subsets = getSubsetsForTbptt(startTimeIdx, (int) endTimeIdx, features, labels, fMask, lMask);
|
INDArray[] subsets = getSubsetsForTbptt(startTimeIdx, (int) endTimeIdx, features, labels, fMask, lMask);
|
||||||
|
|
||||||
setLayerMaskArrays(subsets[2], subsets[3]);
|
setLayerMaskArrays(subsets[2], subsets[3]);
|
||||||
|
@ -3943,7 +3956,8 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura
|
||||||
}
|
}
|
||||||
FeedForwardLayer ffl = (FeedForwardLayer) conf;
|
FeedForwardLayer ffl = (FeedForwardLayer) conf;
|
||||||
|
|
||||||
// FIXME: int cast
|
if (ffl.getNOut() > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
return (int) ffl.getNOut();
|
return (int) ffl.getNOut();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3969,7 +3983,8 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura
|
||||||
}
|
}
|
||||||
FeedForwardLayer ffl = (FeedForwardLayer) conf;
|
FeedForwardLayer ffl = (FeedForwardLayer) conf;
|
||||||
|
|
||||||
// FIXME: int cast
|
if (ffl.getNIn() > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
return (int) ffl.getNIn();
|
return (int) ffl.getNIn();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,7 @@ import org.deeplearning4j.nn.conf.layers.Layer;
|
||||||
import org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder;
|
import org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder;
|
||||||
import org.deeplearning4j.nn.weights.IWeightInit;
|
import org.deeplearning4j.nn.weights.IWeightInit;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
|
import org.nd4j.linalg.exception.ND4JArraySizeException;
|
||||||
import org.nd4j.linalg.indexing.NDArrayIndex;
|
import org.nd4j.linalg.indexing.NDArrayIndex;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
@ -108,7 +109,8 @@ public class VariationalAutoencoderParamInitializer extends DefaultParamInitiali
|
||||||
}
|
}
|
||||||
|
|
||||||
//Between last decoder layer and parameters for p(x|z):
|
//Between last decoder layer and parameters for p(x|z):
|
||||||
// FIXME: int cast
|
if (nIn > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
val nDistributionParams = layer.getOutputDistribution().distributionInputSize((int) nIn);
|
val nDistributionParams = layer.getOutputDistribution().distributionInputSize((int) nIn);
|
||||||
val lastDecLayerSize = decoderLayerSizes[decoderLayerSizes.length - 1];
|
val lastDecLayerSize = decoderLayerSizes[decoderLayerSizes.length - 1];
|
||||||
paramCount += (lastDecLayerSize + 1) * nDistributionParams;
|
paramCount += (lastDecLayerSize + 1) * nDistributionParams;
|
||||||
|
@ -294,7 +296,8 @@ public class VariationalAutoencoderParamInitializer extends DefaultParamInitiali
|
||||||
}
|
}
|
||||||
|
|
||||||
//Finally, p(x|z):
|
//Finally, p(x|z):
|
||||||
// FIXME: int cast
|
if (nIn > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
int nDistributionParams = layer.getOutputDistribution().distributionInputSize((int) nIn);
|
int nDistributionParams = layer.getOutputDistribution().distributionInputSize((int) nIn);
|
||||||
int pxzWeightCount = decoderLayerSizes[decoderLayerSizes.length - 1] * nDistributionParams;
|
int pxzWeightCount = decoderLayerSizes[decoderLayerSizes.length - 1] * nDistributionParams;
|
||||||
INDArray pxzWeightView =
|
INDArray pxzWeightView =
|
||||||
|
@ -402,7 +405,8 @@ public class VariationalAutoencoderParamInitializer extends DefaultParamInitiali
|
||||||
}
|
}
|
||||||
|
|
||||||
//Finally, p(x|z):
|
//Finally, p(x|z):
|
||||||
// FIXME: int cast
|
if (nIn > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
int nDistributionParams = layer.getOutputDistribution().distributionInputSize((int) nIn);
|
int nDistributionParams = layer.getOutputDistribution().distributionInputSize((int) nIn);
|
||||||
int pxzWeightCount = decoderLayerSizes[decoderLayerSizes.length - 1] * nDistributionParams;
|
int pxzWeightCount = decoderLayerSizes[decoderLayerSizes.length - 1] * nDistributionParams;
|
||||||
INDArray pxzWeightView =
|
INDArray pxzWeightView =
|
||||||
|
|
|
@ -30,6 +30,7 @@ import org.nd4j.linalg.api.ops.CustomOp;
|
||||||
import org.nd4j.linalg.api.ops.DynamicCustomOp;
|
import org.nd4j.linalg.api.ops.DynamicCustomOp;
|
||||||
|
|
||||||
import org.nd4j.linalg.api.ops.impl.reduce.floating.Norm2;
|
import org.nd4j.linalg.api.ops.impl.reduce.floating.Norm2;
|
||||||
|
import org.nd4j.linalg.exception.ND4JArraySizeException;
|
||||||
import org.nd4j.linalg.factory.Nd4j;
|
import org.nd4j.linalg.factory.Nd4j;
|
||||||
import org.nd4j.linalg.indexing.NDArrayIndex;
|
import org.nd4j.linalg.indexing.NDArrayIndex;
|
||||||
import org.deeplearning4j.nn.workspace.ArrayType;
|
import org.deeplearning4j.nn.workspace.ArrayType;
|
||||||
|
@ -111,7 +112,8 @@ public abstract class BaseMultiLayerUpdater<T extends Model> implements Updater
|
||||||
if (currentBlock == null || !UpdaterUtils.updaterConfigurationsEquals(lastLayer, lastVariable,
|
if (currentBlock == null || !UpdaterUtils.updaterConfigurationsEquals(lastLayer, lastVariable,
|
||||||
layers[i], var)) {
|
layers[i], var)) {
|
||||||
|
|
||||||
// FIXME: int cast
|
if (paramsViewSoFar + paramSizeThisVariable > Integer.MAX_VALUE || paramsViewSoFar + paramSizeThisVariable > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
//Create a new block
|
//Create a new block
|
||||||
List<UpdaterBlock.ParamState> list = new ArrayList<>();
|
List<UpdaterBlock.ParamState> list = new ArrayList<>();
|
||||||
list.add(new UpdaterBlock.ParamState(layers[i], var, paramsViewSoFar,
|
list.add(new UpdaterBlock.ParamState(layers[i], var, paramsViewSoFar,
|
||||||
|
@ -122,9 +124,11 @@ public abstract class BaseMultiLayerUpdater<T extends Model> implements Updater
|
||||||
|
|
||||||
updaterBlocks.add(currentBlock);
|
updaterBlocks.add(currentBlock);
|
||||||
} else {
|
} else {
|
||||||
// FIXME: int cast
|
long newOffset = currentBlock.getParamOffsetEnd() + paramSizeThisVariable;
|
||||||
|
if (newOffset > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
//Add to existing updater block
|
//Add to existing updater block
|
||||||
currentBlock.setParamOffsetEnd((int) (currentBlock.getParamOffsetEnd() + paramSizeThisVariable));
|
currentBlock.setParamOffsetEnd((int) newOffset);
|
||||||
currentBlock.setUpdaterViewOffsetEnd(
|
currentBlock.setUpdaterViewOffsetEnd(
|
||||||
currentBlock.getUpdaterViewOffsetEnd() + updaterStateSizeThisVariable);
|
currentBlock.getUpdaterViewOffsetEnd() + updaterStateSizeThisVariable);
|
||||||
currentBlock.getLayersAndVariablesInBlock()
|
currentBlock.getLayersAndVariablesInBlock()
|
||||||
|
|
|
@ -29,6 +29,7 @@ import org.nd4j.base.Preconditions;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
import org.nd4j.linalg.api.ops.impl.broadcast.BroadcastCopyOp;
|
import org.nd4j.linalg.api.ops.impl.broadcast.BroadcastCopyOp;
|
||||||
import org.nd4j.linalg.api.shape.Shape;
|
import org.nd4j.linalg.api.shape.Shape;
|
||||||
|
import org.nd4j.linalg.exception.ND4JArraySizeException;
|
||||||
import org.nd4j.linalg.factory.Nd4j;
|
import org.nd4j.linalg.factory.Nd4j;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
@ -62,10 +63,9 @@ public class Convolution1DUtils {
|
||||||
* @param dilation Kernel dilation
|
* @param dilation Kernel dilation
|
||||||
* @return Output size (width)
|
* @return Output size (width)
|
||||||
*/
|
*/
|
||||||
public static int getOutputSize(int inH, int kernel, int strides, int padding,
|
public static long getOutputSize(long inH, int kernel, int strides, int padding,
|
||||||
ConvolutionMode convolutionMode, int dilation) {
|
ConvolutionMode convolutionMode, int dilation) {
|
||||||
// FIXME: int cast
|
long eKernel = effectiveKernelSize(kernel, dilation);
|
||||||
int eKernel = effectiveKernelSize(kernel, dilation);
|
|
||||||
if (convolutionMode == ConvolutionMode.Same) {
|
if (convolutionMode == ConvolutionMode.Same) {
|
||||||
return (int) Math.ceil(inH / ((double) strides));
|
return (int) Math.ceil(inH / ((double) strides));
|
||||||
}
|
}
|
||||||
|
@ -85,7 +85,8 @@ public class Convolution1DUtils {
|
||||||
*/
|
*/
|
||||||
public static int getOutputSize(INDArray inputData, int kernel, int strides, int padding,
|
public static int getOutputSize(INDArray inputData, int kernel, int strides, int padding,
|
||||||
ConvolutionMode convolutionMode, int dilation) {
|
ConvolutionMode convolutionMode, int dilation) {
|
||||||
// FIXME: int cast
|
if (inputData.size(2) > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
int inH = (int) inputData.size(2);
|
int inH = (int) inputData.size(2);
|
||||||
int eKernel = effectiveKernelSize(kernel, dilation);
|
int eKernel = effectiveKernelSize(kernel, dilation);
|
||||||
boolean atrous = (eKernel == kernel);
|
boolean atrous = (eKernel == kernel);
|
||||||
|
|
|
@ -61,15 +61,14 @@ public class Convolution3DUtils {
|
||||||
ConvolutionMode convolutionMode, int[] dilation, boolean isNCDHW) {
|
ConvolutionMode convolutionMode, int[] dilation, boolean isNCDHW) {
|
||||||
|
|
||||||
// NCDHW vs. NDHWC
|
// NCDHW vs. NDHWC
|
||||||
int inD = (int) (isNCDHW ? inputData.size(2) : inputData.size(1));
|
long inD = (isNCDHW ? inputData.size(2) : inputData.size(1));
|
||||||
int inH = (int) (isNCDHW ? inputData.size(3) : inputData.size(2));
|
long inH = (isNCDHW ? inputData.size(3) : inputData.size(2));
|
||||||
int inW = (int) (isNCDHW ? inputData.size(4) : inputData.size(3));
|
long inW = (isNCDHW ? inputData.size(4) : inputData.size(3));
|
||||||
|
|
||||||
int[] eKernel = effectiveKernelSize(kernel, dilation);
|
int[] eKernel = effectiveKernelSize(kernel, dilation);
|
||||||
boolean atrous = (eKernel == kernel);
|
boolean atrous = (eKernel == kernel);
|
||||||
|
|
||||||
// FIXME: int cast
|
val inShape = new long[]{inD, inH, inW};
|
||||||
val inShape = new int[]{inD, inH, inW};
|
|
||||||
validateShapes(ArrayUtil.toInts(inputData.shape()), eKernel, strides, padding, convolutionMode, dilation, inShape, atrous);
|
validateShapes(ArrayUtil.toInts(inputData.shape()), eKernel, strides, padding, convolutionMode, dilation, inShape, atrous);
|
||||||
|
|
||||||
if (convolutionMode == ConvolutionMode.Same) {
|
if (convolutionMode == ConvolutionMode.Same) {
|
||||||
|
@ -80,16 +79,16 @@ public class Convolution3DUtils {
|
||||||
return new int[]{outD, outH, outW};
|
return new int[]{outD, outH, outW};
|
||||||
}
|
}
|
||||||
|
|
||||||
int outD = (inD - eKernel[0] + 2 * padding[0]) / strides[0] + 1;
|
int outD = ((int)inD - eKernel[0] + 2 * padding[0]) / strides[0] + 1;
|
||||||
int outH = (inH - eKernel[1] + 2 * padding[1]) / strides[1] + 1;
|
int outH = ((int)inH - eKernel[1] + 2 * padding[1]) / strides[1] + 1;
|
||||||
int outW = (inW - eKernel[2] + 2 * padding[2]) / strides[2] + 1;
|
int outW = ((int)inW - eKernel[2] + 2 * padding[2]) / strides[2] + 1;
|
||||||
|
|
||||||
return new int[]{outD, outH, outW};
|
return new int[]{outD, outH, outW};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private static void validateShapes(int[] inputDataShape, int[] eKernel, int[] strides, int[] padding,
|
private static void validateShapes(int[] inputDataShape, int[] eKernel, int[] strides, int[] padding,
|
||||||
ConvolutionMode convolutionMode, int[] dilation, int[] inShape,
|
ConvolutionMode convolutionMode, int[] dilation, long[] inShape,
|
||||||
boolean atrous) {
|
boolean atrous) {
|
||||||
|
|
||||||
String[] dims = new String[]{"depth", "height", "width"};
|
String[] dims = new String[]{"depth", "height", "width"};
|
||||||
|
|
|
@ -36,6 +36,8 @@ import org.nd4j.linalg.api.ops.impl.broadcast.BroadcastCopyOp;
|
||||||
import org.nd4j.linalg.api.ops.impl.layers.convolution.MaxPooling2D;
|
import org.nd4j.linalg.api.ops.impl.layers.convolution.MaxPooling2D;
|
||||||
import org.nd4j.linalg.api.ops.impl.layers.convolution.config.Pooling2DConfig;
|
import org.nd4j.linalg.api.ops.impl.layers.convolution.config.Pooling2DConfig;
|
||||||
import org.nd4j.linalg.api.shape.Shape;
|
import org.nd4j.linalg.api.shape.Shape;
|
||||||
|
import org.nd4j.linalg.exception.ND4JArraySizeException;
|
||||||
|
import org.nd4j.linalg.factory.NDArrayFactory;
|
||||||
import org.nd4j.linalg.factory.Nd4j;
|
import org.nd4j.linalg.factory.Nd4j;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
@ -73,7 +75,8 @@ public class ConvolutionUtils {
|
||||||
public static int[] getDeconvolutionOutputSize(INDArray inputData, int[] kernel, int[] strides, int[] padding,
|
public static int[] getDeconvolutionOutputSize(INDArray inputData, int[] kernel, int[] strides, int[] padding,
|
||||||
ConvolutionMode convolutionMode, int[] dilation) {
|
ConvolutionMode convolutionMode, int[] dilation) {
|
||||||
|
|
||||||
// FIXME: int cast
|
if (inputData.size(2) > Integer.MAX_VALUE || inputData.size(3) > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
int hIn = (int) inputData.size(2);
|
int hIn = (int) inputData.size(2);
|
||||||
int wIn = (int) inputData.size(3);
|
int wIn = (int) inputData.size(3);
|
||||||
int[] eKernel = effectiveKernelSize(kernel, dilation);
|
int[] eKernel = effectiveKernelSize(kernel, dilation);
|
||||||
|
@ -104,7 +107,8 @@ public class ConvolutionUtils {
|
||||||
*/
|
*/
|
||||||
public static int[] getOutputSize(INDArray inputData, int[] kernel, int[] strides, int[] padding,
|
public static int[] getOutputSize(INDArray inputData, int[] kernel, int[] strides, int[] padding,
|
||||||
ConvolutionMode convolutionMode, int[] dilation) {
|
ConvolutionMode convolutionMode, int[] dilation) {
|
||||||
// FIXME: int cast
|
if (inputData.size(2) > Integer.MAX_VALUE || inputData.size(3) > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
int inH = (int) inputData.size(2);
|
int inH = (int) inputData.size(2);
|
||||||
int inW = (int) inputData.size(3);
|
int inW = (int) inputData.size(3);
|
||||||
|
|
||||||
|
@ -499,7 +503,7 @@ public class ConvolutionUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static INDArray reshape2dTo4d(INDArray in2d, int[] toShape, LayerWorkspaceMgr workspaceMgr, ArrayType type){
|
public static INDArray reshape2dTo4d(INDArray in2d, long[] toShape, LayerWorkspaceMgr workspaceMgr, ArrayType type){
|
||||||
if(in2d.rank() != 2)
|
if(in2d.rank() != 2)
|
||||||
throw new IllegalArgumentException("Invalid input: expect NDArray with rank 2");
|
throw new IllegalArgumentException("Invalid input: expect NDArray with rank 2");
|
||||||
if (toShape.length != 4)
|
if (toShape.length != 4)
|
||||||
|
@ -513,7 +517,7 @@ public class ConvolutionUtils {
|
||||||
return workspaceMgr.leverageTo(type, out.permute(0, 3, 1, 2));
|
return workspaceMgr.leverageTo(type, out.permute(0, 3, 1, 2));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static INDArray reshape2dTo5d(Convolution3D.DataFormat format, INDArray in2d, int n, int d, int h, int w, int ch, LayerWorkspaceMgr workspaceMgr, ArrayType type){
|
public static INDArray reshape2dTo5d(Convolution3D.DataFormat format, INDArray in2d, long n, long d, long h, long w, long ch, LayerWorkspaceMgr workspaceMgr, ArrayType type){
|
||||||
if(in2d.rank() != 2)
|
if(in2d.rank() != 2)
|
||||||
throw new IllegalArgumentException("Invalid input: expect NDArray with rank 2");
|
throw new IllegalArgumentException("Invalid input: expect NDArray with rank 2");
|
||||||
|
|
||||||
|
@ -580,14 +584,21 @@ public class ConvolutionUtils {
|
||||||
int inW;
|
int inW;
|
||||||
int inDepth;
|
int inDepth;
|
||||||
|
|
||||||
// FIXME: int cast
|
|
||||||
if (inputType instanceof InputType.InputTypeConvolutional) {
|
if (inputType instanceof InputType.InputTypeConvolutional) {
|
||||||
InputType.InputTypeConvolutional conv = (InputType.InputTypeConvolutional) inputType;
|
InputType.InputTypeConvolutional conv = (InputType.InputTypeConvolutional) inputType;
|
||||||
|
if (conv.getHeight() > Integer.MAX_VALUE || conv.getWidth() > Integer.MAX_VALUE ||
|
||||||
|
conv.getChannels() > Integer.MAX_VALUE){
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
|
}
|
||||||
inH = (int) conv.getHeight();
|
inH = (int) conv.getHeight();
|
||||||
inW = (int) conv.getWidth();
|
inW = (int) conv.getWidth();
|
||||||
inDepth = (int) conv.getChannels();
|
inDepth = (int) conv.getChannels();
|
||||||
} else if (inputType instanceof InputType.InputTypeConvolutionalFlat) {
|
} else if (inputType instanceof InputType.InputTypeConvolutionalFlat) {
|
||||||
InputType.InputTypeConvolutionalFlat conv = (InputType.InputTypeConvolutionalFlat) inputType;
|
InputType.InputTypeConvolutionalFlat conv = (InputType.InputTypeConvolutionalFlat) inputType;
|
||||||
|
if (conv.getHeight() > Integer.MAX_VALUE || conv.getWidth() > Integer.MAX_VALUE ||
|
||||||
|
conv.getDepth() > Integer.MAX_VALUE) {
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
|
}
|
||||||
inH = (int) conv.getHeight();
|
inH = (int) conv.getHeight();
|
||||||
inW = (int) conv.getWidth();
|
inW = (int) conv.getWidth();
|
||||||
inDepth = (int) conv.getDepth();
|
inDepth = (int) conv.getDepth();
|
||||||
|
|
|
@ -20,6 +20,7 @@ import lombok.val;
|
||||||
import org.nd4j.base.Preconditions;
|
import org.nd4j.base.Preconditions;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
import org.nd4j.linalg.api.shape.Shape;
|
import org.nd4j.linalg.api.shape.Shape;
|
||||||
|
import org.nd4j.linalg.exception.ND4JArraySizeException;
|
||||||
import org.nd4j.linalg.factory.Nd4j;
|
import org.nd4j.linalg.factory.Nd4j;
|
||||||
import org.nd4j.linalg.indexing.BooleanIndexing;
|
import org.nd4j.linalg.indexing.BooleanIndexing;
|
||||||
import org.nd4j.linalg.indexing.INDArrayIndex;
|
import org.nd4j.linalg.indexing.INDArrayIndex;
|
||||||
|
@ -193,7 +194,7 @@ public class TimeSeriesUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static INDArray reshape2dTo3d(INDArray in, int miniBatchSize, LayerWorkspaceMgr workspaceMgr, ArrayType arrayType) {
|
public static INDArray reshape2dTo3d(INDArray in, long miniBatchSize, LayerWorkspaceMgr workspaceMgr, ArrayType arrayType) {
|
||||||
if (in.rank() != 2)
|
if (in.rank() != 2)
|
||||||
throw new IllegalArgumentException("Invalid input: expect NDArray with rank 2");
|
throw new IllegalArgumentException("Invalid input: expect NDArray with rank 2");
|
||||||
//Based on: RnnToFeedForwardPreProcessor
|
//Based on: RnnToFeedForwardPreProcessor
|
||||||
|
@ -220,7 +221,6 @@ public class TimeSeriesUtils {
|
||||||
in = in.dup('f');
|
in = in.dup('f');
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: int cast
|
|
||||||
int[] idxs = new int[(int) in.size(2)];
|
int[] idxs = new int[(int) in.size(2)];
|
||||||
int j=0;
|
int j=0;
|
||||||
for( int i=idxs.length-1; i>=0; i--){
|
for( int i=idxs.length-1; i>=0; i--){
|
||||||
|
@ -248,7 +248,8 @@ public class TimeSeriesUtils {
|
||||||
in = workspaceMgr.dup(arrayType, in, 'f');
|
in = workspaceMgr.dup(arrayType, in, 'f');
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: int cast
|
if (in.size(2) > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
int[] idxs = new int[(int) in.size(2)];
|
int[] idxs = new int[(int) in.size(2)];
|
||||||
int j=0;
|
int j=0;
|
||||||
for( int i=idxs.length-1; i>=0; i--){
|
for( int i=idxs.length-1; i>=0; i--){
|
||||||
|
@ -291,7 +292,8 @@ public class TimeSeriesUtils {
|
||||||
+ " with shape " + Arrays.toString(mask.shape()));
|
+ " with shape " + Arrays.toString(mask.shape()));
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: int cast
|
if (mask.size(1) > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
int[] idxs = new int[(int) mask.size(1)];
|
int[] idxs = new int[(int) mask.size(1)];
|
||||||
int j=0;
|
int j=0;
|
||||||
for( int i=idxs.length-1; i>=0; i--){
|
for( int i=idxs.length-1; i>=0; i--){
|
||||||
|
@ -319,7 +321,8 @@ public class TimeSeriesUtils {
|
||||||
+ " with shape " + Arrays.toString(mask.shape()));
|
+ " with shape " + Arrays.toString(mask.shape()));
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: int cast
|
if (mask.size(1) > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
int[] idxs = new int[(int) mask.size(1)];
|
int[] idxs = new int[(int) mask.size(1)];
|
||||||
int j=0;
|
int j=0;
|
||||||
for( int i=idxs.length-1; i>=0; i--){
|
for( int i=idxs.length-1; i>=0; i--){
|
||||||
|
@ -358,9 +361,8 @@ public class TimeSeriesUtils {
|
||||||
INDArray out;
|
INDArray out;
|
||||||
if (mask == null) {
|
if (mask == null) {
|
||||||
|
|
||||||
// FIXME: int cast
|
|
||||||
//No mask array -> extract same (last) column for all
|
//No mask array -> extract same (last) column for all
|
||||||
int lastTS = (int) pullFrom.size(2) - 1;
|
long lastTS = pullFrom.size(2) - 1;
|
||||||
out = pullFrom.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(lastTS));
|
out = pullFrom.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(lastTS));
|
||||||
fwdPassTimeSteps = null; //Null -> last time step for all examples
|
fwdPassTimeSteps = null; //Null -> last time step for all examples
|
||||||
} else {
|
} else {
|
||||||
|
@ -396,9 +398,8 @@ public class TimeSeriesUtils {
|
||||||
INDArray out;
|
INDArray out;
|
||||||
if (mask == null) {
|
if (mask == null) {
|
||||||
|
|
||||||
// FIXME: int cast
|
|
||||||
//No mask array -> extract same (last) column for all
|
//No mask array -> extract same (last) column for all
|
||||||
int lastTS = (int) pullFrom.size(2) - 1;
|
long lastTS = pullFrom.size(2) - 1;
|
||||||
out = pullFrom.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(lastTS));
|
out = pullFrom.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(lastTS));
|
||||||
fwdPassTimeSteps = null; //Null -> last time step for all examples
|
fwdPassTimeSteps = null; //Null -> last time step for all examples
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -116,7 +116,6 @@ public class TestUtils {
|
||||||
public static INDArray randomOneHot(long examples, long nOut, Random rng){
|
public static INDArray randomOneHot(long examples, long nOut, Random rng){
|
||||||
INDArray arr = Nd4j.create(examples, nOut);
|
INDArray arr = Nd4j.create(examples, nOut);
|
||||||
for( int i=0; i<examples; i++ ){
|
for( int i=0; i<examples; i++ ){
|
||||||
// FIXME: int cast
|
|
||||||
arr.putScalar(i, rng.nextInt((int) nOut), 1.0);
|
arr.putScalar(i, rng.nextInt((int) nOut), 1.0);
|
||||||
}
|
}
|
||||||
return arr;
|
return arr;
|
||||||
|
|
|
@ -214,7 +214,6 @@ public class FirstIterationFunction implements
|
||||||
else {
|
else {
|
||||||
nextRandom.set(Math.abs(nextRandom.get() * 25214903917L + 11));
|
nextRandom.set(Math.abs(nextRandom.get() * 25214903917L + 11));
|
||||||
|
|
||||||
// FIXME: int cast
|
|
||||||
int idx = Math.abs((int) (nextRandom.get() >> 16) % (int) negativeHolder.getTable().length());
|
int idx = Math.abs((int) (nextRandom.get() >> 16) % (int) negativeHolder.getTable().length());
|
||||||
|
|
||||||
target = negativeHolder.getTable().getInt(idx);
|
target = negativeHolder.getTable().getInt(idx);
|
||||||
|
|
|
@ -222,7 +222,6 @@ public class SecondIterationFunction implements FlatMapFunction<Iterator<Tuple2<
|
||||||
else {
|
else {
|
||||||
nextRandom.set(Math.abs(nextRandom.get() * 25214903917L + 11));
|
nextRandom.set(Math.abs(nextRandom.get() * 25214903917L + 11));
|
||||||
|
|
||||||
// FIXME: int cast
|
|
||||||
int idx = (int) Math.abs((int) (nextRandom.get() >> 16) % negativeHolder.getTable().length());
|
int idx = (int) Math.abs((int) (nextRandom.get() >> 16) % negativeHolder.getTable().length());
|
||||||
|
|
||||||
target = negativeHolder.getTable().getInt(idx);
|
target = negativeHolder.getTable().getInt(idx);
|
||||||
|
|
|
@ -162,7 +162,6 @@ public class SentenceBatch implements Function<Word2VecFuncCall, Word2VecChange>
|
||||||
label = 1;
|
label = 1;
|
||||||
} else {
|
} else {
|
||||||
nextRandom.set(nextRandom.get() * 25214903917L + 11);
|
nextRandom.set(nextRandom.get() * 25214903917L + 11);
|
||||||
// FIXME: int cast
|
|
||||||
target = table.getInt((int) (nextRandom.get() >> 16) % (int) table.length());
|
target = table.getInt((int) (nextRandom.get() >> 16) % (int) table.length());
|
||||||
if (target == 0)
|
if (target == 0)
|
||||||
target = (int) nextRandom.get() % (numWords - 1) + 1;
|
target = (int) nextRandom.get() % (numWords - 1) + 1;
|
||||||
|
|
|
@ -187,7 +187,6 @@ public class Word2VecPerformer implements VoidFunction<Pair<List<VocabWord>, Ato
|
||||||
} else {
|
} else {
|
||||||
nextRandom.set(nextRandom.get() * 25214903917L + 11);
|
nextRandom.set(nextRandom.get() * 25214903917L + 11);
|
||||||
|
|
||||||
// FIXME: int cast
|
|
||||||
target = table.getInt((int) (nextRandom.get() >> 16) % (int) table.length());
|
target = table.getInt((int) (nextRandom.get() >> 16) % (int) table.length());
|
||||||
if (target == 0)
|
if (target == 0)
|
||||||
target = (int) nextRandom.get() % (numWords - 1) + 1;
|
target = (int) nextRandom.get() % (numWords - 1) + 1;
|
||||||
|
|
|
@ -337,7 +337,6 @@ public class Word2VecPerformerVoid implements VoidFunction<Pair<List<VocabWord>,
|
||||||
label = 1;
|
label = 1;
|
||||||
} else {
|
} else {
|
||||||
nextRandom.set(nextRandom.get() * 25214903917L + 11);
|
nextRandom.set(nextRandom.get() * 25214903917L + 11);
|
||||||
// FIXME: int cast
|
|
||||||
target = table.getInt((int) (nextRandom.get() >> 16) % (int) table.length());
|
target = table.getInt((int) (nextRandom.get() >> 16) % (int) table.length());
|
||||||
if (target == 0)
|
if (target == 0)
|
||||||
target = (int) nextRandom.get() % (numWords - 1) + 1;
|
target = (int) nextRandom.get() % (numWords - 1) + 1;
|
||||||
|
|
|
@ -39,7 +39,7 @@ public class StatsCalculationHelper {
|
||||||
private long initialModelAfter;
|
private long initialModelAfter;
|
||||||
private long lastDataSetBefore;
|
private long lastDataSetBefore;
|
||||||
private long lastProcessBefore;
|
private long lastProcessBefore;
|
||||||
private int totalExampleCount;
|
private long totalExampleCount;
|
||||||
private List<EventStats> dataSetGetTimes = new ArrayList<>();
|
private List<EventStats> dataSetGetTimes = new ArrayList<>();
|
||||||
private List<EventStats> processMiniBatchTimes = new ArrayList<>();
|
private List<EventStats> processMiniBatchTimes = new ArrayList<>();
|
||||||
|
|
||||||
|
@ -65,7 +65,7 @@ public class StatsCalculationHelper {
|
||||||
lastDataSetBefore = timeSource.currentTimeMillis();
|
lastDataSetBefore = timeSource.currentTimeMillis();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void logNextDataSetAfter(int numExamples) {
|
public void logNextDataSetAfter(long numExamples) {
|
||||||
long now = timeSource.currentTimeMillis();
|
long now = timeSource.currentTimeMillis();
|
||||||
long duration = now - lastDataSetBefore;
|
long duration = now - lastDataSetBefore;
|
||||||
dataSetGetTimes.add(new BaseEventStats(lastDataSetBefore, duration));
|
dataSetGetTimes.add(new BaseEventStats(lastDataSetBefore, duration));
|
||||||
|
|
|
@ -84,9 +84,8 @@ public class ExecuteWorkerMultiDataSetFlatMap<R extends TrainingResult> implemen
|
||||||
s.logNextDataSetBefore();
|
s.logNextDataSetBefore();
|
||||||
MultiDataSet next = batchedIterator.next();
|
MultiDataSet next = batchedIterator.next();
|
||||||
|
|
||||||
// FIXME: int cast
|
|
||||||
if (stats)
|
if (stats)
|
||||||
s.logNextDataSetAfter((int) next.getFeatures(0).size(0));
|
s.logNextDataSetAfter(next.getFeatures(0).size(0));
|
||||||
|
|
||||||
if (stats) {
|
if (stats) {
|
||||||
s.logProcessMinibatchBefore();
|
s.logProcessMinibatchBefore();
|
||||||
|
|
|
@ -71,7 +71,7 @@ public class GraphFeedForwardWithKeyFunction<K> implements PairFlatMapFunction<I
|
||||||
|
|
||||||
List<INDArray[]> featuresList = new ArrayList<>(batchSize);
|
List<INDArray[]> featuresList = new ArrayList<>(batchSize);
|
||||||
List<K> keyList = new ArrayList<>(batchSize);
|
List<K> keyList = new ArrayList<>(batchSize);
|
||||||
List<Integer> origSizeList = new ArrayList<>();
|
List<Long> origSizeList = new ArrayList<>();
|
||||||
|
|
||||||
long[][] firstShapes = null;
|
long[][] firstShapes = null;
|
||||||
boolean sizesDiffer = false;
|
boolean sizesDiffer = false;
|
||||||
|
@ -96,8 +96,7 @@ public class GraphFeedForwardWithKeyFunction<K> implements PairFlatMapFunction<I
|
||||||
featuresList.add(t2._2());
|
featuresList.add(t2._2());
|
||||||
keyList.add(t2._1());
|
keyList.add(t2._1());
|
||||||
|
|
||||||
// FIXME: int cast
|
origSizeList.add(t2._2()[0].size(0));
|
||||||
origSizeList.add((int) t2._2()[0].size(0));
|
|
||||||
tupleCount++;
|
tupleCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -156,7 +155,7 @@ public class GraphFeedForwardWithKeyFunction<K> implements PairFlatMapFunction<I
|
||||||
|
|
||||||
examplesInBatch = 0;
|
examplesInBatch = 0;
|
||||||
for (int i = firstIdx; i < nextIdx; i++) {
|
for (int i = firstIdx; i < nextIdx; i++) {
|
||||||
int numExamples = origSizeList.get(i);
|
long numExamples = origSizeList.get(i);
|
||||||
INDArray[] outSubset = new INDArray[out.length];
|
INDArray[] outSubset = new INDArray[out.length];
|
||||||
for (int j = 0; j < out.length; j++) {
|
for (int j = 0; j < out.length; j++) {
|
||||||
outSubset[j] = getSubset(examplesInBatch, examplesInBatch + numExamples, out[j]);
|
outSubset[j] = getSubset(examplesInBatch, examplesInBatch + numExamples, out[j]);
|
||||||
|
@ -174,7 +173,7 @@ public class GraphFeedForwardWithKeyFunction<K> implements PairFlatMapFunction<I
|
||||||
return output.iterator();
|
return output.iterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
private INDArray getSubset(int exampleStart, int exampleEnd, INDArray from) {
|
private INDArray getSubset(long exampleStart, long exampleEnd, INDArray from) {
|
||||||
switch (from.rank()) {
|
switch (from.rank()) {
|
||||||
case 2:
|
case 2:
|
||||||
return from.get(NDArrayIndex.interval(exampleStart, exampleEnd), NDArrayIndex.all());
|
return from.get(NDArrayIndex.interval(exampleStart, exampleEnd), NDArrayIndex.all());
|
||||||
|
|
|
@ -36,7 +36,7 @@ import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
/** Function used to score a DataSet using a ComputationGraph */
|
/** Function used to score a DataSet using a ComputationGraph */
|
||||||
public class ScoreFlatMapFunctionCGDataSet implements FlatMapFunction<Iterator<DataSet>, Tuple2<Integer, Double>> {
|
public class ScoreFlatMapFunctionCGDataSet implements FlatMapFunction<Iterator<DataSet>, Tuple2<Long, Double>> {
|
||||||
private static final Logger log = LoggerFactory.getLogger(ScoreFlatMapFunctionCGDataSet.class);
|
private static final Logger log = LoggerFactory.getLogger(ScoreFlatMapFunctionCGDataSet.class);
|
||||||
private String json;
|
private String json;
|
||||||
private Broadcast<INDArray> params;
|
private Broadcast<INDArray> params;
|
||||||
|
@ -50,9 +50,9 @@ public class ScoreFlatMapFunctionCGDataSet implements FlatMapFunction<Iterator<D
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Iterator<Tuple2<Integer, Double>> call(Iterator<DataSet> dataSetIterator) throws Exception {
|
public Iterator<Tuple2<Long, Double>> call(Iterator<DataSet> dataSetIterator) throws Exception {
|
||||||
if (!dataSetIterator.hasNext()) {
|
if (!dataSetIterator.hasNext()) {
|
||||||
return Collections.singletonList(new Tuple2<>(0, 0.0)).iterator();
|
return Collections.singletonList(new Tuple2<>(0L, 0.0)).iterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
DataSetIterator iter = new IteratorDataSetIterator(dataSetIterator, minibatchSize); //Does batching where appropriate
|
DataSetIterator iter = new IteratorDataSetIterator(dataSetIterator, minibatchSize); //Does batching where appropriate
|
||||||
|
@ -65,13 +65,12 @@ public class ScoreFlatMapFunctionCGDataSet implements FlatMapFunction<Iterator<D
|
||||||
"Network did not have same number of parameters as the broadcast set parameters");
|
"Network did not have same number of parameters as the broadcast set parameters");
|
||||||
network.setParams(val);
|
network.setParams(val);
|
||||||
|
|
||||||
List<Tuple2<Integer, Double>> out = new ArrayList<>();
|
List<Tuple2<Long, Double>> out = new ArrayList<>();
|
||||||
while (iter.hasNext()) {
|
while (iter.hasNext()) {
|
||||||
DataSet ds = iter.next();
|
DataSet ds = iter.next();
|
||||||
double score = network.score(ds, false);
|
double score = network.score(ds, false);
|
||||||
|
|
||||||
// FIXME: int cast
|
long numExamples = ds.getFeatures().size(0);
|
||||||
int numExamples = (int) ds.getFeatures().size(0);
|
|
||||||
out.add(new Tuple2<>(numExamples, score * numExamples));
|
out.add(new Tuple2<>(numExamples, score * numExamples));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -35,7 +35,7 @@ import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
/** Function used to score a MultiDataSet using a given ComputationGraph */
|
/** Function used to score a MultiDataSet using a given ComputationGraph */
|
||||||
public class ScoreFlatMapFunctionCGMultiDataSet implements FlatMapFunction<Iterator<MultiDataSet>, Tuple2<Integer, Double>> {
|
public class ScoreFlatMapFunctionCGMultiDataSet implements FlatMapFunction<Iterator<MultiDataSet>, Tuple2<Long, Double>> {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(ScoreFlatMapFunctionCGMultiDataSet.class);
|
private static final Logger log = LoggerFactory.getLogger(ScoreFlatMapFunctionCGMultiDataSet.class);
|
||||||
private String json;
|
private String json;
|
||||||
|
@ -50,9 +50,9 @@ public class ScoreFlatMapFunctionCGMultiDataSet implements FlatMapFunction<Itera
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Iterator<Tuple2<Integer, Double>> call(Iterator<MultiDataSet> dataSetIterator) throws Exception {
|
public Iterator<Tuple2<Long, Double>> call(Iterator<MultiDataSet> dataSetIterator) throws Exception {
|
||||||
if (!dataSetIterator.hasNext()) {
|
if (!dataSetIterator.hasNext()) {
|
||||||
return Collections.singletonList(new Tuple2<>(0, 0.0)).iterator();
|
return Collections.singletonList(new Tuple2<>(0L, 0.0)).iterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
MultiDataSetIterator iter = new IteratorMultiDataSetIterator(dataSetIterator, minibatchSize); //Does batching where appropriate
|
MultiDataSetIterator iter = new IteratorMultiDataSetIterator(dataSetIterator, minibatchSize); //Does batching where appropriate
|
||||||
|
@ -66,13 +66,12 @@ public class ScoreFlatMapFunctionCGMultiDataSet implements FlatMapFunction<Itera
|
||||||
"Network did not have same number of parameters as the broadcast set parameters");
|
"Network did not have same number of parameters as the broadcast set parameters");
|
||||||
network.setParams(val);
|
network.setParams(val);
|
||||||
|
|
||||||
List<Tuple2<Integer, Double>> out = new ArrayList<>();
|
List<Tuple2<Long, Double>> out = new ArrayList<>();
|
||||||
while (iter.hasNext()) {
|
while (iter.hasNext()) {
|
||||||
MultiDataSet ds = iter.next();
|
MultiDataSet ds = iter.next();
|
||||||
double score = network.score(ds, false);
|
double score = network.score(ds, false);
|
||||||
|
|
||||||
// FIXME: int cast
|
long numExamples = ds.getFeatures(0).size(0);
|
||||||
int numExamples = (int) ds.getFeatures(0).size(0);
|
|
||||||
out.add(new Tuple2<>(numExamples, score * numExamples));
|
out.add(new Tuple2<>(numExamples, score * numExamples));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -105,7 +105,6 @@ public class FeedForwardWithKeyFunction<K>
|
||||||
fMaskList.add(t2._2()._2());
|
fMaskList.add(t2._2()._2());
|
||||||
keyList.add(t2._1());
|
keyList.add(t2._1());
|
||||||
|
|
||||||
// FIXME: int cast
|
|
||||||
origSizeList.add((int) t2._2()._1().size(0));
|
origSizeList.add((int) t2._2()._1().size(0));
|
||||||
tupleCount++;
|
tupleCount++;
|
||||||
}
|
}
|
||||||
|
|
|
@ -64,7 +64,6 @@ public class ScoreFlatMapFunction implements FlatMapFunction<Iterator<DataSet>,
|
||||||
DataSet ds = iter.next();
|
DataSet ds = iter.next();
|
||||||
double score = network.score(ds, false);
|
double score = network.score(ds, false);
|
||||||
|
|
||||||
// FIXME: int cast
|
|
||||||
val numExamples = (int) ds.getFeatures().size(0);
|
val numExamples = (int) ds.getFeatures().size(0);
|
||||||
out.add(new Tuple2<>(numExamples, score * numExamples));
|
out.add(new Tuple2<>(numExamples, score * numExamples));
|
||||||
}
|
}
|
||||||
|
|
|
@ -247,10 +247,8 @@ public class ParameterAveragingTrainingWorker extends BaseTrainingWorker<Paramet
|
||||||
trainingHook.postUpdate(dataSet, graph);
|
trainingHook.postUpdate(dataSet, graph);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: int cast
|
|
||||||
if (configuration.isCollectTrainingStats())
|
if (configuration.isCollectTrainingStats())
|
||||||
stats.logFitEnd((int) dataSet.getFeatures(0).size(0));
|
stats.logFitEnd(dataSet.getFeatures(0).size(0));
|
||||||
|
|
||||||
Nd4j.getExecutioner().commit();
|
Nd4j.getExecutioner().commit();
|
||||||
|
|
||||||
|
|
|
@ -195,7 +195,7 @@ public class ParameterAveragingTrainingWorkerStats implements SparkTrainingStats
|
||||||
lastFitStartTime = timeSource.currentTimeMillis();
|
lastFitStartTime = timeSource.currentTimeMillis();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void logFitEnd(int numExamples) {
|
public void logFitEnd(long numExamples) {
|
||||||
long now = timeSource.currentTimeMillis();
|
long now = timeSource.currentTimeMillis();
|
||||||
fitTimes.add(new ExampleCountEventStats(lastFitStartTime, now - lastFitStartTime, numExamples));
|
fitTimes.add(new ExampleCountEventStats(lastFitStartTime, now - lastFitStartTime, numExamples));
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.deeplearning4j.spark.iterator;
|
||||||
import org.nd4j.linalg.dataset.DataSet;
|
import org.nd4j.linalg.dataset.DataSet;
|
||||||
import org.nd4j.linalg.dataset.api.DataSetPreProcessor;
|
import org.nd4j.linalg.dataset.api.DataSetPreProcessor;
|
||||||
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
|
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
|
||||||
|
import org.nd4j.linalg.exception.ND4JArraySizeException;
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
@ -31,8 +32,8 @@ public abstract class BaseDataSetIterator<T> implements DataSetIterator {
|
||||||
protected Collection<T> dataSetStreams;
|
protected Collection<T> dataSetStreams;
|
||||||
protected DataSetPreProcessor preprocessor;
|
protected DataSetPreProcessor preprocessor;
|
||||||
protected Iterator<T> iter;
|
protected Iterator<T> iter;
|
||||||
protected int totalOutcomes = -1;
|
protected long totalOutcomes = -1;
|
||||||
protected int inputColumns = -1;
|
protected long inputColumns = -1;
|
||||||
protected int batch = -1;
|
protected int batch = -1;
|
||||||
protected DataSet preloadedDataSet;
|
protected DataSet preloadedDataSet;
|
||||||
protected int cursor = 0;
|
protected int cursor = 0;
|
||||||
|
@ -112,7 +113,9 @@ public abstract class BaseDataSetIterator<T> implements DataSetIterator {
|
||||||
private void preloadDataSet() {
|
private void preloadDataSet() {
|
||||||
preloadedDataSet = load(iter.next());
|
preloadedDataSet = load(iter.next());
|
||||||
|
|
||||||
// FIXME: int cast
|
if (preloadedDataSet.getLabels().size(1) > Integer.MAX_VALUE ||
|
||||||
|
preloadedDataSet.getFeatures().size(1) > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
totalOutcomes = (int) preloadedDataSet.getLabels().size(1);
|
totalOutcomes = (int) preloadedDataSet.getLabels().size(1);
|
||||||
inputColumns = (int) preloadedDataSet.getFeatures().size(1);
|
inputColumns = (int) preloadedDataSet.getFeatures().size(1);
|
||||||
}
|
}
|
||||||
|
|
|
@ -67,7 +67,6 @@ public class PathSparkDataSetIterator extends BaseDataSetIterator<String> {
|
||||||
ds = load(iter.next());
|
ds = load(iter.next());
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: int cast
|
|
||||||
totalOutcomes = ds.getLabels() == null ? 0 : (int) ds.getLabels().size(1); //May be null for layerwise pretraining
|
totalOutcomes = ds.getLabels() == null ? 0 : (int) ds.getLabels().size(1); //May be null for layerwise pretraining
|
||||||
inputColumns = (int) ds.getFeatures().size(1);
|
inputColumns = (int) ds.getFeatures().size(1);
|
||||||
batch = ds.numExamples();
|
batch = ds.numExamples();
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.deeplearning4j.spark.iterator;
|
||||||
|
|
||||||
import org.apache.spark.input.PortableDataStream;
|
import org.apache.spark.input.PortableDataStream;
|
||||||
import org.nd4j.linalg.dataset.DataSet;
|
import org.nd4j.linalg.dataset.DataSet;
|
||||||
|
import org.nd4j.linalg.exception.ND4JArraySizeException;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
@ -53,7 +54,9 @@ public class PortableDataStreamDataSetIterator extends BaseDataSetIterator<Porta
|
||||||
ds = load(iter.next());
|
ds = load(iter.next());
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: int cast
|
if (ds.getLabels().size(1) > Integer.MAX_VALUE ||
|
||||||
|
ds.getFeatures().size(1) > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
totalOutcomes = (int) ds.getLabels().size(1);
|
totalOutcomes = (int) ds.getLabels().size(1);
|
||||||
inputColumns = (int) ds.getFeatures().size(1);
|
inputColumns = (int) ds.getFeatures().size(1);
|
||||||
batch = ds.numExamples();
|
batch = ds.numExamples();
|
||||||
|
|
|
@ -26,9 +26,9 @@ import lombok.Getter;
|
||||||
public class ExampleCountEventStats extends BaseEventStats {
|
public class ExampleCountEventStats extends BaseEventStats {
|
||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
private final int totalExampleCount;
|
private final long totalExampleCount;
|
||||||
|
|
||||||
public ExampleCountEventStats(long startTime, long durationMs, int totalExampleCount) {
|
public ExampleCountEventStats(long startTime, long durationMs, long totalExampleCount) {
|
||||||
super(startTime, durationMs);
|
super(startTime, durationMs);
|
||||||
this.totalExampleCount = totalExampleCount;
|
this.totalExampleCount = totalExampleCount;
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,6 +31,7 @@ import org.datavec.api.split.InputStreamInputSplit;
|
||||||
import org.datavec.api.writable.Writable;
|
import org.datavec.api.writable.Writable;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
import org.nd4j.linalg.dataset.DataSet;
|
import org.nd4j.linalg.dataset.DataSet;
|
||||||
|
import org.nd4j.linalg.exception.ND4JArraySizeException;
|
||||||
import org.nd4j.linalg.factory.Nd4j;
|
import org.nd4j.linalg.factory.Nd4j;
|
||||||
import org.nd4j.linalg.util.FeatureUtil;
|
import org.nd4j.linalg.util.FeatureUtil;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
@ -122,7 +123,8 @@ public class MLLibUtil {
|
||||||
if (!arr.isVector()) {
|
if (!arr.isVector()) {
|
||||||
throw new IllegalArgumentException("passed in array must be a vector");
|
throw new IllegalArgumentException("passed in array must be a vector");
|
||||||
}
|
}
|
||||||
// FIXME: int cast
|
if (arr.length() > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
double[] ret = new double[(int) arr.length()];
|
double[] ret = new double[(int) arr.length()];
|
||||||
for (int i = 0; i < arr.length(); i++) {
|
for (int i = 0; i < arr.length(); i++) {
|
||||||
ret[i] = arr.getDouble(i);
|
ret[i] = arr.getDouble(i);
|
||||||
|
|
|
@ -33,6 +33,7 @@ import org.deeplearning4j.ui.api.UIServer;
|
||||||
import org.deeplearning4j.ui.storage.mapdb.MapDBStatsStorage;
|
import org.deeplearning4j.ui.storage.mapdb.MapDBStatsStorage;
|
||||||
import org.deeplearning4j.util.UIDProvider;
|
import org.deeplearning4j.util.UIDProvider;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
|
import org.nd4j.linalg.exception.ND4JArraySizeException;
|
||||||
import org.nd4j.linalg.io.ClassPathResource;
|
import org.nd4j.linalg.io.ClassPathResource;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
@ -205,7 +206,8 @@ public class ConvolutionalIterationListener extends BaseTrainingListener {
|
||||||
if(layers[i].type() == Layer.Type.CONVOLUTIONAL){
|
if(layers[i].type() == Layer.Type.CONVOLUTIONAL){
|
||||||
INDArray output = activations.get(i+1); //Offset by 1 - activations list includes input
|
INDArray output = activations.get(i+1); //Offset by 1 - activations list includes input
|
||||||
|
|
||||||
// FIXME: int cast
|
if (output.shape()[0] - 1 > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
int sampleDim = output.shape()[0] == 1 ? 0 : rnd.nextInt((int) output.shape()[0] - 1) + 1;
|
int sampleDim = output.shape()[0] == 1 ? 0 : rnd.nextInt((int) output.shape()[0] - 1) + 1;
|
||||||
if (cnt == 0) {
|
if (cnt == 0) {
|
||||||
INDArray inputs = layers[i].input();
|
INDArray inputs = layers[i].input();
|
||||||
|
@ -426,7 +428,8 @@ public class ConvolutionalIterationListener extends BaseTrainingListener {
|
||||||
|
|
||||||
val height = (numRows * (tShape[1] + border + padding_col)) + padding_col + zoomPadding + zoomWidth;
|
val height = (numRows * (tShape[1] + border + padding_col)) + padding_col + zoomPadding + zoomWidth;
|
||||||
|
|
||||||
// FIXME: int cast
|
if (height > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
BufferedImage outputImage = new BufferedImage(maxWidth, (int) height, BufferedImage.TYPE_BYTE_GRAY);
|
BufferedImage outputImage = new BufferedImage(maxWidth, (int) height, BufferedImage.TYPE_BYTE_GRAY);
|
||||||
Graphics2D graphics2D = outputImage.createGraphics();
|
Graphics2D graphics2D = outputImage.createGraphics();
|
||||||
|
|
||||||
|
@ -571,7 +574,8 @@ public class ConvolutionalIterationListener extends BaseTrainingListener {
|
||||||
*/
|
*/
|
||||||
|
|
||||||
graphics2D.setPaint(borderColor);
|
graphics2D.setPaint(borderColor);
|
||||||
// FIXME: int cast
|
if (tad2D.shape()[0] > Integer.MAX_VALUE || tad2D.shape()[1] > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
graphics2D.drawRect(columnOffset, rowOffset, (int) tad2D.shape()[0], (int) tad2D.shape()[1]);
|
graphics2D.drawRect(columnOffset, rowOffset, (int) tad2D.shape()[0], (int) tad2D.shape()[1]);
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -90,9 +90,8 @@ public abstract class BaseLabels implements Labels {
|
||||||
Preconditions.checkState(predictions.size(1) == labels.size(), "Invalid input array:" +
|
Preconditions.checkState(predictions.size(1) == labels.size(), "Invalid input array:" +
|
||||||
" expected array with size(1) equal to numLabels (%s), got array with shape %s", labels.size(), predictions.shape());
|
" expected array with size(1) equal to numLabels (%s), got array with shape %s", labels.size(), predictions.shape());
|
||||||
|
|
||||||
// FIXME: int cast
|
long rows = predictions.size(0);
|
||||||
int rows = (int) predictions.size(0);
|
long cols = predictions.size(1);
|
||||||
int cols = (int) predictions.size(1);
|
|
||||||
if (predictions.isColumnVectorOrScalar()) {
|
if (predictions.isColumnVectorOrScalar()) {
|
||||||
predictions = predictions.ravel();
|
predictions = predictions.ravel();
|
||||||
rows = (int) predictions.size(0);
|
rows = (int) predictions.size(0);
|
||||||
|
|
|
@ -116,7 +116,6 @@ public class TestUtils {
|
||||||
public static INDArray randomOneHot(long examples, long nOut, Random rng){
|
public static INDArray randomOneHot(long examples, long nOut, Random rng){
|
||||||
INDArray arr = Nd4j.create(examples, nOut);
|
INDArray arr = Nd4j.create(examples, nOut);
|
||||||
for( int i=0; i<examples; i++ ){
|
for( int i=0; i<examples; i++ ){
|
||||||
// FIXME: int cast
|
|
||||||
arr.putScalar(i, rng.nextInt((int) nOut), 1.0);
|
arr.putScalar(i, rng.nextInt((int) nOut), 1.0);
|
||||||
}
|
}
|
||||||
return arr;
|
return arr;
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue