diff --git a/deeplearning4j/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/normalization/CudnnBatchNormalizationHelper.java b/deeplearning4j/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/normalization/CudnnBatchNormalizationHelper.java index aad549ad0..6d826c5eb 100644 --- a/deeplearning4j/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/normalization/CudnnBatchNormalizationHelper.java +++ b/deeplearning4j/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/normalization/CudnnBatchNormalizationHelper.java @@ -123,7 +123,7 @@ public class CudnnBatchNormalizationHelper extends BaseCudnnHelper implements Ba } @Override - public Pair backpropGradient(INDArray input, INDArray epsilon, int[] shape, INDArray gamma, INDArray beta, + public Pair backpropGradient(INDArray input, INDArray epsilon, long[] shape, INDArray gamma, INDArray beta, INDArray dGammaView, INDArray dBetaView, double eps, LayerWorkspaceMgr layerWorkspaceMgr) { this.eps = eps; val miniBatch = (int) input.size(0); @@ -173,8 +173,8 @@ public class CudnnBatchNormalizationHelper extends BaseCudnnHelper implements Ba checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.dstTensorDesc, dataType, miniBatch, depth, inH, inW, dstStride[0], dstStride[1], dstStride[2], dstStride[3])); - checkCudnn(cudnnSetTensor4dDescriptor(cudnnContext.gammaBetaTensorDesc, TENSOR_FORMAT, toCudnnDataType(gamma.data().dataType()), shape[0], - shape[1], shape.length > 2 ? shape[2] : 1, shape.length > 3 ? shape[3] : 1)); + checkCudnn(cudnnSetTensor4dDescriptor(cudnnContext.gammaBetaTensorDesc, TENSOR_FORMAT, toCudnnDataType(gamma.data().dataType()), (int)shape[0], + (int)shape[1], shape.length > 2 ? (int)shape[2] : 1, shape.length > 3 ? (int)shape[3] : 1)); Allocator allocator = AtomicAllocator.getInstance(); CudaContext context = allocator.getFlowController().prepareActionAllWrite(input, epsilon, nextEpsilon, gamma, @@ -214,7 +214,7 @@ public class CudnnBatchNormalizationHelper extends BaseCudnnHelper implements Ba @Override - public INDArray preOutput(INDArray x, boolean training, int[] shape, INDArray gamma, INDArray beta, INDArray mean, + public INDArray preOutput(INDArray x, boolean training, long[] shape, INDArray gamma, INDArray beta, INDArray mean, INDArray var, double decay, double eps, LayerWorkspaceMgr workspaceMgr) { this.eps = eps; final boolean isHalf = (x.dataType() == DataType.HALF); @@ -252,8 +252,8 @@ public class CudnnBatchNormalizationHelper extends BaseCudnnHelper implements Ba checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.dstTensorDesc, dataType, miniBatch, inDepth, inH, inW, dstStride[0], dstStride[1], dstStride[2], dstStride[3])); - checkCudnn(cudnnSetTensor4dDescriptor(cudnnContext.gammaBetaTensorDesc, TENSOR_FORMAT, toCudnnDataType(mean.data().dataType()), shape[0], - shape[1], shape.length > 2 ? shape[2] : 1, shape.length > 3 ? shape[3] : 1)); + checkCudnn(cudnnSetTensor4dDescriptor(cudnnContext.gammaBetaTensorDesc, TENSOR_FORMAT, toCudnnDataType(mean.data().dataType()), (int)shape[0], + (int)shape[1], shape.length > 2 ? (int)shape[2] : 1, shape.length > 3 ? (int)shape[3] : 1)); Allocator allocator = AtomicAllocator.getInstance(); CudaContext context = diff --git a/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/glove/Glove.java b/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/glove/Glove.java index 297e6d9e1..3dde1ae9b 100644 --- a/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/glove/Glove.java +++ b/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/main/java/org/deeplearning4j/spark/models/embeddings/glove/Glove.java @@ -93,11 +93,11 @@ public class Glove implements Serializable { VocabWord w1, INDArray wordVector, INDArray contextVector, double gradient) { //gradient for word vectors INDArray grad1 = contextVector.mul(gradient); - INDArray update = weightAdaGrad.getGradient(grad1, w1.getIndex(), ArrayUtil.toInts(syn0.shape())); + INDArray update = weightAdaGrad.getGradient(grad1, w1.getIndex(), syn0.shape()); wordVector.subi(update); double w1Bias = bias.getDouble(w1.getIndex()); - double biasGradient = biasAdaGrad.getGradient(gradient, w1.getIndex(), ArrayUtil.toInts(bias.shape())); + double biasGradient = biasAdaGrad.getGradient(gradient, w1.getIndex(), bias.shape()); double update2 = w1Bias - biasGradient; bias.putScalar(w1.getIndex(), bias.getDouble(w1.getIndex()) - update2); return new Pair<>(update, (float) update2); diff --git a/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/common/reduce/LongDoubleReduceFunction.java b/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/common/reduce/LongDoubleReduceFunction.java new file mode 100644 index 000000000..1092ff02b --- /dev/null +++ b/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/common/reduce/LongDoubleReduceFunction.java @@ -0,0 +1,31 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +package org.deeplearning4j.spark.impl.common.reduce; + +import org.apache.spark.api.java.function.Function2; +import scala.Tuple2; + +/** + * Add both elements of a {@code Tuple2} + */ +public class LongDoubleReduceFunction + implements Function2, Tuple2, Tuple2> { + @Override + public Tuple2 call(Tuple2 f, Tuple2 s) throws Exception { + return new Tuple2<>(f._1() + s._1(), f._2() + s._2()); + } +} diff --git a/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/graph/SparkComputationGraph.java b/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/graph/SparkComputationGraph.java index e8ad74ba7..0e639a462 100644 --- a/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/graph/SparkComputationGraph.java +++ b/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/graph/SparkComputationGraph.java @@ -38,6 +38,7 @@ import org.deeplearning4j.spark.api.TrainingMaster; import org.deeplearning4j.spark.api.stats.SparkTrainingStats; import org.deeplearning4j.spark.impl.SparkListenable; import org.deeplearning4j.spark.impl.common.reduce.IntDoubleReduceFunction; +import org.deeplearning4j.spark.impl.common.reduce.LongDoubleReduceFunction; import org.deeplearning4j.spark.impl.graph.dataset.DataSetToMultiDataSetFn; import org.deeplearning4j.spark.impl.graph.dataset.PairDataSetToMultiDataSetFn; import org.deeplearning4j.spark.impl.graph.evaluation.IEvaluateMDSFlatMapFunction; @@ -374,11 +375,11 @@ public class SparkComputationGraph extends SparkListenable { * in one go) */ public double calculateScore(JavaRDD data, boolean average, int minibatchSize) { - JavaRDD> rdd = data.mapPartitions(new ScoreFlatMapFunctionCGDataSet(conf.toJson(), - sc.broadcast(network.params(false)), minibatchSize)); + JavaRDD> rdd = data.mapPartitions(new ScoreFlatMapFunctionCGDataSet(conf.toJson(), + sc.broadcast(network.params()), minibatchSize)); //Reduce to a single tuple, with example count + sum of scores - Tuple2 countAndSumScores = rdd.reduce(new IntDoubleReduceFunction()); + Tuple2 countAndSumScores = rdd.reduce(new LongDoubleReduceFunction()); if (average) { return countAndSumScores._2() / countAndSumScores._1(); } else { @@ -409,10 +410,10 @@ public class SparkComputationGraph extends SparkListenable { * in one go) */ public double calculateScoreMultiDataSet(JavaRDD data, boolean average, int minibatchSize) { - JavaRDD> rdd = data.mapPartitions(new ScoreFlatMapFunctionCGMultiDataSet(conf.toJson(), - sc.broadcast(network.params(false)), minibatchSize)); + JavaRDD> rdd = data.mapPartitions(new ScoreFlatMapFunctionCGMultiDataSet(conf.toJson(), + sc.broadcast(network.params()), minibatchSize)); //Reduce to a single tuple, with example count + sum of scores - Tuple2 countAndSumScores = rdd.reduce(new IntDoubleReduceFunction()); + Tuple2 countAndSumScores = rdd.reduce(new LongDoubleReduceFunction()); if (average) { return countAndSumScores._2() / countAndSumScores._1(); } else { diff --git a/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/iterator/BaseDataSetIterator.java b/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/iterator/BaseDataSetIterator.java index e3de70cd9..9fc53759f 100644 --- a/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/iterator/BaseDataSetIterator.java +++ b/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/iterator/BaseDataSetIterator.java @@ -47,7 +47,7 @@ public abstract class BaseDataSetIterator implements DataSetIterator { public int inputColumns() { if (inputColumns == -1) preloadDataSet(); - return inputColumns; + return (int)inputColumns; } @Override