diff --git a/datavec/datavec-api/src/main/java/org/datavec/api/conf/Configuration.java b/datavec/datavec-api/src/main/java/org/datavec/api/conf/Configuration.java index db736c11f..9ea2f5ad3 100644 --- a/datavec/datavec-api/src/main/java/org/datavec/api/conf/Configuration.java +++ b/datavec/datavec-api/src/main/java/org/datavec/api/conf/Configuration.java @@ -50,11 +50,11 @@ import java.util.regex.PatternSyntaxException; *

Resources

* *

Configurations are specified by resources. A resource contains a set of - * name/value pairs as XML data. Each resource is named by either a - * String. If named by a String, - * then the classpath is examined for a file with that name. If named by a - * Path, then the local filesystem is examined directly, without - * referring to the classpath. + * name/value pairs as XML data. Each resource is named by either a + * String or a Path. If named by a + * String, then the classpath is examined for a file with that + * name. If named by a Path, then the local filesystem is + * examined directly, without referring to the classpath. * *

Unless explicitly turned off, Hadoop by default specifies two * resources, loaded in-order from the classpath:

    diff --git a/datavec/datavec-api/src/main/java/org/datavec/api/writable/batch/NDArrayRecordBatch.java b/datavec/datavec-api/src/main/java/org/datavec/api/writable/batch/NDArrayRecordBatch.java index 0a5ddddb8..e9b78e390 100644 --- a/datavec/datavec-api/src/main/java/org/datavec/api/writable/batch/NDArrayRecordBatch.java +++ b/datavec/datavec-api/src/main/java/org/datavec/api/writable/batch/NDArrayRecordBatch.java @@ -52,6 +52,7 @@ public class NDArrayRecordBatch extends AbstractWritableRecordBatch { public NDArrayRecordBatch(@NonNull List arrays){ Preconditions.checkArgument(arrays.size() > 0, "Input list must not be empty"); this.arrays = arrays; + this.size = arrays.get(0).size(0); //Check that dimension 0 matches: if(arrays.size() > 1){ diff --git a/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/loader/BaseImageLoader.java b/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/loader/BaseImageLoader.java index d2518eeb6..0bfddeb32 100644 --- a/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/loader/BaseImageLoader.java +++ b/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/loader/BaseImageLoader.java @@ -16,6 +16,7 @@ package org.datavec.image.loader; +import lombok.extern.slf4j.Slf4j; import org.apache.commons.io.FileUtils; import org.datavec.image.data.Image; import org.datavec.image.transform.ImageTransform; @@ -35,10 +36,9 @@ import java.util.Random; /** * Created by nyghtowl on 12/17/15. */ +@Slf4j public abstract class BaseImageLoader implements Serializable { - protected static final Logger log = LoggerFactory.getLogger(BaseImageLoader.class); - public enum MultiPageMode { MINIBATCH, FIRST //, CHANNELS, } @@ -62,13 +62,37 @@ public abstract class BaseImageLoader implements Serializable { public abstract INDArray asRowVector(InputStream inputStream) throws IOException; + /** As per {@link #asMatrix(File, boolean)} but NCHW/channels_first format */ public abstract INDArray asMatrix(File f) throws IOException; + /** + * Load an image from a file to an INDArray + * @param f File to load the image from + * @param nchw If true: return image in NCHW/channels_first [1, channels, height width] format; if false, return + * in NHWC/channels_last [1, height, width, channels] format + * @return Image file as as INDArray + */ + public abstract INDArray asMatrix(File f, boolean nchw) throws IOException; + public abstract INDArray asMatrix(InputStream inputStream) throws IOException; + /** + * Load an image file from an input stream to an INDArray + * @param inputStream Input stream to load the image from + * @param nchw If true: return image in NCHW/channels_first [1, channels, height width] format; if false, return + * in NHWC/channels_last [1, height, width, channels] format + * @return Image file stream as as INDArray + */ + public abstract INDArray asMatrix(InputStream inputStream, boolean nchw) throws IOException; + /** As per {@link #asMatrix(File)} but as an {@link Image}*/ public abstract Image asImageMatrix(File f) throws IOException; + /** As per {@link #asMatrix(File, boolean)} but as an {@link Image}*/ + public abstract Image asImageMatrix(File f, boolean nchw) throws IOException; + /** As per {@link #asMatrix(InputStream)} but as an {@link Image}*/ public abstract Image asImageMatrix(InputStream inputStream) throws IOException; + /** As per {@link #asMatrix(InputStream, boolean)} but as an {@link Image}*/ + public abstract Image asImageMatrix(InputStream inputStream, boolean nchw) throws IOException; public static void downloadAndUntar(Map urlMap, File fullDir) { diff --git a/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/loader/CifarLoader.java b/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/loader/CifarLoader.java index 3d390c698..e513ebed3 100644 --- a/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/loader/CifarLoader.java +++ b/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/loader/CifarLoader.java @@ -16,6 +16,7 @@ package org.datavec.image.loader; +import lombok.extern.slf4j.Slf4j; import org.apache.commons.io.FileUtils; import org.apache.commons.io.FilenameUtils; import org.bytedeco.javacv.OpenCVFrameConverter; @@ -47,6 +48,7 @@ import static org.bytedeco.opencv.global.opencv_imgproc.*; * There is a special preProcessor used to normalize the dataset based on Sergey Zagoruyko example * https://github.com/szagoruyko/cifar.torch */ +@Slf4j public class CifarLoader extends NativeImageLoader implements Serializable { public static final int NUM_TRAIN_IMAGES = 50000; public static final int NUM_TEST_IMAGES = 10000; diff --git a/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/loader/ImageLoader.java b/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/loader/ImageLoader.java index d246c65ad..9c2c61d57 100644 --- a/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/loader/ImageLoader.java +++ b/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/loader/ImageLoader.java @@ -249,7 +249,14 @@ public class ImageLoader extends BaseImageLoader { * @throws IOException */ public INDArray asMatrix(File f) throws IOException { - return NDArrayUtil.toNDArray(fromFile(f)); + return asMatrix(f, true); + } + + @Override + public INDArray asMatrix(File f, boolean nchw) throws IOException { + try(InputStream is = new BufferedInputStream(new FileInputStream(f))){ + return asMatrix(is, nchw); + } } /** @@ -259,34 +266,68 @@ public class ImageLoader extends BaseImageLoader { * @return the input stream to convert */ public INDArray asMatrix(InputStream inputStream) throws IOException { - if (channels == 3) - return toBgr(inputStream); - try { - BufferedImage image = ImageIO.read(inputStream); - return asMatrix(image); - } catch (IOException e) { - throw new IOException("Unable to load image", e); + return asMatrix(inputStream, true); + } + + @Override + public INDArray asMatrix(InputStream inputStream, boolean nchw) throws IOException { + INDArray ret; + if (channels == 3) { + ret = toBgr(inputStream); + } else { + try { + BufferedImage image = ImageIO.read(inputStream); + ret = asMatrix(image); + } catch (IOException e) { + throw new IOException("Unable to load image", e); + } } + if(ret.rank() == 3){ + ret = ret.reshape(1, ret.size(0), ret.size(1), ret.size(2)); + } + if(!nchw) + ret = ret.permute(0,2,3,1); //NCHW to NHWC + return ret; } @Override public org.datavec.image.data.Image asImageMatrix(File f) throws IOException { + return asImageMatrix(f, true); + } + + @Override + public org.datavec.image.data.Image asImageMatrix(File f, boolean nchw) throws IOException { try (BufferedInputStream bis = new BufferedInputStream(new FileInputStream(f))) { - return asImageMatrix(bis); + return asImageMatrix(bis, nchw); } } @Override public org.datavec.image.data.Image asImageMatrix(InputStream inputStream) throws IOException { - if (channels == 3) - return toBgrImage(inputStream); - try { - BufferedImage image = ImageIO.read(inputStream); - INDArray asMatrix = asMatrix(image); - return new org.datavec.image.data.Image(asMatrix, image.getData().getNumBands(), image.getHeight(), image.getWidth()); - } catch (IOException e) { - throw new IOException("Unable to load image", e); + return asImageMatrix(inputStream, true); + } + + @Override + public org.datavec.image.data.Image asImageMatrix(InputStream inputStream, boolean nchw) throws IOException { + org.datavec.image.data.Image ret; + if (channels == 3) { + ret = toBgrImage(inputStream); + } else { + try { + BufferedImage image = ImageIO.read(inputStream); + INDArray asMatrix = asMatrix(image); + ret = new org.datavec.image.data.Image(asMatrix, image.getData().getNumBands(), image.getHeight(), image.getWidth()); + } catch (IOException e) { + throw new IOException("Unable to load image", e); + } } + if(ret.getImage().rank() == 3){ + INDArray a = ret.getImage(); + ret.setImage(a.reshape(1, a.size(0), a.size(1), a.size(2))); + } + if(!nchw) + ret.setImage(ret.getImage().permute(0,2,3,1)); //NCHW to NHWC + return ret; } /** diff --git a/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/loader/LFWLoader.java b/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/loader/LFWLoader.java index d28c73318..b71c53e42 100644 --- a/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/loader/LFWLoader.java +++ b/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/loader/LFWLoader.java @@ -17,6 +17,7 @@ package org.datavec.image.loader; +import lombok.extern.slf4j.Slf4j; import org.datavec.api.io.filters.BalancedPathFilter; import org.datavec.api.io.labels.PathLabelGenerator; import org.datavec.api.io.labels.PatternPathLabelGenerator; @@ -48,6 +49,7 @@ import java.util.Random; * most images are in color, although a few are grayscale * */ +@Slf4j public class LFWLoader extends BaseImageLoader implements Serializable { public final static int NUM_IMAGES = 13233; @@ -270,19 +272,39 @@ public class LFWLoader extends BaseImageLoader implements Serializable { throw new UnsupportedOperationException(); } + @Override + public INDArray asMatrix(File f, boolean nchw) throws IOException { + throw new UnsupportedOperationException(); + } + @Override public INDArray asMatrix(InputStream inputStream) throws IOException { throw new UnsupportedOperationException(); } + @Override + public INDArray asMatrix(InputStream inputStream, boolean nchw) throws IOException { + throw new UnsupportedOperationException(); + } + @Override public Image asImageMatrix(File f) throws IOException { throw new UnsupportedOperationException(); } + @Override + public Image asImageMatrix(File f, boolean nchw) throws IOException { + throw new UnsupportedOperationException(); + } + @Override public Image asImageMatrix(InputStream inputStream) throws IOException { throw new UnsupportedOperationException(); } + @Override + public Image asImageMatrix(InputStream inputStream, boolean nchw) throws IOException { + throw new UnsupportedOperationException(); + } + } diff --git a/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/loader/NativeImageLoader.java b/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/loader/NativeImageLoader.java index 88bc161f2..ae9e2a322 100644 --- a/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/loader/NativeImageLoader.java +++ b/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/loader/NativeImageLoader.java @@ -248,17 +248,27 @@ public class NativeImageLoader extends BaseImageLoader { @Override public INDArray asMatrix(File f) throws IOException { + return asMatrix(f, true); + } + + @Override + public INDArray asMatrix(File f, boolean nchw) throws IOException { try (BufferedInputStream bis = new BufferedInputStream(new FileInputStream(f))) { - return asMatrix(bis); + return asMatrix(bis, nchw); } } @Override public INDArray asMatrix(InputStream is) throws IOException { - Mat mat = streamToMat(is); + return asMatrix(is, true); + } + + @Override + public INDArray asMatrix(InputStream inputStream, boolean nchw) throws IOException { + Mat mat = streamToMat(inputStream); INDArray a; if (this.multiPageMode != null) { - a = asMatrix(mat.data(), mat.cols()); + a = asMatrix(mat.data(), mat.cols()); }else{ Mat image = imdecode(mat, IMREAD_ANYDEPTH | IMREAD_ANYCOLOR); if (image == null || image.empty()) { @@ -272,7 +282,11 @@ public class NativeImageLoader extends BaseImageLoader { a = asMatrix(image); image.deallocate(); } - return a; + if(nchw) { + return a; + } else { + return a.permute(0, 2, 3, 1); //NCHW to NHWC + } } /** @@ -331,19 +345,29 @@ public class NativeImageLoader extends BaseImageLoader { } public Image asImageMatrix(String filename) throws IOException { - return asImageMatrix(filename); + return asImageMatrix(new File(filename)); } @Override public Image asImageMatrix(File f) throws IOException { + return asImageMatrix(f, true); + } + + @Override + public Image asImageMatrix(File f, boolean nchw) throws IOException { try (BufferedInputStream bis = new BufferedInputStream(new FileInputStream(f))) { - return asImageMatrix(bis); + return asImageMatrix(bis, nchw); } } @Override public Image asImageMatrix(InputStream is) throws IOException { - Mat mat = streamToMat(is); + return asImageMatrix(is, true); + } + + @Override + public Image asImageMatrix(InputStream inputStream, boolean nchw) throws IOException { + Mat mat = streamToMat(inputStream); Mat image = imdecode(mat, IMREAD_ANYDEPTH | IMREAD_ANYCOLOR); if (image == null || image.empty()) { PIX pix = pixReadMem(mat.data(), mat.cols()); @@ -354,6 +378,8 @@ public class NativeImageLoader extends BaseImageLoader { pixDestroy(pix); } INDArray a = asMatrix(image); + if(!nchw) + a = a.permute(0,2,3,1); //NCHW to NHWC Image i = new Image(a, image.channels(), image.rows(), image.cols()); image.deallocate(); diff --git a/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/recordreader/BaseImageRecordReader.java b/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/recordreader/BaseImageRecordReader.java index fb780ea74..d5400ee8e 100644 --- a/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/recordreader/BaseImageRecordReader.java +++ b/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/recordreader/BaseImageRecordReader.java @@ -77,6 +77,8 @@ public abstract class BaseImageRecordReader extends BaseRecordReader { protected int patternPosition = 0; @Getter @Setter protected boolean logLabelCountOnInit = true; + @Getter @Setter + protected boolean nchw_channels_first = true; public final static String HEIGHT = NAME_SPACE + ".height"; public final static String WIDTH = NAME_SPACE + ".width"; @@ -101,6 +103,11 @@ public abstract class BaseImageRecordReader extends BaseRecordReader { protected BaseImageRecordReader(long height, long width, long channels, PathLabelGenerator labelGenerator, PathMultiLabelGenerator labelMultiGenerator, ImageTransform imageTransform) { + this(height, width, channels, true, labelGenerator, labelMultiGenerator, imageTransform); + } + + protected BaseImageRecordReader(long height, long width, long channels, boolean nchw_channels_first, PathLabelGenerator labelGenerator, + PathMultiLabelGenerator labelMultiGenerator, ImageTransform imageTransform) { this.height = height; this.width = width; this.channels = channels; @@ -108,6 +115,7 @@ public abstract class BaseImageRecordReader extends BaseRecordReader { this.labelMultiGenerator = labelMultiGenerator; this.imageTransform = imageTransform; this.appendLabel = (labelGenerator != null || labelMultiGenerator != null); + this.nchw_channels_first = nchw_channels_first; } protected boolean containsFormat(String format) { @@ -237,9 +245,13 @@ public abstract class BaseImageRecordReader extends BaseRecordReader { return next(); try { invokeListeners(image); - INDArray row = imageLoader.asMatrix(image); - Nd4j.getAffinityManager().ensureLocation(row, AffinityManager.Location.DEVICE); - ret = RecordConverter.toRecord(row); + INDArray array = imageLoader.asMatrix(image); + if(!nchw_channels_first){ + array = array.permute(0,2,3,1); //NCHW to NHWC + } + + Nd4j.getAffinityManager().ensureLocation(array, AffinityManager.Location.DEVICE); + ret = RecordConverter.toRecord(array); if (appendLabel || writeLabel){ if(labelMultiGenerator != null){ ret.addAll(labelMultiGenerator.getLabels(image.getPath())); @@ -286,7 +298,7 @@ public abstract class BaseImageRecordReader extends BaseRecordReader { @Override public List> next(int num) { - Preconditions.checkArgument(num > 0, "Number of examples must be > 0: got " + num); + Preconditions.checkArgument(num > 0, "Number of examples must be > 0: got %s", num); if (imageLoader == null) { imageLoader = new NativeImageLoader(height, width, channels, imageTransform); @@ -337,6 +349,9 @@ public abstract class BaseImageRecordReader extends BaseRecordReader { throw new RuntimeException(e); } } + if(!nchw_channels_first){ + features = features.permute(0,2,3,1); //NCHW to NHWC + } Nd4j.getAffinityManager().ensureLocation(features, AffinityManager.Location.DEVICE); @@ -483,8 +498,10 @@ public abstract class BaseImageRecordReader extends BaseRecordReader { if (imageLoader == null) { imageLoader = new NativeImageLoader(height, width, channels, imageTransform); } - INDArray row = imageLoader.asMatrix(dataInputStream); - List ret = RecordConverter.toRecord(row); + INDArray array = imageLoader.asMatrix(dataInputStream); + if(!nchw_channels_first) + array = array.permute(0,2,3,1); + List ret = RecordConverter.toRecord(array); if (appendLabel) ret.add(new IntWritable(labels.indexOf(getLabel(uri.getPath())))); return ret; diff --git a/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/recordreader/ImageRecordReader.java b/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/recordreader/ImageRecordReader.java index be7a6d8d9..f8e292c26 100644 --- a/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/recordreader/ImageRecordReader.java +++ b/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/recordreader/ImageRecordReader.java @@ -34,47 +34,70 @@ import org.datavec.image.transform.ImageTransform; public class ImageRecordReader extends BaseImageRecordReader { - /** Loads images with height = 28, width = 28, and channels = 1, appending no labels. */ + /** Loads images with height = 28, width = 28, and channels = 1, appending no labels. + * Output format is NCHW (channels first) - [numExamples, 1, 28, 28]*/ public ImageRecordReader() { super(); } - /** Loads images with given height, width, and channels, appending labels returned by the generator. */ + /** Loads images with given height, width, and channels, appending labels returned by the generator. + * Output format is NCHW (channels first) - [numExamples, channels, height, width] + */ public ImageRecordReader(long height, long width, long channels, PathLabelGenerator labelGenerator) { super(height, width, channels, labelGenerator); } - /** Loads images with given height, width, and channels, appending labels returned by the generator. */ + /** Loads images with given height, width, and channels, appending labels returned by the generator. + * Output format is NCHW (channels first) - [numExamples, channels, height, width] + */ public ImageRecordReader(long height, long width, long channels, PathMultiLabelGenerator labelGenerator) { super(height, width, channels, labelGenerator); } - /** Loads images with given height, width, and channels, appending no labels. */ + /** Loads images with given height, width, and channels, appending no labels - in NCHW (channels first) format */ public ImageRecordReader(long height, long width, long channels) { super(height, width, channels, (PathLabelGenerator) null); } - /** Loads images with given height, width, and channels, appending labels returned by the generator. */ + /** Loads images with given height, width, and channels, appending no labels - in specified format
    + * If {@code nchw_channels_first == true} output format is NCHW (channels first) - [numExamples, channels, height, width]
    + * If {@code nchw_channels_first == false} output format is NHWC (channels last) - [numExamples, height, width, channels]
    + */ + public ImageRecordReader(long height, long width, long channels, boolean nchw_channels_first) { + super(height, width, channels, nchw_channels_first, null, null, null); + } + + /** Loads images with given height, width, and channels, appending labels returned by the generator. + * Output format is NCHW (channels first) - [numExamples, channels, height, width] */ public ImageRecordReader(long height, long width, long channels, PathLabelGenerator labelGenerator, ImageTransform imageTransform) { super(height, width, channels, labelGenerator, imageTransform); } - /** Loads images with given height, width, and channels, appending no labels. */ + /** Loads images with given height, width, and channels, appending labels returned by the generator.
    + * If {@code nchw_channels_first == true} output format is NCHW (channels first) - [numExamples, channels, height, width]
    + * If {@code nchw_channels_first == false} output format is NHWC (channels last) - [numExamples, height, width, channels]
    + */ + public ImageRecordReader(long height, long width, long channels, boolean nchw_channels_first, PathLabelGenerator labelGenerator, + ImageTransform imageTransform) { + super(height, width, channels, nchw_channels_first, labelGenerator, null, imageTransform); + } + + /** Loads images with given height, width, and channels, appending no labels. + * Output format is NCHW (channels first) - [numExamples, channels, height, width]*/ public ImageRecordReader(long height, long width, long channels, ImageTransform imageTransform) { super(height, width, channels, null, imageTransform); } - /** Loads images with given height, width, and channels, appending labels returned by the generator. */ + /** Loads images with given height, width, and channels, appending labels returned by the generator + * Output format is NCHW (channels first) - [numExamples, channels, height, width]*/ public ImageRecordReader(long height, long width, PathLabelGenerator labelGenerator) { super(height, width, 1, labelGenerator); } - /** Loads images with given height, width, and channels = 1, appending no labels. */ + /** Loads images with given height, width, and channels = 1, appending no labels. + * Output format is NCHW (channels first) - [numExamples, channels, height, width]*/ public ImageRecordReader(long height, long width) { super(height, width, 1, null, null); } - - - } diff --git a/datavec/datavec-data/datavec-data-image/src/test/java/org/datavec/image/loader/TestImageLoader.java b/datavec/datavec-data/datavec-data-image/src/test/java/org/datavec/image/loader/TestImageLoader.java index 1683980f0..a82f12409 100644 --- a/datavec/datavec-data/datavec-data-image/src/test/java/org/datavec/image/loader/TestImageLoader.java +++ b/datavec/datavec-data/datavec-data-image/src/test/java/org/datavec/image/loader/TestImageLoader.java @@ -16,10 +16,16 @@ package org.datavec.image.loader; +import org.datavec.image.data.Image; import org.junit.Test; +import org.nd4j.common.resources.Resources; import org.nd4j.linalg.api.ndarray.INDArray; import java.awt.image.BufferedImage; +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; import java.util.Random; import static org.junit.Assert.assertEquals; @@ -208,4 +214,57 @@ public class TestImageLoader { private BufferedImage makeRandomBufferedImage(boolean alpha) { return makeRandomBufferedImage(alpha, rng.nextInt() % 100 + 100, rng.nextInt() % 100 + 100); } + + + @Test + public void testNCHW_NHWC() throws Exception { + File f = Resources.asFile("datavec-data-image/voc/2007/JPEGImages/000005.jpg"); + + ImageLoader il = new ImageLoader(32, 32, 3); + + //asMatrix(File, boolean) + INDArray a_nchw = il.asMatrix(f); + INDArray a_nchw2 = il.asMatrix(f, true); + INDArray a_nhwc = il.asMatrix(f, false); + + assertEquals(a_nchw, a_nchw2); + assertEquals(a_nchw, a_nhwc.permute(0,3,1,2)); + + + //asMatrix(InputStream, boolean) + try(InputStream is = new BufferedInputStream(new FileInputStream(f))){ + a_nchw = il.asMatrix(is); + } + try(InputStream is = new BufferedInputStream(new FileInputStream(f))){ + a_nchw2 = il.asMatrix(is, true); + } + try(InputStream is = new BufferedInputStream(new FileInputStream(f))){ + a_nhwc = il.asMatrix(is, false); + } + assertEquals(a_nchw, a_nchw2); + assertEquals(a_nchw, a_nhwc.permute(0,3,1,2)); + + + //asImageMatrix(File, boolean) + Image i_nchw = il.asImageMatrix(f); + Image i_nchw2 = il.asImageMatrix(f, true); + Image i_nhwc = il.asImageMatrix(f, false); + + assertEquals(i_nchw.getImage(), i_nchw2.getImage()); + assertEquals(i_nchw.getImage(), i_nhwc.getImage().permute(0,3,1,2)); //NHWC to NCHW + + + //asImageMatrix(InputStream, boolean) + try(InputStream is = new BufferedInputStream(new FileInputStream(f))){ + i_nchw = il.asImageMatrix(is); + } + try(InputStream is = new BufferedInputStream(new FileInputStream(f))){ + i_nchw2 = il.asImageMatrix(is, true); + } + try(InputStream is = new BufferedInputStream(new FileInputStream(f))){ + i_nhwc = il.asImageMatrix(is, false); + } + assertEquals(i_nchw.getImage(), i_nchw2.getImage()); + assertEquals(i_nchw.getImage(), i_nhwc.getImage().permute(0,3,1,2)); //NHWC to NCHW + } } diff --git a/datavec/datavec-data/datavec-data-image/src/test/java/org/datavec/image/loader/TestNativeImageLoader.java b/datavec/datavec-data/datavec-data-image/src/test/java/org/datavec/image/loader/TestNativeImageLoader.java index 6e7705569..68e93107c 100644 --- a/datavec/datavec-data/datavec-data-image/src/test/java/org/datavec/image/loader/TestNativeImageLoader.java +++ b/datavec/datavec-data/datavec-data-image/src/test/java/org/datavec/image/loader/TestNativeImageLoader.java @@ -24,20 +24,19 @@ import org.bytedeco.javacpp.indexer.UByteIndexer; import org.bytedeco.javacv.Frame; import org.bytedeco.javacv.Java2DFrameConverter; import org.bytedeco.javacv.OpenCVFrameConverter; +import org.datavec.image.data.Image; import org.datavec.image.data.ImageWritable; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; +import org.nd4j.common.resources.Resources; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.common.io.ClassPathResource; import java.awt.image.BufferedImage; -import java.io.File; -import java.io.FileInputStream; -import java.io.InputStream; -import java.io.IOException; +import java.io.*; import java.lang.reflect.Field; import java.util.Random; @@ -604,4 +603,56 @@ public class TestNativeImageLoader { } } + @Test + public void testNCHW_NHWC() throws Exception { + File f = Resources.asFile("datavec-data-image/voc/2007/JPEGImages/000005.jpg"); + + NativeImageLoader il = new NativeImageLoader(32, 32, 3); + + //asMatrix(File, boolean) + INDArray a_nchw = il.asMatrix(f); + INDArray a_nchw2 = il.asMatrix(f, true); + INDArray a_nhwc = il.asMatrix(f, false); + + assertEquals(a_nchw, a_nchw2); + assertEquals(a_nchw, a_nhwc.permute(0,3,1,2)); + + + //asMatrix(InputStream, boolean) + try(InputStream is = new BufferedInputStream(new FileInputStream(f))){ + a_nchw = il.asMatrix(is); + } + try(InputStream is = new BufferedInputStream(new FileInputStream(f))){ + a_nchw2 = il.asMatrix(is, true); + } + try(InputStream is = new BufferedInputStream(new FileInputStream(f))){ + a_nhwc = il.asMatrix(is, false); + } + assertEquals(a_nchw, a_nchw2); + assertEquals(a_nchw, a_nhwc.permute(0,3,1,2)); + + + //asImageMatrix(File, boolean) + Image i_nchw = il.asImageMatrix(f); + Image i_nchw2 = il.asImageMatrix(f, true); + Image i_nhwc = il.asImageMatrix(f, false); + + assertEquals(i_nchw.getImage(), i_nchw2.getImage()); + assertEquals(i_nchw.getImage(), i_nhwc.getImage().permute(0,3,1,2)); //NHWC to NCHW + + + //asImageMatrix(InputStream, boolean) + try(InputStream is = new BufferedInputStream(new FileInputStream(f))){ + i_nchw = il.asImageMatrix(is); + } + try(InputStream is = new BufferedInputStream(new FileInputStream(f))){ + i_nchw2 = il.asImageMatrix(is, true); + } + try(InputStream is = new BufferedInputStream(new FileInputStream(f))){ + i_nhwc = il.asImageMatrix(is, false); + } + assertEquals(i_nchw.getImage(), i_nchw2.getImage()); + assertEquals(i_nchw.getImage(), i_nhwc.getImage().permute(0,3,1,2)); //NHWC to NCHW + } + } diff --git a/datavec/datavec-data/datavec-data-image/src/test/java/org/datavec/image/recordreader/TestImageRecordReader.java b/datavec/datavec-data/datavec-data-image/src/test/java/org/datavec/image/recordreader/TestImageRecordReader.java index 80cb9b0af..26cd83f06 100644 --- a/datavec/datavec-data/datavec-data-image/src/test/java/org/datavec/image/recordreader/TestImageRecordReader.java +++ b/datavec/datavec-data/datavec-data-image/src/test/java/org/datavec/image/recordreader/TestImageRecordReader.java @@ -35,13 +35,13 @@ import org.datavec.api.writable.batch.NDArrayRecordBatch; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; +import org.nd4j.common.resources.Resources; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.common.io.ClassPathResource; -import java.io.File; -import java.io.IOException; +import java.io.*; import java.net.URI; import java.util.ArrayList; import java.util.Arrays; @@ -467,5 +467,87 @@ public class TestImageRecordReader { return count; } } + + + + @Test + public void testNCHW_NCHW() throws Exception { + //Idea: labels order should be consistent regardless of input file order + File f0 = testDir.newFolder(); + new ClassPathResource("datavec-data-image/testimages/").copyDirectory(f0); + + FileSplit fs0 = new FileSplit(f0, new Random(12345)); + FileSplit fs1 = new FileSplit(f0, new Random(12345)); + assertEquals(6, fs0.locations().length); + assertEquals(6, fs1.locations().length); + + ImageRecordReader nchw = new ImageRecordReader(32, 32, 3, true); + nchw.initialize(fs0); + + ImageRecordReader nhwc = new ImageRecordReader(32, 32, 3, false); + nhwc.initialize(fs1); + + while(nchw.hasNext()){ + assertTrue(nhwc.hasNext()); + + List l_nchw = nchw.next(); + List l_nhwc = nhwc.next(); + + INDArray a_nchw = ((NDArrayWritable)l_nchw.get(0)).get(); + INDArray a_nhwc = ((NDArrayWritable)l_nhwc.get(0)).get(); + + assertArrayEquals(new long[]{1, 3, 32, 32}, a_nchw.shape()); + assertArrayEquals(new long[]{1, 32, 32, 3}, a_nhwc.shape()); + + INDArray permuted = a_nhwc.permute(0,3,1,2); //NHWC to NCHW + assertEquals(a_nchw, permuted); + } + + + //Test batch: + nchw.reset(); + nhwc.reset(); + + int batchCount = 0; + while(nchw.hasNext()){ + assertTrue(nhwc.hasNext()); + batchCount++; + + List> l_nchw = nchw.next(3); + List> l_nhwc = nhwc.next(3); + assertEquals(3, l_nchw.size()); + assertEquals(3, l_nhwc.size()); + + NDArrayRecordBatch b_nchw = (NDArrayRecordBatch)l_nchw; + NDArrayRecordBatch b_nhwc = (NDArrayRecordBatch)l_nhwc; + + INDArray a_nchw = b_nchw.getArrays().get(0); + INDArray a_nhwc = b_nhwc.getArrays().get(0); + + assertArrayEquals(new long[]{3, 3, 32, 32}, a_nchw.shape()); + assertArrayEquals(new long[]{3, 32, 32, 3}, a_nhwc.shape()); + + INDArray permuted = a_nhwc.permute(0,3,1,2); //NHWC to NCHW + assertEquals(a_nchw, permuted); + } + assertEquals(2, batchCount); + + + //Test record(URI, DataInputStream) + + URI u = fs0.locations()[0]; + + try(DataInputStream dis = new DataInputStream(new BufferedInputStream(new FileInputStream(new File(u))))) { + List l = nchw.record(u, dis); + INDArray arr = ((NDArrayWritable)l.get(0)).get(); + assertArrayEquals(new long[]{1, 3, 32, 32}, arr.shape()); + } + + try(DataInputStream dis = new DataInputStream(new BufferedInputStream(new FileInputStream(new File(u))))) { + List l = nhwc.record(u, dis); + INDArray arr = ((NDArrayWritable)l.get(0)).get(); + assertArrayEquals(new long[]{1, 32, 32, 3}, arr.shape()); + } + } } diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/RandomTests.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/RandomTests.java index b52b7cb49..3ea9e07f3 100644 --- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/RandomTests.java +++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/RandomTests.java @@ -8,12 +8,14 @@ import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.junit.Ignore; import org.junit.Test; +import org.nd4j.common.resources.Resources; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.learning.config.RmsProp; import org.nd4j.linalg.lossfunctions.LossFunctions; +import java.nio.file.Files; import java.util.concurrent.CountDownLatch; @Ignore diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvDataFormatTests.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvDataFormatTests.java index 5d540baa7..76d14d47d 100644 --- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvDataFormatTests.java +++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvDataFormatTests.java @@ -18,11 +18,9 @@ package org.deeplearning4j.nn.layers.convolution; import lombok.*; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; +import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.api.MaskState; -import org.deeplearning4j.nn.conf.CNN2DFormat; -import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.InputPreProcessor; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.*; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.conf.layers.CnnLossLayer; @@ -35,6 +33,7 @@ import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.deeplearning4j.util.ConvolutionUtils; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -49,6 +48,7 @@ import java.util.List; import java.util.Map; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; @RunWith(Parameterized.class) public class ConvDataFormatTests extends BaseDL4JTest { @@ -971,4 +971,58 @@ public class ConvDataFormatTests extends BaseDL4JTest { return null; } } + + + @Test + public void testWrongFormatIn(){ + + for(CNN2DFormat df : CNN2DFormat.values()){ + + + for(int i=0; i<4; i++ ){ + + NeuralNetConfiguration.ListBuilder b = new NeuralNetConfiguration.Builder() + .list(); + switch (i){ + case 0: + b.layer(new ConvolutionLayer.Builder().kernelSize(2,2).nIn(3).nOut(3).dataFormat(df).build()); + break; + case 1: + b.layer(new DepthwiseConvolution2D.Builder().kernelSize(2,2).nIn(3).nOut(3).dataFormat(df).build()); + break; + case 2: + b.layer(new Deconvolution2D.Builder().dataFormat(df).kernelSize(2,2).nIn(3).nOut(3).build()); + break; + case 3: + b.layer(new SeparableConvolution2D.Builder().dataFormat(df).kernelSize(2,2).nIn(3).nOut(3).build()); + break; + } + + MultiLayerNetwork net = new MultiLayerNetwork(b.build()); + net.init(); + + INDArray in; + INDArray wrongFormatIn; + if(df == CNN2DFormat.NCHW){ + in = Nd4j.create(DataType.FLOAT, 5, 3, 12, 12); + wrongFormatIn = Nd4j.create(DataType.FLOAT, 5, 12, 12, 3); + } else { + in = Nd4j.create(DataType.FLOAT, 5, 12, 12, 3); + wrongFormatIn = Nd4j.create(DataType.FLOAT, 5, 3, 12, 12); + } + + net.output(in); + + try { + net.output(wrongFormatIn); + } catch (DL4JInvalidInputException e){ +// e.printStackTrace(); + String msg = e.getMessage(); + assertTrue(msg, msg.contains(ConvolutionUtils.NCHW_NHWC_ERROR_MSG)); + } + } + } + + + } } diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/optimize/solver/accumulation/EncodedGradientsAccumulatorTest.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/optimize/solver/accumulation/EncodedGradientsAccumulatorTest.java index bae025caf..cc85e4b47 100644 --- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/optimize/solver/accumulation/EncodedGradientsAccumulatorTest.java +++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/optimize/solver/accumulation/EncodedGradientsAccumulatorTest.java @@ -23,9 +23,13 @@ import org.deeplearning4j.optimize.solvers.accumulation.EncodedGradientsAccumula import org.deeplearning4j.optimize.solvers.accumulation.EncodingHandler; import org.deeplearning4j.optimize.solvers.accumulation.encoding.threshold.FixedThresholdAlgorithm; import org.junit.Test; +import org.nd4j.linalg.api.concurrency.AffinityManager; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.api.ops.util.PrintAffinity; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.nativeblas.OpaqueDataBuffer; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; /** @@ -93,12 +97,13 @@ public class EncodedGradientsAccumulatorTest extends BaseDL4JTest { } - EncodingHandler handler = new EncodingHandler(new FixedThresholdAlgorithm(1e-3), null, null, false); + EncodingHandler handler = new EncodingHandler(new FixedThresholdAlgorithm(1e-3), null, Integer.MAX_VALUE, false); for (int e = 10; e < numParams / 5; e++) { - INDArray encoded = handler.encodeUpdates(0, 0, getGradients(numParams, e, 2e-3)); + val gradients = getGradients(numParams, e, 2e-3); + val encoded = handler.encodeUpdates(0, 0, gradients); - // log.info("enc len: {}", encoded.data().length()); + assertNotNull("Failed with e == " + e, encoded); int encFormat = encoded.data().getInt(3); diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/optimize/solver/accumulation/SmartFancyBlockingQueueTest.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/optimize/solver/accumulation/SmartFancyBlockingQueueTest.java index 63a69b82c..5abd5a253 100644 --- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/optimize/solver/accumulation/SmartFancyBlockingQueueTest.java +++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/optimize/solver/accumulation/SmartFancyBlockingQueueTest.java @@ -21,9 +21,9 @@ import lombok.val; import org.apache.commons.lang3.RandomUtils; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.optimize.solvers.accumulation.SmartFancyBlockingQueue; -import org.deeplearning4j.core.util.ThreadUtils; import org.junit.Ignore; import org.junit.Test; +import org.nd4j.common.util.ThreadUtils; import org.nd4j.linalg.factory.Nd4j; import java.util.ArrayList; diff --git a/deeplearning4j/deeplearning4j-data/deeplearning4j-utility-iterators/pom.xml b/deeplearning4j/deeplearning4j-data/deeplearning4j-utility-iterators/pom.xml index 462bebc95..7806bab88 100644 --- a/deeplearning4j/deeplearning4j-data/deeplearning4j-utility-iterators/pom.xml +++ b/deeplearning4j/deeplearning4j-data/deeplearning4j-utility-iterators/pom.xml @@ -31,11 +31,6 @@ - - org.deeplearning4j - deeplearning4j-util - ${project.version} - org.nd4j nd4j-api diff --git a/deeplearning4j/deeplearning4j-data/deeplearning4j-utility-iterators/src/main/java/org/deeplearning4j/datasets/iterator/impl/MovingWindowDataSetFetcher.java b/deeplearning4j/deeplearning4j-data/deeplearning4j-utility-iterators/src/main/java/org/deeplearning4j/datasets/iterator/impl/MovingWindowDataSetFetcher.java deleted file mode 100644 index e8bee9092..000000000 --- a/deeplearning4j/deeplearning4j-data/deeplearning4j-utility-iterators/src/main/java/org/deeplearning4j/datasets/iterator/impl/MovingWindowDataSetFetcher.java +++ /dev/null @@ -1,75 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -package org.deeplearning4j.datasets.iterator.impl; - -import org.deeplearning4j.util.MovingWindowMatrix; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.dataset.DataSet; -import org.nd4j.linalg.dataset.api.iterator.fetcher.BaseDataFetcher; -import org.nd4j.common.util.ArrayUtil; - -import java.util.ArrayList; -import java.util.List; - -/** - * - * Moving window data fetcher. Handles rotation of matrices in all directions - * to generate more examples. - * - * - * @author Adam Gibson - */ -public class MovingWindowDataSetFetcher extends BaseDataFetcher { - - private DataSet data; - private int windowRows = 28, windowColumns = 28; - private int cursor = 0; - - public MovingWindowDataSetFetcher(DataSet data, int windowRows, int windowColumns) { - this.data = data; - this.windowRows = windowRows; - this.windowColumns = windowColumns; - List list = data.asList(); - List flipped = new ArrayList<>(); - for (int i = 0; i < list.size(); i++) { - INDArray label = list.get(i).getLabels(); - List windows = - new MovingWindowMatrix(list.get(i).getFeatures(), windowRows, windowColumns, true) - .windows(true); - for (int j = 0; j < windows.size(); j++) { - flipped.add(new DataSet(windows.get(j), label)); - } - flipped.add(list.get(i)); - } - - this.data = DataSet.merge(flipped); - - } - - /** - * Fetches the next dataset. You need to call this - * to get a new dataset, otherwise {@link #next()} - * just returns the last data applyTransformToDestination fetch - * - * @param numExamples the number of examples to fetch - */ - @Override - public void fetch(int numExamples) { - initializeCurrFromList(data.get(ArrayUtil.range(cursor, cursor + numExamples)).asList()); - - } -} diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/KerasLoss.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/KerasLoss.java index e3c603287..d47309d1d 100644 --- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/KerasLoss.java +++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/KerasLoss.java @@ -1,5 +1,6 @@ /******************************************************************************* * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2020 Konduit K.K. * * This program and the accompanying materials are made available under the * terms of the Apache License, Version 2.0 which is available at @@ -28,6 +29,7 @@ import org.deeplearning4j.nn.modelimport.keras.KerasLayer; import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; import org.nd4j.linalg.activations.Activation; +import org.nd4j.linalg.lossfunctions.ILossFunction; import org.nd4j.linalg.lossfunctions.LossFunctions; import java.util.ArrayList; @@ -45,7 +47,7 @@ import static org.deeplearning4j.nn.modelimport.keras.utils.KerasLossUtils.mapLo public class KerasLoss extends KerasLayer { private final String KERAS_CLASS_NAME_LOSS = "Loss"; - private LossFunctions.LossFunction loss; + private ILossFunction loss; /** @@ -86,7 +88,7 @@ public class KerasLoss extends KerasLayer { if (enforceTrainingConfig) throw e; log.warn("Unsupported Keras loss function. Replacing with MSE."); - loss = LossFunctions.LossFunction.SQUARED_LOSS; + loss = LossFunctions.LossFunction.SQUARED_LOSS.getILossFunction(); } } diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasLossUtils.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasLossUtils.java index 35cf34170..b9e0ddfce 100644 --- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasLossUtils.java +++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasLossUtils.java @@ -1,5 +1,6 @@ /******************************************************************************* * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2020 Konduit K.K. * * This program and the accompanying materials are made available under the * terms of the Apache License, Version 2.0 which is available at @@ -19,8 +20,13 @@ package org.deeplearning4j.nn.modelimport.keras.utils; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; +import org.nd4j.linalg.lossfunctions.ILossFunction; import org.nd4j.linalg.lossfunctions.LossFunctions; +import java.util.HashMap; +import java.util.Map; + + /** * Utility functionality for keras loss functions * @@ -28,13 +34,33 @@ import org.nd4j.linalg.lossfunctions.LossFunctions; */ @Slf4j public class KerasLossUtils { + static final Map customLoss = new HashMap<>(); + + /** + * Register a custom loss function + * + * @param lossName name of the lambda layer in the serialized Keras model + * @param lossFunction SameDiffLambdaLayer instance to map to Keras Lambda layer + */ + public static void registerCustomLoss(String lossName, ILossFunction lossFunction) { + customLoss.put(lossName, lossFunction); + } + + /** + * Clear all lambda layers + * + */ + public static void clearCustomLoss() { + customLoss.clear(); + } + /** * Map Keras to DL4J loss functions. * * @param kerasLoss String containing Keras loss function name * @return String containing DL4J loss function */ - public static LossFunctions.LossFunction mapLossFunction(String kerasLoss, KerasLayerConfiguration conf) + public static ILossFunction mapLossFunction(String kerasLoss, KerasLayerConfiguration conf) throws UnsupportedKerasConfigurationException { LossFunctions.LossFunction dl4jLoss; if (kerasLoss.equals(conf.getKERAS_LOSS_MEAN_SQUARED_ERROR()) || @@ -67,8 +93,13 @@ public class KerasLossUtils { } else if (kerasLoss.equals(conf.getKERAS_LOSS_COSINE_PROXIMITY())) { dl4jLoss = LossFunctions.LossFunction.COSINE_PROXIMITY; } else { - throw new UnsupportedKerasConfigurationException("Unknown Keras loss function " + kerasLoss); + ILossFunction lossClass = customLoss.get(kerasLoss); + if(lossClass != null){ + return lossClass; + }else{ + throw new UnsupportedKerasConfigurationException("Unknown Keras loss function " + kerasLoss); + } } - return dl4jLoss; + return dl4jLoss.getILossFunction(); } } diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasCustomLossTest.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasCustomLossTest.java new file mode 100644 index 000000000..23c46835e --- /dev/null +++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasCustomLossTest.java @@ -0,0 +1,78 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +package org.deeplearning4j.nn.modelimport.keras.e2e; + +import org.deeplearning4j.BaseDL4JTest; +import org.deeplearning4j.nn.modelimport.keras.KerasSequentialModel; +import org.deeplearning4j.nn.modelimport.keras.utils.KerasLossUtils; +import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.nd4j.autodiff.samediff.SDVariable; +import org.nd4j.autodiff.samediff.SameDiff; +import org.nd4j.common.resources.Resources; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.lossfunctions.SameDiffLoss; + +import java.io.File; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.StandardCopyOption; + + +/** + * Test importing Keras models with custom loss. + * + * @author Paul Dubs + */ +public class KerasCustomLossTest extends BaseDL4JTest { + + @Rule + public TemporaryFolder testDir = new TemporaryFolder(); + + public class LogCosh extends SameDiffLoss { + @Override + public SDVariable defineLoss(SameDiff sd, SDVariable layerInput, SDVariable labels) { + return sd.math.log(sd.math.cosh(labels.sub(layerInput))); + } + } + + @Test + public void testSequentialLambdaLayerImport() throws Exception { + KerasLossUtils.registerCustomLoss("logcosh", new LogCosh()); + + String modelPath = "modelimport/keras/examples/custom_loss.h5"; + + try(InputStream is = Resources.asStream(modelPath)) { + File modelFile = testDir.newFile("tempModel" + System.currentTimeMillis() + ".h5"); + Files.copy(is, modelFile.toPath(), StandardCopyOption.REPLACE_EXISTING); + MultiLayerNetwork model = new KerasSequentialModel().modelBuilder().modelHdf5Filename(modelFile.getAbsolutePath()) + .enforceTrainingConfig(true).buildSequential().getMultiLayerNetwork(); + + System.out.println(model.summary()); + INDArray input = Nd4j.create(new int[]{10, 3}); + + model.output(input); + } finally { + KerasLossUtils.clearCustomLoss(); + } + } + + +} diff --git a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/test/java/org/deeplearning4j/models/WordVectorSerializerTest.java b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/test/java/org/deeplearning4j/models/WordVectorSerializerTest.java index 27d49d5f5..7d6c0f559 100755 --- a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/test/java/org/deeplearning4j/models/WordVectorSerializerTest.java +++ b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/src/test/java/org/deeplearning4j/models/WordVectorSerializerTest.java @@ -856,15 +856,26 @@ public class WordVectorSerializerTest extends BaseDL4JTest { @Test public void testFastText() { - - File[] files = {fastTextRaw, fastTextZip, fastTextGzip}; + File[] files = { fastTextRaw, fastTextZip, fastTextGzip }; for (File file : files) { try { Word2Vec word2Vec = WordVectorSerializer.readAsCsv(file); - assertEquals(99, word2Vec.getVocab().numWords()); + assertEquals(99, word2Vec.getVocab().numWords()); + } catch (Exception readCsvException) { + fail("Failure for input file " + file.getAbsolutePath() + " " + readCsvException.getMessage()); + } + } + } - } catch (Exception e) { - fail("Failure for input file " + file.getAbsolutePath() + " " + e.getMessage()); + @Test + public void testFastText_readWord2VecModel() { + File[] files = { fastTextRaw, fastTextZip, fastTextGzip }; + for (File file : files) { + try { + Word2Vec word2Vec = WordVectorSerializer.readWord2VecModel(file); + assertEquals(99, word2Vec.getVocab().numWords()); + } catch (Exception readCsvException) { + fail("Failure for input file " + file.getAbsolutePath() + " " + readCsvException.getMessage()); } } } diff --git a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/pom.xml b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/pom.xml index 668c728ae..8a7eacada 100644 --- a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/pom.xml +++ b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/pom.xml @@ -84,6 +84,12 @@ ${project.version} test + + org.awaitility + awaitility + 4.0.2 + test + diff --git a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/loader/WordVectorSerializer.java b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/loader/WordVectorSerializer.java index 8f0003728..a77bdf0de 100755 --- a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/loader/WordVectorSerializer.java +++ b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/embeddings/loader/WordVectorSerializer.java @@ -1,5 +1,6 @@ /******************************************************************************* * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2020 Konduit K.K. * * This program and the accompanying materials are made available under the * terms of the Apache License, Version 2.0 which is available at @@ -16,14 +17,45 @@ package org.deeplearning4j.models.embeddings.loader; -import lombok.*; -import lombok.extern.slf4j.Slf4j; +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.zip.GZIPInputStream; +import java.util.zip.ZipEntry; +import java.util.zip.ZipFile; +import java.util.zip.ZipInputStream; +import java.util.zip.ZipOutputStream; + import org.apache.commons.codec.binary.Base64; import org.apache.commons.compress.compressors.gzip.GzipUtils; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.commons.io.LineIterator; import org.apache.commons.io.output.CloseShieldOutputStream; +import org.deeplearning4j.common.util.DL4JFileUtils; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.models.embeddings.WeightLookupTable; import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable; @@ -50,26 +82,25 @@ import org.deeplearning4j.text.documentiterator.LabelsSource; import org.deeplearning4j.text.sentenceiterator.BasicLineIterator; import org.deeplearning4j.text.tokenization.tokenizer.TokenPreProcess; import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory; -import org.deeplearning4j.common.util.DL4JFileUtils; +import org.nd4j.common.primitives.Pair; +import org.nd4j.common.util.OneTimeLogger; import org.nd4j.compression.impl.NoOp; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.exception.ND4JIllegalStateException; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.ops.transforms.Transforms; -import org.nd4j.common.primitives.Pair; import org.nd4j.shade.jackson.databind.DeserializationFeature; import org.nd4j.shade.jackson.databind.MapperFeature; import org.nd4j.shade.jackson.databind.ObjectMapper; import org.nd4j.shade.jackson.databind.SerializationFeature; import org.nd4j.storage.CompressedRamStorage; -import org.nd4j.common.util.OneTimeLogger; -import java.io.*; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.zip.*; +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.NonNull; +import lombok.extern.slf4j.Slf4j; +import lombok.val; /** * This is utility class, providing various methods for WordVectors serialization @@ -85,14 +116,17 @@ import java.util.zip.*; * {@link #writeWord2VecModel(Word2Vec, OutputStream)} * *
  1. Deserializers for Word2Vec:
  2. - * {@link #readWord2VecModel(File)} * {@link #readWord2VecModel(String)} - * {@link #readWord2VecModel(File, boolean)} * {@link #readWord2VecModel(String, boolean)} + * {@link #readWord2VecModel(File)} + * {@link #readWord2VecModel(File, boolean)} * {@link #readAsBinaryNoLineBreaks(File)} + * {@link #readAsBinaryNoLineBreaks(InputStream)} * {@link #readAsBinary(File)} + * {@link #readAsBinary(InputStream)} * {@link #readAsCsv(File)} - * {@link #readBinaryModel(File, boolean, boolean)} + * {@link #readAsCsv(InputStream)} + * {@link #readBinaryModel(InputStream, boolean, boolean)} * {@link #readWord2VecFromText(File, File, File, File, VectorsConfiguration)} * {@link #readWord2Vec(String, boolean)} * {@link #readWord2Vec(File, boolean)} @@ -117,6 +151,7 @@ import java.util.zip.*; * {@link #fromTableAndVocab(WeightLookupTable, VocabCache)} * {@link #fromPair(Pair)} * {@link #loadTxt(File)} + * {@link #loadTxt(InputStream)} * *
  3. Serializers to tSNE format
  4. * {@link #writeTsneFormat(Glove, INDArray, File)} @@ -151,6 +186,7 @@ import java.util.zip.*; * @author Adam Gibson * @author raver119 * @author alexander@skymind.io + * @author Alexei KLENIN */ @Slf4j public class WordVectorSerializer { @@ -215,18 +251,22 @@ public class WordVectorSerializer { }*/ /** - * Read a binary word2vec file. + * Read a binary word2vec from input stream. + * + * @param inputStream input stream to read + * @param linebreaks if true, the reader expects each word/vector to be in a separate line, terminated + * by a line break + * @param normalize * - * @param modelFile the File to read - * @param linebreaks if true, the reader expects each word/vector to be in a separate line, terminated - * by a line break * @return a {@link Word2Vec model} * @throws NumberFormatException * @throws IOException * @throws FileNotFoundException */ - public static Word2Vec readBinaryModel(File modelFile, boolean linebreaks, boolean normalize) - throws NumberFormatException, IOException { + public static Word2Vec readBinaryModel( + InputStream inputStream, + boolean linebreaks, + boolean normalize) throws NumberFormatException, IOException { InMemoryLookupTable lookupTable; VocabCache cache; INDArray syn0; @@ -240,9 +280,7 @@ public class WordVectorSerializer { Nd4j.getMemoryManager().setOccasionalGcFrequency(50000); - try (BufferedInputStream bis = new BufferedInputStream(GzipUtils.isCompressedFilename(modelFile.getName()) - ? new GZIPInputStream(new FileInputStream(modelFile)) : new FileInputStream(modelFile)); - DataInputStream dis = new DataInputStream(bis)) { + try (DataInputStream dis = new DataInputStream(inputStream)) { words = Integer.parseInt(ReadHelper.readString(dis)); size = Integer.parseInt(ReadHelper.readString(dis)); syn0 = Nd4j.create(words, size); @@ -250,23 +288,26 @@ public class WordVectorSerializer { printOutProjectedMemoryUse(words, size, 1); - lookupTable = (InMemoryLookupTable) new InMemoryLookupTable.Builder().cache(cache) - .useHierarchicSoftmax(false).vectorLength(size).build(); + lookupTable = new InMemoryLookupTable.Builder() + .cache(cache) + .useHierarchicSoftmax(false) + .vectorLength(size) + .build(); - int cnt = 0; String word; float[] vector = new float[size]; for (int i = 0; i < words; i++) { - word = ReadHelper.readString(dis); - log.trace("Loading " + word + " with word " + i); + log.trace("Loading {} with word {}", word, i); for (int j = 0; j < size; j++) { vector[j] = ReadHelper.readFloat(dis); } - if (cache.containsWord(word)) - throw new ND4JIllegalStateException("Tried to add existing word. Probably time to switch linebreaks mode?"); + if (cache.containsWord(word)) { + throw new ND4JIllegalStateException( + "Tried to add existing word. Probably time to switch linebreaks mode?"); + } syn0.putRow(i, normalize ? Transforms.unitVec(Nd4j.create(vector)) : Nd4j.create(vector)); @@ -285,25 +326,31 @@ public class WordVectorSerializer { Nd4j.getMemoryManager().invokeGcOccasionally(); } } finally { - if (originalPeriodic) + if (originalPeriodic) { Nd4j.getMemoryManager().togglePeriodicGc(true); + } Nd4j.getMemoryManager().setOccasionalGcFrequency(originalFreq); } - lookupTable.setSyn0(syn0); - - Word2Vec ret = new Word2Vec.Builder().useHierarchicSoftmax(false).resetModel(false).layerSize(syn0.columns()) - .allowParallelTokenization(true).elementsLearningAlgorithm(new SkipGram()) - .learningRate(0.025).windowSize(5).workers(1).build(); + Word2Vec ret = new Word2Vec + .Builder() + .useHierarchicSoftmax(false) + .resetModel(false) + .layerSize(syn0.columns()) + .allowParallelTokenization(true) + .elementsLearningAlgorithm(new SkipGram()) + .learningRate(0.025) + .windowSize(5) + .workers(1) + .build(); ret.setVocab(cache); ret.setLookupTable(lookupTable); return ret; - } /** @@ -927,7 +974,7 @@ public class WordVectorSerializer { public static Word2Vec readWord2VecFromText(@NonNull File vectors, @NonNull File hs, @NonNull File h_codes, @NonNull File h_points, @NonNull VectorsConfiguration configuration) throws IOException { // first we load syn0 - Pair pair = loadTxt(vectors); + Pair pair = loadTxt(new FileInputStream(vectors)); InMemoryLookupTable lookupTable = pair.getFirst(); lookupTable.setNegative(configuration.getNegative()); if (configuration.getNegative() > 0) @@ -1604,160 +1651,172 @@ public class WordVectorSerializer { * @param vectorsFile the path of the file to load\ * @return * @throws FileNotFoundException if the file does not exist - * @deprecated Use {@link #loadTxt(File)} + * @deprecated Use {@link #loadTxt(InputStream)} */ @Deprecated - public static WordVectors loadTxtVectors(File vectorsFile) - throws IOException { - Pair pair = loadTxt(vectorsFile); + public static WordVectors loadTxtVectors(File vectorsFile) throws IOException { + FileInputStream fileInputStream = new FileInputStream(vectorsFile); + Pair pair = loadTxt(fileInputStream); return fromPair(pair); } + static InputStream fileStream(@NonNull File file) throws IOException { + boolean isZip = file.getName().endsWith(".zip"); + boolean isGzip = GzipUtils.isCompressedFilename(file.getName()); + + InputStream inputStream; + + if (isZip) { + inputStream = decompressZip(file); + } else if (isGzip) { + FileInputStream fis = new FileInputStream(file); + inputStream = new GZIPInputStream(fis); + } else { + inputStream = new FileInputStream(file); + } + + return new BufferedInputStream(inputStream); + } + private static InputStream decompressZip(File modelFile) throws IOException { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); ZipFile zipFile = new ZipFile(modelFile); InputStream inputStream = null; - try (ZipInputStream zipStream = new ZipInputStream(new BufferedInputStream(new FileInputStream(modelFile)))) { - - ZipEntry entry = null; + try (FileInputStream fis = new FileInputStream(modelFile); + BufferedInputStream bis = new BufferedInputStream(fis); + ZipInputStream zipStream = new ZipInputStream(bis)) { + ZipEntry entry; if ((entry = zipStream.getNextEntry()) != null) { - inputStream = zipFile.getInputStream(entry); } + if (zipStream.getNextEntry() != null) { throw new RuntimeException("Zip archive " + modelFile + " contains more than 1 file"); } } + return inputStream; } - private static BufferedReader createReader(File vectorsFile) throws IOException { - InputStreamReader inputStreamReader; - try { - inputStreamReader = new InputStreamReader(decompressZip(vectorsFile)); - } catch (IOException e) { - inputStreamReader = new InputStreamReader(GzipUtils.isCompressedFilename(vectorsFile.getName()) - ? new GZIPInputStream(new FileInputStream(vectorsFile)) - : new FileInputStream(vectorsFile), "UTF-8"); + public static Pair loadTxt(@NonNull File file) { + try (InputStream inputStream = fileStream(file)) { + return loadTxt(inputStream); + } catch (IOException readTestException) { + throw new RuntimeException(readTestException); } - BufferedReader reader = new BufferedReader(inputStreamReader); - return reader; } /** - * Loads an in memory cache from the given path (sets syn0 and the vocab) + * Loads an in memory cache from the given input stream (sets syn0 and the vocab). * - * @param vectorsFile the path of the file to load - * @return a Pair holding the lookup table and the vocab cache. - * @throws FileNotFoundException if the input file does not exist + * @param inputStream input stream + * @return a {@link Pair} holding the lookup table and the vocab cache. */ - public static Pair loadTxt(File vectorsFile) - throws IOException, UnsupportedEncodingException { + public static Pair loadTxt(@NonNull InputStream inputStream) { + AbstractCache cache = new AbstractCache<>(); + LineIterator lines = null; - AbstractCache cache = new AbstractCache<>(); - BufferedReader reader = createReader(vectorsFile); - LineIterator iter = IOUtils.lineIterator(reader); - String line = null; - boolean hasHeader = false; - if (iter.hasNext()) { - line = iter.nextLine(); // skip header line - //look for spaces - if (!line.contains(" ")) { - log.debug("Skipping first line"); - hasHeader = true; - } else { - // we should check for something that looks like proper word vectors here. i.e: 1 word at the 0 position, and bunch of floats further - String[] split = line.split(" "); - try { - long[] header = new long[split.length]; - for (int x = 0; x < split.length; x++) { - header[x] = Long.parseLong(split[x]); - } - if (split.length < 4) - hasHeader = true; - // now we know, if that's all ints - it's just a header - // [0] - number of words - // [1] - vectorSize - // [2] - number of documents <-- DL4j-only value - if (split.length == 3) - cache.incrementTotalDocCount(header[2]); + try (InputStreamReader inputStreamReader = new InputStreamReader(inputStream); + BufferedReader reader = new BufferedReader(inputStreamReader)) { + lines = IOUtils.lineIterator(reader); - printOutProjectedMemoryUse(header[0], (int) header[1], 1); + String line = null; + boolean hasHeader = false; - hasHeader = true; + /* Check if first line is a header */ + if (lines.hasNext()) { + line = lines.nextLine(); + hasHeader = isHeader(line, cache); + } - try { - reader.close(); - } catch (Exception ex) { - } - } catch (Exception e) { - // if any conversion exception hits - that'll be considered header - hasHeader = false; + if (hasHeader) { + log.debug("First line is a header"); + line = lines.nextLine(); + } + List arrays = new ArrayList<>(); + long[] vShape = new long[]{ 1, -1 }; + + do { + String[] tokens = line.split(" "); + String word = ReadHelper.decodeB64(tokens[0]); + VocabWord vocabWord = new VocabWord(1.0, word); + vocabWord.setIndex(cache.numWords()); + + cache.addToken(vocabWord); + cache.addWordToIndex(vocabWord.getIndex(), word); + cache.putVocabWord(word); + + float[] vector = new float[tokens.length - 1]; + for (int i = 1; i < tokens.length; i++) { + vector[i - 1] = Float.parseFloat(tokens[i]); } + + vShape[1] = vector.length; + INDArray row = Nd4j.create(vector, vShape); + + arrays.add(row); + + line = lines.hasNext() ? lines.next() : null; + } while (line != null); + + INDArray syn = Nd4j.vstack(arrays); + + InMemoryLookupTable lookupTable = new InMemoryLookupTable + .Builder() + .vectorLength(arrays.get(0).columns()) + .useAdaGrad(false) + .cache(cache) + .useHierarchicSoftmax(false) + .build(); + + lookupTable.setSyn0(syn); + + return new Pair<>((InMemoryLookupTable) lookupTable, (VocabCache) cache); + } catch (IOException readeTextStreamException) { + throw new RuntimeException(readeTextStreamException); + } finally { + if (lines != null) { + lines.close(); } - } + } - //reposition buffer to be one line ahead - if (hasHeader) { - line = ""; - iter.close(); - //reader = new BufferedReader(new FileReader(vectorsFile)); - reader = createReader(vectorsFile); - iter = IOUtils.lineIterator(reader); - iter.nextLine(); - } + static boolean isHeader(String line, AbstractCache cache) { + if (!line.contains(" ")) { + return true; + } else { - List arrays = new ArrayList<>(); - long[] vShape = new long[]{1, -1}; - while (iter.hasNext()) { - if (line.isEmpty()) - line = iter.nextLine(); - String[] split = line.split(" "); - String word = ReadHelper.decodeB64(split[0]); //split[0].replaceAll(whitespaceReplacement, " "); - VocabWord word1 = new VocabWord(1.0, word); + /* We should check for something that looks like proper word vectors here. i.e: 1 word at the 0 + * position, and bunch of floats further */ + String[] headers = line.split(" "); - word1.setIndex(cache.numWords()); + try { + long[] header = new long[headers.length]; + for (int x = 0; x < headers.length; x++) { + header[x] = Long.parseLong(headers[x]); + } - cache.addToken(word1); + /* Now we know, if that's all ints - it's just a header + * [0] - number of words + * [1] - vectorLength + * [2] - number of documents <-- DL4j-only value + */ + if (headers.length == 3) { + long numberOfDocuments = header[2]; + cache.incrementTotalDocCount(numberOfDocuments); + } - cache.addWordToIndex(word1.getIndex(), word); + long numWords = header[0]; + int vectorLength = (int) header[1]; + printOutProjectedMemoryUse(numWords, vectorLength, 1); - cache.putVocabWord(word); - - float[] vector = new float[split.length - 1]; - - for (int i = 1; i < split.length; i++) { - vector[i - 1] = Float.parseFloat(split[i]); + return true; + } catch (Exception notHeaderException) { + // if any conversion exception hits - that'll be considered header + return false; } - - vShape[1] = vector.length; - INDArray row = Nd4j.create(vector, vShape); - - arrays.add(row); - - // workaround for skipped first row - line = ""; } - - INDArray syn = Nd4j.vstack(arrays); - - InMemoryLookupTable lookupTable = - (InMemoryLookupTable) new InMemoryLookupTable.Builder().vectorLength(arrays.get(0).columns()) - .useAdaGrad(false).cache(cache).useHierarchicSoftmax(false).build(); - - lookupTable.setSyn0(syn); - - iter.close(); - - try { - reader.close(); - } catch (Exception e) { - } - - return new Pair<>(lookupTable, (VocabCache) cache); } /** @@ -2352,22 +2411,6 @@ public class WordVectorSerializer { } } - /** - * This method - * 1) Binary model, either compressed or not. Like well-known Google Model - * 2) Popular CSV word2vec text format - * 3) DL4j compressed format - *

    - * Please note: Only weights will be loaded by this method. - * - * @param file - * @return - */ - public static Word2Vec readWord2VecModel(@NonNull File file) { - return readWord2VecModel(file, false); - } - - /** * This method * 1) Binary model, either compressed or not. Like well-known Google Model @@ -2389,106 +2432,196 @@ public class WordVectorSerializer { * 2) Popular CSV word2vec text format * 3) DL4j compressed format *

    - * Please note: if extended data isn't available, only weights will be loaded instead. + * Please note: Only weights will be loaded by this method. * - * @param path - * @param extendedModel if TRUE, we'll try to load HS states & Huffman tree info, if FALSE, only weights will be loaded + * @param path path to model file + * @param extendedModel if TRUE, we'll try to load HS states & Huffman tree info, if FALSE, only weights will be loaded * @return */ public static Word2Vec readWord2VecModel(String path, boolean extendedModel) { return readWord2VecModel(new File(path), extendedModel); } - public static Word2Vec readAsBinaryNoLineBreaks(@NonNull File file) { + /** + * This method + * 1) Binary model, either compressed or not. Like well-known Google Model + * 2) Popular CSV word2vec text format + * 3) DL4j compressed format + *

    + * Please note: Only weights will be loaded by this method. + * + * @param file + * @return + */ + public static Word2Vec readWord2VecModel(File file) { + return readWord2VecModel(file, false); + } + + /** + * This method + * 1) Binary model, either compressed or not. Like well-known Google Model + * 2) Popular CSV word2vec text format + * 3) DL4j compressed format + *

    + * Please note: if extended data isn't available, only weights will be loaded instead. + * + * @param file model file + * @param extendedModel if TRUE, we'll try to load HS states & Huffman tree info, if FALSE, only weights will be loaded + * @return word2vec model + */ + public static Word2Vec readWord2VecModel(File file, boolean extendedModel) { + if (!file.exists() || !file.isFile()) { + throw new ND4JIllegalStateException("File [" + file.getAbsolutePath() + "] doesn't exist"); + } + boolean originalPeriodic = Nd4j.getMemoryManager().isPeriodicGcActive(); + if (originalPeriodic) { + Nd4j.getMemoryManager().togglePeriodicGc(false); + } + Nd4j.getMemoryManager().setOccasionalGcFrequency(50000); + + try { + return readWord2Vec(file, extendedModel); + } catch (Exception readSequenceVectors) { + try { + return extendedModel + ? readAsExtendedModel(file) + : readAsSimplifiedModel(file); + } catch (Exception loadFromFileException) { + try { + return readAsCsv(file); + } catch (Exception readCsvException) { + try { + return readAsBinary(file); + } catch (Exception readBinaryException) { + try { + return readAsBinaryNoLineBreaks(file); + } catch (Exception readModelException) { + log.error("Unable to guess input file format", readModelException); + throw new RuntimeException("Unable to guess input file format. Please use corresponding loader directly"); + } + } + } + } + } + } + + public static Word2Vec readAsBinaryNoLineBreaks(@NonNull File file) { + try (InputStream inputStream = fileStream(file)) { + return readAsBinaryNoLineBreaks(inputStream); + } catch (IOException readCsvException) { + throw new RuntimeException(readCsvException); + } + } + + public static Word2Vec readAsBinaryNoLineBreaks(@NonNull InputStream inputStream) { boolean originalPeriodic = Nd4j.getMemoryManager().isPeriodicGcActive(); int originalFreq = Nd4j.getMemoryManager().getOccasionalGcFrequency(); - Word2Vec vec; // try to load without linebreaks try { - if (originalPeriodic) + if (originalPeriodic) { Nd4j.getMemoryManager().togglePeriodicGc(true); + } Nd4j.getMemoryManager().setOccasionalGcFrequency(originalFreq); - vec = readBinaryModel(file, false, false); - return vec; - } catch (Exception ez) { - throw new RuntimeException( - "Unable to guess input file format. Please use corresponding loader directly"); + return readBinaryModel(inputStream, false, false); + } catch (Exception readModelException) { + log.error("Cannot read binary model", readModelException); + throw new RuntimeException("Unable to guess input file format. Please use corresponding loader directly"); + } + } + + public static Word2Vec readAsBinary(@NonNull File file) { + try (InputStream inputStream = fileStream(file)) { + return readAsBinary(inputStream); + } catch (IOException readCsvException) { + throw new RuntimeException(readCsvException); } } /** - * This method loads Word2Vec model from binary file + * This method loads Word2Vec model from binary input stream. * - * @param file File - * @return Word2Vec + * @param inputStream binary input stream + * @return Word2Vec */ - public static Word2Vec readAsBinary(@NonNull File file) { + public static Word2Vec readAsBinary(@NonNull InputStream inputStream) { boolean originalPeriodic = Nd4j.getMemoryManager().isPeriodicGcActive(); int originalFreq = Nd4j.getMemoryManager().getOccasionalGcFrequency(); - Word2Vec vec; - // we fallback to trying binary model instead try { log.debug("Trying binary model restoration..."); - if (originalPeriodic) + if (originalPeriodic) { Nd4j.getMemoryManager().togglePeriodicGc(true); + } Nd4j.getMemoryManager().setOccasionalGcFrequency(originalFreq); - vec = readBinaryModel(file, true, false); - return vec; - } catch (Exception ey) { - throw new RuntimeException(ey); + return readBinaryModel(inputStream, true, false); + } catch (Exception readModelException) { + throw new RuntimeException(readModelException); + } + } + + public static Word2Vec readAsCsv(@NonNull File file) { + try (InputStream inputStream = fileStream(file)) { + return readAsCsv(inputStream); + } catch (IOException readCsvException) { + throw new RuntimeException(readCsvException); } } /** * This method loads Word2Vec model from csv file * - * @param file File - * @return Word2Vec + * @param inputStream input stream + * @return Word2Vec model */ - public static Word2Vec readAsCsv(@NonNull File file) { - - Word2Vec vec; + public static Word2Vec readAsCsv(@NonNull InputStream inputStream) { VectorsConfiguration configuration = new VectorsConfiguration(); // let's try to load this file as csv file try { log.debug("Trying CSV model restoration..."); - Pair pair = loadTxt(file); - Word2Vec.Builder builder = new Word2Vec.Builder().lookupTable(pair.getFirst()).useAdaGrad(false) - .vocabCache(pair.getSecond()).layerSize(pair.getFirst().layerSize()) + Pair pair = loadTxt(inputStream); + Word2Vec.Builder builder = new Word2Vec + .Builder() + .lookupTable(pair.getFirst()) + .useAdaGrad(false) + .vocabCache(pair.getSecond()) + .layerSize(pair.getFirst().layerSize()) // we don't use hs here, because model is incomplete - .useHierarchicSoftmax(false).resetModel(false); + .useHierarchicSoftmax(false) + .resetModel(false); TokenizerFactory factory = getTokenizerFactory(configuration); - if (factory != null) + if (factory != null) { builder.tokenizerFactory(factory); + } - vec = builder.build(); - return vec; + return builder.build(); } catch (Exception ex) { throw new RuntimeException("Unable to load model in CSV format"); } } + /** + * This method just loads full compressed model. + */ private static Word2Vec readAsExtendedModel(@NonNull File file) throws IOException { int originalFreq = Nd4j.getMemoryManager().getOccasionalGcFrequency(); boolean originalPeriodic = Nd4j.getMemoryManager().isPeriodicGcActive(); log.debug("Trying full model restoration..."); - // this method just loads full compressed model - if (originalPeriodic) + if (originalPeriodic) { Nd4j.getMemoryManager().togglePeriodicGc(true); + } Nd4j.getMemoryManager().setOccasionalGcFrequency(originalFreq); @@ -2627,67 +2760,6 @@ public class WordVectorSerializer { return vec; } - /** - * This method - * 1) Binary model, either compressed or not. Like well-known Google Model - * 2) Popular CSV word2vec text format - * 3) DL4j compressed format - *

    - * Please note: if extended data isn't available, only weights will be loaded instead. - * - * @param file - * @param extendedModel if TRUE, we'll try to load HS states & Huffman tree info, if FALSE, only weights will be loaded - * @return - */ - public static Word2Vec readWord2VecModel(@NonNull File file, boolean extendedModel) { - - if (!file.exists() || !file.isFile()) - throw new ND4JIllegalStateException("File [" + file.getAbsolutePath() + "] doesn't exist"); - - Word2Vec vec = null; - - int originalFreq = Nd4j.getMemoryManager().getOccasionalGcFrequency(); - boolean originalPeriodic = Nd4j.getMemoryManager().isPeriodicGcActive(); - if (originalPeriodic) - Nd4j.getMemoryManager().togglePeriodicGc(false); - Nd4j.getMemoryManager().setOccasionalGcFrequency(50000); - - // try to load zip format - try { - vec = readWord2Vec(file, extendedModel); - return vec; - } catch (Exception e) { - // let's try to load this file as csv file - try { - if (extendedModel) { - vec = readAsExtendedModel(file); - return vec; - } else { - vec = readAsSimplifiedModel(file); - return vec; - } - } catch (Exception ex) { - try { - vec = readAsCsv(file); - return vec; - } catch (Exception exc) { - try { - vec = readAsBinary(file); - return vec; - } catch (Exception exce) { - try { - vec = readAsBinaryNoLineBreaks(file); - return vec; - - } catch (Exception excep) { - throw new RuntimeException("Unable to guess input file format. Please use corresponding loader directly"); - } - } - } - } - } - } - protected static TokenizerFactory getTokenizerFactory(VectorsConfiguration configuration) { if (configuration == null) return null; @@ -3019,16 +3091,13 @@ public class WordVectorSerializer { /** * This method restores Word2Vec model from file * - * @param path String - * @param readExtendedTables booleab + * @param path + * @param readExtendedTables * @return Word2Vec */ - public static Word2Vec readWord2Vec(@NonNull String path, boolean readExtendedTables) - throws IOException { - + public static Word2Vec readWord2Vec(@NonNull String path, boolean readExtendedTables) { File file = new File(path); - Word2Vec word2Vec = readWord2Vec(file, readExtendedTables); - return word2Vec; + return readWord2Vec(file, readExtendedTables); } /** @@ -3139,11 +3208,12 @@ public class WordVectorSerializer { * @param readExtendedTables boolean * @return Word2Vec */ - public static Word2Vec readWord2Vec(@NonNull File file, boolean readExtendedTables) - throws IOException { - - Word2Vec word2Vec = readWord2Vec(new FileInputStream(file), readExtendedTables); - return word2Vec; + public static Word2Vec readWord2Vec(@NonNull File file, boolean readExtendedTables) { + try (InputStream inputStream = fileStream(file)) { + return readWord2Vec(inputStream, readExtendedTables); + } catch (Exception readSequenceVectors) { + throw new RuntimeException(readSequenceVectors); + } } /** @@ -3153,13 +3223,19 @@ public class WordVectorSerializer { * @param readExtendedTable boolean * @return Word2Vec */ - public static Word2Vec readWord2Vec(@NonNull InputStream stream, - boolean readExtendedTable) throws IOException { + public static Word2Vec readWord2Vec( + @NonNull InputStream stream, + boolean readExtendedTable) throws IOException { SequenceVectors vectors = readSequenceVectors(stream, readExtendedTable); - Word2Vec word2Vec = new Word2Vec.Builder(vectors.getConfiguration()).layerSize(vectors.getLayerSize()).build(); + + Word2Vec word2Vec = new Word2Vec + .Builder(vectors.getConfiguration()) + .layerSize(vectors.getLayerSize()) + .build(); word2Vec.setVocab(vectors.getVocab()); word2Vec.setLookupTable(vectors.lookupTable()); word2Vec.setModelUtils(vectors.getModelUtils()); + return word2Vec; } diff --git a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/glove/AbstractCoOccurrences.java b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/glove/AbstractCoOccurrences.java index 7c9134e2c..969dbaeb9 100644 --- a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/glove/AbstractCoOccurrences.java +++ b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/glove/AbstractCoOccurrences.java @@ -29,7 +29,7 @@ import org.deeplearning4j.text.sentenceiterator.PrefetchingSentenceIterator; import org.deeplearning4j.text.sentenceiterator.SentenceIterator; import org.deeplearning4j.text.sentenceiterator.SynchronizedSentenceIterator; import org.deeplearning4j.common.util.DL4JFileUtils; -import org.deeplearning4j.core.util.ThreadUtils; +import org.nd4j.common.util.ThreadUtils; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.common.primitives.Pair; import org.slf4j.Logger; diff --git a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/paragraphvectors/ParagraphVectors.java b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/paragraphvectors/ParagraphVectors.java index 1f1dce5f7..c007d4b96 100644 --- a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/paragraphvectors/ParagraphVectors.java +++ b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/paragraphvectors/ParagraphVectors.java @@ -47,7 +47,7 @@ import org.deeplearning4j.text.sentenceiterator.SentenceIterator; import org.deeplearning4j.text.sentenceiterator.interoperability.SentenceIteratorConverter; import org.deeplearning4j.text.sentenceiterator.labelaware.LabelAwareSentenceIterator; import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory; -import org.deeplearning4j.core.util.ThreadUtils; +import org.nd4j.common.util.ThreadUtils; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.exception.ND4JIllegalStateException; import org.nd4j.linalg.factory.Nd4j; diff --git a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/sequencevectors/SequenceVectors.java b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/sequencevectors/SequenceVectors.java index 3f2d5f216..d31cc51b0 100644 --- a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/sequencevectors/SequenceVectors.java +++ b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/sequencevectors/SequenceVectors.java @@ -47,7 +47,7 @@ import org.deeplearning4j.models.word2vec.VocabWord; import org.deeplearning4j.models.word2vec.wordstore.VocabCache; import org.deeplearning4j.models.word2vec.wordstore.VocabConstructor; import org.deeplearning4j.models.word2vec.wordstore.inmemory.AbstractCache; -import org.deeplearning4j.core.util.ThreadUtils; +import org.nd4j.common.util.ThreadUtils; import org.nd4j.linalg.api.memory.conf.WorkspaceConfiguration; import org.nd4j.linalg.api.memory.enums.LearningPolicy; import org.nd4j.linalg.api.ndarray.INDArray; diff --git a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/word2vec/wordstore/VocabConstructor.java b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/word2vec/wordstore/VocabConstructor.java index 10f2a4811..fca1288d0 100644 --- a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/word2vec/wordstore/VocabConstructor.java +++ b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/word2vec/wordstore/VocabConstructor.java @@ -27,7 +27,7 @@ import org.deeplearning4j.models.sequencevectors.sequence.SequenceElement; import org.deeplearning4j.models.word2vec.Huffman; import org.deeplearning4j.models.word2vec.wordstore.inmemory.AbstractCache; import org.deeplearning4j.text.invertedindex.InvertedIndex; -import org.deeplearning4j.core.util.ThreadUtils; +import org.nd4j.common.util.ThreadUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.threadly.concurrent.PriorityScheduler; diff --git a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/sentenceiterator/PrefetchingSentenceIterator.java b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/sentenceiterator/PrefetchingSentenceIterator.java index 8490a1f99..cb1e860c1 100644 --- a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/sentenceiterator/PrefetchingSentenceIterator.java +++ b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/sentenceiterator/PrefetchingSentenceIterator.java @@ -18,7 +18,7 @@ package org.deeplearning4j.text.sentenceiterator; import lombok.NonNull; -import org.deeplearning4j.core.util.ThreadUtils; +import org.nd4j.common.util.ThreadUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/TsneTest.java b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/TsneTest.java index 69fcd236c..5466bc15b 100644 --- a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/TsneTest.java +++ b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/TsneTest.java @@ -37,8 +37,6 @@ import java.io.File; import java.util.ArrayList; import java.util.List; -import static org.junit.Assert.assertEquals; - @Slf4j public class TsneTest extends BaseDL4JTest { diff --git a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/models/sequencevectors/serialization/WordVectorSerializerTest.java b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/models/embeddings/loader/WordVectorSerializerTest.java similarity index 86% rename from deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/models/sequencevectors/serialization/WordVectorSerializerTest.java rename to deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/models/embeddings/loader/WordVectorSerializerTest.java index b7aff923e..f089a6ae9 100644 --- a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/models/sequencevectors/serialization/WordVectorSerializerTest.java +++ b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/models/embeddings/loader/WordVectorSerializerTest.java @@ -14,17 +14,14 @@ * SPDX-License-Identifier: Apache-2.0 ******************************************************************************/ -package org.deeplearning4j.models.sequencevectors.serialization; +package org.deeplearning4j.models.embeddings.loader; import lombok.extern.slf4j.Slf4j; import lombok.val; -import org.apache.commons.lang.StringUtils; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.models.embeddings.WeightLookupTable; import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable; import org.deeplearning4j.models.embeddings.learning.impl.elements.CBOW; -import org.deeplearning4j.models.embeddings.loader.VectorsConfiguration; -import org.deeplearning4j.models.embeddings.loader.WordVectorSerializer; import org.deeplearning4j.models.embeddings.reader.impl.BasicModelUtils; import org.deeplearning4j.models.embeddings.reader.impl.FlatModelUtils; import org.deeplearning4j.models.fasttext.FastText; @@ -47,7 +44,11 @@ import java.io.File; import java.io.IOException; import java.util.Collections; -import static org.junit.Assert.*; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; @Slf4j public class WordVectorSerializerTest extends BaseDL4JTest { @@ -78,10 +79,11 @@ public class WordVectorSerializerTest extends BaseDL4JTest { syn1 = Nd4j.rand(DataType.FLOAT, 10, 2), syn1Neg = Nd4j.rand(DataType.FLOAT, 10, 2); - InMemoryLookupTable lookupTable = - (InMemoryLookupTable) new InMemoryLookupTable.Builder() - .useAdaGrad(false).cache(cache) - .build(); + InMemoryLookupTable lookupTable = new InMemoryLookupTable + .Builder() + .useAdaGrad(false) + .cache(cache) + .build(); lookupTable.setSyn0(syn0); lookupTable.setSyn1(syn1); @@ -92,7 +94,6 @@ public class WordVectorSerializerTest extends BaseDL4JTest { lookupTable(lookupTable). build(); SequenceVectors deser = null; - String json = StringUtils.EMPTY; try { ByteArrayOutputStream baos = new ByteArrayOutputStream(); WordVectorSerializer.writeSequenceVectors(vectors, baos); @@ -126,10 +127,11 @@ public class WordVectorSerializerTest extends BaseDL4JTest { syn1 = Nd4j.rand(DataType.FLOAT, 10, 2), syn1Neg = Nd4j.rand(DataType.FLOAT, 10, 2); - InMemoryLookupTable lookupTable = - (InMemoryLookupTable) new InMemoryLookupTable.Builder() - .useAdaGrad(false).cache(cache) - .build(); + InMemoryLookupTable lookupTable = new InMemoryLookupTable + .Builder() + .useAdaGrad(false) + .cache(cache) + .build(); lookupTable.setSyn0(syn0); lookupTable.setSyn1(syn1); @@ -204,10 +206,11 @@ public class WordVectorSerializerTest extends BaseDL4JTest { syn1 = Nd4j.rand(DataType.FLOAT, 10, 2), syn1Neg = Nd4j.rand(DataType.FLOAT, 10, 2); - InMemoryLookupTable lookupTable = - (InMemoryLookupTable) new InMemoryLookupTable.Builder() - .useAdaGrad(false).cache(cache) - .build(); + InMemoryLookupTable lookupTable = new InMemoryLookupTable + .Builder() + .useAdaGrad(false) + .cache(cache) + .build(); lookupTable.setSyn0(syn0); lookupTable.setSyn1(syn1); @@ -252,10 +255,11 @@ public class WordVectorSerializerTest extends BaseDL4JTest { syn1 = Nd4j.rand(DataType.FLOAT, 10, 2), syn1Neg = Nd4j.rand(DataType.FLOAT, 10, 2); - InMemoryLookupTable lookupTable = - (InMemoryLookupTable) new InMemoryLookupTable.Builder() - .useAdaGrad(false).cache(cache) - .build(); + InMemoryLookupTable lookupTable = new InMemoryLookupTable + .Builder() + .useAdaGrad(false) + .cache(cache) + .build(); lookupTable.setSyn0(syn0); lookupTable.setSyn1(syn1); @@ -267,7 +271,6 @@ public class WordVectorSerializerTest extends BaseDL4JTest { WeightLookupTable deser = null; try { WordVectorSerializer.writeLookupTable(lookupTable, file); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); deser = WordVectorSerializer.readLookupTable(file); } catch (Exception e) { log.error("",e); @@ -305,7 +308,6 @@ public class WordVectorSerializerTest extends BaseDL4JTest { FastText deser = null; try { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); deser = WordVectorSerializer.readWordVectors(new File(dir, "some.data")); } catch (Exception e) { log.error("",e); @@ -323,4 +325,32 @@ public class WordVectorSerializerTest extends BaseDL4JTest { assertEquals(fastText.getInputFile(), deser.getInputFile()); assertEquals(fastText.getOutputFile(), deser.getOutputFile()); } + + @Test + public void testIsHeader_withValidHeader () { + + /* Given */ + AbstractCache cache = new AbstractCache<>(); + String line = "48 100"; + + /* When */ + boolean isHeader = WordVectorSerializer.isHeader(line, cache); + + /* Then */ + assertTrue(isHeader); + } + + @Test + public void testIsHeader_notHeader () { + + /* Given */ + AbstractCache cache = new AbstractCache<>(); + String line = "your -0.0017603 0.0030831 0.00069072 0.0020581 -0.0050952 -2.2573e-05 -0.001141"; + + /* When */ + boolean isHeader = WordVectorSerializer.isHeader(line, cache); + + /* Then */ + assertFalse(isHeader); + } } diff --git a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/models/fasttext/FastTextTest.java b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/models/fasttext/FastTextTest.java index 4f0548ef5..4c89cfa1c 100644 --- a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/models/fasttext/FastTextTest.java +++ b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/models/fasttext/FastTextTest.java @@ -1,9 +1,9 @@ package org.deeplearning4j.models.fasttext; import lombok.extern.slf4j.Slf4j; +import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.models.embeddings.loader.WordVectorSerializer; import org.deeplearning4j.models.word2vec.Word2Vec; -import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.text.sentenceiterator.BasicLineIterator; import org.deeplearning4j.text.sentenceiterator.SentenceIterator; import org.junit.Rule; @@ -14,13 +14,14 @@ import org.nd4j.common.primitives.Pair; import org.nd4j.common.resources.Resources; import java.io.File; +import java.io.FileNotFoundException; import java.io.IOException; - +import static org.hamcrest.CoreMatchers.hasItems; +import static org.hamcrest.MatcherAssert.assertThat; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; - @Slf4j public class FastTextTest extends BaseDL4JTest { @@ -32,7 +33,6 @@ public class FastTextTest extends BaseDL4JTest { private File cbowModelFile = Resources.asFile("models/fasttext/cbow.model.bin"); private File supervisedVectors = Resources.asFile("models/fasttext/supervised.model.vec"); - @Rule public TemporaryFolder testDir = new TemporaryFolder(); @@ -90,7 +90,7 @@ public class FastTextTest extends BaseDL4JTest { } @Test - public void tesLoadCBOWModel() throws IOException { + public void tesLoadCBOWModel() { FastText fastText = new FastText(cbowModelFile); fastText.test(cbowModelFile); @@ -99,7 +99,7 @@ public class FastTextTest extends BaseDL4JTest { assertEquals("enjoy", fastText.vocab().wordAtIndex(fastText.vocab().numWords() - 1)); double[] expected = {5.040466203354299E-4, 0.001005030469968915, 2.8882650076411664E-4, -6.413314840756357E-4, -1.78931062691845E-4, -0.0023157168179750443, -0.002215880434960127, 0.00274421414360404, -1.5344757412094623E-4, 4.6274057240225375E-4, -1.4383681991603225E-4, 3.7832374800927937E-4, 2.523412986192852E-4, 0.0018913350068032742, -0.0024741862434893847, -4.976555937901139E-4, 0.0039220210164785385, -0.001781729981303215, -6.010578363202512E-4, -0.00244093406945467, -7.98621098510921E-4, -0.0010007203090935946, -0.001640203408896923, 7.897148607298732E-4, 9.131592814810574E-4, -0.0013367272913455963, -0.0014030139427632093, -7.755287806503475E-4, -4.2878396925516427E-4, 6.912827957421541E-4, -0.0011824817629531026, -0.0036014916840940714, 0.004353308118879795, -7.073904271237552E-5, -9.646290563978255E-4, -0.0031849315855652094, 2.3360115301329643E-4, -2.9103990527801216E-4, -0.0022990566212683916, -0.002393763978034258, -0.001034979010000825, -0.0010725988540798426, 0.0018285386031493545, -0.0013178540393710136, -1.6632364713586867E-4, -1.4665909475297667E-5, 5.445032729767263E-4, 2.999933494720608E-4, -0.0014367225812748075, -0.002345481887459755, 0.001117417006753385, -8.688368834555149E-4, -0.001830018823966384, 0.0013242220738902688, -8.880519890226424E-4, -6.888324278406799E-4, -0.0036394784692674875, 0.002179111586883664, -1.7201311129610986E-4, 0.002365073887631297, 0.002688770182430744, 0.0023955567739903927, 0.001469283364713192, 0.0011803617235273123, 5.871498142369092E-4, -7.099180947989225E-4, 7.518937345594168E-4, -8.599072461947799E-4, -6.600041524507105E-4, -0.002724145073443651, -8.365285466425121E-4, 0.0013173354091122746, 0.001083166105672717, 0.0014539906987920403, -3.1698777456767857E-4, -2.387022686889395E-4, 1.9560157670639455E-4, 0.0020277926232665777, -0.0012741144746541977, -0.0013026101514697075, -1.5212174912448972E-4, 0.0014194383984431624, 0.0012500399025157094, 0.0013362085446715355, 3.692879108712077E-4, 4.319801155361347E-5, 0.0011261265026405454, 0.0017244465416297317, 5.564604725805111E-5, 0.002170475199818611, 0.0014707016525790095, 0.001303741242736578, 0.005553730763494968, -0.0011097051901742816, -0.0013661726843565702, 0.0014100460102781653, 0.0011811562580987811, -6.622733199037611E-4, 7.860265322960913E-4, -9.811905911192298E-4}; - assertArrayEquals(expected, fastText.getWordVector("enjoy"), 1e-4); + assertArrayEquals(expected, fastText.getWordVector("enjoy"), 2e-3); } @Test @@ -111,7 +111,7 @@ public class FastTextTest extends BaseDL4JTest { assertEquals("association", fastText.vocab().wordAtIndex(fastText.vocab().numWords() - 1)); double[] expected = {-0.006423053797334433, 0.007660661358386278, 0.006068876478821039, -0.004772625397890806, -0.007143457420170307, -0.007735592778772116, -0.005607823841273785, -0.00836215727031231, 0.0011235733982175589, 2.599214785732329E-4, 0.004131870809942484, 0.007203693501651287, 0.0016768622444942594, 0.008694255724549294, -0.0012487826170399785, -0.00393667770549655, -0.006292815785855055, 0.0049359360709786415, -3.356488887220621E-4, -0.009407570585608482, -0.0026168026961386204, -0.00978928804397583, 0.0032913016621023417, -0.0029464277904480696, -0.008649969473481178, 8.056449587456882E-4, 0.0043088337406516075, -0.008980576880276203, 0.008716211654245853, 0.0073893265798687935, -0.007388216909021139, 0.003814412746578455, -0.005518500227481127, 0.004668557550758123, 0.006603693123906851, 0.003820829326286912, 0.007174000144004822, -0.006393063813447952, -0.0019381389720365405, -0.0046371882781386375, -0.006193376146256924, -0.0036685809027403593, 7.58899434003979E-4, -0.003185075242072344, -0.008330358192324638, 3.3206873922608793E-4, -0.005389622412621975, 0.009706716984510422, 0.0037855932023376226, -0.008665262721478939, -0.0032511046156287193, 4.4134497875347733E-4, -0.008377416990697384, -0.009110655635595322, 0.0019723298028111458, 0.007486093323677778, 0.006400121841579676, 0.00902814231812954, 0.00975200068205595, 0.0060582347214221954, -0.0075621469877660275, 1.0270809434587136E-4, -0.00673140911385417, -0.007316927425563335, 0.009916870854794979, -0.0011407854035496712, -4.502215306274593E-4, -0.007612560410052538, 0.008726916275918484, -3.0280642022262327E-5, 0.005529289599508047, -0.007944817654788494, 0.005593308713287115, 0.003423960180953145, 4.1348213562741876E-4, 0.009524818509817123, -0.0025129399728029966, -0.0030074280221015215, -0.007503866218030453, -0.0028124507516622543, -0.006841592025011778, -2.9375351732596755E-4, 0.007195258513092995, -0.007775942329317331, 3.951996040996164E-4, -0.006887971889227629, 0.0032655203249305487, -0.007975360378623009, -4.840183464693837E-6, 0.004651934839785099, 0.0031739831902086735, 0.004644941072911024, -0.007461248897016048, 0.003057275665923953, 0.008903342299163342, 0.006857945583760738, 0.007567950990051031, 0.001506582135334611, 0.0063307867385447025, 0.005645462777465582}; - assertArrayEquals(expected, fastText.getWordVector("association"), 1e-4); + assertArrayEquals(expected, fastText.getWordVector("association"), 2e-3); String label = fastText.predict(text); assertEquals("__label__soccer", label); @@ -126,7 +126,7 @@ public class FastTextTest extends BaseDL4JTest { assertEquals("association", fastText.vocab().wordAtIndex(fastText.vocab().numWords() - 1)); double[] expected = {-0.006423053797334433, 0.007660661358386278, 0.006068876478821039, -0.004772625397890806, -0.007143457420170307, -0.007735592778772116, -0.005607823841273785, -0.00836215727031231, 0.0011235733982175589, 2.599214785732329E-4, 0.004131870809942484, 0.007203693501651287, 0.0016768622444942594, 0.008694255724549294, -0.0012487826170399785, -0.00393667770549655, -0.006292815785855055, 0.0049359360709786415, -3.356488887220621E-4, -0.009407570585608482, -0.0026168026961386204, -0.00978928804397583, 0.0032913016621023417, -0.0029464277904480696, -0.008649969473481178, 8.056449587456882E-4, 0.0043088337406516075, -0.008980576880276203, 0.008716211654245853, 0.0073893265798687935, -0.007388216909021139, 0.003814412746578455, -0.005518500227481127, 0.004668557550758123, 0.006603693123906851, 0.003820829326286912, 0.007174000144004822, -0.006393063813447952, -0.0019381389720365405, -0.0046371882781386375, -0.006193376146256924, -0.0036685809027403593, 7.58899434003979E-4, -0.003185075242072344, -0.008330358192324638, 3.3206873922608793E-4, -0.005389622412621975, 0.009706716984510422, 0.0037855932023376226, -0.008665262721478939, -0.0032511046156287193, 4.4134497875347733E-4, -0.008377416990697384, -0.009110655635595322, 0.0019723298028111458, 0.007486093323677778, 0.006400121841579676, 0.00902814231812954, 0.00975200068205595, 0.0060582347214221954, -0.0075621469877660275, 1.0270809434587136E-4, -0.00673140911385417, -0.007316927425563335, 0.009916870854794979, -0.0011407854035496712, -4.502215306274593E-4, -0.007612560410052538, 0.008726916275918484, -3.0280642022262327E-5, 0.005529289599508047, -0.007944817654788494, 0.005593308713287115, 0.003423960180953145, 4.1348213562741876E-4, 0.009524818509817123, -0.0025129399728029966, -0.0030074280221015215, -0.007503866218030453, -0.0028124507516622543, -0.006841592025011778, -2.9375351732596755E-4, 0.007195258513092995, -0.007775942329317331, 3.951996040996164E-4, -0.006887971889227629, 0.0032655203249305487, -0.007975360378623009, -4.840183464693837E-6, 0.004651934839785099, 0.0031739831902086735, 0.004644941072911024, -0.007461248897016048, 0.003057275665923953, 0.008903342299163342, 0.006857945583760738, 0.007567950990051031, 0.001506582135334611, 0.0063307867385447025, 0.005645462777465582}; - assertArrayEquals(expected, fastText.getWordVector("association"), 1e-4); + assertArrayEquals(expected, fastText.getWordVector("association"), 2e-3); String label = fastText.predict(text); fastText.wordsNearest("test",1); @@ -140,10 +140,10 @@ public class FastTextTest extends BaseDL4JTest { Pair result = fastText.predictProbability(text); assertEquals("__label__soccer", result.getFirst()); - assertEquals(-0.6930, result.getSecond(), 1e-4); + assertEquals(-0.6930, result.getSecond(), 2e-3); assertEquals(48, fastText.vocabSize()); - assertEquals(0.0500, fastText.getLearningRate(), 1e-4); + assertEquals(0.0500, fastText.getLearningRate(), 2e-3); assertEquals(100, fastText.getDimension()); assertEquals(5, fastText.getContextWindowSize()); assertEquals(5, fastText.getEpoch()); @@ -155,7 +155,7 @@ public class FastTextTest extends BaseDL4JTest { } @Test - public void testVocabulary() throws IOException { + public void testVocabulary() { FastText fastText = new FastText(supModelFile); assertEquals(48, fastText.vocab().numWords()); assertEquals(48, fastText.vocabSize()); @@ -171,78 +171,73 @@ public class FastTextTest extends BaseDL4JTest { } @Test - public void testLoadIterator() { - try { - SentenceIterator iter = new BasicLineIterator(inputFile.getAbsolutePath()); - FastText fastText = - FastText.builder().supervised(true).iterator(iter).build(); - fastText.loadIterator(); - - } catch (IOException e) { - log.error("",e); - } + public void testLoadIterator() throws FileNotFoundException { + SentenceIterator iter = new BasicLineIterator(inputFile.getAbsolutePath()); + FastText + .builder() + .supervised(true) + .iterator(iter) + .build() + .loadIterator(); } @Test(expected=IllegalStateException.class) public void testState() { FastText fastText = new FastText(); - String label = fastText.predict("something"); + fastText.predict("something"); } @Test public void testPretrainedVectors() throws IOException { File output = testDir.newFile(); - FastText fastText = - FastText.builder().supervised(true). - inputFile(inputFile.getAbsolutePath()). - pretrainedVectorsFile(supervisedVectors.getAbsolutePath()). - outputFile(output.getAbsolutePath()).build(); + FastText fastText = FastText + .builder() + .supervised(true) + .inputFile(inputFile.getAbsolutePath()) + .pretrainedVectorsFile(supervisedVectors.getAbsolutePath()) + .outputFile(output.getAbsolutePath()) + .build(); + log.info("\nTraining supervised model ...\n"); fastText.fit(); } @Test public void testWordsStatistics() throws IOException { - File output = testDir.newFile(); - FastText fastText = - FastText.builder().supervised(true). - inputFile(inputFile.getAbsolutePath()). - outputFile(output.getAbsolutePath()).build(); + FastText fastText = FastText + .builder() + .supervised(true) + .inputFile(inputFile.getAbsolutePath()) + .outputFile(output.getAbsolutePath()) + .build(); log.info("\nTraining supervised model ...\n"); fastText.fit(); - Word2Vec word2Vec = WordVectorSerializer.readAsCsv(new File(output.getAbsolutePath() + ".vec")); + File file = new File(output.getAbsolutePath() + ".vec"); + Word2Vec word2Vec = WordVectorSerializer.readAsCsv(file); - assertEquals(48, word2Vec.getVocab().numWords()); - - System.out.println(word2Vec.wordsNearest("association", 3)); - System.out.println(word2Vec.similarity("Football", "teams")); - System.out.println(word2Vec.similarity("professional", "minutes")); - System.out.println(word2Vec.similarity("java","cpp")); + assertEquals(48, word2Vec.getVocab().numWords()); + assertEquals("", 0.1667751520872116, word2Vec.similarity("Football", "teams"), 2e-3); + assertEquals("", 0.10083991289138794, word2Vec.similarity("professional", "minutes"), 2e-3); + assertEquals("", Double.NaN, word2Vec.similarity("java","cpp"), 0.0); + assertThat(word2Vec.wordsNearest("association", 3), hasItems("Football", "Soccer", "men's")); } - @Test - public void testWordsNativeStatistics() throws IOException { - - File output = testDir.newFile(); - + public void testWordsNativeStatistics() { FastText fastText = new FastText(); fastText.loadPretrainedVectors(supervisedVectors); log.info("\nTraining supervised model ...\n"); assertEquals(48, fastText.vocab().numWords()); - - String[] result = new String[3]; - fastText.wordsNearest("association", 3).toArray(result); - assertArrayEquals(new String[]{"most","eleven","hours"}, result); - assertEquals(0.1657, fastText.similarity("Football", "teams"), 1e-4); - assertEquals(0.3661, fastText.similarity("professional", "minutes"), 1e-4); - assertEquals(Double.NaN, fastText.similarity("java","cpp"), 1e-4); + assertThat(fastText.wordsNearest("association", 3), hasItems("most","eleven","hours")); + assertEquals(0.1657, fastText.similarity("Football", "teams"), 2e-3); + assertEquals(0.3661, fastText.similarity("professional", "minutes"), 2e-3); + assertEquals(Double.NaN, fastText.similarity("java","cpp"), 0.0); } } diff --git a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/models/word2vec/Word2VecTestsSmall.java b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/models/word2vec/Word2VecTestsSmall.java index c9cc8f072..38b44d1ff 100644 --- a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/models/word2vec/Word2VecTestsSmall.java +++ b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/models/word2vec/Word2VecTestsSmall.java @@ -47,7 +47,9 @@ import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.util.Collection; +import java.util.concurrent.Callable; +import static org.awaitility.Awaitility.await; import static org.junit.Assert.assertEquals; @@ -190,22 +192,26 @@ public class Word2VecTestsSmall extends BaseDL4JTest { .nOut(4).build()) .build(); - MultiLayerNetwork net = new MultiLayerNetwork(conf); + final MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); INDArray w0 = net.getParam("0_W"); assertEquals(w, w0); - - ByteArrayOutputStream baos = new ByteArrayOutputStream(); ModelSerializer.writeModel(net, baos, true); byte[] bytes = baos.toByteArray(); ByteArrayInputStream bais = new ByteArrayInputStream(bytes); - MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(bais, true); + final MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(bais, true); assertEquals(net.getLayerWiseConfigurations(), restored.getLayerWiseConfigurations()); - assertEquals(net.params(), restored.params()); + await() + .until(new Callable() { + @Override + public Boolean call() { + return net.params().equalsWithEps(restored.params(), 2e-3); + } + }); } } diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java index 8daa947df..b2f64c894 100644 --- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java +++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java @@ -63,6 +63,9 @@ public class Deconvolution2D extends ConvolutionLayer { protected Deconvolution2D(BaseConvBuilder builder) { super(builder); initializeConstraints(builder); + if(builder instanceof Builder){ + this.cnn2dDataFormat = ((Builder) builder).format; + } } public boolean hasBias() { @@ -136,7 +139,7 @@ public class Deconvolution2D extends ConvolutionLayer { private CNN2DFormat format = CNN2DFormat.NCHW; - public Builder format(CNN2DFormat format){ + public Builder dataFormat(CNN2DFormat format){ this.format = format; return this; } diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java index b8b0c13a9..81804a31f 100644 --- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java +++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java @@ -310,11 +310,21 @@ public class ConvolutionLayer extends BaseLayer index = new ThreadLocal<>(); protected long initialMemory = 100 * 1024 * 1024L; protected int queueSize = 5; - protected Double boundary = 1.0; + protected Integer boundary = Integer.MAX_VALUE; protected boolean encodingDebugMode; protected IndexedTail externalSource; @@ -101,11 +101,11 @@ public class EncodedGradientsAccumulator implements GradientsAccumulator, Regist } public EncodedGradientsAccumulator(int parties, ThresholdAlgorithm thresholdAlgorithm, ResidualPostProcessor residualPostProcessor, boolean encodingDebugMode) { - this(parties, new EncodingHandler(thresholdAlgorithm, residualPostProcessor, 1.0, encodingDebugMode), DEFAULT_INITIAL_MEMORY, 10, 1.0, encodingDebugMode); + this(parties, new EncodingHandler(thresholdAlgorithm, residualPostProcessor, Integer.MAX_VALUE, encodingDebugMode), DEFAULT_INITIAL_MEMORY, 10, Integer.MAX_VALUE, encodingDebugMode); } public EncodedGradientsAccumulator(int parties, @NonNull MessageHandler handler, long initialMemory, - int queueSize, Double boundary, boolean encodingDebugMode) { + int queueSize, Integer boundary, boolean encodingDebugMode) { this.parties = parties; this.handler = handler; this.initialMemory = initialMemory; @@ -551,7 +551,7 @@ public class EncodedGradientsAccumulator implements GradientsAccumulator, Regist protected long initialMemory = DEFAULT_INITIAL_MEMORY; protected int queueSize = 5; protected MessageHandler handler; - protected Double boundary = null; + protected int boundary = Integer.MAX_VALUE; protected boolean encodingDebugMode; /** @@ -598,15 +598,12 @@ public class EncodedGradientsAccumulator implements GradientsAccumulator, Regist /** * This method enables optional limit for max number of updates per message * - * Default value: 1.0 (no limit) + * Default value: Integer.MAX_VALUE (no limit) * @param boundary positive value in range 0..1 * @return */ - public Builder updatesBoundary(double boundary) { - if (boundary >= 1.0) - return this; - - if (boundary <= 0.0) + public Builder updatesBoundary(int boundary) { + if (boundary <= 0) throw new DL4JInvalidConfigException("Boundary should have positive value"); this.boundary = boundary; diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/EncodingHandler.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/EncodingHandler.java index 24a46117a..c451ecd6a 100644 --- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/EncodingHandler.java +++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/EncodingHandler.java @@ -16,6 +16,7 @@ package org.deeplearning4j.optimize.solvers.accumulation; +import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.shade.guava.util.concurrent.AtomicDouble; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; @@ -24,7 +25,6 @@ import org.deeplearning4j.optimize.solvers.accumulation.encoding.ThresholdAlgori import org.deeplearning4j.optimize.solvers.accumulation.encoding.ThresholdAlgorithmReducer; import org.nd4j.linalg.api.buffer.DataBuffer; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.compression.NDArrayCompressor; import org.nd4j.linalg.exception.ND4JIllegalStateException; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.ops.transforms.Transforms; @@ -54,9 +54,8 @@ public class EncodingHandler implements MessageHandler { protected ThresholdAlgorithm initialThresholdAlgorithm; protected ResidualPostProcessor initialResidualPostProcessor; - protected Double boundary; + protected Integer boundary; protected boolean encodingDebugMode; - protected NDArrayCompressor compressor; protected AtomicInteger atomicBoundary = new AtomicInteger(-1); protected ThreadLocal thresholdAlgorithm = new ThreadLocal<>(); @@ -73,20 +72,16 @@ public class EncodingHandler implements MessageHandler { protected final AtomicLong lastThresholdLogTime = new AtomicLong(); public EncodingHandler(final ThresholdAlgorithm thresholdAlgorithm, final ResidualPostProcessor residualPostProcessor, - Double boundary, boolean encodingDebugMode){ + Integer boundary, boolean encodingDebugMode){ this.initialThresholdAlgorithm = thresholdAlgorithm; this.initialResidualPostProcessor = residualPostProcessor; - this.boundary = boundary; + this.boundary = boundary == null ? Integer.MAX_VALUE : boundary; this.encodingDebugMode = encodingDebugMode; } @Override public void initialize(@NonNull GradientsAccumulator accumulator) { this.accumulator = accumulator; - - compressor = Nd4j.getCompressor().getCompressor("THRESHOLD"); - if (compressor == null) - throw new ND4JIllegalStateException("Can't find Threshold compressor implementation!"); } public INDArray encodeUpdates(int iteration, int epoch, INDArray updates) { @@ -135,14 +130,13 @@ public class EncodingHandler implements MessageHandler { iterations.get().incrementAndGet(); if (boundary != null && atomicBoundary.get() < 0) - atomicBoundary.compareAndSet(-1, (int) (updates.length() * boundary)); + atomicBoundary.compareAndSet(-1, (int) (updates.length() / 16) ); INDArray encoded; if (!bitmapMode.get().get()) { //Sparse updates - encoded = Nd4j.getExecutioner().thresholdEncode(updates, currentThreshold.get().get(), - boundary == null ? null : atomicBoundary.get()); + encoded = Nd4j.getExecutioner().thresholdEncode(updates, currentThreshold.get().get(), boundary == null ? null : atomicBoundary.get()); // updates were TOO sparse, nothing to share here if (encoded == null) { @@ -157,17 +151,14 @@ public class EncodingHandler implements MessageHandler { } - double encLen = encoded.data().getInt(0); + double encLen = encoded.length(); // if updates are too dense - we fallback to bitmap encoding if (encLen >= (updates.length() / 16)) { log.debug("Switching back to bitmapEncoding: iteration {}, epoch {}, threshold {}, encoded length {}", iteration, epoch, currThreshold, encLen); bitmapMode.get().set(true); - DataBuffer buffer = Nd4j.getDataBufferFactory().createInt(updates.length() / 16 + 5); - encoded = Nd4j.createArrayFromShapeBuffer(buffer, updates.shapeInfoDataBuffer()); - - Nd4j.getExecutioner().bitmapEncode(updates, encoded, currentThreshold.get().get()); + encoded = Nd4j.getExecutioner().bitmapEncode(updates, currentThreshold.get().get()); applyPostProcessor(iteration, epoch, currThreshold, updates); lastSparsityRatio.set(null); @@ -186,8 +177,7 @@ public class EncodingHandler implements MessageHandler { } } else { //Dense bitmap updates - DataBuffer buffer = Nd4j.getDataBufferFactory().createInt(updates.length() / 16 + 5); - encoded = Nd4j.createArrayFromShapeBuffer(buffer, updates.shapeInfoDataBuffer()); + encoded = Nd4j.create(DataType.INT32, updates.length() / 16 + 5); long values = Nd4j.getExecutioner().bitmapEncode(updates, encoded, currentThreshold.get().get()); diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/FancyBlockingQueue.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/FancyBlockingQueue.java index 378870bdf..e530b729c 100644 --- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/FancyBlockingQueue.java +++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/FancyBlockingQueue.java @@ -18,6 +18,7 @@ package org.deeplearning4j.optimize.solvers.accumulation; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; +import org.nd4j.common.util.ThreadUtils; import java.util.Collection; import java.util.Iterator; @@ -28,8 +29,6 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.ReentrantReadWriteLock; -import org.deeplearning4j.util.ThreadUtils; - /** * This BlockingQueue implementation is suited only for symmetric gradients updates, and should NOT be used anywhere else. * diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/ConvolutionUtils.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/ConvolutionUtils.java index 11252bf19..ca02e42d2 100644 --- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/ConvolutionUtils.java +++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/ConvolutionUtils.java @@ -48,6 +48,13 @@ import java.util.Arrays; */ public class ConvolutionUtils { + public static final String NCHW_NHWC_ERROR_MSG = "Note: Convolution layers can be configured for either NCHW (channels first)" + + " or NHWC (channels last) format for input images and activations.\n" + + "Layers can be configured using .dataFormat(CNN2DFormat.NCHW/NHWC) when constructing the layer, or for the entire net using" + + " .setInputType(InputType.convolutional(height, width, depth, CNN2DForman.NCHW/NHWC)).\n" + + "ImageRecordReader and NativeImageLoader can also be configured to load image data in either NCHW or NHWC format which must match the network"; + + private static final int[] ONES = new int[]{1, 1}; diff --git a/deeplearning4j/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/ParallelWrapper.java b/deeplearning4j/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/ParallelWrapper.java index 50bd6f34e..8d303d391 100644 --- a/deeplearning4j/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/ParallelWrapper.java +++ b/deeplearning4j/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/ParallelWrapper.java @@ -910,7 +910,7 @@ public class ParallelWrapper implements AutoCloseable { Preconditions.checkState(thresholdAlgorithm != null, "Cannot use SHARED_GRADIENTS training mode without setting a threshold algorithm"); this.trainerContext = new SymmetricTrainerContext(); if (this.accumulator == null) { - log.info("Creating new GradientsAccumulator instance with threshold of [5e-4"); + log.info("Creating new GradientsAccumulator instance with default threshold of [5e-4]"); this.accumulator = new EncodedGradientsAccumulator(workers, thresholdAlgorithm, residualPostProcessor, false); } } diff --git a/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/networking/v1/WiredEncodingHandler.java b/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/networking/v1/WiredEncodingHandler.java index 1560d38cd..c2fc97658 100644 --- a/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/networking/v1/WiredEncodingHandler.java +++ b/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/networking/v1/WiredEncodingHandler.java @@ -45,7 +45,7 @@ public class WiredEncodingHandler extends EncodingHandler { * @param thresholdAlgorithm threshold algorithm to use * @param boundary */ - public WiredEncodingHandler(ThresholdAlgorithm thresholdAlgorithm, ResidualPostProcessor residualPostProcessor, Double boundary, boolean encodingDebugMode) { + public WiredEncodingHandler(ThresholdAlgorithm thresholdAlgorithm, ResidualPostProcessor residualPostProcessor, Integer boundary, boolean encodingDebugMode) { super(thresholdAlgorithm, residualPostProcessor, boundary, encodingDebugMode); } diff --git a/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/networking/v2/WiredEncodingHandler.java b/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/networking/v2/WiredEncodingHandler.java index 3f892cfe7..130526658 100644 --- a/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/networking/v2/WiredEncodingHandler.java +++ b/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/networking/v2/WiredEncodingHandler.java @@ -44,7 +44,7 @@ public class WiredEncodingHandler extends EncodingHandler { * * @param thresholdAlgorithm The threshold algorithm to use */ - public WiredEncodingHandler(ThresholdAlgorithm thresholdAlgorithm, ResidualPostProcessor residualPostProcessor, Double boundary, boolean encodingDebugMode) { + public WiredEncodingHandler(ThresholdAlgorithm thresholdAlgorithm, ResidualPostProcessor residualPostProcessor, Integer boundary, boolean encodingDebugMode) { super(thresholdAlgorithm, residualPostProcessor, boundary, encodingDebugMode); } diff --git a/docs/README.md b/docs/README.md deleted file mode 100644 index e5b024ce2..000000000 --- a/docs/README.md +++ /dev/null @@ -1,62 +0,0 @@ -# DL4J auto-generated documentation - -## Building - -Run `./gen_all_docs.sh` to generate documentation from source for all supported projects. For each documentation module, files will be put into a `doc_sources` folder where they are staged for copying to the primary docs repository. Note that the autogen docs require Python 2. - -To deploy a new version of documentation, first make sure to set `$DL4J_DOCS_DIR` to your local copy of -https://github.com/eclipse/deeplearning4j-docs and set `$DL4J_VERSION` to a URI-friendly version string such as `v100-RC` (note the lack of decimals). Then run `./copy-to-dl4j-docs.sh`. This puts documentation -into the right folders and you can use `git` to create a PR and update the live docs. - -The structure of this project (template files, generating code, mkdocs YAML) is closely aligned -with the [Keras documentation](keras.io) and heavily inspired by the [Keras docs repository](https://github.com/keras-team/keras/tree/master/docs). - -## File structure - -Each major module or library in Eclipse Deeplearning4j has its own folder. Inside that folder are three essential files: - -- `templates/` -- `pages.json` -- `README.md` - -Note that the folder names don't exactly match up with the modules in the `pom.xml` definitions across DL4J. This is because some of the documentation is consolidated (such as DataVec) or omitted due to its experimental status or because it is low-level in the code. - -Templates must maintain a flat file structure. This is to accommodate Jekyll collections when the docs are published. Don't worry about having similarly named files in different doc modules - the module name is prepended when the docs are generated. - -## Creating templates - -Each template has a Jekyll header at the top: - -```markdown ---- -title: Deeplearning4j Autoencoders -short_title: Autoencoders -description: Supported autoencoder configurations. -category: Models -weight: 3 ---- -``` - -All of these definitions are necessary. - -- `title` is the HTML title that appears for a Google result or at the top of the browser window. -- `short_title` is a short name for simple navigation in the user guide. -- `description` is the text that appears below the title in a search engine result. -- `category` is the high-level category in the user guide. -- `weight` is the ordering that the doc will appear in navigation, the larger the lower the listing. - -## Creating links - -**All links to other docs need to be relative.** This prolongs the life of the documentation and reduces maintenance. The basic structure of a link to another doc looks like: - -``` -- -``` - -So if you created a DataVec doc with the name `iterators.md` in the `datavec` module, your relative link will look like: - -``` -./datavec-iterators -``` - -Note the omission of the file extension `.md`. Jekyll automatically generates a clean URL for us to use. \ No newline at end of file diff --git a/docs/__init__.py b/docs/__init__.py deleted file mode 100644 index ebf4eab00..000000000 --- a/docs/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -################################################################################ -# Copyright (c) 2015-2019 Skymind, Inc. -# -# This program and the accompanying materials are made available under the -# terms of the Apache License, Version 2.0 which is available at -# https://www.apache.org/licenses/LICENSE-2.0. -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# SPDX-License-Identifier: Apache-2.0 -################################################################################ - diff --git a/docs/arbiter/README.md b/docs/arbiter/README.md deleted file mode 100644 index 36fd1fd0c..000000000 --- a/docs/arbiter/README.md +++ /dev/null @@ -1,10 +0,0 @@ -# arbiter documentation - -To generate docs into the`datavec/doc_sources` folder, first `cd docs` then run: - -```shell -python generate_docs.py \ - --project arbiter \ - --code ../arbiter - --out_language en -``` \ No newline at end of file diff --git a/docs/arbiter/pages.json b/docs/arbiter/pages.json deleted file mode 100644 index 62005eddc..000000000 --- a/docs/arbiter/pages.json +++ /dev/null @@ -1,61 +0,0 @@ -{ - "excludes": [ - "abstract" - ], - "indices": [ - ], - "pages": [ - { - "page": "overview.md", - "class": [] - }, - { - "page": "visualization.md", - "class": [] - }, - { - "page": "parameter-spaces.md", - "class": [ - "arbiter-core/src/main/java/org/deeplearning4j/arbiter/optimize/parameter/continuous/ContinuousParameterSpace.java", - "arbiter-core/src/main/java/org/deeplearning4j/arbiter/optimize/parameter/discrete/DiscreteParameterSpace.java", - "arbiter-core/src/main/java/org/deeplearning4j/arbiter/optimize/parameter/integer/IntegerParameterSpace.java", - "arbiter-core/src/main/java/org/deeplearning4j/arbiter/optimize/parameter/BooleanSpace.java", - "arbiter-deeplearning4j/src/main/java/org/deeplearning4j/arbiter/dropout/AlphaDropoutSpace.java", - "arbiter-deeplearning4j/src/main/java/org/deeplearning4j/arbiter/dropout/GaussianDropoutSpace.java", - "arbiter-deeplearning4j/src/main/java/org/deeplearning4j/arbiter/dropout/GaussianNoiseSpace.java", - "arbiter-core/src/main/java/org/deeplearning4j/arbiter/optimize/parameter/FixedValue.java", - "arbiter-core/src/main/java/org/deeplearning4j/arbiter/optimize/parameter/math/MathOp.java", - "arbiter-core/src/main/java/org/deeplearning4j/arbiter/optimize/parameter/math/PairMathOp.java" - ] - }, - { - "page": "layer-spaces.md", - "class": [ - "arbiter-deeplearning4j/src/main/java/org/deeplearning4j/arbiter/layers/ActivationLayerSpace.java", - "arbiter-deeplearning4j/src/main/java/org/deeplearning4j/arbiter/layers/AutoEncoderLayerSpace.java", - "arbiter-deeplearning4j/src/main/java/org/deeplearning4j/arbiter/layers/BatchNormalizationSpace.java", - "arbiter-deeplearning4j/src/main/java/org/deeplearning4j/arbiter/layers/Bidirectional.java", - "arbiter-deeplearning4j/src/main/java/org/deeplearning4j/arbiter/layers/CenterLossOutputLayerSpace.java", - "arbiter-deeplearning4j/src/main/java/org/deeplearning4j/arbiter/layers/ConvolutionLayerSpace.java", - "arbiter-deeplearning4j/src/main/java/org/deeplearning4j/arbiter/layers/Deconvolution2DLayerSpace.java", - "arbiter-deeplearning4j/src/main/java/org/deeplearning4j/arbiter/layers/DenseLayerSpace.java", - "arbiter-deeplearning4j/src/main/java/org/deeplearning4j/arbiter/layers/DropoutLayerSpace.java", - "arbiter-deeplearning4j/src/main/java/org/deeplearning4j/arbiter/layers/EmbeddingLayerSpace.java", - "arbiter-deeplearning4j/src/main/java/org/deeplearning4j/arbiter/layers/FeedForwardLayerSpace.java", - "arbiter-deeplearning4j/src/main/java/org/deeplearning4j/arbiter/layers/GlobalPoolingLayerSpace.java", - "arbiter-deeplearning4j/src/main/java/org/deeplearning4j/arbiter/layers/GravesBidirectionalLSTMLayerSpace.java", - "arbiter-deeplearning4j/src/main/java/org/deeplearning4j/arbiter/layers/GravesLSTMLayerSpace.java", - "arbiter-deeplearning4j/src/main/java/org/deeplearning4j/arbiter/layers/LSTMLayerSpace.java", - "arbiter-deeplearning4j/src/main/java/org/deeplearning4j/arbiter/layers/LocalResponseNormalizationLayerSpace.java", - "arbiter-deeplearning4j/src/main/java/org/deeplearning4j/arbiter/layers/LossLayerSpace.java", - "arbiter-deeplearning4j/src/main/java/org/deeplearning4j/arbiter/layers/OCNNLayerSpace.java", - "arbiter-deeplearning4j/src/main/java/org/deeplearning4j/arbiter/layers/OutputLayerSpace.java", - "arbiter-deeplearning4j/src/main/java/org/deeplearning4j/arbiter/layers/RnnOutputLayerSpace.java", - "arbiter-deeplearning4j/src/main/java/org/deeplearning4j/arbiter/layers/SeparableConvolution2DLayerSpace.java", - "arbiter-deeplearning4j/src/main/java/org/deeplearning4j/arbiter/layers/SubsamplingLayerSpace.java", - "arbiter-deeplearning4j/src/main/java/org/deeplearning4j/arbiter/layers/VariationalAutoencoderLayerSpace.java" - ] - } - ] -} - diff --git a/docs/arbiter/templates/layer-spaces.md b/docs/arbiter/templates/layer-spaces.md deleted file mode 100644 index bc1d0891b..000000000 --- a/docs/arbiter/templates/layer-spaces.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Arbiter Layer Spaces -short_title: Layer Spaces -description: Set a search spaces for layers. -category: Arbiter -weight: 1 ---- - -## Layer Spaces - -{{autogenerated}} diff --git a/docs/arbiter/templates/overview.md b/docs/arbiter/templates/overview.md deleted file mode 100644 index eb7a8e8e4..000000000 --- a/docs/arbiter/templates/overview.md +++ /dev/null @@ -1,257 +0,0 @@ ---- -title: Arbiter Overview -short_title: Overview -description: Introduction to using Arbiter for hyperparameter optimization. -category: Arbiter -weight: 0 ---- - -## Hyperparameter Optimization - -Machine learning techniques have a set of parameters that have to be chosen before any training can begin. These parameters are referred to as hyperparameters. Some examples of hyperparameters are ‘k’ in k-nearest-neighbors and the regularization parameter in Support Vector Machines. Neural Networks, in particular, have a wide variety of hyperparameters. Some of these define the architecture of the neural network like the number of layers and their size. Other define the learning process like the learning rate and regularization. - -Traditionally these choices are made based on existing rules of thumb or after extensive trial and error, both of which are less than ideal. Undoubtedly the choice of these parameters can have a significant impact on the results obtained after learning. Hyperparameter optimization attempts to automate this process using software that applies search strategies. - -## Arbiter - -Arbiter is part of the DL4J Suite of Machine Learning/Deep Learning tools for the enterprise. It is dedicated to the hyperparameter optimization of neural networks created or imported into dl4j. It allows users to set up search spaces for the hyperparameters and run either grid search or random search to select the best configuration based on a given scoring metric. - -When to use Arbiter? -Arbiter can be used to find good performing models, potentially saving you time tuning your model's hyperparameters, at the expense of greater computational time. Note however that Arbiter doesn't completely automate the neural network tuning process, the user still needs to specify a search space. This search space defines the range of valid values for each hyperparameter (example: minimum and maximum allowable learning rate). If this search space is chosen poorly, Arbiter may not be able to find any good models. - -Add the following to your pom.xml to include Arbiter in your project where ${arbiter.version} is the latest release of the dl4j stack. - -```xml - - - org.deeplearning4j - arbiter-deeplearning4j - {{page.version}} - - - org.deeplearning4j - arbiter-ui_2.11 - {{page.version}} - -``` - -Arbiter also comes with a handy UI that helps visualize the results from the optimizations runs. - -As a prerequisite to using Arbiter users should be familiar with the NeuralNetworkConfiguration, MultilayerNetworkConfiguration and ComputationGraphconfiguration classes in DL4J. - -## Usage -This section will provide an overview of the important constructs necessary to use Arbiter. The sections that follow will dive into the details. - -At the highest level, setting up hyperparameter optimization involves setting up an OptimizationConfiguration and running it via IOptimizationRunner. - -Below is some code that demonstrates the fluent builder pattern in OptimizationConfiguration: - -```java -OptimizationConfiguration configuration = new OptimizationConfiguration.Builder() - .candidateGenerator(candidateGenerator) - .dataSource(dataSourceClass,dataSourceProperties) - .modelSaver(modelSaver) - .scoreFunction(scoreFunction) - .terminationConditions(terminationConditions) - .build(); -``` - -As indicated above setting up an optimization configuration requires: -CandidateGenerator: Proposes candidates (i.e., hyperparameter configurations) for evaluation. Candidates are generated based on some strategy. Currently random search and grid search are supported. Valid configurations for the candidates are determined by the hyperparameter space associated with the candidate generator. -DataSource: DataSource is used under the hood to provide data to the generated candidates for training and test -ModelSaver: Specifies how the results of each hyperparameter optimization run should be saved. For example, whether saving should be done to local disk, to a database, to HDFS, or simply stored in memory. -ScoreFunction: A metric that is a single number that we are seeking to minimize or maximize to determine the best candidate. Eg. Model loss or classification accuracy -TerminationCondition: Determines when hyperparameter optimization should be stopped. Eg. A given number of candidates have been evaluated, a certain amount of computation time has passed. - -The optimization configuration is then passed to an optimization runner along with a task creator. - -If candidates generated are MultiLayerNetworks this is set up as follows: - -```java -IOptimizationRunner runner = new LocalOptimizationRunner(configuration, new MultiLayerNetworkTaskCreator()); -``` - -Alternatively if candidates generated are ComputationGraphs this is set up as follows: - -```java -IOptimizationRunner runner = new LocalOptimizationRunner(configuration, new ComputationGraphTaskCreator()); -``` - -Currently the only option available for the runner is the LocalOptimizationRunner which is used to execute learning on a single machine (i.e, in the current JVM). In principle, other execution methods (for example, on Spark or cloud computing machines) could be implemented. - -To summarize here are the steps to set up a hyperparameter optimization run: - -1. Specify hyperparameter search space -1. Specify a candidate generator for the hyperparameter search space -1. The next section of steps can be done in any order: -1. Specify a data source -1. Specify a model saver -1. Specify a score function -1. Specify a termination condition -1. The next steps have to be done in order: -1. Use 2 to 6 above to construct an Optimization Configuration -1. Run with the Optimization Runner. - - -## Hyperparameter search space - -Arbiter’s `ParameterSpace` class defines the acceptable ranges of values a given hyperparameter may take. ParameterSpace can be a simple, like a ParameterSpace that defines a continuous range of double values (say for learning rate) or complicated with multiple nested parameter spaces within like the case of a MultiLayerSpace (which defines a search space for a MultilayerConfiguration). - - -## MultiLayerSpace and ComputationGraphSpace - -MultiLayerSpace and ComputationGraphSpace are Arbiter’s counterpart to dl4j’s MultiLayerConfiguration and ComputationGraphConfiguration. They are used to set up parameter spaces for valid hyperparameters in MultiLayerConfiguration and ComputationGraphConfiguration. - -In addition to these users can also set up the number of epochs or an early stopping configuration to indicate when training on each candidate neural net should stop. If both an EarlyStoppingConfiguration and the number of epochs are specified, early stopping will be used in preference. - -Setting up MultiLayerSpace or ComputationGraphSpace are fairly straightforward once the user is familiar with Integer, Continuous and Discrete parameter spaces and LayerSpaces and UpdaterSpaces. - -The only caveat to be noted here is that while it is possible to set up weightConstraints, l1Bias and l2Bias as part of the NeuralNetConfiguration these have to be setup on a per layer/layerSpace basis in MultiLayerSpace. In general all properties/hyperparameters available through the builder will take either a fixed value or a parameter space of that type. This means that pretty much every aspect of the MultiLayerConfiguration can be swept to test out a variety of architectures and initial values. - -Here is a simple example of a MultiLayerSpace: - -```java -ParameterSpace biasSpace = new DiscreteParameterSpace<>(new Boolean[]{true, false}); -ParameterSpace firstLayerSize = new IntegerParameterSpace(10,30); -ParameterSpace secondLayerSize = new MathOp<>(firstLayerSize, Op.MUL, 3); -ParameterSpace firstLayerLR = new ContinuousParameterSpace(0.01, 0.1); -ParameterSpace secondLayerLR = new MathOp<>(firstLayerLR, Op.ADD, 0.2); - -MultiLayerSpace mls = - new MultiLayerSpace.Builder().seed(12345) - .hasBias(biasSpace) - .layer(new DenseLayerSpace.Builder().nOut(firstLayerSize) - .updater(new AdamSpace(firstLayerLR)) - .build()) - .layer(new OutputLayerSpace.Builder().nOut(secondLayerSize) - .updater(new AdamSpace(secondLayerLR)) - .build()) - .setInputType(InputType.feedForward(10)) - .numEpochs(20).build(); //Data will be fit for a fixed number of epochs -``` - -Of particular note is Arbiter’s ability to vary the number of layers in the MultiLayerSpace. Here is a simple example demonstrating the same that also demonstrates setting up a parameter search space for a weighted loss function: - -```java -ILossFunction[] weightedLossFns = new ILossFunction[]{ - new LossMCXENT(Nd4j.create(new double[]{1, 0.1})), - new LossMCXENT(Nd4j.create(new double[]{1, 0.05})), - new LossMCXENT(Nd4j.create(new double[]{1, 0.01}))}; - -DiscreteParameterSpace weightLossFn = new DiscreteParameterSpace<>(weightedLossFns); -MultiLayerSpace mls = - new MultiLayerSpace.Builder().seed(12345) - .addLayer(new DenseLayerSpace.Builder().nIn(10).nOut(10).build(), - new IntegerParameterSpace(2, 5)) //2 to 5 identical layers - .addLayer(new OutputLayerSpace.Builder() - .iLossFunction(weightLossFn) - .nIn(10).nOut(2).build()) - .backprop(true).pretrain(false).build(); -``` - -The two to five layers created above will be identical (stacked). Currently Arbiter does not support the ability to create independent layers. - -Finally it is also possible to create a fixed number of identical layers as shown in the following example: - -```java -DiscreteParameterSpace activationSpace = new DiscreteParameterSpace(new Activation[]{Activation.IDENTITY, Activation.ELU, Activation.RELU}); -MultiLayerSpace mls = new MultiLayerSpace.Builder().updater(new Sgd(0.005)) - .addLayer(new DenseLayerSpace.Builder().activation(activationSpace).nIn(10).nOut(10).build(), - new FixedValue(3)) - .addLayer(new OutputLayerSpace.Builder().iLossFunction(new LossMCXENT()).nIn(10).nOut(2).build()) - .backprop(true).build(); -``` - -In this example with a grid search three separate architectures will be created. They will be identical in every way but in the chosen activation function in the non-output layers. Again it is to be noted that the layers created in each architecture are identical(stacked). - -Creating ComputationGraphSpace is very similar to MultiLayerSpace. However there is currently only support for fixed graph structures. - -Here is a simple example demonstrating setting up a ComputationGraphSpace: - -```java -ComputationGraphSpace cgs = new ComputationGraphSpace.Builder() - .updater(new SgdSpace(new ContinuousParameterSpace(0.0001, 0.1))) - .l2(new ContinuousParameterSpace(0.2, 0.5)) - .addInputs("in") - .addLayer("0",new DenseLayerSpace.Builder().nIn(10).nOut(10).activation( - new DiscreteParameterSpace<>(Activation.RELU,Activation.TANH).build(),"in") - - .addLayer("1", new OutputLayerSpace.Builder().nIn(10).nOut(10) - .activation(Activation.SOFTMAX).build(), "0") - .setOutputs("1").setInputTypes(InputType.feedForward(10)).build(); -``` - -### JSON serialization. - -MultiLayerSpace, ComputationGraphSpace and OptimizationConfiguration have `toJso`n methods as well as `fromJson` methods. You can store the JSON representation for further use. - -Specifying a candidate generator -As mentioned earlier Arbiter currently supports grid search and random search. - -Setting up a random search is straightforward and is shown below: -MultiLayerSpace mls; -... -CandidateGenerator candidateGenerator = new RandomSearchGenerator(mls); - -Setting up a grid search is also simple. With a grid search the user also gets to specify a discretization count and a mode. The discretization count determines how many values a continuous parameter is binned into. For eg. a continuous parameter in range [0,1] is converted to [0.0, 0.5, 1.0] with a discretizationCount of 3. The mode determines the manner in which the candidates are generated. Candidates can be generated in Sequential (in order) or RandomOrder. With sequential order the first hyperparameter will be changed most rapidly and consequently the last hyperparameter will be changed the least rapidly. Note that both modes will result in the same set of candidates just in varying order. - -Here is a simple example of how a grid search is set up with a discretization count of 4 in sequential order: - -```java -CandidateGenerator candidateGenerator = new GridSearchCandidateGenerator(mls, 4, - GridSearchCandidateGenerator.Mode.Sequential); -``` - - -## Specifying a data source - -The DataSource interface defines where data for training the different candidates come from. It is very straightforward to implement. Note that a no argument constructor is required to be defined. Depending on the needs of the user the DataSource implementation can be configured with properties, like the size of the minibatch. A simple implementation of the data source that uses the MNIST dataset is available in the example repo which is covered later in this guide. -It is important to note here that the number of epochs (as well as early stopping configurations) can be set via the MultiLayerSpace and ComputationGraphSpace builders. - - -## Specifying a model/result saver - -Arbiter currently supports saving models either saving to disk in local memory (FileModelSaver) or storing results in-memory (InMemoryResultSaver). InMemoryResultSaver is obviously not recommended for large models. - -Setting them up are trivial. FileModelSaver constructor takes a path as String. It saves config, parameters and score to: baseDir/0/, baseDir/1/, etc where index is given by OptimizationResult.getIndex(). InMemoryResultSaver requires no arguments. - -Specifying a score function -There are three main classes for score functions: EvaluationScoreFunction, ROCScoreFunction and RegressionScoreFunction. - -EvaluationScoreFunction uses a DL4J evaluation metric. Available metrics are ACCURACY, F1, PRECISION, RECALL, GMEASURE, MCC. Here is a simple example that uses accuracy: - ScoreFunction scoreFunction = new EvaluationScoreFunction(Evaluation.Metric.ACCURACY); - -ROCScoreFunction calculates AUC (area under ROC curve) or AUPRC (area under precision/recall curve) on the test set. Different ROC types (ROC, ROCBinary and ROCMultiClass) are supported. Here is a simple example that uses AUC: -ScoreFunction sf = new ROCScoreFunction(ROCScoreFunction.ROCType.BINARY, ROCScoreFunction.Metric.AUC)); - -RegressionScoreFunction is used for regression and supports all DL4J RegressionEvaluation metrics (MSE, MAE, RMSE, RSE, PC, R2). Here is a simple example: -ScoreFunction sf = new RegressionScoreFunction(RegressionEvaluation.Metric.MSE); - -## Specifying a termination condition - -Arbiter currently only supports two kinds of termination conditions - MaxTimeCondition and MaxCandidatesCondition. MaxTimeCondition specifies a time after which hyperparameter optimization will be terminated. MaxCandidatesCondition specifies a maximum number of candidates after which hyperparameter optimization is terminated. Termination conditions can be specified as a list. Hyperparameter optimization stops if any of the conditions are met. - -Here is a simple example where the run is terminated at fifteen minutes or after training ten candidates which ever is met first: - -```java -TerminationCondition[] terminationConditions = { - new MaxTimeCondition(15, TimeUnit.MINUTES), - new MaxCandidatesCondition(10) -}; -``` - - -## Example Arbiter Run on MNIST data - -The DL4J example repo contains a BasicHyperparameterOptimizationExample on MNIST data. Users can walk through this simple example here. This example also goes through setting up the Arbiter UI. Arbiter uses the same storage and persistence approach as DL4J's UI. More documentation on the UI can be found here. The UI can be accessed at http://localhost:9000/arbiter. - - -## Tips for hyperparameter tuning - -Please refer to the excellent section on hyperparameter optimization here from the CS231N class at Stanford. A summary of these techniques are below: -- Prefer random search over grid search. For a comparison of random and grid search methods, see Random Search for Hyper-parameter Optimization (Bergstra and Bengio, 2012). -- Run search from coarse to fine (Start with a coarse parameter search with one or two epochs, pick the best candidate to do a fine search on with more epochs, iterate) -- Use LogUniformDistribution for certain hyperparameter like the learning rate, l2 etc -- Be mindful of values that fall close to the borders of the parameter search space - - diff --git a/docs/arbiter/templates/parameter-spaces.md b/docs/arbiter/templates/parameter-spaces.md deleted file mode 100644 index acb5a7c6b..000000000 --- a/docs/arbiter/templates/parameter-spaces.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Arbiter Parameter Spaces -short_title: Parameter Spaces -description: Set a search spaces for parameters. -category: Arbiter -weight: 1 ---- - -## Parameter Spaces - -{{autogenerated}} diff --git a/docs/copy-to-dl4j-docs.sh b/docs/copy-to-dl4j-docs.sh deleted file mode 100755 index a920e85a0..000000000 --- a/docs/copy-to-dl4j-docs.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env bash - -################################################################################ -# Copyright (c) 2015-2018 Skymind, Inc. -# -# This program and the accompanying materials are made available under the -# terms of the Apache License, Version 2.0 which is available at -# https://www.apache.org/licenses/LICENSE-2.0. -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# SPDX-License-Identifier: Apache-2.0 -################################################################################ - -# Make sure to set $DL4J_DOCS_DIR to your local copy of https://github.com/deeplearning4j/deeplearning4j-docs -SOURCE_DIR=$(pwd) - -# print the current git status -cd $DL4J_DOCS_DIR -git status - -cd $SOURCE_DIR - -# each release is its own jekyll collection located in docs/ -DOCS_DEST=$DL4J_DOCS_DIR/docs/_$DL4J_VERSION -mkdir $DOCS_DEST -echo Copying to $DOCS_DEST - -# recursively find all files in doc_sources and copy -find $SOURCE_DIR/*/doc_sources -maxdepth 1 -type f -exec cp '{}' $DOCS_DEST \; \ No newline at end of file diff --git a/docs/datavec/README.md b/docs/datavec/README.md deleted file mode 100644 index 55fb01bec..000000000 --- a/docs/datavec/README.md +++ /dev/null @@ -1,10 +0,0 @@ -# datavec documentation - -To generate docs into the`datavec/doc_sources` folder, first `cd docs` then run: - -```shell -python generate_docs.py \ - --project datavec \ - --code ../datavec - --out_language en -``` \ No newline at end of file diff --git a/docs/datavec/pages.json b/docs/datavec/pages.json deleted file mode 100644 index 2dd6b0f05..000000000 --- a/docs/datavec/pages.json +++ /dev/null @@ -1,203 +0,0 @@ -{ - "excludes": [ - "abstract" - ], - "indices": [ - ], - "pages": [ - { - "page": "overview.md", - "class": [] - }, - { - "page": "normalization.md", - "module": [ - "/../nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/dataset/api/preprocessor/" - ] - }, - { - "page": "records.md", - "class": [ - "datavec-api/src/main/java/org/datavec/api/records/impl/Record.java", - "datavec-api/src/main/java/org/datavec/api/records/impl/SequenceRecord.java" - ] - }, - { - "page": "readers.md", - "class": [ - "datavec-data/datavec-data-image/src/main/java/org/datavec/image/recordreader/ImageRecordReader.java", - "datavec-data/datavec-data-audio/src/main/java/org/datavec/audio/recordreader/NativeAudioRecordReader.java", - "datavec-data/datavec-data-audio/src/main/java/org/datavec/audio/recordreader/WavFileRecordReader.java", - "datavec-data/datavec-data-nlp/src/main/java/org/datavec/nlp/reader/TfidfRecordReader.java", - "datavec-api/src/main/java/org/datavec/api/records/reader/impl/FileRecordReader.java", - "datavec-api/src/main/java/org/datavec/api/records/reader/impl/LineRecordReader.java", - "datavec-api/src/main/java/org/datavec/api/records/reader/impl/ComposableRecordReader.java", - "datavec-api/src/main/java/org/datavec/api/records/reader/impl/csv/CSVRecordReader.java", - "datavec-api/src/main/java/org/datavec/api/records/reader/impl/csv/CSVRegexRecordReader.java", - "datavec-api/src/main/java/org/datavec/api/records/reader/impl/csv/CSVSequenceRecordReader.java", - "datavec-api/src/main/java/org/datavec/api/records/reader/impl/csv/CSVVariableSlidingWindowRecordReader.java", - "datavec-api/src/main/java/org/datavec/api/records/reader/impl/ConcatenatingRecordReader.java", - "datavec-api/src/main/java/org/datavec/api/records/reader/impl/transform/TransformProcessRecordReader.java", - "datavec-api/src/main/java/org/datavec/api/records/reader/impl/transform/TransformProcessSequenceRecordReader.java", - "datavec-api/src/main/java/org/datavec/api/records/reader/impl/collection/CollectionRecordReader.java", - "datavec-api/src/main/java/org/datavec/api/records/reader/impl/collection/CollectionSequenceRecordReader.java", - "datavec-api/src/main/java/org/datavec/api/records/reader/impl/collection/ListStringRecordReader.java", - "datavec-api/src/main/java/org/datavec/api/records/reader/impl/misc/LibSvmRecordReader.java", - "datavec-api/src/main/java/org/datavec/api/records/reader/impl/misc/MatlabRecordReader.java", - "datavec-api/src/main/java/org/datavec/api/records/reader/impl/misc/SVMLightRecordReader.java", - "datavec-api/src/main/java/org/datavec/api/records/reader/impl/regex/RegexLineRecordReader.java", - "datavec-api/src/main/java/org/datavec/api/records/reader/impl/regex/RegexSequenceRecordReader.java" - ] - }, - { - "page": "executors.md", - "class": [ - "datavec-local/src/main/java/org/datavec/local/transforms/LocalTransformExecutor.java", - "datavec-spark/src/main/java/org/datavec/spark/transform/SparkTransformExecutor.java" - ] - }, - { - "page": "schema.md", - "class": [ - "datavec-api/src/main/java/org/datavec/api/transform/schema/Schema.java", - "datavec-api/src/main/java/org/datavec/api/transform/schema/SequenceSchema.java", - "datavec-api/src/main/java/org/datavec/api/transform/schema/InferredSchema.java", - "datavec-api/src/main/java/org/datavec/api/transform/join/Join.java" - ] - }, - { - "page": "transforms.md", - "class": [ - "datavec-api/src/main/java/org/datavec/api/transform/TransformProcess.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/categorical/CategoricalToIntegerTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/categorical/CategoricalToOneHotTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/categorical/IntegerToCategoricalTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/categorical/PivotTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/categorical/StringToCategoricalTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/column/AddConstantColumnTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/column/DuplicateColumnsTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/column/RemoveAllColumnsExceptForTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/column/RemoveColumnsTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/column/RenameColumnsTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/column/ReorderColumnsTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/doubletransform/DoubleColumnsMathOpTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/doubletransform/DoubleMathFunctionTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/doubletransform/DoubleMathOpTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/integer/IntegerColumnsMathOpTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/integer/IntegerMathOpTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/integer/IntegerToOneHotTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/integer/ReplaceEmptyIntegerWithValueTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/integer/ReplaceInvalidWithIntegerTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/longtransform/LongColumnsMathOpTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/longtransform/LongMathOpTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/nlp/TextToCharacterIndexTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/nlp/TextToTermIndexSequenceTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/sequence/SequenceDifferenceTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/sequence/SequenceMovingWindowReduceTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/sequence/SequenceOffsetTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/string/AppendStringColumnTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/string/ChangeCaseStringTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/string/ConcatenateStringColumns.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/string/MapAllStringsExceptListTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/string/RemoveWhiteSpaceTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/string/ReplaceEmptyStringTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/string/ReplaceStringTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/string/StringListToCategoricalSetTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/string/StringListToCountsNDArrayTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/string/StringListToIndicesNDArrayTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/string/StringMapTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/time/DeriveColumnsFromTimeTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/time/StringToTimeTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/time/TimeMathOpTransform.java", - - "datavec-api/src/main/java/org/datavec/api/transform/transform/condition/ConditionalCopyValueTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/condition/ConditionalReplaceValueTransform.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/condition/ConditionalReplaceValueTransformWithDefault.java", - - "datavec-api/src/main/java/org/datavec/api/transform/transform/doubletransform/ConvertToDouble.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/integer/ConvertToInteger.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/string/ConvertToString.java", - - "datavec-api/src/main/java/org/datavec/api/transform/transform/doubletransform/Log2Normalizer.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/doubletransform/MinMaxNormalizer.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/doubletransform/StandardizeNormalizer.java", - "datavec-api/src/main/java/org/datavec/api/transform/transform/doubletransform/SubtractMeanNormalizer.java" - ] - }, - { - "page": "operations.md", - "class": [ - "datavec-api/src/main/java/org/datavec/api/transform/ops/AggregableCheckingOp.java", - "datavec-api/src/main/java/org/datavec/api/transform/ops/AggregableMultiOp.java", - "datavec-api/src/main/java/org/datavec/api/transform/ops/ByteWritableOp.java", - "datavec-api/src/main/java/org/datavec/api/transform/ops/DispatchOp.java", - "datavec-api/src/main/java/org/datavec/api/transform/ops/DispatchWithConditionOp.java", - "datavec-api/src/main/java/org/datavec/api/transform/ops/DoubleWritableOp.java", - "datavec-api/src/main/java/org/datavec/api/transform/ops/FloatWritableOp.java", - "datavec-api/src/main/java/org/datavec/api/transform/ops/IntWritableOp.java", - "datavec-api/src/main/java/org/datavec/api/transform/ops/LongWritableOp.java", - "datavec-api/src/main/java/org/datavec/api/transform/ops/StringWritableOp.java", - "datavec-api/src/main/java/org/datavec/api/transform/rank/CalculateSortedRank.java" - ] - }, - { - "page": "conditions.md", - "class": [ - "datavec-api/src/main/java/org/datavec/api/transform/condition/column/BooleanColumnCondition.java", - "datavec-api/src/main/java/org/datavec/api/transform/condition/column/CategoricalColumnCondition.java", - "datavec-api/src/main/java/org/datavec/api/transform/condition/column/DoubleColumnCondition.java", - "datavec-api/src/main/java/org/datavec/api/transform/condition/column/InfiniteColumnCondition.java", - "datavec-api/src/main/java/org/datavec/api/transform/condition/column/IntegerColumnCondition.java", - "datavec-api/src/main/java/org/datavec/api/transform/condition/column/InvalidValueColumnCondition.java", - "datavec-api/src/main/java/org/datavec/api/transform/condition/column/LongColumnCondition.java", - "datavec-api/src/main/java/org/datavec/api/transform/condition/column/NaNColumnCondition.java", - "datavec-api/src/main/java/org/datavec/api/transform/condition/column/NullWritableColumnCondition.java", - "datavec-api/src/main/java/org/datavec/api/transform/condition/column/StringColumnCondition.java", - "datavec-api/src/main/java/org/datavec/api/transform/condition/column/TimeColumnCondition.java", - "datavec-api/src/main/java/org/datavec/api/transform/condition/column/TrivialColumnCondition.java", - "datavec-api/src/main/java/org/datavec/api/transform/condition/sequence/SequenceLengthCondition.java", - "datavec-api/src/main/java/org/datavec/api/transform/condition/string/StringRegexColumnCondition.java", - "datavec-api/src/main/java/org/datavec/api/transform/condition/BooleanCondition.java", - "datavec-api/src/main/java/org/datavec/api/transform/condition/SequenceConditionMode.java" - ] - }, - { - "page": "filters.md", - "class": [ - "datavec-api/src/main/java/org/datavec/api/transform/filter/Filter.java", - "datavec-api/src/main/java/org/datavec/api/transform/filter/ConditionFilter.java", - "datavec-api/src/main/java/org/datavec/api/transform/filter/FilterInvalidValues.java", - "datavec-api/src/main/java/org/datavec/api/transform/filter/InvalidNumColumns.java" - ] - }, - { - "page": "reductions.md", - "class": [ - "datavec-api/src/main/java/org/datavec/api/transform/reduce/impl/GeographicMidpointReduction.java", - "datavec-api/src/main/java/org/datavec/api/transform/stringreduce/StringReducer.java" - ] - }, - { - "page": "serialization.md", - "class": [ - "datavec-api/src/main/java/org/datavec/api/transform/serde/JsonSerializer.java", - "datavec-api/src/main/java/org/datavec/api/transform/serde/YamlSerializer.java" - ] - }, - { - "page": "visualization.md", - "class": [ - "datavec-api/src/main/java/org/datavec/api/transform/ui/HtmlAnalysis.java", - "datavec-api/src/main/java/org/datavec/api/transform/ui/HtmlSequencePlotting.java" - ] - }, - { - "page": "analysis.md", - "class": [ - "datavec-spark/src/main/java/org/datavec/spark/transform/AnalyzeSpark.java", - "datavec-local/src/main/java/org/datavec/local/transforms/AnalyzeLocal.java" - ] - } - ] -} - diff --git a/docs/datavec/templates/analysis.md b/docs/datavec/templates/analysis.md deleted file mode 100644 index c343774cc..000000000 --- a/docs/datavec/templates/analysis.md +++ /dev/null @@ -1,58 +0,0 @@ ---- -title: DataVec Analysis -short_title: Analysis -description: Gather statistics on datasets. -category: DataVec -weight: 2 ---- - -## Analysis of data - -Sometimes datasets are too large or too abstract in their format to manually analyze and estimate statistics on certain columns or patterns. DataVec comes with some helper utilities for performing a data analysis, and maximums, means, minimums, and other useful metrics. - -## Using Spark for analysis - -If you have loaded your data into Apache Spark, DataVec has a special `AnalyzeSpark` class which can generate histograms, collect statistics, and return information about the quality of the data. Assuming you have already loaded your data into a Spark RDD, pass the `JavaRDD` and `Schema` to the class. - -If you are using DataVec in Scala and your data was loaded into a regular `RDD` class, you can convert it by calling `.toJavaRDD()` which returns a `JavaRDD`. If you need to convert it back, call `rdd()`. - -The code below demonstrates some of many analyses for a 2D dataset in Spark analysis using the RDD `javaRdd` and the schema `mySchema`: - -```java -import org.datavec.spark.transform.AnalyzeSpark; -import org.datavec.api.writable.Writable; -import org.datavec.api.transform.analysis.*; - -int maxHistogramBuckets = 10 -DataAnalysis analysis = AnalyzeSpark.analyze(mySchema, javaRdd, maxHistogramBuckets) - -DataQualityAnalysis analysis = AnalyzeSpark.analyzeQuality(mySchema, javaRdd) - -Writable max = AnalyzeSpark.max(javaRdd, "myColumn", mySchema) - -int numSamples = 5 -List sample = AnalyzeSpark.sampleFromColumn(numSamples, "myColumn", mySchema, javaRdd) -``` - -Note that if you have sequence data, there are special methods for that as well: - -```java -SequenceDataAnalysis seqAnalysis = AnalyzeSpark.analyzeSequence(mySchema, sequenceRdd) - -List uniqueSequence = AnalyzeSpark.getUniqueSequence("myColumn", seqSchema, sequenceRdd) -``` - -## Analyzing locally - -The `AnalyzeLocal` class works very similarly to its Spark counterpart and has a similar API. Instead of passing an RDD, it accepts a `RecordReader` which allows it to iterate over the dataset. - -```java -import org.datavec.local.transforms.AnalyzeLocal; - -int maxHistogramBuckets = 10 -DataAnalysis analysis = AnalyzeLocal.analyze(mySchema, csvRecordReader, maxHistogramBuckets) -``` - -## Utilities - -{{autogenerated}} \ No newline at end of file diff --git a/docs/datavec/templates/conditions.md b/docs/datavec/templates/conditions.md deleted file mode 100644 index 884a574bf..000000000 --- a/docs/datavec/templates/conditions.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: DataVec Conditions -short_title: Conditions -description: Rules for triggering operations and transformations. -category: DataVec -weight: 3 ---- - -## Available conditions - -{{autogenerated}} \ No newline at end of file diff --git a/docs/datavec/templates/executors.md b/docs/datavec/templates/executors.md deleted file mode 100644 index 5ecdf73c0..000000000 --- a/docs/datavec/templates/executors.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -title: DataVec Executors -short_title: Executors -description: Execute ETL and vectorization in a local instance. -category: DataVec -weight: 3 ---- - -## Local or remote execution? - -Because datasets are commonly large by nature, you can decide on an execution mechanism that best suits your needs. For example, if you are vectorizing a large training dataset, you can process it in a distributed Spark cluster. However, if you need to do real-time inference, DataVec also provides a local executor that doesn't require any additional setup. - -## Executing a transform process - -Once you've created your `TransformProcess` using your `Schema`, and you've either loaded your dataset into a Apache Spark `JavaRDD` or have a `RecordReader` that load your dataset, you can execute a transform. - -Locally this looks like: - -```java -import org.datavec.local.transforms.LocalTransformExecutor; - -List> transformed = LocalTransformExecutor.execute(recordReader, transformProcess) - -List>> transformedSeq = LocalTransformExecutor.executeToSequence(sequenceReader, transformProcess) - -List> joined = LocalTransformExecutor.executeJoin(join, leftReader, rightReader) -``` - -When using Spark this looks like: - -```java -import org.datavec.spark.transforms.SparkTransformExecutor; - -JavaRDD> transformed = SparkTransformExecutor.execute(inputRdd, transformProcess) - -JavaRDD>> transformedSeq = SparkTransformExecutor.executeToSequence(inputSequenceRdd, transformProcess) - -JavaRDD> joined = SparkTransformExecutor.executeJoin(join, leftRdd, rightRdd) -``` - -## Available executors - -{{autogenerated}} \ No newline at end of file diff --git a/docs/datavec/templates/filters.md b/docs/datavec/templates/filters.md deleted file mode 100644 index 2a7fde81e..000000000 --- a/docs/datavec/templates/filters.md +++ /dev/null @@ -1,23 +0,0 @@ ---- -title: DataVec Filters -short_title: Filters -description: Selection of data using conditions. -category: DataVec -weight: 3 ---- - -## Using filters - -Filters are a part of transforms and gives a DSL for you to keep parts of your dataset. Filters can be one-liners for single conditions or include complex boolean logic. - -```java -TransformProcess tp = new TransformProcess.Builder(inputDataSchema) - .filter(new ConditionFilter(new CategoricalColumnCondition("MerchantCountryCode", ConditionOp.NotInSet, new HashSet<>(Arrays.asList("USA","CAN"))))) - .build(); -``` - -You can also write your own filters by implementing the `Filter` interface, though it is much more often that you may want to create a custom condition instead. - -## Available filters - -{{autogenerated}} \ No newline at end of file diff --git a/docs/datavec/templates/normalization.md b/docs/datavec/templates/normalization.md deleted file mode 100644 index af3c6f0a9..000000000 --- a/docs/datavec/templates/normalization.md +++ /dev/null @@ -1,15 +0,0 @@ ---- -title: DataVec Normalization -short_title: Normalization -description: Preparing data in the right shape and range for learning. -category: DataVec -weight: 5 ---- - -## Why normalize? - -Neural networks work best when the data they’re fed is normalized, constrained to a range between -1 and 1. There are several reasons for that. One is that nets are trained using gradient descent, and their activation functions usually having an active range somewhere between -1 and 1. Even when using an activation function that doesn’t saturate quickly, it is still good practice to constrain your values to this range to improve performance. - -## Available preprocessors - -{{autogenerated}} \ No newline at end of file diff --git a/docs/datavec/templates/operations.md b/docs/datavec/templates/operations.md deleted file mode 100644 index 027f66849..000000000 --- a/docs/datavec/templates/operations.md +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: DataVec Operations -short_title: Operations -description: Implementations for advanced transformation. -category: DataVec -weight: 3 ---- - -## Usage - -Operations, such as a `Function`, help execute transforms and load data into DataVec. The concept of operations is low-level, meaning that most of the time you will not need to worry about them. - -## Loading data into Spark - -If you're using Apache Spark, functions will iterate over the dataset and load it into a Spark `RDD` and convert the raw data format into a `Writable`. - -```java -import org.datavec.api.writable.Writable; -import org.datavec.api.records.reader.impl.csv.CSVRecordReader; -import org.datavec.spark.transform.misc.StringToWritablesFunction; - -SparkConf conf = new SparkConf(); -JavaSparkContext sc = new JavaSparkContext(conf) - -String customerInfoPath = new ClassPathResource("CustomerInfo.csv").getFile().getPath(); -JavaRDD> customerInfo = sc.textFile(customerInfoPath).map(new StringToWritablesFunction(rr)); -``` - -The above code loads a CSV file into a 2D java RDD. Once your RDD is loaded, you can transform it, perform joins and use reducers to wrangle the data any way you want. - -## Available ops - -{{autogenerated}} \ No newline at end of file diff --git a/docs/datavec/templates/overview.md b/docs/datavec/templates/overview.md deleted file mode 100644 index 28eab3071..000000000 --- a/docs/datavec/templates/overview.md +++ /dev/null @@ -1,121 +0,0 @@ ---- -title: DataVec Overview -short_title: Overview -description: Overview of the vectorization and ETL library for DL4J. -category: DataVec -weight: 0 ---- - -## DataVec: A Vectorization and ETL Library - -DataVec solves one of the most important obstacles to effective machine or deep learning: getting data into a format that neural nets can understand. Nets understand vectors. Vectorization is the first problem many data scientists will have to solve to start training their algorithms on data. Datavec should be used for 99% of your data transformations, if you are not sure if this applies to you, please consult the [gitter](https://gitter.im/deeplearning4j/deeplearning4j). Datavec supports most data formats you could want out of the box, but you may also implement your own custom record reader as well. - -If your data is in CSV (Comma Seperated Values) format stored in flat files that must be converted to numeric and ingested, or your data is a directory structure of labelled images then DataVec is the tool to help you organize that data for use in DeepLearning4J. - - -Please **read this entire page**, particularly the section [Reading Records](#record) below, before working with DataVec. - - - -## Introductory Video - -This video describes the conversion of image data to a vector. - - - -## Key Aspects -- [DataVec](https://github.com/eclipse/deeplearning4j/tree/master/datavec) uses an input/output format system (similar in some ways to how Hadoop MapReduce uses InputFormat to determine InputSplits and RecordReaders, DataVec also provides RecordReaders to Serialize Data) -- Designed to support all major types of input data (text, CSV, audio, image and video) with these specific input formats -- Uses an output format system to specify an implementation-neutral type of vector format (SVMLight, etc.) -- Can be extended for specialized input formats (such as exotic image formats); i.e. You can write your own custom input format and let the rest of the codebase handle the transformation pipeline -- Makes vectorization a first-class citizen -- Built in Transformation tools to convert and normalize data -- Please see the [DataVec Javadoc](/api/{{page.version}}/) here - -There's a brief tutorial below. - -## A Few Examples - - * Convert the CSV-based UCI Iris dataset into svmLight open vector text format - * Convert the MNIST dataset from raw binary files to the svmLight text format. - * Convert raw text into the Metronome vector format - * Convert raw text into TF-IDF based vectors in a text vector format {svmLight, metronome} - * Convert raw text into the word2vec in a text vector format {svmLight, metronome} - -## Targeted Vectorization Engines - - * Any CSV to vectors with a scriptable transform language - * MNIST to vectors - * Text to vectors - * TF-IDF - * Bag of Words - * word2vec - -## CSV Transformation Engine - -If data is numeric and appropriately formatted then CSVRecordReader may be satisfactory. If however your data has non-numeric fields such as strings representing boolean (T/F) or strings for labels then a Schema Transformation will be required. DataVec uses apache [Spark](http://spark.apache.org/) to perform transform operations. *note you do not need to know the internals of Spark to be succesful with DataVec Transform - -## Schema Transformation Video - -A video tutorial of a simple DataVec transform along with code is available below. - - -## Example Java Code - -Our [examples](https://github.com/eclipse/deeplearning4j-examples) include a collection of DataVec examples. - - - - -## Reading Records, Iterating Over Data - -The following code shows how to work with one example, raw images, transforming them into a format that will work well with DL4J and ND4J: - -``` java -// Instantiating RecordReader. Specify height, width and channels of images. -// Note that for grayscale output, channels = 1, whereas for RGB images, channels = 3 -RecordReader recordReader = new ImageRecordReader(28, 28, 3); - -// Point to data path. -recordReader.initialize(new FileSplit(new File(labeledPath))); -``` - -The RecordReader is a class in DataVec that helps convert the byte-oriented input into data that's oriented toward a record; i.e. a collection of elements that are fixed in number and indexed with a unique ID. Converting data to records is the process of vectorization. The record itself is a vector, each element of which is a feature. - -The [ImageRecordReader](https://github.com/eclipse/deeplearning4j/tree/master/datavec/blob/a64389c08396bb39626201beeabb7c4d5f9288f9/datavec-data/datavec-data-image/src/main/java/org/datavec/image/recordreader/ImageRecordReader.java) is a subclass of the RecordReader and is built to automatically take in 28 x 28 pixel images. Thus, LFW images are scaled to 28 pixels x 28 pixels. You can change dimensions to match your custom images by changing the parameters fed to the ImageRecordReader, as long as you make sure to adjust the `nIn` hyperparameter, which will be equal to the product of image height x image width. - -Other parameters shown above include `true`, which instructs the reader to append a label to the record, and `labels`, which is the array of supervised values (e.g. targets) used to validate neural net model results. Here are all the RecordReader extensions that come pre-built with DataVec (you can find them by right-clicking on `RecordReader` in IntelliJ, clicking `Go To` in the drop-down menu, and selection `Implementations`): - -![Alt text](/images/guide/recordreader_extensions.png) - -The DataSetIterator is a Deeplearning4J class that traverses the elements of a list. Iterators pass through the data list, accesses each item sequentially, keeps track of how far it has progressed by pointing to its current element, and modifies itself to point to the next element with each new step in the traversal. - -``` java -// DataVec to DL4J -DataSetIterator iter = new RecordReaderDataSetIterator(recordReader, 784, labels.size()); -``` - -The DataSetIterator iterates through input datasets, fetching one or more new examples with each iteration, and loading those examples into a DataSet object that neural nets can work with. Note that ImageRecordReader produces image data with 4 dimensions that matches DL4J's expected activations layout. Thus, each 28x28 RGB image is represented as a 4d array, with dimensions [minibatch, channels, height, width] = [1, 3, 28, 28]. Note that the constructor line above also specifies the number of labels possible. -Note also that ImageRecordReader does not normalize the image data, thus each pixel/channel value will be in the range 0 to 255 (and generally should be normalized separately - for example using ND4J's ImagePreProcessingScaler or another normalizer. - -`RecordReaderDataSetIterator` can take as parameters the specific recordReader you want (for images, sound, etc.) and the batch size. For supervised learning, it will also take a label index and the number of possible labels that can be applied to the input (for LFW, the number of labels is 5,749). - -## Execution - -Runs as both a local serial process and a MapReduce (MR engine on the roadmap) scale-out process with no code changes. - -## Targetted Vector Formats -* svmLight -* libsvm -* Metronome - -## Built-In General Functionality -* Understands how to take general text and convert it into vectors with stock techniques such as kernel hashing and TF-IDF diff --git a/docs/datavec/templates/readers.md b/docs/datavec/templates/readers.md deleted file mode 100644 index 95336316a..000000000 --- a/docs/datavec/templates/readers.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: DataVec Readers -short_title: Readers -description: Read individual records from different formats. -category: DataVec -weight: 2 ---- - -## Why readers? - -Readers iterate records from a dataset in storage and load the data into DataVec. The usefulness of readers beyond individual entries in a dataset includes: what if you wanted to train a text generator on a corpus? Or programmatically compose two entries together to form a new record? Reader implementations are useful for complex file types or distributed storage mechanisms. - -Readers return `Writable` classes that describe each column in a `Record`. These classes are used to convert each record to a tensor/ND-Array format. - -## Usage - -Each reader implementation extends `BaseRecordReader` and provides a simple API for selecting the next record in a dataset, acting similarly to iterators. - -Useful methods include: - -- `next`: Return a batch of `Writable`. -- `nextRecord`: Return a single `Record`, optionally with `RecordMetaData`. -- `reset`: Reset the underlying iterator. -- `hasNext`: Iterator method to determine if another record is available. - -## Listeners - -You can hook a custom `RecordListener` to a record reader for debugging or visualization purposes. Pass your custom listener to the `addListener` base method immediately after initializing your class. - -## Types of readers - -{{autogenerated}} \ No newline at end of file diff --git a/docs/datavec/templates/records.md b/docs/datavec/templates/records.md deleted file mode 100644 index 5e2933de2..000000000 --- a/docs/datavec/templates/records.md +++ /dev/null @@ -1,19 +0,0 @@ ---- -title: DataVec Records -short_title: Records -description: How to use data records in DataVec. -category: DataVec -weight: 1 ---- - -## What is a record? - -In the DataVec world a Record represents a single entry in a dataset. DataVec differentiates types of records to make data manipulation easier with built-in APIs. Sequences and 2D records are distinguishable. - -## Using records - -Most of the time you do not need to interact with the record classes directly, unless you are manually iterating records for the purpose of forwarding through a neural network. - -## Types of records - -{{autogenerated}} \ No newline at end of file diff --git a/docs/datavec/templates/reductions.md b/docs/datavec/templates/reductions.md deleted file mode 100644 index 660aabe56..000000000 --- a/docs/datavec/templates/reductions.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: DataVec Reductions -short_title: Reductions -description: Operations for reducing complexity in data. -category: DataVec -weight: 1 ---- - -## Available reductions - -{{autogenerated}} \ No newline at end of file diff --git a/docs/datavec/templates/schema.md b/docs/datavec/templates/schema.md deleted file mode 100644 index b5c9e8f1f..000000000 --- a/docs/datavec/templates/schema.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -title: DataVec Schema -short_title: Schema -description: Schemas for datasets and transformation. -category: DataVec -weight: 1 ---- - -## Why use schemas? - -The unfortunate reality is that data is *dirty*. When trying to vecotrize a dataset for deep learning, it is quite rare to find files that have zero errors. Schema is important for maintaining the meaning of the data before using it for something like training a neural network. - -## Using schemas - -Schemas are primarily used for programming transformations. Before you can properly execute a `TransformProcess` you will need to pass the schema of the data being transformed. - -An example of a schema for merchant records may look like: - -```java -Schema inputDataSchema = new Schema.Builder() - .addColumnsString("DateTimeString", "CustomerID", "MerchantID") - .addColumnInteger("NumItemsInTransaction") - .addColumnCategorical("MerchantCountryCode", Arrays.asList("USA","CAN","FR","MX")) - .addColumnDouble("TransactionAmountUSD",0.0,null,false,false) //$0.0 or more, no maximum limit, no NaN and no Infinite values - .addColumnCategorical("FraudLabel", Arrays.asList("Fraud","Legit")) - .build(); -``` - -## Joining schemas - -If you have two different datasets that you want to merge together, DataVec provides a `Join` class with different join strategies such as `Inner` or `RightOuter`. - -```java -Schema customerInfoSchema = new Schema.Builder() - .addColumnLong("customerID") - .addColumnString("customerName") - .addColumnCategorical("customerCountry", Arrays.asList("USA","France","Japan","UK")) - .build(); - -Schema customerPurchasesSchema = new Schema.Builder() - .addColumnLong("customerID") - .addColumnTime("purchaseTimestamp", DateTimeZone.UTC) - .addColumnLong("productID") - .addColumnInteger("purchaseQty") - .addColumnDouble("unitPriceUSD") - .build(); - -Join join = new Join.Builder(Join.JoinType.Inner) - .setJoinColumns("customerID") - .setSchemas(customerInfoSchema, customerPurchasesSchema) - .build(); -``` - -Once you've defined your join and you've loaded the data into DataVec, you must use an `Executor` to complete the join. - -## Classes and utilities - -DataVec comes with a few `Schema` classes and helper utilities for 2D and sequence types of data. - -{{autogenerated}} \ No newline at end of file diff --git a/docs/datavec/templates/serialization.md b/docs/datavec/templates/serialization.md deleted file mode 100644 index 49d8734f4..000000000 --- a/docs/datavec/templates/serialization.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: DataVec Serialization -short_title: Serialization -description: Data wrangling and mapping from one schema to another. -category: DataVec -weight: 1 ---- - -## Serializing transforms - -DataVec comes with the ability to serialize transforms, which allows them to be more portable when they're needed for production environments. A `TransformProcess` is serialzied to a human-readable format such as JSON and can be saved as a file. - -## Serialization - -The code below shows how you can serialize the transform process `tp`. - -```java -String serializedTransformString = tp.toJson() -``` - -## Deserialization - -When you want to reinstantiate the transform process, call the static `from` method. - -```java -TransformProcess tp = TransformProcess.fromJson(serializedTransformString) -``` - - -## Available serializers - -{{autogenerated}} \ No newline at end of file diff --git a/docs/datavec/templates/transforms.md b/docs/datavec/templates/transforms.md deleted file mode 100644 index 1414e786a..000000000 --- a/docs/datavec/templates/transforms.md +++ /dev/null @@ -1,64 +0,0 @@ ---- -title: DataVec Transforms -short_title: Transforms -description: Data wrangling and mapping from one schema to another. -category: DataVec -weight: 1 ---- - -## Data wrangling - -One of the key tools in DataVec is transformations. DataVec helps the user map a dataset from one schema to another, and provides a list of operations to convert types, format data, and convert a 2D dataset to sequence data. - -## Building a transform process - -A transform process requires a `Schema` to successfully transform data. Both schema and transform process classes come with a helper `Builder` class which are useful for organizing code and avoiding complex constructors. - -When both are combined together they look like the sample code below. Note how `inputDataSchema` is passed into the `Builder` constructor. Your transform process will fail to compile without it. - -```java -import org.datavec.api.transform.TransformProcess; - -TransformProcess tp = new TransformProcess.Builder(inputDataSchema) - .removeColumns("CustomerID","MerchantID") - .filter(new ConditionFilter(new CategoricalColumnCondition("MerchantCountryCode", ConditionOp.NotInSet, new HashSet<>(Arrays.asList("USA","CAN"))))) - .conditionalReplaceValueTransform( - "TransactionAmountUSD", //Column to operate on - new DoubleWritable(0.0), //New value to use, when the condition is satisfied - new DoubleColumnCondition("TransactionAmountUSD",ConditionOp.LessThan, 0.0)) //Condition: amount < 0.0 - .stringToTimeTransform("DateTimeString","YYYY-MM-DD HH:mm:ss.SSS", DateTimeZone.UTC) - .renameColumn("DateTimeString", "DateTime") - .transform(new DeriveColumnsFromTimeTransform.Builder("DateTime").addIntegerDerivedColumn("HourOfDay", DateTimeFieldType.hourOfDay()).build()) - .removeColumns("DateTime") - .build(); -``` - -## Executing a transformation - -Different "backends" for executors are available. Using the `tp` transform process above, here's how you can execute it locally using plain DataVec. - -```java -import org.datavec.local.transforms.LocalTransformExecutor; - -List> processedData = LocalTransformExecutor.execute(originalData, tp); -``` - -## Debugging - -Each operation in a transform process represents a "step" in schema changes. Sometimes, the resulting transformation is not the intended result. You can debug this by printing each step in the transform `tp` with the following: - -```java -//Now, print the schema after each time step: -int numActions = tp.getActionList().size(); - -for(int i=0; i()) - .windowSize(5).iterate(iter).tokenizerFactory(t).build(); - - vec.fit(); - - assertNotEquals(vec.lookupTable().vector("UNK"), vec.lookupTable().vector("negative")); - assertNotEquals(vec.lookupTable().vector("UNK"),vec.lookupTable().vector("positive")); - assertNotEquals(vec.lookupTable().vector("UNK"),vec.lookupTable().vector("neutral"));} -``` - -### Further Reading - -* [Distributed Representations of Sentences and Documents](https://cs.stanford.edu/~quocle/paragraph_vector.pdf) -* [Word2vec: A Tutorial](./word2vec) \ No newline at end of file diff --git a/docs/deeplearning4j-nlp/templates/overview.md b/docs/deeplearning4j-nlp/templates/overview.md deleted file mode 100644 index 0ad46f458..000000000 --- a/docs/deeplearning4j-nlp/templates/overview.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -title: Deeplearning4j's NLP Functionality -short_title: Overview -description: Overview of language processing in DL4J -category: Language Processing -weight: 0 ---- - -## Deeplearning4j's NLP Functionality - -Although not designed to be comparable to tools such as Stanford CoreNLP or NLTK, deepLearning4J does include some core text processing tools that are described here. - -Deeplearning4j's NLP relies on [ClearTK](https://cleartk.github.io/cleartk/), an open-source machine learning and natural language processing framework for the Apache [Unstructured Information Management Architecture](https://uima.apache.org/), or UIMA. UIMA enables us to perform language identification, language-specific segmentation, sentence boundary detection and entity detection (proper nouns: persons, corporations, places and things). - -### SentenceIterator - -There are several steps involved in processing natural language. The first is to iterate over your corpus to create a list of documents, which can be as short as a tweet, or as long as a newspaper article. This is performed by a SentenceIterator, which will appear like this: - - - -The SentenceIterator encapsulates a corpus or text, organizing it, say, as one Tweet per line. It is responsible for feeding text piece by piece into your natural language processor. The SentenceIterator is not analogous to a similarly named class, the DatasetIterator, which creates a dataset for training a neural net. Instead it creates a collection of strings by segmenting a corpus. - -### Tokenizer - -A Tokenizer further segments the text at the level of single words, also alternatively as n-grams. ClearTK contains the underlying tokenizers, such as parts of speech (PoS) and parse trees, which allow for both dependency and constituency parsing, like that employed by a recursive neural tensor network (RNTN). - -A Tokenizer is created and wrapped by a [TokenizerFactory](https://github.com/eclipse/deeplearning4j/blob/6f027fd5075e3e76a38123ae5e28c00c17db4361/deeplearning4j-scaleout/deeplearning4j-nlp/src/main/java/org/deeplearning4j/text/tokenization/tokenizerfactory/UimaTokenizerFactory.java). The default tokens are words separated by spaces. The tokenization process also involves some machine learning to differentiate between ambibuous symbols like . which end sentences and also abbreviate words such as Mr. and vs. - -Both Tokenizers and SentenceIterators work with Preprocessors to deal with anomalies in messy text like Unicode, and to render such text, say, as lowercase characters uniformly. - - - - - -### Vocab - -Each document has to be tokenized to create a vocab, the set of words that matter for that document or corpus. Those words are stored in the vocab cache, which contains statistics about a subset of words counted in the document, the words that "matter". The line separating significant and insignifant words is mobile, but the basic idea of distinguishing between the two groups is that words occurring only once (or less than, say, five times) are hard to learn and their presence represents unhelpful noise. - -The vocab cache stores metadata for methods such as Word2vec and Bag of Words, which treat words in radically different ways. Word2vec creates representations of words, or neural word embeddings, in the form of vectors that are hundreds of coefficients long. Those coefficients help neural nets predict the likelihood of a word appearing in any given context; for example, after another word. Here's Word2vec, configured: - - - -Once you obtain word vectors, you can feed them into a deep net for classification, prediction, sentiment analysis and the like. \ No newline at end of file diff --git a/docs/deeplearning4j-nlp/templates/sentence-iterator.md b/docs/deeplearning4j-nlp/templates/sentence-iterator.md deleted file mode 100644 index fdb2bf130..000000000 --- a/docs/deeplearning4j-nlp/templates/sentence-iterator.md +++ /dev/null @@ -1,56 +0,0 @@ ---- -title: Sentence Iteration -short_title: Sentence Iteration -description: Iteration of words, documents, and sentences for language processing in DL4J. -category: Language Processing -weight: 10 ---- - -## Sentence iterator - -A [sentence iterator](./doc/org/deeplearning4j/word2vec/sentenceiterator/SentenceIterator.html) is used in both [Word2vec](./word2vec.html) and [Bag of Words](./bagofwords-tf-idf.html). - -It feeds bits of text into a neural network in the form of vectors, and also covers the concept of documents in text processing. - -In natural-language processing, a document or sentence is typically used to encapsulate a context which an algorithm should learn. - -A few examples include analyzing Tweets and full-blown news articles. The purpose of the [sentence iterator](./doc/org/deeplearning4j/word2vec/sentenceiterator/SentenceIterator.html) is to divide text into processable bits. Note the sentence iterator is input agnostic. So bits of text (a document) can come from a file system, the Twitter API or Hadoop. - -Depending on how input is processed, the output of a sentence iterator will then be passed to a [tokenizer](./org/deeplearning4j/word2vec/tokenizer/Tokenizer.html) for the processing of individual tokens, which are usually words, but could also be ngrams, skipgrams or other units. The tokenizer is created on a per-sentence basis by a [tokenizer factory](./doc/org/deeplearning4j/word2vec/tokenizer/TokenizerFactory.html). The tokenizer factory is what is passed into a text-processing vectorizer. - -Some typical examples are below: - - SentenceIterator iter = new LineSentenceIterator(new File("your file")); - -This assumes that each line in a file is a sentence. - -You can also do list of strings as sentence as follows: - - Collection sentences = ...; - SentenceIterator iter = new CollectionSentenceIterator(sentences); - -This will assume that each string is a sentence (document). Remember this could be a list of Tweets or articles -- both are applicable. - -You can iterate over files as follows: - - SentenceIterator iter = new FileSentenceIterator(new File("your dir or file")); - -This will parse the files line by line and return individual sentences on each one. - -For anything complex, we recommend an actual machine-learning level pipeline, represented by the [UimaSentenceIterator](./doc/org/deeplearning4j/text/sentenceiterator/UimaSentenceIterator.html). - -The UimaSentenceIterator is capable of tokenization, part-of-speech tagging and lemmatization, among other things. The UimaSentenceIterator iterates over a set of files and can segment sentences. You can customize its behavior based on the AnalysisEngine passed into it. - -The AnalysisEngine is the [UIMA](http://uima.apache.org/) concept of a text-processing pipeline. DeepLearning4j comes with standard analysis engines for all of these common tasks, allowing you to customize which text is being passed in and how you define sentences. The AnalysisEngines are thread-safe versions of the [opennlp](http://opennlp.apache.org/) pipelines. We also include [cleartk](http://cleartk.googlecode.com/)-based pipelines for handling common tasks. - -For those using UIMA or curious about it, this employs the cleartk type system for tokens, sentences, and other annotations within the type system. - -Here's how to create a UimaSentenceItrator. - - SentenceIterator iter = UimaSentenceIterator.create("path/to/your/text/documents"); - -You can also instantiate directly: - - SentenceIterator iter = new UimaSentenceIterator(path,AnalysisEngineFactory.createEngine(AnalysisEngineFactory.createEngineDescription(TokenizerAnnotator.getDescription(), SentenceAnnotator.getDescription()))); - -For those familiar with Uima, this uses Uimafit extensively to create analysis engines. You can also create custom sentence iterators by extending SentenceIterator. \ No newline at end of file diff --git a/docs/deeplearning4j-nlp/templates/tokenization.md b/docs/deeplearning4j-nlp/templates/tokenization.md deleted file mode 100644 index d12e069c6..000000000 --- a/docs/deeplearning4j-nlp/templates/tokenization.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: Tokenization -short_title: Tokenization -description: Breaking text into individual words for language processing in DL4J. -category: Language Processing -weight: 10 ---- - -## What is Tokenization? - -Tokenization is the process of breaking text down into individual words. Word windows are also composed of tokens. [Word2Vec](./word2vec.html) can output text windows that comprise training examples for input into neural nets, as seen here. - -## Example - -Here's an example of tokenization done with DL4J tools: - - //tokenization with lemmatization,part of speech taggin,sentence segmentation - TokenizerFactory tokenizerFactory = new UimaTokenizerFactory(); - Tokenizer tokenizer = tokenizerFactory.tokenize("mystring"); - - //iterate over the tokens - while(tokenizer.hasMoreTokens()) { - String token = tokenizer.nextToken(); - } - - //get the whole list of tokens - List tokens = tokenizer.getTokens(); - -The above snippet creates a tokenizer capable of stemming. - -In Word2Vec, that's the recommended a way of creating a vocabulary, because it averts various vocabulary quirks, such as the singular and plural of the same noun being counted as two different words. \ No newline at end of file diff --git a/docs/deeplearning4j-nlp/templates/vocabulary-cache.md b/docs/deeplearning4j-nlp/templates/vocabulary-cache.md deleted file mode 100644 index 719b4592a..000000000 --- a/docs/deeplearning4j-nlp/templates/vocabulary-cache.md +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: Vocabulary Cache -short_title: Vocab Cache -description: Mechanism for handling general NLP tasks in DL4J. -category: Language Processing -weight: 10 ---- - -# How the Vocab Cache Works - -The vocabulary cache, or vocab cache, is a mechanism for handling general-purpose natural-language tasks in Deeplearning4j, including normal TF-IDF, word vectors and certain information-retrieval techniques. The goal of the vocab cache is to be a one-stop shop for text vectorization, encapsulating techniques common to bag of words and word vectors, among others. - -Vocab cache handles storage of tokens, word-count frequencies, inverse-document frequencies and document occurrences via an inverted index. The InMemoryLookupCache is the reference implementation. - -In order to use a vocab cache as you iterate over text and index tokens, you need to figure out if the tokens should be included in the vocab. The criterion is usually if tokens occur with more than a certain pre-configured frequency in the corpus. Below that frequency, an individual token isn't a vocab word, and it remains just a token. - -We track tokens as well. In order to track tokens, do the following: - - addToken(new VocabWord(1.0,"myword")); - -When you want to add a vocab word, do the following: - - addWordToIndex(0, Word2Vec.UNK); - putVocabWord(Word2Vec.UNK); - -Adding the word to the index sets the index. Then you declare it as a vocab word. (Declaring it as a vocab word will pull the word from the index.) \ No newline at end of file diff --git a/docs/deeplearning4j-nlp/templates/word2vec.md b/docs/deeplearning4j-nlp/templates/word2vec.md deleted file mode 100644 index df188dc2f..000000000 --- a/docs/deeplearning4j-nlp/templates/word2vec.md +++ /dev/null @@ -1,495 +0,0 @@ ---- -title: Word2Vec in Deeplearning4j -short_title: Word2Vec -description: Neural word embeddings for NLP in DL4J. -category: Language Processing -weight: 2 ---- - -## Word2Vec, Doc2vec & GloVe: Neural Word Embeddings for Natural Language Processing - -Contents - -* Introduction -* Neural Word Embeddings -* Amusing Word2vec Results -* **Just Give Me the Code** -* Anatomy of Word2Vec -* Setup, Load and Train -* A Code Example -* Troubleshooting & Tuning Word2Vec -* Word2vec Use Cases -* Foreign Languages -* GloVe (Global Vectors) & Doc2Vec - -## Introduction to Word2Vec - -Word2vec is a two-layer neural net that processes text. Its input is a text corpus and its output is a set of vectors: feature vectors for words in that corpus. While Word2vec is not a [deep neural network](https://skymind.ai/wiki/neural-network), it turns text into a numerical form that deep nets can understand. [Deeplearning4j](./deeplearning4j-quickstart) implements a distributed form of Word2vec for Java and Scala, which works on Spark with GPUs. - -Word2vec's applications extend beyond parsing sentences in the wild. It can be applied just as well to genes, code, likes, playlists, social media graphs and other verbal or symbolic series in which patterns may be discerned. - -Why? Because words are simply discrete states like the other data mentioned above, and we are simply looking for the transitional probabilities between those states: the likelihood that they will co-occur. So gene2vec, like2vec and follower2vec are all possible. With that in mind, the tutorial below will help you understand how to create neural embeddings for any group of discrete and co-occurring states. - -The purpose and usefulness of Word2vec is to group the vectors of similar words together in vectorspace. That is, it detects similarities mathematically. Word2vec creates vectors that are distributed numerical representations of word features, features such as the context of individual words. It does so without human intervention. - -Given enough data, usage and contexts, Word2vec can make highly accurate guesses about a word’s meaning based on past appearances. Those guesses can be used to establish a word's association with other words (e.g. "man" is to "boy" what "woman" is to "girl"), or cluster documents and classify them by topic. Those clusters can form the basis of search, [sentiment analysis](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/recurrent/word2vecsentiment/Word2VecSentimentRNN.java) and recommendations in such diverse fields as scientific research, legal discovery, e-commerce and customer relationship management. - -The output of the Word2vec neural net is a vocabulary in which each item has a vector attached to it, which can be fed into a deep-learning net or simply queried to detect relationships between words. - -Measuring [cosine similarity](https://skymind.ai/wiki/glossary#cosine), no similarity is expressed as a 90 degree angle, while total similarity of 1 is a 0 degree angle, complete overlap; i.e. Sweden equals Sweden, while Norway has a cosine distance of 0.760124 from Sweden, the highest of any other country. - -Here's a list of words associated with "Sweden" using Word2vec, in order of proximity: - -![Cosine Distance](/images/guide/sweden_cosine_distance.png) - -The nations of Scandinavia and several wealthy, northern European, Germanic countries are among the top nine. - -## Neural Word Embeddings - -The vectors we use to represent words are called *neural word embeddings*, and representations are strange. One thing describes another, even though those two things are radically different. As Elvis Costello said: "Writing about music is like dancing about architecture." Word2vec "vectorizes" about words, and by doing so it makes natural language computer-readable -- we can start to perform powerful mathematical operations on words to detect their similarities. - -So a neural word embedding represents a word with numbers. It's a simple, yet unlikely, translation. - -Word2vec is similar to an autoencoder, encoding each word in a vector, but rather than training against the input words through [reconstruction](.https://skymind.ai/wiki/variational-autoencoder) word2vec trains words against other words that neighbor them in the input corpus. - -It does so in one of two ways, either using context to predict a target word (a method known as continuous bag of words, or CBOW), or using a word to predict a target context, which is called skip-gram. We use the latter method because it produces more accurate results on large datasets. - -![word2vec diagram](/images/guide/word2vec_diagrams.png) - -When the feature vector assigned to a word cannot be used to accurately predict that word's context, the components of the vector are adjusted. Each word's context in the corpus is the *teacher* sending error signals back to adjust the feature vector. The vectors of words judged similar by their context are nudged closer together by adjusting the numbers in the vector. - -Just as Van Gogh's painting of sunflowers is a two-dimensional mixture of oil on canvas that *represents* vegetable matter in a three-dimensional space in Paris in the late 1880s, so 500 numbers arranged in a vector can represent a word or group of words. - -Those numbers locate each word as a point in 500-dimensional vectorspace. Spaces of more than three dimensions are difficult to visualize. (Geoff Hinton, teaching people to imagine 13-dimensional space, suggests that students first picture 3-dimensional space and then say to themselves: "Thirteen, thirteen, thirteen." :) - -A well trained set of word vectors will place similar words close to each other in that space. The words *oak*, *elm* and *birch* might cluster in one corner, while *war*, *conflict* and *strife* huddle together in another. - -Similar things and ideas are shown to be "close". Their relative meanings have been translated to measurable distances. Qualities become quantities, and algorithms can do their work. But similarity is just the basis of many associations that Word2vec can learn. For example, it can gauge relations between words of one language, and map them to another. - -![word2vec translation](/images/guide/word2vec_translation.png) - -These vectors are the basis of a more comprehensive geometry of words. As shown in the graph, capital cities such as Rome, Paris, Berlin and Beijing cluster near each other, and they will each have similar distances in vectorspace to their countries; i.e. Rome - Italy = Beijing - China. If you only knew that Rome was the capital of Italy, and were wondering about the capital of China, then the equation Rome -Italy + China would return Beijing. No kidding. - -![capitals output](/images/guide/countries_capitals.png) - -## Amusing Word2Vec Results - -Let's look at some other associations Word2vec can produce. - -Instead of the pluses, minus and equals signs, we'll give you the results in the notation of logical analogies, where `:` means "is to" and `::` means "as"; e.g. "Rome is to Italy as Beijing is to China" = `Rome:Italy::Beijing:China`. In the last spot, rather than supplying the "answer", we'll give you the list of words that a Word2vec model proposes, when given the first three elements: - - king:queen::man:[woman, Attempted abduction, teenager, girl] - //Weird, but you can kind of see it - - China:Taiwan::Russia:[Ukraine, Moscow, Moldova, Armenia] - //Two large countries and their small, estranged neighbors - - house:roof::castle:[dome, bell_tower, spire, crenellations, turrets] - - knee:leg::elbow:[forearm, arm, ulna_bone] - - New York Times:Sulzberger::Fox:[Murdoch, Chernin, Bancroft, Ailes] - //The Sulzberger-Ochs family owns and runs the NYT. - //The Murdoch family owns News Corp., which owns Fox News. - //Peter Chernin was News Corp.'s COO for 13 yrs. - //Roger Ailes is president of Fox News. - //The Bancroft family sold the Wall St. Journal to News Corp. - - love:indifference::fear:[apathy, callousness, timidity, helplessness, inaction] - //the poetry of this single array is simply amazing... - - Donald Trump:Republican::Barack Obama:[Democratic, GOP, Democrats, McCain] - //It's interesting to note that, just as Obama and McCain were rivals, - //so too, Word2vec thinks Trump has a rivalry with the idea Republican. - - monkey:human::dinosaur:[fossil, fossilized, Ice_Age_mammals, fossilization] - //Humans are fossilized monkeys? Humans are what's left - //over from monkeys? Humans are the species that beat monkeys - //just as Ice Age mammals beat dinosaurs? Plausible. - - building:architect::software:[programmer, SecurityCenter, WinPcap] - -This model was trained on the Google News vocab, which you can [import](#import) and play with. Contemplate, for a moment, that the Word2vec algorithm has never been taught a single rule of English syntax. It knows nothing about the world, and is unassociated with any rules-based symbolic logic or knowledge graph. And yet it learns more, in a flexible and automated fashion, than most knowledge graphs will learn after a years of human labor. It comes to the Google News documents as a blank slate, and by the end of training, it can compute complex analogies that mean something to humans. - -You can also query a Word2vec model for other assocations. Not everything has to be two analogies that mirror each other. ([We explain how below....](#eval)) - -* Geopolitics: *Iraq - Violence = Jordan* -* Distinction: *Human - Animal = Ethics* -* *President - Power = Prime Minister* -* *Library - Books = Hall* -* Analogy: *Stock Market ≈ Thermometer* - -By building a sense of one word's proximity to other similar words, which do not necessarily contain the same letters, we have moved beyond hard tokens to a smoother and more general sense of meaning. - -# Just Give Me the Code - -## Anatomy of Word2vec in DL4J - -Here are Deeplearning4j's natural-language processing components: - -* **SentenceIterator/DocumentIterator**: Used to iterate over a dataset. A SentenceIterator returns strings and a DocumentIterator works with inputstreams. -* **Tokenizer/TokenizerFactory**: Used in tokenizing the text. In NLP terms, a sentence is represented as a series of tokens. A TokenizerFactory creates an instance of a tokenizer for a "sentence." -* **VocabCache**: Used for tracking metadata including word counts, document occurrences, the set of tokens (not vocab in this case, but rather tokens that have occurred), vocab (the features included in both [bag of words](./bagofwords-tf-idf.html) as well as the word vector lookup table) -* **Inverted Index**: Stores metadata about where words occurred. Can be used for understanding the dataset. A Lucene index with the Lucene implementation[1] is automatically created. - -While Word2vec refers to a family of related algorithms, this implementation uses [Negative Sampling](https://skymind.ai/wiki/glossary#skipgram). - -## Word2Vec Setup - -Create a new project in IntelliJ using Maven. If you don't know how to do that, see our [Quickstart page](./deeplearning4j-quickstart). Then specify these properties and dependencies in the POM.xml file in your project's root directory (You can [check Maven](https://search.maven.org/#search%7Cga%7C1%7Cnd4j) for the most recent versions -- please use those...). - - - -### Loading Data - -Now create and name a new class in Java. After that, you'll take the raw sentences in your .txt file, traverse them with your iterator, and subject them to some sort of preprocessing, such as converting all words to lowercase. - -``` java - String filePath = new ClassPathResource("raw_sentences.txt").getFile().getAbsolutePath(); - - log.info("Load & Vectorize Sentences...."); - // Strip white space before and after for each line - SentenceIterator iter = new BasicLineIterator(filePath); -``` - -If you want to load a text file besides the sentences provided in our example, you'd do this: - -``` java - log.info("Load data...."); - SentenceIterator iter = new LineSentenceIterator(new File("/Users/cvn/Desktop/file.txt")); - iter.setPreProcessor(new SentencePreProcessor() { - @Override - public String preProcess(String sentence) { - return sentence.toLowerCase(); - } - }); -``` - -That is, get rid of the `ClassPathResource` and feed the absolute path of your `.txt` file into the `LineSentenceIterator`. - -``` java -SentenceIterator iter = new LineSentenceIterator(new File("/your/absolute/file/path/here.txt")); -``` - -In bash, you can find the absolute file path of any directory by typing `pwd` in your command line from within that same directory. To that path, you'll add the file name and *voila*. - -### Tokenizing the Data - -Word2vec needs to be fed words rather than whole sentences, so the next step is to tokenize the data. To tokenize a text is to break it up into its atomic units, creating a new token each time you hit a white space, for example. - -``` java - // Split on white spaces in the line to get words - TokenizerFactory t = new DefaultTokenizerFactory(); - t.setTokenPreProcessor(new CommonPreprocessor()); -``` - -That should give you one word per line. - -### Training the Model - -Now that the data is ready, you can configure the Word2vec neural net and feed in the tokens. - -``` java - log.info("Building model...."); - Word2Vec vec = new Word2Vec.Builder() - .minWordFrequency(5) - .layerSize(100) - .seed(42) - .windowSize(5) - .iterate(iter) - .tokenizerFactory(t) - .build(); - - log.info("Fitting Word2Vec model...."); - vec.fit(); -``` - -This configuration accepts a number of hyperparameters. A few require some explanation: - -* *batchSize* is the amount of words you process at a time. -* *minWordFrequency* is the minimum number of times a word must appear in the corpus. Here, if it appears less than 5 times, it is not learned. Words must appear in multiple contexts to learn useful features about them. In very large corpora, it's reasonable to raise the minimum. -* *useAdaGrad* - Adagrad creates a different gradient for each feature. Here we are not concerned with that. -* *layerSize* specifies the number of features in the word vector. This is equal to the number of dimensions in the featurespace. Words represented by 500 features become points in a 500-dimensional space. -* *learningRate* is the step size for each update of the coefficients, as words are repositioned in the feature space. -* *minLearningRate* is the floor on the learning rate. Learning rate decays as the number of words you train on decreases. If learning rate shrinks too much, the net's learning is no longer efficient. This keeps the coefficients moving. -* *iterate* tells the net what batch of the dataset it's training on. -* *tokenizer* feeds it the words from the current batch. -* *vec.fit()* tells the configured net to begin training. - -An example for [uptraining your previously trained word vectors is here](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/word2vec/Word2VecUptrainingExample.java). - -### Evaluating the Model, Using Word2vec - -The next step is to evaluate the quality of your feature vectors. - -``` java - // Write word vectors - WordVectorSerializer.writeWordVectors(vec, "pathToWriteto.txt"); - - log.info("Closest Words:"); - Collection lst = vec.wordsNearest("day", 10); - System.out.println(lst); - UiServer server = UiServer.getInstance(); - System.out.println("Started on port " + server.getPort()); - - //output: [night, week, year, game, season, during, office, until, -] -``` - -The line `vec.similarity("word1","word2")` will return the cosine similarity of the two words you enter. The closer it is to 1, the more similar the net perceives those words to be (see the Sweden-Norway example above). For example: - -``` java - double cosSim = vec.similarity("day", "night"); - System.out.println(cosSim); - //output: 0.7704452276229858 -``` - -With `vec.wordsNearest("word1", numWordsNearest)`, the words printed to the screen allow you to eyeball whether the net has clustered semantically similar words. You can set the number of nearest words you want with the second parameter of wordsNearest. For example: - -``` java - Collection lst3 = vec.wordsNearest("man", 10); - System.out.println(lst3); - //output: [director, company, program, former, university, family, group, such, general] -``` - -### Visualizing the Model - -We rely on [TSNE](https://lvdmaaten.github.io/tsne/) to reduce the dimensionality of word feature vectors and project words into a two or three-dimensional space. The full [DL4J/ND4J example for TSNE is here](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/tsne/TSNEStandardExample.java). - -``` java - Nd4j.setDataType(DataBuffer.Type.DOUBLE); - List cacheList = new ArrayList<>(); //cacheList is a dynamic array of strings used to hold all words - - //STEP 2: Turn text input into a list of words - log.info("Load & Vectorize data...."); - File wordFile = new ClassPathResource("words.txt").getFile(); //Open the file - //Get the data of all unique word vectors - Pair vectors = WordVectorSerializer.loadTxt(wordFile); - VocabCache cache = vectors.getSecond(); - INDArray weights = vectors.getFirst().getSyn0(); //seperate weights of unique words into their own list - - for(int i = 0; i < cache.numWords(); i++) //seperate strings of words into their own list - cacheList.add(cache.wordAtIndex(i)); - - //STEP 3: build a dual-tree tsne to use later - log.info("Build model...."); - BarnesHutTsne tsne = new BarnesHutTsne.Builder() - .setMaxIter(iterations).theta(0.5) - .normalize(false) - .learningRate(500) - .useAdaGrad(false) -// .usePca(false) - .build(); - - //STEP 4: establish the tsne values and save them to a file - log.info("Store TSNE Coordinates for Plotting...."); - String outputFile = "target/archive-tmp/tsne-standard-coords.csv"; - (new File(outputFile)).getParentFile().mkdirs(); - - tsne.fit(weights); - tsne.saveAsFile(cacheList, outputFile); -``` - -### Saving, Reloading & Using the Model - -You'll want to save the model. The normal way to save models in Deeplearning4j is via the serialization utils (Java serialization is akin to Python pickling, converting an object into a *series* of bytes). - -``` java - log.info("Save vectors...."); - WordVectorSerializer.writeWord2VecModel(vec, "pathToSaveModel.txt"); -``` - -This will save the vectors to a file called `pathToSaveModel.txt` that will appear in the root of the directory where Word2vec is trained. The output in the file should have one word per line, followed by a series of numbers that together are its vector representation. - -To keep working with the vectors, simply call methods on `vec` like this: - -``` java -Collection kingList = vec.wordsNearest(Arrays.asList("king", "woman"), Arrays.asList("queen"), 10); -``` - -The classic example of Word2vec's arithmetic of words is "king - queen = man - woman" and its logical extension "king - queen + woman = man". - -The example above will output the 10 nearest words to the vector `king - queen + woman`, which should include `man`. The first parameter for wordsNearest has to include the "positive" words `king` and `woman`, which have a + sign associated with them; the second parameter includes the "negative" word `queen`, which is associated with the minus sign (positive and negative here have no emotional connotation); the third is the length of the list of nearest words you would like to see. Remember to add this to the top of the file: `import java.util.Arrays;`. - -Any number of combinations is possible, but they will only return sensible results if the words you query occurred with enough frequency in the corpus. Obviously, the ability to return similar words (or documents) is at the foundation of both search and recommendation engines. - -You can reload the vectors into memory like this: - -``` java - Word2Vec word2Vec = WordVectorSerializer.readWord2VecModel("pathToSaveModel.txt"); -``` - -You can then use Word2vec as a lookup table: - -``` java - WeightLookupTable weightLookupTable = word2Vec.lookupTable(); - Iterator vectors = weightLookupTable.vectors(); - INDArray wordVectorMatrix = word2Vec.getWordVectorMatrix("myword"); - double[] wordVector = word2Vec.getWordVector("myword"); -``` - -If the word isn't in the vocabulary, Word2vec returns zeros. - -### Importing Word2vec Models - -The [Google News Corpus model](https://dl4jdata.blob.core.windows.net/resources/wordvectors/GoogleNews-vectors-negative300.bin.gz) we use to test the accuracy of our trained nets is hosted on S3. Users whose current hardware takes a long time to train on large corpora can simply download it to explore a Word2vec model without the prelude. - -If you trained with the [C vectors](https://docs.google.com/file/d/0B7XkCwpI5KDYaDBDQm1tZGNDRHc/edit) or Gensimm, this line will import the model. - -``` java - File gModel = new File("/Developer/Vector Models/GoogleNews-vectors-negative300.bin.gz"); - Word2Vec vec = WordVectorSerializer.readWord2VecModel(gModel); -``` - -Remember to add `import java.io.File;` to your imported packages. - -With large models, you may run into trouble with your heap space. The Google model may take as much as 10G of RAM, and the JVM only launches with 256 MB of RAM, so you have to adjust your heap space. You can do that either with a `bash_profile` file (see our [Troubleshooting section](./deeplearning4j-troubleshooting-training)), or through IntelliJ itself: - -``` java - //Click: - IntelliJ Preferences > Compiler > Command Line Options - //Then paste: - -Xms1024m - -Xmx10g - -XX:MaxPermSize=2g -``` - -### N-grams & Skip-grams - -Words are read into the vector one at a time, *and scanned back and forth within a certain range*. Those ranges are n-grams, and an n-gram is a contiguous sequence of *n* items from a given linguistic sequence; it is the nth version of unigram, bigram, trigram, four-gram or five-gram. A skip-gram simply drops items from the n-gram. - -The skip-gram representation popularized by Mikolov and used in the DL4J implementation has proven to be more accurate than other models, such as continuous bag of words, due to the more generalizable contexts generated. - -This n-gram is then fed into a neural network to learn the significance of a given word vector; i.e. significance is defined as its usefulness as an indicator of certain larger meanings, or labels. - -### A Working Example - -**Please note** : The code below may be outdated. For updated examples, please see our [dl4j-examples repository on Github](https://github.com/eclipse/deeplearning4j-examples/tree/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp). - -Now that you have a basic idea of how to set up Word2Vec, here's [one example](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/word2vec/Word2VecRawTextExample.java) of how it can be used with DL4J's API: - - - -After following the instructions in the [Quickstart](./deeplearning4j-quickstart), you can open this example in IntelliJ and hit run to see it work. If you query the Word2vec model with a word isn't contained in the training corpus, it will return null. - -### Troubleshooting & Tuning Word2Vec - -*Q: I get a lot of stack traces like this* - -``` java - java.lang.StackOverflowError: null - at java.lang.ref.Reference.(Reference.java:254) ~[na:1.8.0_11] - at java.lang.ref.WeakReference.(WeakReference.java:69) ~[na:1.8.0_11] - at java.io.ObjectStreamClass$WeakClassKey.(ObjectStreamClass.java:2306) [na:1.8.0_11] - at java.io.ObjectStreamClass.lookup(ObjectStreamClass.java:322) ~[na:1.8.0_11] - at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1134) ~[na:1.8.0_11] - at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1548) ~[na:1.8.0_11] -``` - -*A:* Look inside the directory where you started your Word2vec application. This can, for example, be an IntelliJ project home directory or the directory where you typed Java at the command line. It should have some directories that look like: - -``` - ehcache_auto_created2810726831714447871diskstore - ehcache_auto_created4727787669919058795diskstore - ehcache_auto_created3883187579728988119diskstore - ehcache_auto_created9101229611634051478diskstore -``` - -You can shut down your Word2vec application and try to delete them. - -*Q: Not all of the words from my raw text data are appearing in my Word2vec object…* - -*A:* Try to raise the layer size via **.layerSize()** on your Word2Vec object like so - -``` java - Word2Vec vec = new Word2Vec.Builder().layerSize(300).windowSize(5) - .layerSize(300).iterate(iter).tokenizerFactory(t).build(); -``` - -*Q: How do I load my data? Why does training take forever?* - -*A:* If all of your sentences have been loaded as *one* sentence, Word2vec training could take a very long time. That's because Word2vec is a sentence-level algorithm, so sentence boundaries are very important, because co-occurrence statistics are gathered sentence by sentence. (For GloVe, sentence boundaries don't matter, because it's looking at corpus-wide co-occurrence. For many corpora, average sentence length is six words. That means that with a window size of 5 you have, say, 30 (random number here) rounds of skip-gram calculations. If you forget to specify your sentence boundaries, you may load a "sentence" that's 10,000 words long. In that case, Word2vec would attempt a full skip-gram cycle for the whole 10,000-word "sentence". In DL4J's implementation, a line is assumed to be a sentence. You need plug in your own SentenceIterator and Tokenizer. By asking you to specify how your sentences end, DL4J remains language-agnostic. UimaSentenceIterator is one way to do that. It uses OpenNLP for sentence boundary detection. - - -*Q: Why is there such a difference in performance when feeding whole documents as one "sentence" vs splitting into Sentences?* - -*A:*If average sentence contains 6 words, and window size is 5, maximum theoretical number of 10 skipgram rounds will be achieved on 0 words. Sentence isn't long enough to have full window set with words. Rough maximum number of 5 sg rounds is available there for all words in such sentence. - -But if your "sentence" is 1000k words length, you'll have 10 skipgram rounds for every word in this sentence, excluding the first 5 and last five. So, you'll have to spend WAY more time building model + cooccurrence statistics will be shifted due to the absense of sentence boundaries. - -*Q: How does Word2Vec Use Memory?* - -*A:* The major memory consumer in w2v is weights matrix. Math is simple there: NumberOfWords x NumberOfDimensions x 2 x DataType memory footprint. - -So, if you build w2v model for 100k words using floats, and 100 dimensions, your memory footprint will be 100k x 100 x 2 x 4 (float size) = 80MB RAM just for matri + some space for strings, variables, threads etc. - -If you load pre-built model, it uses roughly 2 times less RAM then during build time, so it's 40MB RAM. - -And the most popular model used so far is Google News model. There's 3M words, and vector size 300. That gives us 3.6GB only to load model. And you have to add 3M of strings, that do not have constant size in java. So, usually that's something around 4-6GB for loaded model depending on jvm version/supplier, gc state and phase of the moon. - - -*Q: I did everything you said and the results still don't look right.* - -*A:* Make sure you're not hitting into normalization issues. Some tasks, like wordsNearest(), use normalized weights by default, and others require non-normalized weights. Pay attention to this difference. - - - -### Use Cases - -Google Scholar keeps a running tally of the papers citing [Deeplearning4j's implementation of Word2vec here](https://scholar.google.com/scholar?hl=en&q=deeplearning4j+word2vec&btnG=&as_sdt=1%2C5&as_sdtp=). - -Kenny Helsens, a data scientist based in Belgium, [applied Deeplearning4j's implementation of Word2vec](http://thinkdata.be/2015/06/10/word2vec-on-raw-omim-database/) to the NCBI's Online Mendelian Inheritance In Man (OMIM) database. He then looked for the words most similar to alk, a known oncogene of non-small cell lung carcinoma, and Word2vec returned: "nonsmall, carcinomas, carcinoma, mapdkd." From there, he established analogies between other cancer phenotypes and their genotypes. This is just one example of the associations Word2vec can learn on a large corpus. The potential for discovering new aspects of important diseases has only just begun, and outside of medicine, the opportunities are equally diverse. - -Andreas Klintberg trained Deeplearning4j's implementation of Word2vec on Swedish, and wrote a [thorough walkthrough on Medium](https://medium.com/@klintcho/training-a-word2vec-model-for-swedish-e14b15be6cb). - -Word2Vec is especially useful in preparing text-based data for information retrieval and QA systems, which DL4J implements with [deep autoencoders](./deeplearning4j-nn-autoencoders). - -Marketers might seek to establish relationships among products to build a recommendation engine. Investigators might analyze a social graph to surface members of a single group, or other relations they might have to location or financial sponsorship. - -### Google's Word2vec Patent - -Word2vec is [a method of computing vector representations of words](https://arxiv.org/pdf/1301.3781.pdf) introduced by a team of researchers at Google led by Tomas Mikolov. Google [hosts an open-source version of Word2vec](https://code.google.com/p/word2vec/) released under an Apache 2.0 license. In 2014, Mikolov left Google for Facebook, and in May 2015, [Google was granted a patent for the method](http://patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO2&Sect2=HITOFF&p=1&u=%2Fnetahtml%2FPTO%2Fsearch-bool.html&r=1&f=G&l=50&co1=AND&d=PTXT&s1=9037464&OS=9037464&RS=9037464), which does not abrogate the Apache license under which it has been released. - -### Foreign Languages - -While words in all languages may be converted into vectors with Word2vec, and those vectors learned with Deeplearning4j, NLP preprocessing can be very language specific, and requires tools beyond our libraries. The [Stanford Natural Language Processing Group](http://nlp.stanford.edu/software/) has a number of Java-based tools for tokenization, part-of-speech tagging and named-entity recognition for languages such as [Mandarin Chinese](http://nlp.stanford.edu/projects/chinese-nlp.shtml), Arabic, French, German and Spanish. For Japanese, NLP tools like [Kuromoji](http://www.atilika.org/) are useful. Other foreign-language resources, including [text corpora, are available here](http://www-nlp.stanford.edu/links/statnlp.html). - -### GloVe: Global Vectors - -Loading and saving GloVe models to word2vec can be done like so: - -``` java - WordVectors wordVectors = WordVectorSerializer.loadTxtVectors(new File("glove.6B.50d.txt")); -``` - -### Sequence Vectors - -Deeplearning4j has a class called [SequenceVectors](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/sequencevectors/SequenceVectors.java), which is one level of abstraction above word vectors, and which allows you to extract features from any sequence, including social media profiles, transactions, proteins, etc. If data can be described as sequence, it can be learned via skip-gram and hierarchic softmax with the AbstractVectors class. This is compatible with the [DeepWalk algorithm](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-graph/src/main/java/org/deeplearning4j/graph/models/deepwalk/DeepWalk.java), also implemented in Deeplearning4j. - -### Word2Vec Features on Deeplearning4j - -* Weights update after model serialization/deserialization was added. That is, you can update model state with, say, 200GB of new text by calling `loadFullModel`, adding `TokenizerFactory` and `SentenceIterator` to it, and calling `fit()` on the restored model. -* Option for multiple datasources for vocab construction was added. -* Epochs and Iterations can be specified separately, although they are both typically "1". -* Word2Vec.Builder has this option: `hugeModelExpected`. If set to `true`, the vocab will be periodically truncated during the build. -* While `minWordFrequency` is useful for ignoring rare words in the corpus, any number of words can be excluded to customize. -* Two new WordVectorsSerialiaztion methods have been introduced: `writeFullModel` and `loadFullModel`. These save and load a full model state. -* A decent workstation should be able to handle a vocab with a few million words. Deeplearning4j's Word2vec imlementation can model a few terabytes of data on a single machine. Roughly, the math is: `vectorSize * 4 * 3 * vocab.size()`. - -### Doc2vec & Other NLP Resources - -* [DL4J Example of Text Classification With Word2vec & RNNs](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/recurrent/word2vecsentiment/Word2VecSentimentRNN.java) -* [DL4J Example of Text Classification With Paragraph Vectors](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/paragraphvectors/ParagraphVectorsClassifierExample.java) -* [Doc2vec, or Paragraph Vectors, With Deeplearning4j](./deeplearning4j-nlp-doc2vec) -* [Thought Vectors, Natural Language Processing & the Future of AI](https://skymind.ai/wiki/thought-vectors) -* [Quora: How Does Word2vec Work?](http://www.quora.com/How-does-word2vec-work) -* [Quora: What Are Some Interesting Word2Vec Results?](http://www.quora.com/Word2vec/What-are-some-interesting-Word2Vec-results/answer/Omer-Levy) -* [Word2Vec: an introduction](http://www.folgertkarsdorp.nl/word2vec-an-introduction/); Folgert Karsdorp -* [Mikolov's Original Word2vec Code @Google](https://code.google.com/p/word2vec/) -* [word2vec Explained: Deriving Mikolov et al.’s Negative-Sampling Word-Embedding Method](https://arxiv.org/pdf/1402.3722v1.pdf); Yoav Goldberg and Omer Levy -* [Advances in Pre-Training Distributed Word Representations - by Mikolov et al](https://arxiv.org/abs/1712.09405) - - -### Word2Vec in Literature - - It's like numbers are language, like all the letters in the language are turned into numbers, and so it's something that everyone understands the same way. You lose the sounds of the letters and whether they click or pop or touch the palate, or go ooh or aah, and anything that can be misread or con you with its music or the pictures it puts in your mind, all of that is gone, along with the accent, and you have a new understanding entirely, a language of numbers, and everything becomes as clear to everyone as the writing on the wall. So as I say there comes a certain time for the reading of the numbers. - -- E.L. Doctorow, Billy Bathgate diff --git a/docs/deeplearning4j-nn/README.md b/docs/deeplearning4j-nn/README.md deleted file mode 100644 index 5de5ef9e5..000000000 --- a/docs/deeplearning4j-nn/README.md +++ /dev/null @@ -1,10 +0,0 @@ -# deeplearning4j-nn documentation - -To generate docs into the `deeplearning4j-nn/doc_sources` folder, first `cd docs` then run: - -```shell -python generate_docs.py \ - --project deeplearning4j-nn \ - --code ../deeplearning4j - --out_language en -``` diff --git a/docs/deeplearning4j-nn/pages.json b/docs/deeplearning4j-nn/pages.json deleted file mode 100644 index 44e7ac00c..000000000 --- a/docs/deeplearning4j-nn/pages.json +++ /dev/null @@ -1,187 +0,0 @@ -{ - "excludes": [ - "abstract" - ], - "indices": [ - ], - "pages": [ - { - "page": "evaluation.md", - "module": [ - "/deeplearning4j-nn/src/main/java/org/deeplearning4j/eval/" - ] - }, - { - "page": "model-persistence.md", - "class": [ - "deeplearning4j-nn/src/main/java/org/deeplearning4j/util/ModelSerializer.java" - ] - }, - { - "page": "visualization.md", - "class": [] - }, - { - "page": "tsne-visualization.md", - "class": [] - }, - { - "page": "transfer-learning.md", - "module": [ - "/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/transferlearning/" - ] - }, - { - "page": "listeners.md", - "module": [ - "/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/listeners/" - ] - }, - { - "page": "iterators.md", - "module": [ - "/deeplearning4j-data/deeplearning4j-datasets/src/main/java/org/deeplearning4j/datasets/iterator/impl/", - "/deeplearning4j-data/deeplearning4j-datavec-iterators/src/main/java/org/deeplearning4j/datasets/datavec/", - "/deeplearning4j-data/deeplearning4j-utility-iterators/src/main/java/org/deeplearning4j/datasets/iterator/" - ], - "class": [ - "deeplearning4j-nn/src/main/java/org/deeplearning4j/datasets/iterator/impl/MultiDataSetIteratorAdapter.java" - ] - }, - { - "page": "layers.md", - "class": [ - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/OutputLayer.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DropoutLayer.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LossLayer.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingSequenceLayer.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GlobalPoolingLayer.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected1D.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected2D.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/NoParamLayer.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Pooling1D.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Pooling2D.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling3DLayer.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling1D.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling2D.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling3D.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding1DLayer.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPaddingLayer.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding3DLayer.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/ElementWiseMultiplicationLayer.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/RepeatVector.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/util/MaskLayer.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/util/MaskZeroLayer.java" - ] - }, - { - "page": "autoencoders.md", - "class": [ - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/BernoulliReconstructionDistribution.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/CompositeReconstructionDistribution.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/ExponentialReconstructionDistribution.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/GaussianReconstructionDistribution.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/LossFunctionWrapper.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/ReconstructionDistribution.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AutoEncoder.java" - ] - }, - { - "page": "convolutional.md", - "class": [ - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1D.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution2D.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution3D.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping1D.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping2D.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping3D.java" - ] - }, - { - "page": "recurrent.md", - "class": [ - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LSTM.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnOutputLayer.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnLossLayer.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/Bidirectional.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/LastTimeStep.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/SimpleRnn.java" - ] - }, - { - "page": "custom-layer.md", - "class": [ - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/FeedForwardLayer.java" - ] - }, - { - "page": "vertices.md", - "class": [ - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ElementWiseVertex.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/InputVertex.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/L2NormalizeVertex.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/L2Vertex.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/MergeVertex.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/PoolHelperVertex.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ReshapeVertex.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ScaleVertex.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ShiftVertex.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/StackVertex.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/SubsetVertex.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/UnstackVertex.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/DuplicateToTimeSeriesVertex.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/LastTimeStepVertex.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/ReverseTimeSeriesVertex.java" - ] - }, - { - "page": "early-stopping.md", - "class": [ - "deeplearning4j-nn/src/main/java/org/deeplearning4j/earlystopping/EarlyStoppingConfiguration.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/earlystopping/EarlyStoppingModelSaver.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/earlystopping/EarlyStoppingResult.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/AutoencoderScoreCalculator.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/ClassificationScoreCalculator.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/DataSetLossCalculator.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/DataSetLossCalculatorCG.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/ROCScoreCalculator.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/RegressionScoreCalculator.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/VAEReconErrorScoreCalculator.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/VAEReconProbScoreCalculator.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/earlystopping/termination/ScoreImprovementEpochTerminationCondition.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/earlystopping/termination/BestScoreEpochTerminationCondition.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/earlystopping/termination/EpochTerminationCondition.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/earlystopping/termination/InvalidScoreIterationTerminationCondition.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/earlystopping/termination/IterationTerminationCondition.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/earlystopping/termination/MaxEpochsTerminationCondition.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/earlystopping/termination/MaxScoreIterationTerminationCondition.java", - "deeplearning4j-nn/src/main/java/org/deeplearning4j/earlystopping/termination/MaxTimeIterationTerminationCondition.java" - ] - }, - { - "page": "computationgraph.md", - "class": [ - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java" - ] - }, - { - "page": "multilayernetwork.md", - "class": [ - "deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java" - ] - } - ] -} - diff --git a/docs/deeplearning4j-nn/templates/autoencoders.md b/docs/deeplearning4j-nn/templates/autoencoders.md deleted file mode 100644 index bb0b48734..000000000 --- a/docs/deeplearning4j-nn/templates/autoencoders.md +++ /dev/null @@ -1,19 +0,0 @@ ---- -title: Deeplearning4j Autoencoders -short_title: Autoencoders -description: Supported autoencoder configurations. -category: Models -weight: 3 ---- - -## What are autoencoders? - -Autoencoders are neural networks for unsupervised learning. Eclipse Deeplearning4j supports certain autoencoder layers such as variational autoencoders. - -## Where's Restricted Boltzmann Machine? - -RBMs are no longer supported as of version 0.9.x. They are no longer best-in-class for most machine learning problems. - -## Supported layers - -{{autogenerated}} \ No newline at end of file diff --git a/docs/deeplearning4j-nn/templates/computationgraph.md b/docs/deeplearning4j-nn/templates/computationgraph.md deleted file mode 100644 index f4ff7f03d..000000000 --- a/docs/deeplearning4j-nn/templates/computationgraph.md +++ /dev/null @@ -1,258 +0,0 @@ ---- -title: Complex Architectures with Computation Graph -short_title: Computation Graph -description: How to build complex networks with DL4J computation graph. -category: Models -weight: 3 ---- - -## Building Complex Network Architectures with Computation Graph - -This page describes how to build more complicated networks, using DL4J's Computation Graph functionality. - -**Contents** - -* [Overview of the Computation Graph](#overview) -* [Computation Graph: Some Example Use Cases](#usecases) -* [Configuring a ComputationGraph network](#config) - * [Types of Graph Vertices](#vertextypes) - * [Example 1: Recurrent Network with Skip Connections](#rnnskip) - * [Example 2: Multiple Inputs and Merge Vertex](#multiin) - * [Example 3: Multi-Task Learning](#multitask) - * [Automatically Adding PreProcessors and Calculating nIns](#preprocessors) -* [Training Data for ComputationGraph](#data) - * [RecordReaderMultiDataSetIterator Example 1: Regression Data](#rrmdsi1) - * [RecordReaderMultiDataSetIterator Example 2: Classification and Multi-Task Learning](#rrmdsi2) - - -## Overview of Computation Graph - -DL4J has two types of networks comprised of multiple layers: - -- The [MultiLayerNetwork](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java), which is essentially a stack of neural network layers (with a single input layer and single output layer), and -- The [ComputationGraph](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java), which allows for greater freedom in network architectures - - -Specifically, the ComputationGraph allows for networks to be built with the following features: - -- Multiple network input arrays -- Multiple network outputs (including mixed classification/regression architectures) -- Layers connected to other layers using a directed acyclic graph connection structure (instead of just a stack of layers) - -As a general rule, when building networks with a single input layer, a single output layer, and an input->a->b->c->output type connection structure: MultiLayerNetwork is usually the preferred network. However, everything that MultiLayerNetwork can do, ComputationGraph can do as well - though the configuration may be a little more complicated. - -

    -GET STARTED WITH DEEP LEARNING -

    - -## Computation Graph: Some Example Use Cases - -Examples of some architectures that can be built using ComputationGraph include: - -- Multi-task learning architectures -- Recurrent neural networks with skip connections -- [GoogLeNet](https://arxiv.org/abs/1409.4842), a complex type of convolutional netural network for image classification -- [Image caption generation](https://arxiv.org/abs/1411.4555) -- [Convolutional networks for sentence classification](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/convolution/sentenceclassification/CnnSentenceClassificationExample.java) -- [Residual learning convolutional neural networks](https://arxiv.org/abs/1512.03385) - - -## Configuring a Computation Graph - -### Types of Graph Vertices - -The basic idea is that in the ComputationGraph, the core building block is the [GraphVertex](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/GraphVertex.java), instead of layers. Layers (or, more accurately the [LayerVertex](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/LayerVertex.java) objects), are but one type of vertex in the graph. Other types of vertices include: - -- Input Vertices -- Element-wise operation vertices -- Merge vertices -- Subset vertices -- Preprocessor vertices - -These types of graph vertices are described briefly below. - -**LayerVertex**: Layer vertices (graph vertices with neural network layers) are added using the ```.addLayer(String,Layer,String...)``` method. The first argument is the label for the layer, and the last arguments are the inputs to that layer. -If you need to manually add an [InputPreProcessor](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor) (usually this is unnecessary - see next section) you can use the ```.addLayer(String,Layer,InputPreProcessor,String...)``` method. - -**InputVertex**: Input vertices are specified by the ```addInputs(String...)``` method in your configuration. The strings used as inputs can be arbitrary - they are user-defined labels, and can be referenced later in the configuration. The number of strings provided define the number of inputs; the order of the input also defines the order of the corresponding INDArrays in the fit methods (or the DataSet/MultiDataSet objects). - -**ElementWiseVertex**: Element-wise operation vertices do for example an element-wise addition or subtraction of the activations out of one or more other vertices. Thus, the activations used as input for the ElementWiseVertex must all be the same size, and the output size of the elementwise vertex is the same as the inputs. - -**MergeVertex**: The MergeVertex concatenates/merges the input activations. For example, if a MergeVertex has 2 inputs of size 5 and 10 respectively, then output size will be 5+10=15 activations. For convolutional network activations, examples are merged along the depth: so suppose the activations from one layer have 4 features and the other has 5 features (both with (4 or 5) x width x height activations), then the output will have (4+5) x width x height activations. - -**SubsetVertex**: The subset vertex allows you to get only part of the activations out of another vertex. For example, to get the first 5 activations out of another vertex with label "layer1", you can use ```.addVertex("subset1", new SubsetVertex(0,4), "layer1")```: this means that the 0th through 4th (inclusive) activations out of the "layer1" vertex will be used as output from the subset vertex. - -**PreProcessorVertex**: Occasionally, you might want to the functionality of an [InputPreProcessor](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor) without that preprocessor being associated with a layer. The PreProcessorVertex allows you to do this. - -Finally, it is also possible to define custom graph vertices by implementing both a [configuration](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/graph/GraphVertex.java) and [implementation](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/GraphVertex.java) class for your custom GraphVertex. - - -### Example 1: Recurrent Network with Skip Connections - -Suppose we wish to build the following recurrent neural network architecture: -![RNN with Skip connections](/images/guide/lstm_skip_connection.png) - -For the sake of this example, lets assume our input data is of size 5. Our configuration would be as follows: - -```java -ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() - .updater(new Sgd(0.01)) - .graphBuilder() - .addInputs("input") //can use any label for this - .addLayer("L1", new GravesLSTM.Builder().nIn(5).nOut(5).build(), "input") - .addLayer("L2",new RnnOutputLayer.Builder().nIn(5+5).nOut(5).build(), "input", "L1") - .setOutputs("L2") //We need to specify the network outputs and their order - .build(); - -ComputationGraph net = new ComputationGraph(conf); -net.init(); -``` - -Note that in the .addLayer(...) methods, the first string ("L1", "L2") is the name of that layer, and the strings at the end (["input"], ["input","L1"]) are the inputs to that layer. - - -### Example 2: Multiple Inputs and Merge Vertex - -Consider the following architecture: - -![Computation Graph with Merge Vertex](/images/guide/compgraph_merge.png) - -Here, the merge vertex takes the activations out of layers L1 and L2, and merges (concatenates) them: thus if layers L1 and L2 both have has 4 output activations (.nOut(4)) then the output size of the merge vertex is 4+4=8 activations. - -To build the above network, we use the following configuration: - -```java -ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() - .updater(new Sgd(0.01)) - .graphBuilder() - .addInputs("input1", "input2") - .addLayer("L1", new DenseLayer.Builder().nIn(3).nOut(4).build(), "input1") - .addLayer("L2", new DenseLayer.Builder().nIn(3).nOut(4).build(), "input2") - .addVertex("merge", new MergeVertex(), "L1", "L2") - .addLayer("out", new OutputLayer.Builder().nIn(4+4).nOut(3).build(), "merge") - .setOutputs("out") - .build(); -``` - -### Example 3: Multi-Task Learning - -In multi-task learning, a neural network is used to make multiple independent predictions. -Consider for example a simple network used for both classification and regression simultaneously. In this case, we have two output layers, "out1" for classification, and "out2" for regression. - -![Computation Graph for MultiTask Learning](/images/guide/compgraph_multitask.png) - -In this case, the network configuration is: - -```java -ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() - .updater(new Sgd(0.01)) - .graphBuilder() - .addInputs("input") - .addLayer("L1", new DenseLayer.Builder().nIn(3).nOut(4).build(), "input") - .addLayer("out1", new OutputLayer.Builder() - .lossFunction(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) - .nIn(4).nOut(3).build(), "L1") - .addLayer("out2", new OutputLayer.Builder() - .lossFunction(LossFunctions.LossFunction.MSE) - .nIn(4).nOut(2).build(), "L1") - .setOutputs("out1","out2") - .build(); -``` - -### Automatically Adding PreProcessors and Calculating nIns - -One feature of the ComputationGraphConfiguration is that you can specify the types of input to the network, using the ```.setInputTypes(InputType...)``` method in the configuration. - -The setInputType method has two effects: - -1. It will automatically add any [InputPreProcessor](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor)s as required. InputPreProcessors are necessary to handle the interaction between for example fully connected (dense) and convolutional layers, or recurrent and fully connected layers. -2. It will automatically calculate the number of inputs (.nIn(x) config) to a layer. Thus, if you are using the ```setInputTypes(InputType...)``` functionality, it is not necessary to manually specify the .nIn(x) options in your configuration. This can simplify building some architectures (such as convolutional networks with fully connected layers). If the .nIn(x) is specified for a layer, the network will not override this when using the InputType functionality. - - -For example, if your network has 2 inputs, one being a convolutional input and the other being a feed-forward input, you would use ```.setInputTypes(InputType.convolutional(depth,width,height), InputType.feedForward(feedForwardInputSize))``` - - -## Training Data for ComputationGraph - -There are two types of data that can be used with the ComputationGraph. - -### DataSet and the DataSetIterator - -The DataSet class was originally designed for use with the MultiLayerNetwork, however can also be used with ComputationGraph - but only if that computation graph has a single input and output array. For computation graph architectures with more than one input array, or more than one output array, DataSet and DataSetIterator cannot be used (instead, use MultiDataSet/MultiDataSetIterator). - -A DataSet object is basically a pair of INDArrays that hold your training data. In the case of RNNs, it may also include masking arrays (see [this](http://deeplearning4j.org/usingrnns) for more details). A DataSetIterator is essentially an iterator over DataSet objects. - -### MultiDataSet and the MultiDataSetIterator - -MultiDataSet is multiple input and/or multiple output version of DataSet. It may also include multiple mask arrays (for each input/output array) in the case of recurrent neural networks. As a general rule, you should use DataSet/DataSetIterator, unless you are dealing with multiple inputs and/or multiple outputs. - -There are currently two ways to use a MultiDataSetIterator: - -- By implementing the [MultiDataSetIterator](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/dataset/api/iterator/MultiDataSetIterator.java) interface directly -- By using the [RecordReaderMultiDataSetIterator](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-data/deeplearning4j-datavec-iterators/src/main/java/org/deeplearning4j/datasets/datavec/RecordReaderMultiDataSetIterator.java) in conjuction with DataVec record readers - - -The RecordReaderMultiDataSetIterator provides a number of options for loading data. In particular, the RecordReaderMultiDataSetIterator provides the following functionality: - -- Multiple DataVec RecordReaders may be used simultaneously -- The record readers need not be the same modality: for example, you can use an image record reader with a CSV record reader -- It is possible to use a subset of the columns in a RecordReader for different purposes - for example, the first 10 columns in a CSV could be your input, and the last 5 could be your output -- It is possible to convert single columns from a class index to a one-hot representation - - -Some basic examples on how to use the RecordReaderMultiDataSetIterator follow. You might also find [these unit tests](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/datavec/RecordReaderMultiDataSetIteratorTest.java) to be useful. - -### RecordReaderMultiDataSetIterator Example 1: Regression Data - -Suppose we have a CSV file with 5 columns, and we want to use the first 3 as our input, and the last 2 columns as our output (for regression). We can build a MultiDataSetIterator to do this as follows: - -```java -int numLinesToSkip = 0; -String fileDelimiter = ","; -RecordReader rr = new CSVRecordReader(numLinesToSkip,fileDelimiter); -String csvPath = "/path/to/my/file.csv"; -rr.initialize(new FileSplit(new File(csvPath))); - -int batchSize = 4; -MultiDataSetIterator iterator = new RecordReaderMultiDataSetIterator.Builder(batchSize) - .addReader("myReader",rr) - .addInput("myReader",0,2) //Input: columns 0 to 2 inclusive - .addOutput("myReader",3,4) //Output: columns 3 to 4 inclusive - .build(); -``` - - -### RecordReaderMultiDataSetIterator Example 2: Classification and Multi-Task Learning - -Suppose we have two separate CSV files, one for our inputs, and one for our outputs. Further suppose we are building a multi-task learning architecture, whereby have two outputs - one for classification. -For this example, let's assume the data is as follows: - -- Input file: myInput.csv, and we want to use all columns as input (without modification) -- Output file: myOutput.csv. - - Network output 1 - regression: columns 0 to 3 - - Network output 2 - classification: column 4 is the class index for classification, with 3 classes. Thus column 4 contains integer values [0,1,2] only, and we want to convert these indexes to a one-hot representation for classification. - -In this case, we can build our iterator as follows: - -```java -int numLinesToSkip = 0; -String fileDelimiter = ","; - -RecordReader featuresReader = new CSVRecordReader(numLinesToSkip,fileDelimiter); -String featuresCsvPath = "/path/to/my/myInput.csv"; -featuresReader.initialize(new FileSplit(new File(featuresCsvPath))); - -RecordReader labelsReader = new CSVRecordReader(numLinesToSkip,fileDelimiter); -String labelsCsvPath = "/path/to/my/myOutput.csv"; -labelsReader.initialize(new FileSplit(new File(labelsCsvPath))); - -int batchSize = 4; -int numClasses = 3; -MultiDataSetIterator iterator = new RecordReaderMultiDataSetIterator.Builder(batchSize) - .addReader("csvInput", featuresReader) - .addReader("csvLabels", labelsReader) - .addInput("csvInput") //Input: all columns from input reader - .addOutput("csvLabels", 0, 3) //Output 1: columns 0 to 3 inclusive - .addOutputOneHot("csvLabels", 4, numClasses) //Output 2: column 4 -> convert to one-hot for classification - .build(); -``` \ No newline at end of file diff --git a/docs/deeplearning4j-nn/templates/convolutional.md b/docs/deeplearning4j-nn/templates/convolutional.md deleted file mode 100644 index d09dda571..000000000 --- a/docs/deeplearning4j-nn/templates/convolutional.md +++ /dev/null @@ -1,15 +0,0 @@ ---- -title: Supported Convolutional Layers -short_title: Convolutional -description: Supported convolutional layers. -category: Models -weight: 3 ---- - -## What is a convolutional neural network? - -Each layer in a neural network configuration represents a unit of hidden units. When layers are stacked together, they represent a *deep neural network*. - -## Available layers - -{{autogenerated}} \ No newline at end of file diff --git a/docs/deeplearning4j-nn/templates/custom-layer.md b/docs/deeplearning4j-nn/templates/custom-layer.md deleted file mode 100644 index 7bf059a83..000000000 --- a/docs/deeplearning4j-nn/templates/custom-layer.md +++ /dev/null @@ -1,52 +0,0 @@ ---- -title: Custom Layers -short_title: Custom Layers -description: Extend DL4J functionality for custom layers. -category: Models -weight: 10 ---- - -## Writing Your Custom Layer - -There are two components to adding a custom layer: - -1. Adding the layer configuration class: extends org.deeplearning4j.nn.conf.layers.Layer -2. Adding the layer implementation class: implements org.deeplearning4j.nn.api.Layer - -The configuration layer ((1) above) class handles the settings. It's the one you would -use when constructing a MultiLayerNetwork or ComputationGraph. You can add custom -settings here, and use them in your layer. - -The implementation layer ((2) above) class has parameters, and handles network forward -pass, backpropagation, etc. It is created from the org.deeplearning4j.nn.conf.layers.Layer.instantiate(...) -method. In other words: the instantiate method is how we go from the configuration -to the implementation; MultiLayerNetwork or ComputationGraph will call this method -when initializing the - -An example of these are CustomLayer (the configuration class) and CustomLayerImpl (the -implementation class). Both of these classes have extensive comments regarding -their methods. - -You'll note that in Deeplearning4j there are two DenseLayer clases, two GravesLSTM classes, -etc: the reason is because one is for the configuration, one is for the implementation. -We have not followed this "same name" pattern here to hopefully avoid confusion. - -## Testing Your Custom Layer - -Once you have added a custom layer, it is necessary to run some tests to ensure -it is correct. - -These tests should at a minimum include the following: - -1. Tests to ensure that the JSON configuration (to/from JSON) works correctly - This is necessary for networks with your custom layer to function with both - model serialization (saving) and Spark training. -2. Gradient checks to ensure that the implementation is correct. - -## Example - -A full custom layer example is available in our [examples repository](https://github.com/eclipse/deeplearning4j-examples/tree/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/misc/customlayers). - -## API - -{{autogenerated}} \ No newline at end of file diff --git a/docs/deeplearning4j-nn/templates/early-stopping.md b/docs/deeplearning4j-nn/templates/early-stopping.md deleted file mode 100644 index df2708c2f..000000000 --- a/docs/deeplearning4j-nn/templates/early-stopping.md +++ /dev/null @@ -1,83 +0,0 @@ ---- -title: Early Stopping -short_title: Early Stopping -description: Terminate a training session given certain conditions. -category: Tuning & Training -weight: 10 ---- - -## What is early stopping? - -When training neural networks, numerous decisions need to be made regarding the settings (hyperparameters) used, in order to obtain good performance. Once such hyperparameter is the number of training epochs: that is, how many full passes of the data set (epochs) should be used? If we use too few epochs, we might underfit (i.e., not learn everything we can from the training data); if we use too many epochs, we might overfit (i.e., fit the 'noise' in the training data, and not the signal). - -Early stopping attempts to remove the need to manually set this value. It can also be considered a type of regularization method (like L1/L2 weight decay and dropout) in that it can stop the network from overfitting. - -The idea behind early stopping is relatively simple: - -* Split data into training and test sets -* At the end of each epoch (or, every N epochs): - * evaluate the network performance on the test set - * if the network outperforms the previous best model: save a copy of the network at the current epoch -* Take as our final model the model that has the best test set performance - - -This is shown graphically below: - -![Early Stopping](/images/guide/earlystopping.png) - -The best model is the one saved at the time of the vertical dotted line - i.e., the model with the best accuracy on the test set. - - -Using DL4J's early stopping functionality requires you to provide a number of configuration options: - -* A score calculator, such as the *DataSetLossCalculator*([JavaDoc](https://deeplearning4j.org/api/{{page.version}}/org/deeplearning4j/earlystopping/scorecalc/DataSetLossCalculator.html), [Source Code](https://github.com/eclipse/deeplearning4j/blob/c152293ef8d1094c281f5375ded61ff5f8eb6587/deeplearning4j-core/src/main/java/org/deeplearning4j/earlystopping/scorecalc/DataSetLossCalculator.java)) for a Multi Layer Network, or *DataSetLossCalculatorCG* ([JavaDoc](https://deeplearning4j.org/api/{{page.version}}/org/deeplearning4j/earlystopping/scorecalc/DataSetLossCalculatorCG.html), [Source Code](https://github.com/eclipse/deeplearning4j/blob/c152293ef8d1094c281f5375ded61ff5f8eb6587/deeplearning4j-core/src/main/java/org/deeplearning4j/earlystopping/scorecalc/DataSetLossCalculatorCG.java)) for a Computation Graph. Is used to calculate at every epoch (for example: the loss function value on a test set, or the accuracy on the test set) -* How frequently we want to calculate the score function (default: every epoch) -* One or more termination conditions, which tell the training process when to stop. There are two classes of termination conditions: - * Epoch termination conditions: evaluated every N epochs - * Iteration termination conditions: evaluated once per minibatch -* A model saver, that defines how models are saved - -An example, with an epoch termination condition of maximum of 30 epochs, a maximum of 20 minutes training time, calculating the score every epoch, and saving the intermediate results to disk: - -```java - -MultiLayerConfiguration myNetworkConfiguration = ...; -DataSetIterator myTrainData = ...; -DataSetIterator myTestData = ...; - -EarlyStoppingConfiguration esConf = new EarlyStoppingConfiguration.Builder() - .epochTerminationConditions(new MaxEpochsTerminationCondition(30)) - .iterationTerminationConditions(new MaxTimeIterationTerminationCondition(20, TimeUnit.MINUTES)) - .scoreCalculator(new DataSetLossCalculator(myTestData, true)) - .evaluateEveryNEpochs(1) - .modelSaver(new LocalFileModelSaver(directory)) - .build(); - -EarlyStoppingTrainer trainer = new EarlyStoppingTrainer(esConf,myNetworkConfiguration,myTrainData); - -//Conduct early stopping training: -EarlyStoppingResult result = trainer.fit(); - -//Print out the results: -System.out.println("Termination reason: " + result.getTerminationReason()); -System.out.println("Termination details: " + result.getTerminationDetails()); -System.out.println("Total epochs: " + result.getTotalEpochs()); -System.out.println("Best epoch number: " + result.getBestModelEpoch()); -System.out.println("Score at best epoch: " + result.getBestModelScore()); - -//Get the best model: -MultiLayerNetwork bestModel = result.getBestModel(); - -``` - -You can also implement your own iteration and epoch termination conditions. - -## Early Stopping w/ Parallel Wrapper - -The early stopping implementation described above will only work with a single device. However, `EarlyStoppingParallelTrainer` provides similar functionality as early stopping and allows you to optimize for either multiple CPUs or GPUs. `EarlyStoppingParallelTrainer` wraps your model in a `ParallelWrapper` class and performs localized distributed training. - -Note that `EarlyStoppingParallelTrainer` doesn't support all of the functionality as its single device counterpart. It is not UI-compatible and may not work with complex iteration listeners. This is due to how the model is distributed and copied in the background. - -## API - -{{autogenerated}} \ No newline at end of file diff --git a/docs/deeplearning4j-nn/templates/evaluation.md b/docs/deeplearning4j-nn/templates/evaluation.md deleted file mode 100644 index 85ed12474..000000000 --- a/docs/deeplearning4j-nn/templates/evaluation.md +++ /dev/null @@ -1,212 +0,0 @@ ---- -title: Evaluation Classes for Neural Networks -short_title: Evaluation -description: Tools and classes for evaluating neural network performance -category: Tuning & Training -weight: 3 ---- - - -## Why evaluate? - -When training or deploying a Neural Network it is useful to know the accuracy of your model. In DL4J the Evaluation Class and variants of the Evaluation Class are available to evaluate your model's performance. - - -### Evaluation for Classification - -The Evaluation class is used to evaluate the performance for binary and multi-class classifiers (including time series classifiers). This section covers basic usage of the Evaluation Class. - -Given a dataset in the form of a DataSetIterator, the easiest way to perform evaluation is to use the built-in evaluate methods on MultiLayerNetwork and ComputationGraph: -``` -DataSetIterator myTestData = ... -Evaluation eval = model.evaluate(myTestData); -``` - -However, evaluation can be performed on individual minibatches also. Here is an example taken from our dataexamples/CSVExample in the [Examples](https://github.com/eclipse/deeplearning4j-examples) project. - -The CSV example has CSV data for 3 classes of flowers and builds a simple feed forward neural network to classify the flowers based on 4 measurements. - -``` -Evaluation eval = new Evaluation(3); -INDArray output = model.output(testData.getFeatures()); -eval.eval(testData.getLabels(), output); -log.info(eval.stats()); -``` - -The first line creates an Evaluation object with 3 classes. -The second line gets the labels from the model for our test dataset. -The third line uses the eval method to compare the labels array from the testdata with the labels generated from the model. -The fourth line logs the evaluation data to the console. - -The output. - -``` -Examples labeled as 0 classified by model as 0: 24 times -Examples labeled as 1 classified by model as 1: 11 times -Examples labeled as 1 classified by model as 2: 1 times -Examples labeled as 2 classified by model as 2: 17 times - - -==========================Scores======================================== - # of classes: 3 - Accuracy: 0.9811 - Precision: 0.9815 - Recall: 0.9722 - F1 Score: 0.9760 -Precision, recall & F1: macro-averaged (equally weighted avg. of 3 classes) -======================================================================== -``` - -By default the .stats() method displays the confusion matrix entries (one per line), Accuracy, Precision, Recall and F1 Score. Additionally the Evaluation Class can also calculate and return the following values: - -* Confusion Matrix -* False Positive/Negative Rate -* True Positive/Negative -* Class Counts -* F-beta, G-measure, Matthews Correlation Coefficient and more, see [Evaluation JavaDoc](https://deeplearning4j.org/api/latest/org/deeplearning4j/eval/Evaluation.html) - -Display the Confusion Matrix. - -``` -System.out.println(eval.confusionToString()); -``` - -Displays - -``` -Predicted: 0 1 2 -Actual: -0 0 | 16 0 0 -1 1 | 0 19 0 -2 2 | 0 0 18 -``` - -Additionaly the confusion matrix can be accessed directly, converted to csv or html using. - -``` -eval.getConfusionMatrix() ; -eval.getConfusionMatrix().toHTML(); -eval.getConfusionMatrix().toCSV(); -``` - - -### Evaluation for Regression - -To Evaluate a network performing regression use the RegressionEvaluation Class. - -As with the Evaluation class, RegressionEvaluation on a DataSetIterator can be performed as follows: -``` -DataSetIterator myTestData = ... -RegressionEvaluation eval = model.evaluateRegression(myTestData); -``` - -Here is a code snippet with single column, in this case the neural network was predicting the age of shelfish based on measurements. - -``` -RegressionEvaluation eval = new RegressionEvaluation(1); -``` - -Print the statistics for the Evaluation. - -``` -System.out.println(eval.stats()); -``` - -Returns - -``` -Column MSE MAE RMSE RSE R^2 -col_0 7.98925e+00 2.00648e+00 2.82653e+00 5.01481e-01 7.25783e-01 -``` - -Columns are Mean Squared Error, Mean Absolute Error, Root Mean Squared Error, Relative Squared Error, and R^2 Coefficient of Determination - -See [RegressionEvaluation JavaDoc](https://deeplearning4j.org/api/{{page.version}}/org/deeplearning4j/eval/RegressionEvaluation.html) - -### Performing Multiple Evaluations Simultaneously - -When performing multiple types of evaluations (for example, Evaluation and ROC on the same network and dataset) it is more efficient to do this in one pass of the dataset, as follows: - -``` -DataSetIterator testData = ... -Evaluation eval = new Evaluation(); -ROC roc = new ROC(); -model.doEvaluation(testdata, eval, roc); -``` - -### Evaluation of Time Series - -Time series evaluation is very similar to the above evaluation approaches. Evaluation in DL4J is performed on all (non-masked) time steps separately - for example, a time series of length 10 will contribute 10 predictions/labels to an Evaluation object. -One difference with time seires is the (optional) presence of mask arrays, which are used to mark some time steps as missing or not present. See [Using RNNs - Masking](./deeplearning4j-nn-recurrent) for more details on masking. - -For most users, it is simply sufficient to use the ```MultiLayerNetwork.evaluate(DataSetIterator)``` or ```MultiLayerNetwork.evaluateRegression(DataSetIterator)``` and similar methods. These methods will properly handle masking, if mask arrays are present. - - -### Evaluation for Binary Classifiers - -The EvaluationBinary is used for evaluating networks with binary classification outputs - these networks usually have Sigmoid activation functions and XENT loss functions. The typical classification metrics, such as accuracy, precision, recall, F1 score, etc. are calculated for each output. - -``` -EvaluationBinary eval = new EvaluationBinary(int size) -``` - -See [EvaluationBinary JavaDoc](https://deeplearning4j.org/api/{{page.version}}/org/deeplearning4j/eval/EvaluationBinary.html) - - -### ROC - -ROC (Receiver Operating Characteristic) is another commonly used evaluation metric for the evaluation of classifiers. Three ROC variants exist in DL4J: - -- ROC - for single binary label (as a single column probability, or 2 column 'softmax' probability distribution). -- ROCBinary - for multiple binary labels -- ROCMultiClass - for evaluation of non-binary classifiers, using a "one vs. all" approach - -These classes have the ability to calculate the area under ROC curve (AUROC) and area under Precision-Recall curve (AUPRC), via the ```calculateAUC()``` and ```calculateAUPRC()``` methods. Furthermore, the ROC and Precision-Recall curves can be obtained using ```getRocCurve()``` and ```getPrecisionRecallCurve()```. - -The ROC and Precision-Recall curves can be exported to HTML for viewing using: ```EvaluationTools.exportRocChartsToHtmlFile(ROC, File)```, which will export a HTML file with both ROC and P-R curves, that can be viewed in a browser. - - -Note that all three support two modes of operation/calculation -- Thresholded (approximate AUROC/AUPRC calculation, no memory issues) -- Exact (exact AUROC/AUPRC calculation, but can require large amount of memory with very large datasets - i.e., datasets with many millions of examples) - -The number of bins can be set using the constructors. Exact can be set using the default constructor ```new ROC()``` or explicitly using ```new ROC(0)``` - -See [ROCBinary JavaDoc](https://deeplearning4j.org/api/{{page.version}}/org/deeplearning4j/eval/ROC.html) is used to evaluate Binary Classifiers. - -### Evaluating Classifier Calibration - -Deeplearning4j also has the EvaluationCalibration class, which is designed to analyze the calibration of a classifier. It provides a number of tools for this purpose: - - - Counts of the number of labels and predictions for each class - - Reliability diagram (or reliability curve) - - Residual plot (histogram) - - Histograms of probabilities, including probabilities for each class separately - - Evaluation of a classifier using EvaluationCalibration is performed in a similar manner to the other evaluation classes. - The various plots/histograms can be exported to HTML for viewing using ```EvaluationTools.exportevaluationCalibrationToHtmlFile(EvaluationCalibration, File)```. - -### Distributed Evaluation for Spark Networks - -SparkDl4jMultiLayer and SparkComputationGraph both have similar methods for evaluation: -``` -Evaluation eval = SparkDl4jMultiLayer.evaluate(JavaRDD); - -//Multiple evaluations in one pass: -SparkDl4jMultiLayer.doEvaluation(JavaRDD, IEvaluation...); -``` - - -### Evaluation for Multi-task Networks - -A multi-task network is a network that is trained to produce multiple outputs. For example a network given audio samples can be trained to both predict the language spoken and the gender of the speaker. Multi-task configuration is briefly described [here](./deeplearning4j-nn-computationgraph). - -Evaluation Classes useful for Multi-Task Network - -See [ROCMultiClass JavaDoc](https://deeplearning4j.org/api/{{page.version}}/org/deeplearning4j/eval/ROCMultiClass.html) - -See [ROCBinary JavaDoc](https://deeplearning4j.org/api/{{page.version}}/org/deeplearning4j/eval/ROCBinary.html) - -## Available evaluations - -{{autogenerated}} diff --git a/docs/deeplearning4j-nn/templates/iterators.md b/docs/deeplearning4j-nn/templates/iterators.md deleted file mode 100644 index 6e577579c..000000000 --- a/docs/deeplearning4j-nn/templates/iterators.md +++ /dev/null @@ -1,44 +0,0 @@ ---- -title: Deeplearning4j Iterators -short_title: Iterators -description: Data iteration tools for loading into neural networks. -category: Models -weight: 5 ---- - -## What is an iterator? - -A dataset iterator allows for easy loading of data into neural networks and help organize batching, conversion, and masking. The iterators included in Eclipse Deeplearning4j help with either user-provided data, or automatic loading of common benchmarking datasets such as MNIST and IRIS. - -## Usage - -For most use cases, initializing an iterator and passing a reference to a `MultiLayerNetwork` or `ComputationGraph` `fit()` method is all you need to begin a task for training: - -```java -MultiLayerNetwork model = new MultiLayerNetwork(conf); -model.init(); - -// pass an MNIST data iterator that automatically fetches data -DataSetIterator mnistTrain = new MnistDataSetIterator(batchSize, true, rngSeed); -net.fit(mnistTrain); -``` - -Many other methods also accept iterators for tasks such as evaluation: - -```java -// passing directly to the neural network -DataSetIterator mnistTest = new MnistDataSetIterator(batchSize, false, rngSeed); -net.eval(mnistTest); - -// using an evaluation class -Evaluation eval = new Evaluation(10); //create an evaluation object with 10 possible classes -while(mnistTest.hasNext()){ - DataSet next = mnistTest.next(); - INDArray output = model.output(next.getFeatureMatrix()); //get the networks prediction - eval.eval(next.getLabels(), output); //check the prediction against the true class -} -``` - -## Available iterators - -{{autogenerated}} \ No newline at end of file diff --git a/docs/deeplearning4j-nn/templates/layers.md b/docs/deeplearning4j-nn/templates/layers.md deleted file mode 100644 index 9a5163c3b..000000000 --- a/docs/deeplearning4j-nn/templates/layers.md +++ /dev/null @@ -1,23 +0,0 @@ ---- -title: Supported Layers -short_title: Layers -description: Supported neural network layers. -category: Models -weight: 3 ---- - -## What are layers? - -Each layer in a neural network configuration represents a unit of hidden units. When layers are stacked together, they represent a *deep neural network*. - -## Using layers - -All layers available in Eclipse Deeplearning4j can be used either in a `MultiLayerNetwork` or `ComputationGraph`. When configuring a neural network, you pass the layer configuration and the network will instantiate the layer for you. - -## Layers vs. vertices - -If you are configuring complex networks such as InceptionV4, you will need to use the `ComputationGraph` API and join different branches together using vertices. Check the vertices for more information. - -## General layers - -{{autogenerated}} \ No newline at end of file diff --git a/docs/deeplearning4j-nn/templates/listeners.md b/docs/deeplearning4j-nn/templates/listeners.md deleted file mode 100644 index 6e85dd52f..000000000 --- a/docs/deeplearning4j-nn/templates/listeners.md +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: Deeplearning4j Listeners -short_title: Listeners -description: Adding hooks and listeners on DL4J models. -category: Models -weight: 5 ---- - -## What are listeners? - -Listeners allow users to "hook" into certain events in Eclipse Deeplearning4j. This allows you to collect or print information useful for tasks like training. For example, a `ScoreIterationListener` allows you to print training scores from the output layer of a neural network. - -## Usage - -To add one or more listeners to a `MultiLayerNetwork` or `ComputationGraph`, use the `addListener` method: - -```java -MultiLayerNetwork model = new MultiLayerNetwork(conf); -model.init(); -//print the score with every 1 iteration -model.setListeners(new ScoreIterationListener(1)); -``` - -## Available listeners - -{{autogenerated}} \ No newline at end of file diff --git a/docs/deeplearning4j-nn/templates/model-persistence.md b/docs/deeplearning4j-nn/templates/model-persistence.md deleted file mode 100644 index 82f87f1ff..000000000 --- a/docs/deeplearning4j-nn/templates/model-persistence.md +++ /dev/null @@ -1,28 +0,0 @@ ---- -title: Deeplearning4j Model Persistence -short_title: Model Persistence -description: Saving and loading of neural networks. -category: Models -weight: 10 ---- - -## Saving and Loading a Neural Network - -The `ModelSerializer` is a class which handles loading and saving models. There are two methods for saving models shown in the examples through the link. The first example saves a normal multilayer network, the second one saves a [computation graph](https://deeplearning4j.org/docs/latest/deeplearning4j-nn-computationgraph). - -Here is a [basic example](https://github.com/eclipse/deeplearning4j-examples/tree/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/misc/modelsaving) with code to save a computation graph using the `ModelSerializer` class, as well as an example of using ModelSerializer to save a neural net built using MultiLayer configuration. - -### RNG Seed - -If your model uses probabilities (i.e. DropOut/DropConnect), it may make sense to save it separately, and apply it after model is restored; i.e: - -```bash - Nd4j.getRandom().setSeed(12345); - ModelSerializer.restoreMultiLayerNetwork(modelFile); -``` - -This will guarantee equal results between sessions/JVMs. - -## Model serializer - -{{autogenerated}} \ No newline at end of file diff --git a/docs/deeplearning4j-nn/templates/multilayernetwork.md b/docs/deeplearning4j-nn/templates/multilayernetwork.md deleted file mode 100644 index 5ab87b4cd..000000000 --- a/docs/deeplearning4j-nn/templates/multilayernetwork.md +++ /dev/null @@ -1,81 +0,0 @@ ---- -title: Multilayer Network -short_title: Multilayer Network -description: Simple and sequential network configuration. -category: Models -weight: 3 ---- - -## Why use MultiLayerNetwork? - -The `MultiLayerNetwork` class is the simplest network configuration API available in Eclipse Deeplearning4j. This class is useful for beginners or users who do not need a complex and branched network graph. - -You will not want to use `MultiLayerNetwork` configuration if you are creating complex loss functions, using graph vertices, or doing advanced training such as a triplet network. This includes popular complex networks such as InceptionV4. - -## Usage - -The example below shows how to build a simple linear classifier using `DenseLayer` (a basic multiperceptron layer). - -```java -MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .seed(seed) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .learningRate(learningRate) - .updater(Updater.NESTEROVS).momentum(0.9) - .list() - .layer(0, new DenseLayer.Builder().nIn(numInputs).nOut(numHiddenNodes) - .weightInit(WeightInit.XAVIER) - .activation("relu") - .build()) - .layer(1, new OutputLayer.Builder(LossFunction.NEGATIVELOGLIKELIHOOD) - .weightInit(WeightInit.XAVIER) - .activation("softmax").weightInit(WeightInit.XAVIER) - .nIn(numHiddenNodes).nOut(numOutputs).build()) - .pretrain(false).backprop(true).build(); -``` - -You can also create convolutional configurations: - -```java -MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder() - .seed(seed) - .regularization(true).l2(0.0005) - .learningRate(0.01)//.biasLearningRate(0.02) - //.learningRateDecayPolicy(LearningRatePolicy.Inverse).lrPolicyDecayRate(0.001).lrPolicyPower(0.75) - .weightInit(WeightInit.XAVIER) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .updater(Updater.NESTEROVS).momentum(0.9) - .list() - .layer(0, new ConvolutionLayer.Builder(5, 5) - //nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied - .nIn(nChannels) - .stride(1, 1) - .nOut(20) - .activation("identity") - .build()) - .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) - .kernelSize(2,2) - .stride(2,2) - .build()) - .layer(2, new ConvolutionLayer.Builder(5, 5) - //Note that nIn need not be specified in later layers - .stride(1, 1) - .nOut(50) - .activation("identity") - .build()) - .layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) - .kernelSize(2,2) - .stride(2,2) - .build()) - .layer(4, new DenseLayer.Builder().activation("relu") - .nOut(500).build()) - .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) - .nOut(outputNum) - .activation("softmax") - .build()) - .backprop(true).pretrain(false); -``` - -## API - -{{autogenerated}} diff --git a/docs/deeplearning4j-nn/templates/recurrent.md b/docs/deeplearning4j-nn/templates/recurrent.md deleted file mode 100644 index fe07ebddb..000000000 --- a/docs/deeplearning4j-nn/templates/recurrent.md +++ /dev/null @@ -1,355 +0,0 @@ ---- -title: Recurrent Neural Networks in DL4J -short_title: RNN -description: Recurrent Neural Network implementations in DL4J. -category: Models -weight: 10 ---- - -## Recurrent Neural Networks in DL4J - -This document outlines the specifics training features and the practicalities of how to use them in DeepLearning4J. This document assumes some familiarity with recurrent neural networks and their use - it is not an introduction to recurrent neural networks, and assumes some familiarity with their both their use and terminology. - -**Contents** - -* [The Basics: Data and Network Configuration](#basics) -* [RNN Training Features](#trainingfeatures) - * [Truncated Back Propagation Through Time](#tbptt) - * [Masking: One-to-Many, Many-to-One, and Sequence Classification](#masking) - * [Masking and Sequence Classification After Training](#testtimemasking) - * [Combining RNN Layers with Other Layer Types](#otherlayertypes) -* [Test Time: Prediction One Step at a Time](#rnntimestep) -* [Importing Time Series Data](#data) -* [Examples](#examples) - -## The Basics: Data and Network Configuration -DL4J currently supports the following types of recurrent neural network -* GravesLSTM (Long Short-Term Memory) -* BidirectionalGravesLSTM -* BaseRecurrent - -Java documentation for each is available, [GravesLSTM](https://deeplearning4j.org/api/{{page.version}}/org/deeplearning4j/nn/conf/layers/GravesLSTM.html), - [BidirectionalGravesLSTM](https://deeplearning4j.org/api/{{page.version}}/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.html), [BaseRecurrent](https://deeplearning4j.org/api/latest/org/deeplearning4j/nn/conf/layers/BaseRecurrentLayer.html) - -#### Data for RNNs -Consider for the moment a standard feed-forward network (a multi-layer perceptron or 'DenseLayer' in DL4J). These networks expect input and output data that is two-dimensional: that is, data with "shape" [numExamples,inputSize]. This means that the data into a feed-forward network has ‘numExamples’ rows/examples, where each row consists of ‘inputSize’ columns. A single example would have shape [1,inputSize], though in practice we generally use multiple examples for computational and optimization efficiency. Similarly, output data for a standard feed-forward network is also two dimensional, with shape [numExamples,outputSize]. - -Conversely, data for RNNs are time series. Thus, they have 3 dimensions: one additional dimension for time. Input data thus has shape [numExamples,inputSize,timeSeriesLength], and output data has shape [numExamples,outputSize,timeSeriesLength]. This means that the data in our INDArray is laid out such that the value at position (i,j,k) is the jth value at the kth time step of the ith example in the minibatch. This data layout is shown below. - -When importing time series data using the class CSVSequenceRecordReader each line in the data files represents one time step with the earliest time series observation in the first row (or first row after header if present) and the most recent observation in the last row of the csv. Each feature time series is a separate column of the of the csv file. For example if you have five features in time series, each with 120 observations, and a training & test set of size 53 then there will be 106 input csv files(53 input, 53 labels). The 53 input csv files will each have five columns and 120 rows. The label csv files will have one column (the label) and one row. - -![Data: Feed Forward vs. RNN](/images/guide/rnn_data.png) - -#### RnnOutputLayer - -RnnOutputLayer is a type of layer used as the final layer with many recurrent neural network systems (for both regression and classification tasks). RnnOutputLayer handles things like score calculation, and error calculation (of prediction vs. actual) given a loss function etc. Functionally, it is very similar to the 'standard' OutputLayer class (which is used with feed-forward networks); however it both outputs (and expects as labels/targets) 3d time series data sets. - -Configuration for the RnnOutputLayer follows the same design other layers: for example, to set the third layer in a MultiLayerNetwork to a RnnOutputLayer for classification: - - .layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX) - .weightInit(WeightInit.XAVIER).nIn(prevLayerSize).nOut(nOut).build()) - -Use of RnnOutputLayer in practice can be seen in the examples, linked at the end of this document. - -## RNN Training Features - -### Truncated Back Propagation Through Time -Training neural networks (including RNNs) can be quite computationally demanding. For recurrent neural networks, this is especially the case when we are dealing with long sequences - i.e., training data with many time steps. - -Truncated backpropagation through time (BPTT) was developed in order to reduce the computational complexity of each parameter update in a recurrent neural network. In summary, it allows us to train networks faster (by performing more frequent parameter updates), for a given amount of computational power. It is recommended to use truncated BPTT when your input sequences are long (typically, more than a few hundred time steps). - -Consider what happens when training a recurrent neural network with a time series of length 12 time steps. Here, we need to do a forward pass of 12 steps, calculate the error (based on predicted vs. actual), and do a backward pass of 12 time steps: - -![Standard Backprop Training](/images/guide/rnn_tbptt_1.png) - -For 12 time steps, in the image above, this is not a problem. Consider, however, that instead the input time series was 10,000 or more time steps. In this case, standard backpropagation through time would require 10,000 time steps for each of the forward and backward passes for each and every parameter update. This is of course very computationally demanding. - -In practice, truncated BPTT splits the forward and backward passes into a set of smaller forward/backward pass operations. The specific length of these forward/backward pass segments is a parameter set by the user. For example, if we use truncated BPTT of length 4 time steps, learning looks like the following: - -![Truncated BPTT](/images/guide/rnn_tbptt_2.png) - -Note that the overall complexity for truncated BPTT and standard BPTT are approximately the same - both do the same number of time step during forward/backward pass. Using this method however, we get 3 parameter updates instead of one for approximately the same amount of effort. However, the cost is not exactly the same there is a small amount of overhead per parameter update. - -The downside of truncated BPTT is that the length of the dependencies learned in truncated BPTT can be shorter than in full BPTT. This is easy to see: consider the images above, with a TBPTT length of 4. Suppose that at time step 10, the network needs to store some information from time step 0 in order to make an accurate prediction. In standard BPTT, this is ok: the gradients can flow backwards all the way along the unrolled network, from time 10 to time 0. In truncated BPTT, this is problematic: the gradients from time step 10 simply don't flow back far enough to cause the required parameter updates that would store the required information. This tradeoff is usually worth it, and (as long as the truncated BPTT lengths are set appropriately), truncated BPTT works well in practice. - -Using truncated BPTT in DL4J is quite simple: just add the following code to your network configuration (at the end, before the final .build() in your network configuration) - - .backpropType(BackpropType.TruncatedBPTT) - .tBPTTLength(100) - -The above code snippet will cause any network training (i.e., calls to MultiLayerNetwork.fit() methods) to use truncated BPTT with segments of length 100 steps. - -Some things of note: - -* By default (if a backprop type is not manually specified), DL4J will use BackpropType.Standard (i.e., full BPTT). -* The tBPTTLength configuration parameter set the length of the truncated BPTT passes. Typically, this is somewhere on the order of 50 to 200 time steps, though depends on the application and data. -* The truncated BPTT lengths is typically a fraction of the total time series length (i.e., 200 vs. sequence length 1000), but variable length time series in the same minibatch is OK when using TBPTT (for example, a minibatch with two sequences - one of length 100 and another of length 1000 - with a TBPTT length of 200 - will work correctly) - -### Masking: One-to-Many, Many-to-One, and Sequence Classification - -DL4J supports a number of related training features for RNNs, based on the idea of padding and masking. Padding and masking allows us to support training situations including one-to-many, many-to-one, as also support variable length time series (in the same mini-batch). - -Suppose we want to train a recurrent neural network with inputs or outputs that don't occur at every time step. Examples of this (for a single example) are shown in the image below. DL4J supports training networks for all of these situations: - -![RNN Training Types](/images/guide/rnn_masking_1.png) - -Without masking and padding, we are restricted to the many-to-many case (above, left): that is, (a) All examples are of the same length, and (b) Examples have both inputs and outputs at all time steps. - -The idea behind padding is simple. Consider two time series of lengths 50 and 100 time steps, in the same mini-batch. The training data is a rectangular array; thus, we pad (i.e., add zeros to) the shorter time series (for both input and output), such that the input and output are both the same length (in this example: 100 time steps). - -Of course, if this was all we did, it would cause problems during training. Thus, in addition to padding, we use a masking mechanism. The idea behind masking is simple: we have two additional arrays that record whether an input or output is actually present for a given time step and example, or whether the input/output is just padding. - -Recall that with RNNs, our minibatch data has 3 dimensions, with shape [miniBatchSize,inputSize,timeSeriesLength] and [miniBatchSize,outputSize,timeSeriesLength] for the input and output respectively. The padding arrays are then 2 dimensional, with shape [miniBatchSize,timeSeriesLength] for both the input and output, with values of 0 ('absent') or 1 ('present') for each time series and example. The masking arrays for the input and output are stored in separate arrays. - -For a single example, the input and output masking arrays are shown below: - -![RNN Training Types](/images/guide/rnn_masking_2.png) - -For the “Masking not required” cases, we could equivalently use a masking array of all 1s, which will give the same result as not having a mask array at all. Also note that it is possible to use zero, one or two masking arrays when learning RNNs - for example, the many-to-one case could have a masking array for the output only. - -In practice: these padding arrays are generally created during the data import stage (for example, by the SequenceRecordReaderDatasetIterator – discussed later), and are contained within the DataSet object. If a DataSet contains masking arrays, the MultiLayerNetwork fit will automatically use them during training. If they are absent, no masking functionality is used. - -#### Evaluation and Scoring with Masking - -Mask arrays are also important when doing scoring and evaluation (i.e., when evaluating the accuracy of a RNN classifier). Consider for example the many-to-one case: there is only a single output for each example, and any evaluation should take this into account. - -Evaluation using the (output) mask arrays can be used during evaluation by passing it to the following method: - - Evaluation.evalTimeSeries(INDArray labels, INDArray predicted, INDArray outputMask) - -where labels are the actual output (3d time series), predicted is the network predictions (3d time series, same shape as labels), and outputMask is the 2d mask array for the output. Note that the input mask array is not required for evaluation. - -Score calculation will also make use of the mask arrays, via the MultiLayerNetwork.score(DataSet) method. Again, if the DataSet contains an output masking array, it will automatically be used when calculating the score (loss function - mean squared error, negative log likelihood etc) for the network. - -#### Masking and Sequence Classification After Training - -Sequence classification is one common use of masking. The idea is that although we have a sequence (time series) as input, we only want to provide a single label for the entire sequence (rather than one label at each time step in the sequence). - -However, RNNs by design output sequences, of the same length of the input sequence. For sequence classification, masking allows us to train the network with this single label at the final time step - we essentially tell the network that there isn't *actually* label data anywhere except for the last time step. - -Now, suppose we've trained our network, and want to get the last time step for predictions, from the time series output array. How do we do that? - - -To get the last time step, there are two cases to be aware of. First, when we have a single example, we don't actually need to use the mask arrays: we can just get the last time step in the output array: - -``` - INDArray timeSeriesFeatures = ...; - INDArray timeSeriesOutput = myNetwork.output(timeSeriesFeatures); - int timeSeriesLength = timeSeriesOutput.size(2); //Size of time dimension - INDArray lastTimeStepProbabilities = timeSeriesOutput.get(NDArrayIndex.point(0), NDArrayIndex.all(), NDArrayIndex.point(timeSeriesLength-1)); -``` - -Assuming classification (same process for regression, however) the last line above gives us probabilities at the last time step - i.e., the class probabilities for our sequence classification. - - -The slightly more complex case is when we have multiple examples in the one minibatch (features array), where the lengths of each example differ. (If all are the same length: we can use the same process as above). - -In this 'variable length' case, we need to get the last time step *for each example separately*. If we have the time series lengths for each example from our data pipeline, it becomes straightforward: we just iterate over examples, replacing the ```timeSeriesLength``` in the above code with the length of that example. - -If we don't have the lengths of the time series directly, we need to extract them from the mask array. - -If we have a labels mask array (which is a one-hot vector, like [0,0,0,1,0] for each time series): - -``` - INDArray labelsMaskArray = ...; - INDArray lastTimeStepIndices = Nd4j.argMax(labelMaskArray,1); -``` - -Alternatively, if we have only the features mask: One quick and dirty approach is to use this: - -``` - INDArray featuresMaskArray = ...; - int longestTimeSeries = featuresMaskArray.size(1); - INDArray linspace = Nd4j.linspace(1,longestTimeSeries,longestTimeSeries); - INDArray temp = featuresMaskArray.mulColumnVector(linspace); - INDArray lastTimeStepIndices = Nd4j.argMax(temp,1); -``` -To understand what is happening here, note that originally we have a features mask like [1,1,1,1,0], from which we want to get the last non-zero element. So we map [1,1,1,1,0] -> [1,2,3,4,0], and then get the largest element (which is the last time step). - - -In either case, we can then do the following: - -``` - int numExamples = timeSeriesFeatures.size(0); - for( int i=0; iCombining RNN Layers with Other Layer Types - -RNN layers in DL4J can be combined with other layer types. For example, it is possible to combine DenseLayer and LSTM layers in the same network; or combine Convolutional (CNN) layers and LSTM layers for video. - -Of course, the DenseLayer and Convolutional layers do not handle time series data - they expect a different type of input. To deal with this, we need to use the layer preprocessor functionality: for example, the CnnToRnnPreProcessor and FeedForwardToRnnPreprocessor classes. See [here](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor) for all preprocessors. Fortunately, in most situations, the DL4J configuration system will automatically add these preprocessors as required. However, the preprocessors can be added manually (overriding the automatic addition of preprocessors, for each layer). - -For example, to manually add a preprocessor between layers 1 and 2, add the following to your network configuration: `.inputPreProcessor(2, new RnnToFeedForwardPreProcessor())`. - -## Test Time: Predictions One Step at a Time -As with other types of neural networks, predictions can be generated for RNNs using the `MultiLayerNetwork.output()` and `MultiLayerNetwork.feedForward()` methods. These methods can be useful in many circumstances; however, they have the limitation that we can only generate predictions for time series, starting from scratch each and every time. - -Consider for example the case where we want to generate predictions in a real-time system, where these predictions are based on a very large amount of history. It this case, it is impractical to use the output/feedForward methods, as they conduct the full forward pass over the entire data history, each time they are called. If we wish to make a prediction for a single time step, at every time step, these methods can be both (a) very costly, and (b) wasteful, as they do the same calculations over and over. - -For these situations, MultiLayerNetwork provides four methods of note: - -* `rnnTimeStep(INDArray)` -* `rnnClearPreviousState()` -* `rnnGetPreviousState(int layer)` -* `rnnSetPreviousState(int layer, Map state)` - -The rnnTimeStep() method is designed to allow forward pass (predictions) to be conducted efficiently, one or more steps at a time. Unlike the output/feedForward methods, the rnnTimeStep method keeps track of the internal state of the RNN layers when it is called. It is important to note that output for the rnnTimeStep and the output/feedForward methods should be identical (for each time step), whether we make these predictions all at once (output/feedForward) or whether these predictions are generated one or more steps at a time (rnnTimeStep). Thus, the only difference should be the computational cost. - -In summary, the MultiLayerNetwork.rnnTimeStep() method does two things: - -1. Generate output/predictions (forward pass), using the previous stored state (if any) -2. Update the stored state, storing the activations for the last time step (ready to be used next time rnnTimeStep is called) - -For example, suppose we want to use a RNN to predict the weather, one hour in advance (based on the weather at say the previous 100 hours as input). -If we were to use the output method, at each hour we would need to feed in the full 100 hours of data to predict the weather for hour 101. Then to predict the weather for hour 102, we would need to feed in the full 100 (or 101) hours of data; and so on for hours 103+. - -Alternatively, we could use the rnnTimeStep method. Of course, if we want to use the full 100 hours of history before we make our first prediction, we still need to do the full forward pass: - -![RNN Time Step](/images/guide/rnn_timestep_1.png) - -For the first time we call rnnTimeStep, the only practical difference between the two approaches is that the activations/state of the last time step are stored - this is shown in orange. However, the next time we use the rnnTimeStep method, this stored state will be used to make the next predictions: - -![RNN Time Step](/images/guide/rnn_timestep_2.png) - -There are a number of important differences here: - -1. In the second image (second call of rnnTimeStep) the input data consists of a single time step, instead of the full history of data -2. The forward pass is thus a single time step (as compared to the hundreds – or more) -3. After the rnnTimeStep method returns, the internal state will automatically be updated. Thus, predictions for time 103 could be made in the same way as for time 102. And so on. - -However, if you want to start making predictions for a new (entirely separate) time series: it is necessary (and important) to manually clear the stored state, using the `MultiLayerNetwork.rnnClearPreviousState()` method. This will reset the internal state of all recurrent layers in the network. - -If you need to store or set the internal state of the RNN for use in predictions, you can use the rnnGetPreviousState and rnnSetPreviousState methods, for each layer individually. This can be useful for example during serialization (network saving/loading), as the internal network state from the rnnTimeStep method is *not* saved by default, and must be saved and loaded separately. Note that these get/set state methods return and accept a map, keyed by the type of activation. For example, in the LSTM model, it is necessary to store both the output activations, and the memory cell state. - -Some other points of note: - -- We can use the rnnTimeStep method for multiple independent examples/predictions simultaneously. In the weather example above, we might for example want to make predicts for multiple locations using the same neural network. This works in the same way as training and the forward pass / output methods: multiple rows (dimension 0 in the input data) are used for multiple examples. -- If no history/stored state is set (i.e., initially, or after a call to rnnClearPreviousState), a default initialization (zeros) is used. This is the same approach as during training. -- The rnnTimeStep can be used for an arbitrary number of time steps simultaneously – not just one time step. However, it is important to note: - - For a single time step prediction: the data is 2 dimensional, with shape [numExamples,nIn]; in this case, the output is also 2 dimensional, with shape [numExamples,nOut] - - For multiple time step predictions: the data is 3 dimensional, with shape [numExamples,nIn,numTimeSteps]; the output will have shape [numExamples,nOut,numTimeSteps]. Again, the final time step activations are stored as before. -- It is not possible to change the number of examples between calls of rnnTimeStep (in other words, if the first use of rnnTimeStep is for say 3 examples, all subsequent calls must be with 3 examples). After resetting the internal state (using rnnClearPreviousState()), any number of examples can be used for the next call of rnnTimeStep. -- The rnnTimeStep method makes no changes to the parameters; it is used after training the network has been completed only. -- The rnnTimeStep method works with networks containing single and stacked/multiple RNN layers, as well as with networks that combine other layer types (such as Convolutional or Dense layers). -- The RnnOutputLayer layer type does not have any internal state, as it does not have any recurrent connections. - -## Importing Time Series Data - -Data import for RNNs is complicated by the fact that we have multiple different types of data we could want to use for RNNs: one-to-many, many-to-one, variable length time series, etc. This section will describe the currently implemented data import mechanisms for DL4J. - -The methods described here utilize the SequenceRecordReaderDataSetIterator class, in conjunction with the CSVSequenceRecordReader class from DataVec. This approach currently allows you to load delimited (tab, comma, etc) data from files, where each time series is in a separate file. -This method also supports: - -* Variable length time series input -* One-to-many and many-to-one data loading (where input and labels are in different files) -* Label conversion from an index to a one-hot representation for classification (i.e., '2' to [0,0,1,0]) -* Skipping a fixed/specified number of rows at the start of the data files (i.e., comment or header rows) - -Note that in all cases, each line in the data files represents one time step. - -(In addition to the examples below, you might find [these unit tests](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/datavec/RecordReaderDataSetiteratorTest.java) to be of some use.) - -#### Example 1: Time Series of Same Length, Input and Labels in Separate Files - -Suppose we have 10 time series in our training data, represented by 20 files: 10 files for the input of each time series, and 10 files for the output/labels. For now, assume these 20 files all contain the same number of time steps (i.e., same number of rows). - -To use the [SequenceRecordReaderDataSetIterator](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-data/deeplearning4j-datavec-iterators/src/main/java/org/deeplearning4j/datasets/datavec/SequenceRecordReaderDataSetIterator.java) and [CSVSequenceRecordReader](https://github.com/eclipse/deeplearning4j/blob/master/datavec/datavec-api/src/main/java/org/datavec/api/records/reader/impl/csv/CSVSequenceRecordReader.java) approaches, we first create two CSVSequenceRecordReader objects, one for input and one for labels: - - SequenceRecordReader featureReader = new CSVSequenceRecordReader(1, ","); - SequenceRecordReader labelReader = new CSVSequenceRecordReader(1, ","); - -This particular constructor takes the number of lines to skip (1 row skipped here), and the delimiter (comma character used here). - -Second, we need to initialize these two readers, by telling them where to get the data from. We do this with an InputSplit object. -Suppose that our time series are numbered, with file names "myInput_0.csv", "myInput_1.csv", ..., "myLabels_0.csv", etc. One approach is to use the [NumberedFileInputSplit](https://github.com/eclipse/deeplearning4j/blob/master/datavec/datavec-api/src/main/java/org/datavec/api/split/NumberedFileInputSplit.java): - - featureReader.initialize(new NumberedFileInputSplit("/path/to/data/myInput_%d.csv", 0, 9)); - labelReader.initialize(new NumberedFileInputSplit(/path/to/data/myLabels_%d.csv", 0, 9)); - -In this particular approach, the "%d" is replaced by the corresponding number, and the numbers 0 to 9 (both inclusive) are used. - -Finally, we can create our SequenceRecordReaderdataSetIterator: - - DataSetIterator iter = new SequenceRecordReaderDataSetIterator(featureReader, labelReader, miniBatchSize, numPossibleLabels, regression); - -This DataSetIterator can then be passed to MultiLayerNetwork.fit() to train the network. - -The miniBatchSize argument specifies the number of examples (time series) in each minibatch. For example, with 10 files total, miniBatchSize of 5 would give us two data sets with 2 minibatches (DataSet objects) with 5 time series in each. - -Note that: - -* For classification problems: numPossibleLabels is the number of classes in your data set. Use regression = false. - * Labels data: one value per line, as a class index - * Label data will be converted to a one-hot representation automatically -* For regression problems: numPossibleLabels is not used (set it to anything) and use regression = true. - * The number of values in the input and labels can be anything (unlike classification: can have an arbitrary number of outputs) - * No processing of the labels is done when regression = true - -#### Example 2: Time Series of Same Length, Input and Labels in Same File - -Following on from the last example, suppose that instead of a separate files for our input data and labels, we have both in the same file. However, each time series is still in a separate file. - -As of DL4J 0.4-rc3.8, this approach has the restriction of a single column for the output (either a class index, or a single real-valued regression output) - -In this case, we create and initialize a single reader. Again, we are skipping one header row, and specifying the format as comma delimited, and assuming our data files are named "myData_0.csv", ..., "myData_9.csv": - - SequenceRecordReader reader = new CSVSequenceRecordReader(1, ","); - reader.initialize(new NumberedFileInputSplit("/path/to/data/myData_%d.csv", 0, 9)); - DataSetIterator iterClassification = new SequenceRecordReaderDataSetIterator(reader, miniBatchSize, numPossibleLabels, labelIndex, false); - -`miniBatchSize` and `numPossibleLabels` are the same as the previous example. Here, `labelIndex` specifies which column the labels are in. For example, if the labels are in the fifth column, use labelIndex = 4 (i.e., columns are indexed 0 to numColumns-1). - -For regression on a single output value, we use: - - DataSetIterator iterRegression = new SequenceRecordReaderDataSetIterator(reader, miniBatchSize, -1, labelIndex, true); - -Again, the numPossibleLabels argument is not used for regression. - -#### Example 3: Time Series of Different Lengths (Many-to-Many) - -Following on from the previous two examples, suppose that for each example individually, the input and labels are of the same length, but these lengths differ between time series. - -We can use the same approach (CSVSequenceRecordReader and SequenceRecordReaderDataSetIterator), though with a different constructor: - - DataSetIterator variableLengthIter = new SequenceRecordReaderDataSetIterator(featureReader, labelReader, miniBatchSize, numPossibleLabels, regression, SequenceRecordReaderDataSetIterator.AlignmentMode.ALIGN_END); - -The argument here are the same as in the previous example, with the exception of the AlignmentMode.ALIGN_END addition. This alignment mode input tells the SequenceRecordReaderDataSetIterator to expect two things: - -1. That the time series may be of different lengths -2. To align the input and labels - for each example individually - such that their last values occur at the same time step. - -Note that if the features and labels are always of the same length (as is the assumption in example 3), then the two alignment modes (AlignmentMode.ALIGN_END and AlignmentMode.ALIGN_START) will give identical outputs. The alignment mode option is explained in the next section. - -Also note: that variable length time series always start at time zero in the data arrays: padding, if required, will be added after the time series has ended. - -Unlike examples 1 and 2 above, the DataSet objects produced by the above variableLengthIter instance will also include input and masking arrays, as described earlier in this document. - -#### Example 4: Many-to-One and One-to-Many Data -We can also use the AlignmentMode functionality in example 3 to implement a many-to-one RNN sequence classifier. Here, let us assume: - -* Input and labels are in separate delimited files -* The labels files contain a single row (time step) (either a class index for classification, or one or more numbers for regression) -* The input lengths may (optionally) differ between examples - -In fact, the same approach as in example 3 can do this: - - DataSetIterator variableLengthIter = new SequenceRecordReaderDataSetIterator(featureReader, labelReader, miniBatchSize, numPossibleLabels, regression, SequenceRecordReaderDataSetIterator.AlignmentMode.ALIGN_END); - -Alignment modes are relatively straightforward. They specify whether to pad the start or the end of the shorter time series. The diagram below shows how this works, along with the masking arrays (as discussed earlier in this document): - -![Sequence Alignment](/images/guide/rnn_seq_alignment.png) - -The one-to-many case (similar to the last case above, but with only one input) is done by using AlignmentMode.ALIGN_START. - -Note that in the case of training data that contains time series of different lengths, the labels and inputs will be aligned for each example individually, and then the shorter time series will be padded as required: - -![Sequence Alignment](/images/guide/rnn_seq_alignment_2.png) - -## Available layers - -{{autogenerated}} \ No newline at end of file diff --git a/docs/deeplearning4j-nn/templates/transfer-learning.md b/docs/deeplearning4j-nn/templates/transfer-learning.md deleted file mode 100644 index c44ce4124..000000000 --- a/docs/deeplearning4j-nn/templates/transfer-learning.md +++ /dev/null @@ -1,160 +0,0 @@ ---- -title: Neural Network Transfer Learning -short_title: Transfer Learning -description: -category: Tuning & Training -weight: 5 ---- - -## DL4J’s Transfer Learning API - -The DL4J transfer learning API enables users to: - -* Modify the architecture of an existing model -* Fine tune learning configurations of an existing model. -* Hold parameters of a specified layer constant during training, also referred to as “frozen" - -Holding certain layers frozen on a network and training is effectively the same as training on a transformed version of the input, the transformed version being the intermediate outputs at the boundary of the frozen layers. This is the process of “feature extraction” from the input data and will be referred to as “featurizing” in this document. - - -## The transfer learning helper - -The forward pass to “featurize” the input data on large, pertained networks can be time consuming. DL4J also provides a TransferLearningHelper class with the following capabilities. - -* Featurize an input dataset to save for future use -* Fit the model with frozen layers with a featurized dataset -* Output from the model with frozen layers given a featurized input. - -When running multiple epochs users will save on computation time since the expensive forward pass on the frozen layers/vertices will only have to be conducted once. - - -## Show me the code - -This example will use VGG16 to classify images belonging to five categories of flowers. The dataset will automatically download from http://download.tensorflow.org/example_images/flower_photos.tgz - -#### I. Import a zoo model - -As of 0.9.0 (0.8.1-SNAPSHOT) Deeplearning4j has a new native model zoo. Read about the [deeplearning4j-zoo](/model-zoo) module for more information on using pretrained models. Here, we load a pretrained VGG-16 model initialized with weights trained on ImageNet: - -``` -ZooModel zooModel = new VGG16(); -ComputationGraph pretrainedNet = (ComputationGraph) zooModel.initPretrained(PretrainedType.IMAGENET); -``` - - -#### II. Set up a fine-tune configuration - -``` -FineTuneConfiguration fineTuneConf = new FineTuneConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .updater(new Nesterovs(5e-5)) - .seed(seed) - .build(); -``` - -#### III. Build new models based on VGG16 - -##### A.Modifying only the last layer, keeping other frozen - -The final layer of VGG16 does a softmax regression on the 1000 classes in ImageNet. We modify the very last layer to give predictions for five classes keeping the other layers frozen. - -``` -ComputationGraph vgg16Transfer = new TransferLearning.GraphBuilder(pretrainedNet) - .fineTuneConfiguration(fineTuneConf) - .setFeatureExtractor("fc2") - .removeVertexKeepConnections("predictions") - .addLayer("predictions", - new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) - .nIn(4096).nOut(numClasses) - .weightInit(WeightInit.XAVIER) - .activation(Activation.SOFTMAX).build(), "fc2") - .build(); -``` -After a mere thirty iterations, which in this case is exposure to 450 images, the model attains an accuracy > 75% on the test dataset. This is rather remarkable considering the complexity of training an image classifier from scratch. - -##### B. Attach new layers to the bottleneck (block5_pool) - -Here we hold all but the last three dense layers frozen and attach new dense layers onto it. Note that the primary intent here is to demonstrate the use of the API, secondary to what might give better results. - -``` -ComputationGraph vgg16Transfer = new TransferLearning.GraphBuilder(pretrainedNet) - .fineTuneConfiguration(fineTuneConf) - .setFeatureExtractor("block5_pool") - .nOutReplace("fc2",1024, WeightInit.XAVIER) - .removeVertexAndConnections("predictions") - .addLayer("fc3",new DenseLayer.Builder() - .activation(Activation.RELU) - .nIn(1024).nOut(256).build(),"fc2") - .addLayer("newpredictions",new OutputLayer - .Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) - .activation(Activation.SOFTMAX) - .nIn(256).nOut(numClasses).build(),"fc3") - .setOutputs("newpredictions") - .build(); -``` - -##### C. Fine tune layers from a previously saved model - -Say we have saved off our model from (B) and now want to allow “block_5” layers to train. - -``` -ComputationGraph vgg16FineTune = new TransferLearning.GraphBuilder(vgg16Transfer) - .fineTuneConfiguration(fineTuneConf) - .setFeatureExtractor(“block4_pool”) - .build(); -``` - -#### IV. Saving “featurized” datasets and training with them. - -We use the transfer learning helper API. Note this freezes the layers of the model passed in. - -Here is how you obtain the featured version of the dataset at the specified layer “fc2”. - -``` -TransferLearningHelper transferLearningHelper = - new TransferLearningHelper(pretrainedNet, "fc2"); -while(trainIter.hasNext()) { - DataSet currentFeaturized = transferLearningHelper.featurize(trainIter.next()); - saveToDisk(currentFeaturized,trainDataSaved,true); - trainDataSaved++; -} -``` - -Here is how you can fit with a featured dataset. vgg16Transfer is a model setup in (A) of section III. - -``` -TransferLearningHelper transferLearningHelper = - new TransferLearningHelper(vgg16Transfer); -while (trainIter.hasNext()) { - transferLearningHelper.fitFeaturized(trainIter.next()); -} -``` - -## Notes - -* The TransferLearning builder returns a new instance of a dl4j model. - -Keep in mind this is a second model that leaves the original one untouched. For large pertained network take into consideration memory requirements and adjust your JVM heap space accordingly. - -* The trained model helper imports models from Keras without enforcing a training configuration. - -Therefore the last layer (as seen when printing the summary) is a dense layer and not an output layer with a loss function. Therefore to modify nOut of an output layer we delete the layer vertex, keeping it’s connections and add back in a new output layer with the same name, a different nOut, the suitable loss function etc etc. - -* Changing nOuts at a layer/vertex will modify nIn of the layers/vertices it fans into. - -When changing nOut users can specify a weight initialization scheme or a distribution for the layer as well as a separate weight initialization scheme or distribution for the layers it fans out to. - -* Frozen layer configurations are not saved when writing the model to disk. - -In other words, a model with frozen layers when serialized and read back in will not have any frozen layers. To continue training holding specific layers constant the user is expected to go through the transfer learning helper or the transfer learning API. There are two ways to “freeze” layers in a dl4j model. - - - On a copy: With the transfer learning API which will return a new model with the relevant frozen layers - - In place: With the transfer learning helper API which will apply the frozen layers to the given model. - -* FineTune configurations will selectively update learning parameters. - -For eg, if a learning rate is specified this learning rate will apply to all unfrozen/trainable layers in the model. However, newly added layers can override this learning rate by specifying their own learning rates in the layer builder. - -## Utilities - -{{autogenerated}} \ No newline at end of file diff --git a/docs/deeplearning4j-nn/templates/tsne-visualization.md b/docs/deeplearning4j-nn/templates/tsne-visualization.md deleted file mode 100644 index 9a55b1a74..000000000 --- a/docs/deeplearning4j-nn/templates/tsne-visualization.md +++ /dev/null @@ -1,72 +0,0 @@ ---- -title: t-SNE's Data Visualization -short_title: t-SNE Visualization -description: Data visualizaiton with t-SNE with higher dimensional data. -category: Tuning & Training -weight: 10 ---- - -## t-SNE's Data Visualization - -[t-Distributed Stochastic Neighbor Embedding](https://en.wikipedia.org/wiki/T-distributed_stochastic_neighbor_embedding) (t-SNE) is a data-visualization tool created by Laurens van der Maaten at Delft University of Technology. - -While it can be used for any data, t-SNE (pronounced Tee-Snee) is only really meaningful with labeled data, which clarify how the input is clustering. Below, you can see the kind of graphic you can generate in DL4J with t-SNE working on MNIST data. - -![Alt text](/images/guide/tsne.png) - -Look closely and you can see the numerals clustered near their likes, alongside the dots. - -Here's how t-SNE appears in Deeplearning4j code. - -```java -public class TSNEStandardExample { - - private static Logger log = LoggerFactory.getLogger(TSNEStandardExample.class); - - public static void main(String[] args) throws Exception { - //STEP 1: Initialization - int iterations = 100; - //create an n-dimensional array of doubles - DataTypeUtil.setDTypeForContext(DataBuffer.Type.DOUBLE); - List cacheList = new ArrayList<>(); //cacheList is a dynamic array of strings used to hold all words - - //STEP 2: Turn text input into a list of words - log.info("Load & Vectorize data...."); - File wordFile = new ClassPathResource("words.txt").getFile(); //Open the file - //Get the data of all unique word vectors - Pair vectors = WordVectorSerializer.loadTxt(wordFile); - VocabCache cache = vectors.getSecond(); - INDArray weights = vectors.getFirst().getSyn0(); //seperate weights of unique words into their own list - - for(int i = 0; i < cache.numWords(); i++) //seperate strings of words into their own list - cacheList.add(cache.wordAtIndex(i)); - - //STEP 3: build a dual-tree tsne to use later - log.info("Build model...."); - BarnesHutTsne tsne = new BarnesHutTsne.Builder() - .setMaxIter(iterations).theta(0.5) - .normalize(false) - .learningRate(500) - .useAdaGrad(false) -// .usePca(false) - .build(); - - //STEP 4: establish the tsne values and save them to a file - log.info("Store TSNE Coordinates for Plotting...."); - String outputFile = "target/archive-tmp/tsne-standard-coords.csv"; - (new File(outputFile)).getParentFile().mkdirs(); - tsne.plot(weights,2,cacheList,outputFile); - //This tsne will use the weights of the vectors as its matrix, have two dimensions, use the words strings as - //labels, and be written to the outputFile created on the previous line - - } - - - -} -``` - -Here is an image of the tsne-standard-coords.csv file plotted using gnuplot. - - -![Tsne data plot](/images/guide/tsne_output.png) diff --git a/docs/deeplearning4j-nn/templates/vertices.md b/docs/deeplearning4j-nn/templates/vertices.md deleted file mode 100644 index 92fe59834..000000000 --- a/docs/deeplearning4j-nn/templates/vertices.md +++ /dev/null @@ -1,15 +0,0 @@ ---- -title: Supported Vertices -short_title: Vertices -description: Computation graph nodes for advanced configuration. -category: Models -weight: 4 ---- - -## What is a vertex? - -In Eclipse Deeplearning4j a vertex is a type of layer that acts as a node in a `ComputationGraph`. It can accept multiple inputs, provide multiple outputs, and can help construct popular networks such as InceptionV4. - -## Available classes - -{{autogenerated}} \ No newline at end of file diff --git a/docs/deeplearning4j-nn/templates/visualization.md b/docs/deeplearning4j-nn/templates/visualization.md deleted file mode 100644 index d4a4b30fc..000000000 --- a/docs/deeplearning4j-nn/templates/visualization.md +++ /dev/null @@ -1,325 +0,0 @@ ---- -title: Visualize, Monitor and Debug Neural Network Learning -short_title: Visualization -description: How to visualize, monitor and debug neural network learning. -category: Tuning & Training -weight: 2 ---- - -## Contents - -* [Visualizing Network Training with the Deeplearning4j Training UI](#ui) - * [Deeplearning4j UI: The Overview Page](#overviewpage) - * [Deeplearning4j UI: The Model Page](#modelpage) -* [Deeplearning4J UI and Spark Training](#sparkui) -* [Using the UI to Tune Your Network](#usingui) -* [TSNE and Word2Vec](#tsne) -* [Fixing UI Issue: "No configuration setting" exception](#issues) - -## Visualizing Network Training with the Deeplearning4j Training UI - -**Note**: This information here pertains to DL4J versions 0.7.0 and later. - -DL4J Provides a user interface to visualize in your browser (in real time) the current network status and progress of training. The UI is typically used to help with tuning neural networks - i.e., the selection of hyperparameters (such as learning rate) to obtain good performance for a network. - -**Step 1: Add the Deeplearning4j UI dependency to your project.** - -``` - - org.deeplearning4j - deeplearning4j-ui_2.10 - {{ page.version }} - -``` - -Note the ```_2.10``` suffix: this is the Scala version (due to using the Play framework, a Scala library, for the backend). If you are not using other Scala libraries, either ```_2.10``` or ```_2.11``` is OK. - -**Step 2: Enable the UI in your project** - -This is relatively straightforward: - -``` - //Initialize the user interface backend - UIServer uiServer = UIServer.getInstance(); - - //Configure where the network information (gradients, score vs. time etc) is to be stored. Here: store in memory. - StatsStorage statsStorage = new InMemoryStatsStorage(); //Alternative: new FileStatsStorage(File), for saving and loading later - - //Attach the StatsStorage instance to the UI: this allows the contents of the StatsStorage to be visualized - uiServer.attach(statsStorage); - - //Then add the StatsListener to collect this information from the network, as it trains - net.setListeners(new StatsListener(statsStorage)); -``` - -To access the UI, open your browser and go to ```http://localhost:9000/train```. -You can set the port by using the ```org.deeplearning4j.ui.port``` system property: i.e., to use port 9001, pass the following to the JVM on launch: ```-Dorg.deeplearning4j.ui.port=9001``` - -Information will then be collected and routed to the UI when you call the ```fit``` method on your network. - - -**Example:** [See a UI example here](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/userInterface/UIExample.java) - -The full set of UI examples are available [here](https://github.com/eclipse/deeplearning4j-examples/tree/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/userInterface). - - -### Deeplearning4j UI: The Overview Page - -![Overview Page](/images/guide/DL4J_UI_01.png) - -The overview page (one of 3 available pages) contains the following information: - -- Top left: score vs iteration chart - this is the value of the loss function on the current minibatch -- Top right: model and training information -- Bottom left: Ratio of parameters to updates (by layer) for all network weights vs. iteration -- Bottom right: Standard deviations (vs. time) of: activations, gradients and updates - -Note that for the bottom two charts, these are displayed as the logarithm (base 10) of the values. Thus a value of -3 on the update: parameter ratio chart corresponds to a ratio of 10-3 = 0.001. - -The ratio of updates to parameters is specifically the ratio of mean magnitudes of these values (i.e., log10(mean(abs(updates))/mean(abs(parameters))). - -See the later section of this page on how to use these values in practice. - -### Deeplearning4j UI: The Model Page - -![Model Page](/images/guide/DL4J_UI_02.png) - -The model page contains a graph of the neural network layers, which operates as a selection mechanism. Click on a layer to display information for it. - -On the right, the following charts are available, after selecting a layer: - -- Table of layer information -- Update to parameter ratio for this layer, as per the overview page. The components of this ratio (the parameter and update mean magnitudes) are also available via tabs. -- Layer activations (mean and mean +/- 2 standard deviations) over time -- Histograms of parameters and updates, for each parameter type -- Learning rate vs. time (note this will be flat, unless learning rate schedules are used) - - -*Note: parameters are labeled as follows: weights (W) and biases (b). For recurrent neural networks, W refers to the weights connecting the layer to the layer below, and RW refers to the recurrent weights (i.e., those between time steps).* - - - - -## Deeplearning4J UI and Spark Training - -The DL4J UI can be used with Spark. However, as of 0.7.0, conflicting dependencies mean that running the UI and Spark is the same JVM can be difficult. - -Two alternatives are available: - -1. Collect and save the relevant stats, to be visualized (offline) at a later point -2. Run the UI in a separate server, and Use the remote UI functionality to upload the data from the Spark master to your UI instance - -**Collecting Stats for Later Offline Use** - -``` - SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, conf, tm); - - StatsStorage ss = new FileStatsStorage(new File("myNetworkTrainingStats.dl4j")); - sparkNet.setListeners(ss, Collections.singletonList(new StatsListener(null))); -``` - -Then, later you can load and display the saved information using: - -``` - StatsStorage statsStorage = new FileStatsStorage(statsFile); //If file already exists: load the data from it - UIServer uiServer = UIServer.getInstance(); - uiServer.attach(statsStorage); -``` - -**Using the Remote UI Functionality** - -First, in the JVM running the UI (note this is the server): - -``` - UIServer uiServer = UIServer.getInstance(); - uiServer.enableRemoteListener(); //Necessary: remote support is not enabled by default -``` -This will require the ```deeplearning4j-ui_2.10``` or ```deeplearning4j-ui_2.11``` dependency. (NOTE THIS IS NOT THE CLIENT THIS IS YOUR SERVER - SEE BELOW FOR THE CLIENT WHICH USES: deeplearning4j-ui-model) - -Client (both spark and standalone neural networks using simple deeplearning4j-nn) -Second, for your neural net (Note this example is for spark, but computation graph and multi layer network both have the equivalemtn setListeners method with the same usage, [example found here](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/userInterface/RemoteUIExample.java)): - -``` - SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, conf, tm); - - StatsStorageRouter remoteUIRouter = new RemoteUIStatsStorageRouter("http://UI_MACHINE_IP:9000"); - sparkNet.setListeners(remoteUIRouter, Collections.singletonList(new StatsListener(null))); -``` -To avoid dependency conflicts with Spark, you should use the ```deeplearning4j-ui-model``` dependency to get the StatsListener, *not* the full ```deeplearning4j-ui_2.10``` UI dependency. - -**Note to scala users**: - -You need to use the above method if you are on a newer scala version. See the linked example above for the client. - - - - -Note: you should replace ```UI_MACHINE_IP``` with the IP address of the machine running the user interface instance. - - - - -## Using the UI to Tune Your Network - -Here's an excellent [web page by Andrej Karpathy](http://cs231n.github.io/neural-networks-3/#baby) about visualizing neural net training. It is worth reading and understanding that page first. - -Tuning neural networks is often more an art than a science. However, here's some ideas that may be useful: - -**Overview Page - Model Score vs. Iteration Chart** - -The score vs. iteration should (overall) go down over time. - -- If the score increases consistently, your learning rate is likely set too high. Try reducing it until scores become more stable. -- Increasing scores can also be indicative of other network issues, such as incorrect data normalization -- If the score is flat or decreases very slowly (over a few hundred iterations) (a) your learning rate may be too low, or (b) you might be having difficulties with optimization. In the latter case, if you are using the SGD updater, try a different updater such as Nesterovs (momentum), RMSProp or Adagrad. -- Note that data that isn't shuffled (i.e., each minibatch contains only one class, for classification) can result in very rough or abnormal-looking score vs. iteration graphs -- Some noise in this line chart is expected (i.e., the line will go up and down within a small range). However, if the scores vary quite significantly between runs variation is very large, this can be a problem - - The issues mentioned above (learning rate, normalization, data shuffling) may contribute to this. - - Setting the minibatch size to a very small number of examples can also contribute to noisy score vs. iteration graphs, and *might* lead to optimization difficulties - -**Overview Page and Model Page - Using the Update: Parameter Ratio Chart** - -- The ratio of mean magnitude of updates to parameters is provided on both the overview and model pages - - "Mean magnitude" = the average of the absolute value of the parameters or updates at the current time step -- The most important use of this ratio is in selecting a learning rate. As a rule of thumb: this ratio should be around 1:1000 = 0.001. On the (log10) chart, this corresponds to a value of -3 (i.e., 10-3 = 0.001) - - Note that is a rough guide only, and may not be appropriate for all networks. It's often a good starting point, however. - - If the ratio diverges significantly from this (for example, > -2 (i.e., 10-2=0.01) or < -4 (i.e., 10-4=0.0001), your parameters may be too unstable to learn useful features, or may change too slowly to learn useful features - - To change this ratio, adjust your learning rate (or sometimes, parameter initialization). In some networks, you may need to set the learning rate differently for different layers. -- Keep an eye out for unusually large spikes in the ratio: this may indicate exploding gradients - - -**Model Page: Layer Activations (vs. Time) Chart** - -This chart can be used to detect vanishing or exploding activations (due to poor weight initialization, too much regularization, lack of data normalization, or too high a learning rate). - -- This chart should ideally stabilize over time (usually a few hundred iterations) -- A good standard deviation for the activations is on the order of 0.5 to 2.0. Significantly outside of this range may indicate one of the problems mentioned above. - -**Model Page: Layer Parameters Histogram** - -The layer parameters histogram is displayed for the most recent iteration only. - -- For weights, these histograms should have an approximately Gaussian (normal) distribution, after some time -- For biases, these histograms will generally start at 0, and will usually end up being approximately Gaussian - - One exception to this is for LSTM recurrent neural network layers: by default, the biases for one gate (the forget gate) are set to 1.0 (by default, though this is configurable), to help in learning dependencies across long time periods. This results in the bias graphs initially having many biases around 0.0, with another set of biases around 1.0 -- Keep an eye out for parameters that are diverging to +/- infinity: this may be due to too high a learning rate, or insufficient regularization (try adding some L2 regularization to your network). -- Keep an eye out for biases that become very large. This can sometimes occur in the output layer for classification, if the distribution of classes is very imbalanced - -**Model Page: Layer Updates Histogram** - -The layer update histogram is displayed for the most recent iteration only. - -- Note that these are the updates - i.e., the gradients *after* applying learning rate, momentum, regularization etc -- As with the parameter graphs, these should have an approximately Gaussian (normal) distribution -- Keep an eye out for very large values: this can indicate exploding gradients in your network - - Exploding gradients are problematic as they can 'mess up' the parameters of your network - - In this case, it may indicate a weight initialization, learning rate or input/labels data normalization issue - - In the case of recurrent neural networks, adding some [gradient normalization or gradient clipping](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/GradientNormalization.java) may help - -**Model Page: Parameter Learning Rates Chart** - -This chart simply shows the learning rates of the parameters of selected layer, over time. - -If you are not using learning rate schedules, the chart will be flat. If you *are* using learning rate schedules, you can use this chart to track the current value of the learning rate (for each parameter), over time. - - -## TSNE and Word2vec - -We rely on [TSNE](https://lvdmaaten.github.io/tsne/) to reduce the dimensionality of [word feature vectors](./deeplearning4j-nlp-word2vec) and project words into a two or three-dimensional space. Here's some code for using TSNE with Word2Vec: - -```java -log.info("Plot TSNE...."); -BarnesHutTsne tsne = new BarnesHutTsne.Builder() - .setMaxIter(1000) - .stopLyingIteration(250) - .learningRate(500) - .useAdaGrad(false) - .theta(0.5) - .setMomentum(0.5) - .normalize(true) - .usePca(false) - .build(); -vec.lookupTable().plotVocab(tsne); -``` - -## Fixing UI Issue: "No configuration setting" exception - -A possible exception that can occur with the DL4J UI is the following: -``` -com.typesafe.config.ConfigException$Missing: No configuration setting found for key 'play.crypto.provider' - at com.typesafe.config.impl.SimpleConfig.findKeyOrNull(SimpleConfig.java:152) - at com.typesafe.config.impl.SimpleConfig.findOrNull(SimpleConfig.java:170) - ... - at play.server.Server.forRouter(Server.java:96) - at org.deeplearning4j.ui.play.PlayUIServer.runMain(PlayUIServer.java:206) - at org.deeplearning4j.ui.api.UIServer.getInstance(UIServer.java:27) -``` - -This exception is not due to DL4J directly, but is due to a missing application.conf file, required by the Play framework (the library that DL4J's UI is based on). This is originally present in the deeplearning4j-play dependency: however, if an uber-jar (i.e., a JAR file with dependencies) is built (say, via ```mvn package```), it may not be copied over correctly. For example, using the ```maven-assembly-plugin``` has caused this exception for some users. - -The recommended solution (for Maven) is to use the Maven Shade plugin to produce an uber-jar, configured as follows: - -```xml - - - - org.codehaus.mojo - exec-maven-plugin - ${exec-maven-plugin.version} - - - - exec - - - - - java - - - - org.apache.maven.plugins - maven-shade-plugin - ${maven-shade-plugin.version} - - true - ${shadedClassifier} - true - - - *:* - - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - - - - package - - shade - - - - - reference.conf - - - - - - - - - - -``` - -Then, create your uber-jar with ```mvn package``` and run via ```cd target && java -cp dl4j-examples-0.9.1-bin.jar org.deeplearning4j.examples.userInterface.UIExample```. Note the "-bin" suffix for the generated JAR file: this includes all dependencies. - -Note also that this Maven Shade approach is configured for DL4J's examples repository. \ No newline at end of file diff --git a/docs/deeplearning4j-scaleout/README.md b/docs/deeplearning4j-scaleout/README.md deleted file mode 100644 index 1dbb54d04..000000000 --- a/docs/deeplearning4j-scaleout/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# deeplearning4j-scaleout documentation - -Build and serve documentation for deeplearning4j-scaleout with MkDocs (install with `pip install mkdocs`) -The source for Keras documentation is in this directory under `doc_sources/`. - -The structure of this project (template files, generating code, mkdocs YAML) is closely aligned -with the [Keras documentation](keras.io) and heavily inspired by the [Keras docs repository](https://github.com/keras-team/keras/tree/master/docs). - -To generate docs into the `deeplearning4j-scaleout/doc_sources` folder, first `cd docs` then run: - -```shell -python generate_docs.py \ - --project deeplearning4j-scaleout \ - --code ../deeplearning4j - --out_language en -``` diff --git a/docs/deeplearning4j-scaleout/pages.json b/docs/deeplearning4j-scaleout/pages.json deleted file mode 100644 index 58ef542b0..000000000 --- a/docs/deeplearning4j-scaleout/pages.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "excludes": [ - ], - "indices": [ - ], - "pages": [ - { - "page": "intro.md", - "class": [] - }, - { - "page": "technicalref.md", - "class": [] - }, - { - "page": "howto.md", - "class": [] - }, - { - "page": "data-howto.md", - "class": [] - }, - { - "page": "apiref.md", - "class": [ - "deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/multilayer/SparkDl4jMultiLayer.java", - "deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/graph/SparkComputationGraph.java", - "deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/main/java/org/deeplearning4j/spark/parameterserver/training/SharedTrainingMaster.java", - "deeplearning4j-scaleout/spark/dl4j-spark/src/main/java/org/deeplearning4j/spark/impl/paramavg/ParameterAveragingTrainingMaster.java" - ] - } - ] -} - diff --git a/docs/deeplearning4j-scaleout/templates/apiref.md b/docs/deeplearning4j-scaleout/templates/apiref.md deleted file mode 100644 index 855ae6a79..000000000 --- a/docs/deeplearning4j-scaleout/templates/apiref.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -title: "Deeplearning4j on Spark: API Reference" -short_title: API Reference -description: "Deeplearning4j on Spark: API Reference" -category: Distributed Deep Learning -weight: 4 ---- - -# API Reference - -This page provides the API reference for key classes required to do distributed training with DL4J on Spark. Before going through these, make sure you have read the introduction guide for deeplearning4j Spark training [here](deeplearning4j-scaleout-intro). - -{{autogenerated}} diff --git a/docs/deeplearning4j-scaleout/templates/data-howto.md b/docs/deeplearning4j-scaleout/templates/data-howto.md deleted file mode 100644 index fefdcfb10..000000000 --- a/docs/deeplearning4j-scaleout/templates/data-howto.md +++ /dev/null @@ -1,490 +0,0 @@ ---- -title: "Deeplearning4j on Spark: How To Build Data Pipelines" -short_title: Spark Data Pipelines Guide -description: "Deeplearning4j on Spark: How To Build Data Pipelines" -category: Distributed Deep Learning -weight: 3 ---- - -# Deeplearning4j on Spark: How To Build Data Pipelines - -This page provides some guides on how to create data pipelines for both training and evaluation when using Deeplearning4j on Spark. - -This page assumes some familiarity with Spark (RDDs, master vs. workers, etc) and Deeplearning4j (networks, DataSet etc). - -As with training on a single machine, the final step of a data pipeline should be to produce a DataSet (single features arrays, single label array) or MultiDataSet (one or more feature arrays, one or more label arrays). In the case of DL4J on Spark, the final step of a data pipeline is data in one of the following formats: -(a) an ```RDD```/```JavaRDD``` -(b) an ```RDD```/```JavaRDD``` -(c) a directory of serialized DataSet/MultiDataSet (minibatch) objects on network storage such as HDFS, S3 or Azure blob storage -(d) a directory of minibatches in some other format - -Once data is in one of those four formats, it can be used for training or evaluation. - -**Note:** When training multiple models on a single dataset, it is best practice to preprocess your data once, and save it to network storage such as HDFS. -Then, when training the network you can call ```SparkDl4jMultiLayer.fit(String path)``` or ```SparkComputationGraph.fit(String path)``` where ```path``` is the directory where you saved the files. - - -Spark Data Prepration: How-To Guides -* [How to prepare a RDD[DataSet] from CSV data for classification or regression](#csv) -* [How to create a Spark data pipeline for training on images](#images) -* [How to create a RDD[MultiDataSet] from one or more RDD[List[Writable]]](#multidataset) -* [How to save a RDD[DataSet] or RDD[MultiDataSet] to network storage and use it for training](#saveloadrdd) -* [How to prepare data on a single machine for use on a cluster: saving DataSets](#singletocluster) -* [How to prepare data on a single machine for use on a cluster: map/sequence files](#singletocluster2) -* [How to load multiple CSVs (one sequence per file) for RNN data pipelines](#csvseq) -* [How to load prepared minibatches in custom format](#customformat) - -

    - -## How to prepare a RDD[DataSet] from CSV data for classification or regression - -This guide shows how to load data contained in one or more CSV files and produce a ```JavaRDD``` for export, training or evaluation on Spark. - -The process is fairly straightforward. Note that the ```DataVecDataSetFunction``` is very similar to the ```RecordReaderDataSetIterator``` that is often used for single machine training. - -For example, suppose the CSV had the following format - 6 total columns: 5 features followed by an integer class index for classification, and 10 possible classes - -``` -1.0,3.2,4.5,1.1,6.3,0 -1.6,2.4,5.9,0.2,2.2,1 -... -``` - -we could load this data for classification using the following code: -``` -String filePath = "hdfs:///your/path/some_csv_file.csv"; -JavaSparkContext sc = new JavaSparkContext(); -JavaRDD rddString = sc.textFile(filePath); -RecordReader recordReader = new CSVRecordReader(','); -JavaRDD> rddWritables = rddString.map(new StringToWritablesFunction(recordReader)); - -int labelIndex = 5; //Labels: a single integer representing the class index in column number 5 -int numLabelClasses = 10; //10 classes for the label -JavaRDD rddDataSetClassification = rddWritables.map(new DataVecDataSetFunction(labelIndex, numLabelClasses, false)); -``` - -However, if this dataset was for regression instead, with again 6 total columns, 3 feature columns (positions 0, 1 and 2 in the file rows) and 3 label columns (positions 3, 4 and 5) we could load it using the same process as above, but changing the last 3 lines to: - -``` -int firstLabelColumn = 3; //First column index for label -int lastLabelColumn = 5; //Last column index for label -JavaRDD rddDataSetRegression = rddWritables.map(new DataVecDataSetFunction(firstColumnLabel, lastColumnLabel, true, null, null)); -``` - -

    - -## How to create a RDD[MultiDataSet] from one or more RDD[List[Writable]] - -RecordReaderMultiDataSetIterator (RRMDSI) is the most common way to create MultiDataSet instances for single-machine training data pipelines. -It is possible to use RRMDSI for Spark data pipelines, where data is coming from one or more of ```RDD>``` (for 'standard' data) or ```RDD>``` (for sequence data). - -**Case 1: Single ```RDD>``` to ```RDD```** - -Consider the following *single node* (non-Spark) data pipeline for a CSV classification task. -``` -RecordReader recordReader = new CSVRecordReader(numLinesToSkip,delimiter); -recordReader.initialize(new FileSplit(new ClassPathResource("iris.txt").getFile())); - -int batchSize = 32; -int labelColumn = 4; -int numClasses = 3; -MultiDataSetIterator iter = new RecordReaderMultiDataSetIterator.Builder(batchSize) - .addReader("data", recordReader) - .addInput("data", 0, labelColumn-1) - .addOutputOneHot("data", labelColumn, numClasses) - .build(); -``` - -The equivalent to the following Spark data pipeline: - -``` -JavaRDD> rdd = sc.textFile(f.getPath()).map(new StringToWritablesFunction(new CSVRecordReader())); - -MultiDataSetIterator iter = new RecordReaderMultiDataSetIterator.Builder(batchSize) - .addReader("data", new SparkSourceDummyReader(0)) //Note the use of the "SparkSourceDummyReader" - .addInput("data", 0, labelColumn-1) - .addOutputOneHot("data", labelColumn, numClasses) - .build(); -JavaRDD mdsRdd = IteratorUtils.mapRRMDSI(rdd, rrmdsi2); -``` - -For Sequence data (```List>```) you can use SparkSourceDummySeqReader instead. - -**Case 2: Multiple ```RDD>``` or ```RDD>``` to ```RDD```** - -For this case, the process is much the same. However, internaly, a join is used. - -``` -JavaRDD> rdd1 = ... -JavaRDD> rdd2 = ... - -RecordReaderMultiDataSetIterator rrmdsi = new RecordReaderMultiDataSetIterator.Builder(batchSize) - .addReader("rdd1", new SparkSourceDummyReader(0)) //0 = use first rdd in list - .addReader("rdd2", new SparkSourceDummyReader(1)) //1 = use second rdd in list - .addInput("rdd1", 1, 2) // - .addOutput("rdd2", 1, 2) - .build(); - -List>> list = Arrays.asList(rdd1, rdd2); -int[] keyIdxs = new int[]{0,0}; //Column 0 in rdd1 and rdd2 is the 'key' used for joining -boolean filterMissing = false; //If true: filter out any records that don't have matching keys in all RDDs -JavaRDD mdsRdd = IteratorUtils.mapRRMDSI(list, null, keyIdxs, null, filterMissing, rrmdsi); -``` - -

    - -## How to save a RDD[DataSet] or RDD[MultiDataSet] to network storage and use it for training - -As noted at the start of this page, it is considered a best practice to preprocess and export your data once (i.e., save to network storage such as HDFS and reuse), rather than fitting from an ```RDD``` or ```RDD``` directly in each training job. - -There are a number of reasons for this: -* Better performance (avoid redundant loading/calculation): When fitting multiple models from the same dataset, it is faster to preprocess this data once and save to disk rather than preprocessing it again for every single training run. -* Minimizing memory and other resources: By exporting and fitting from disk, we only need to keep the DataSets we are currently using (plus a small async prefetch buffer) in memory, rather than also keeping many unused DataSet objects in memory. Exporting results in lower total memory use and hence we can use larger networks, larger minibatch sizes, or allocate fewer resources to our job. -* Avoiding recomputation: When an RDD is too large to fit into memory, some parts of it may need to be recomputed before it can be used (depending on the cache settings). When this occurs, Spark will recompute parts of the data pipeline multiple times, costing us both time and memory. A pre-export step avoids this recomputation entirely. - -**Step 1: Saving** - -Saving the DataSet objects once you have an ```RDD``` is quite straightforward: -``` -JavaRDD rddDataSet = ... -int minibatchSize = 32; //Minibatch size of the saved DataSet objects -String exportPath = "hdfs:///path/to/export/data"; -JavaRDD paths = rddDataSet.mapPartitionsWithIndex(new BatchAndExportDataSetsFunction(minibatchSize, exportPath), true); -``` -Keep in mind that this is a map function, so no data will be saved until the paths RDD is executed - i.e., you should follow this with an operation such as: -``` -paths.saveAsTextFile("hdfs:///path/to/text/file.txt"); //Specified file will contain paths/URIs of all saved DataSet objects -``` -or -``` -List paths = paths.collect(); //Collection of paths/URIs of all saved DataSet objects -``` -or -``` -paths.foreach(new VoidFunction() { - @Override - public void call(String path) { - //Some operation on each path - } -}); -``` - - -Saving an ```RDD``` can be done in the same way using ```BatchAndExportMultiDataSetsFunction``` instead, which takes the same arguments. - -**Step 2: Loading and Fitting** - -The exported data can be used in a few ways. -First, it can be used to fit a network directly: -``` -String exportPath = "hdfs:///path/to/export/data"; -SparkDl4jMultiLayer net = ... -net.fit(exportPath); //Loads the serialized DataSet objects found in the 'exportPath' directory -``` -Similarly, we can use ```SparkComputationGraph.fitMultiDataSet(String path)``` if we saved an ```RDD``` instead. - - -Alternatively, we can load up the paths in a few different ways, depending on if or how we saved them: - -``` -JavaSparkContext sc = new JavaSparkContext(); - -//If we used saveAsTextFile: -String saveTo = "hdfs:///path/to/text/file.txt"; -paths.saveAsTextFile(saveTo); //Save -JavaRDD loadedPaths = sc.textFile(saveTo); //Load - -//If we used collecting: -List paths = paths.collect(); //Collect -JavaRDD loadedPaths = sc.parallelize(paths); //Parallelize - -//If we want to list the directory contents: -String exportPath = "hdfs:///path/to/export/data"; -JavaRDD loadedPaths = SparkUtils.listPaths(sc, exportPath); //List paths using org.deeplearning4j.spark.util.SparkUtils -``` - -Then we can execute training on these paths by using methods such as ```SparkDl4jMultiLayer.fitPaths(JavaRDD)``` - - -

    - -## How to prepare data on a single machine for use on a cluster: saving DataSets - -Another possible workflow is to start with the data pipeline on a single machine, and export the DataSet or MultiDataSet objects for use on the cluster. -This workflow clearly isn't as scalable as preparing data on a cluster (you are using just one machine to prepare data) but it can be an easy option in some cases, especially when you have an existing data pipeline. - -This section assumes you have an existing ```DataSetIterator``` or ```MultiDataSetIterator``` used for single-machine training. There are many different ways to create one, which is outside of the scope of this guide. - -**Step 1: Save the DataSets or MultiDataSets** - -Saving the contents of a DataSet to a local directory can be done using the following code: -``` -DataSetIterator iter = ... -File rootDir = new File("/saving/directory/"); -int count = 0; -while(iter.hasNext()){ - DataSet ds = iter.next(); - File outFile = new File(rootDir, "dataset_" + (count++) + ".bin"); - ds.save(outFile); -} -``` -Note that for the purposes of Spark, the exact file names don't matter. -The process for saving MultiDataSets is almost identical. - -As an aside: you can read these saved DataSet objects on a single machine (for non-Spark training) using [FileDataSetIterator](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-data/deeplearning4j-utility-iterators/src/main/java/org/deeplearning4j/datasets/iterator/file/FileDataSetIterator.java)). - -An alternative approach is to save directly to the cluster using output streams, to (for example) HDFS. This can only be done if the machine running the code is properly configured with the required libraries and access rights. For example, to save the DataSets directly to HDFS you could use: - -``` -JavaSparkContext sc = new JavaSparkContext(); -FileSystem fileSystem = FileSystem.get(sc.hadoopConfiguration()); -String outputDir = "hdfs:///my/output/location/"; - -DataSetIterator iter = ... -int count = 0; -while(iter.hasNext()){ - DataSet ds = iter.next(); - String filePath = outputDir + "dataset_" + (count++) + ".bin"; - try (OutputStream os = new BufferedOutputStream(fileSystem.create(new Path(outputPath)))) { - ds.save(os); - } -} -``` - - -**Step 2: Load and Train on a Cluster** -The saved DataSet objects can then be copied to the cluster or network file storage (for example, using Hadoop FS utilities on a Hadoop cluster), and used as follows: -``` -String dir = "hdfs:///data/copied/here"; -SparkDl4jMultiLayer net = ... -net.fit(dir); //Loads the serialized DataSet objects found in the 'dir' directory -``` -or alternatively/equivalently, we can list the paths as an RDD using: -``` -String dir = "hdfs:///data/copied/here"; -JavaRDD paths = SparkUtils.listPaths(sc, dir); //List paths using org.deeplearning4j.spark.util.SparkUtils -``` - -

    - -## How to prepare data on a single machine for use on a cluster: map/sequence files - -An alternative approach is to use Hadoop MapFile and SequenceFiles, which are efficient binary storage formats. -This can be used to convert the output of any DataVec ```RecordReader``` or ```SequenceRecordReader``` (including a custom record reader) to a format usable for use on Spark. -MapFileRecordWriter and MapFileSequenceRecordWriter require the following dependencies: -``` - - org.datavec - datavec-hadoop - ${datavec.version} - - - org.apache.hadoop - hadoop-common - ${hadoop.version} - - - -``` - -**Step 1: Create a MapFile Locally** -In the following example, a CSVRecordReader will be used, but any other RecordReader could be used in its place: -``` -File csvFile = new File("/path/to/file.csv") -RecordReader recordReader = new CSVRecordReader(); -recordReader.initialize(new FileSplit(csvFile)); - -//Create map file writer -String outPath = "/map/file/root/dir" -MapFileRecordWriter writer = new MapFileRecordWriter(new File(outPath)); - -//Convert to MapFile binary format: -RecordReaderConverter.convert(recordReader, writer); -``` - -The process for using a ```SequenceRecordReader``` combined with a ```MapFileSequenceRecordWriter``` is virtually the same. - -Note also that ```MapFileRecordWriter``` and ```MapFileSequenceRecordWriter``` both support splitting - i.e., creating multiple smaller map files instead of creating one single (potentially multi-GB) map file. Using splitting is recommended when saving data in this manner for use with Spark. - -**Step 2: Copy to HDFS or other network file storage** - -The exact process is beyond the scope of this guide. However, it should be sufficient to simply copy the directory ("/map/file/root/dir" in the example above) to a location on HDFS. - -**Step 3: Read and Convert to ```RDD``` for Training** - -We can load the data for training using the following: -``` -JavaSparkContext sc = new JavaSparkContext(); -String pathOnHDFS = "hdfs:///map/file/directory"; -JavaRDD> rdd = SparkStorageUtils.restoreMapFile(pathOnHDFS, sc); //import: org.datavec.spark.storage.SparkStorageUtils - -//Note at this point: it's the same as the latter part of the CSV how-to guide -int labelIndex = 5; //Labels: a single integer representing the class index in column number 5 -int numLabelClasses = 10; //10 classes for the label -JavaRDD rddDataSetClassification = rdd.map(new DataVecDataSetFunction(labelIndex, numLabelClasses, false)); -``` - -

    - -## How to load multiple CSVs (one sequence per file) for RNN data pipelines - -This guide shows how load CSV files for training an RNN. -The assumption is that the dataset is comprised of multiple CSV files, where: - -* each CSV file represents one sequence -* each row/line of the CSV contains the values for one time step (one or more columns/values, same number of values in all rows for all files) -* each CSV may contain a different number of lines to other CSVs (i.e., variable length sequences are OK here) -* header lines either aren't present in any files, or are present in all files - -A data pipeline can be created using the following process: -``` -String directoryWithCsvFiles = "hdfs:///path/to/directory"; -JavaPairRDD origData = sc.binaryFiles(directoryWithCsvFiles); - -int numHeaderLinesEachFile = 0; //No header lines -int delimiter = ","; //Comma delimited files -SequenceRecordReader seqRR = new CSVSequenceRecordReader(numHeaderLinesEachFile, delimiter); - -JavaRDD>> sequencesRdd = origData.map(new SequenceRecordReaderFunction(seqRR)); - -//Similar to the non-sequence CSV guide using DataVecDataSetFunction. Assuming classification here: -int labelIndex = 5; //Index of the label column. Occurs at position/column 5 -int numClasses = 10; //Number of classes for classification -JavaRDD dataSetRdd = sequencesRdd.map(new DataVecSequenceDataSetFunction(labelIndex, numClasses, false)); -``` - -

    - -## How to create a Spark data pipeline for training on images - -This guide shows how to create an ```RDD``` for image classification, starting from images stored either locally, or on a network file system such as HDFS. - -The approach here used (added in 1.0.0-beta3) is to first preprocess the images into batches of files - [FileBatch](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-common/src/main/java/org/nd4j/api/loader/FileBatch.java) objects. -The motivation for this approach is simple: the original image files typically use efficient compresion (JPEG for example) which is much more space (and network) efficient than a bitmap (int8 or 32-bit floating point) representation. However, on a cluster we want to minimize disk reads due to latency issues with remote storage - one file read/transfer is going to be faster than ```minibatchSize``` remote file reads. - -The [TinyImageNet example](https://github.com/eclipse/deeplearning4j-examples/tree/master/dl4j-spark-examples/dl4j-spark/src/main/java/org/deeplearning4j/tinyimagenet) also shows how this can be done. - -Note that one limitation of the implementation is that the set of classes (i.e., the class/category labels when doing classification) needs to be known, provided or collected manually. This differs from using ImageRecordReader for classification on a single machine, which can automatically infer the set of class labels. - -First, assume the images are in subdirectories based on their class labels. For example, suppose there are two classes, "cat" and "dog", the directory structure would look like: -``` -rootDir/cat/img0.jpg -rootDir/cat/img1.jpg -... -rootDir/dog/img0.jpg -rootDir/dog/img1.jpg -... -``` -(Note the file names don't matter in this example - however, the parent directory names are the class labels) - -**Step 1 (option 1 of 2): Preprocess Locally** - -Local preprocessing can be done as follows: -``` -String sourceDirectory = "/home/user/my_images"; //Where your data is located -String destinationDirectory = "/home/user/preprocessed"; //Where the preprocessed data should be written -int batchSize = 32; //Number of examples (images) in each FileBatch object -SparkDataUtils.createFileBatchesLocal(sourceDirectory, NativeImageLoader.ALLOWED_FORMATS, true, saveDirTrain, batchSize); -``` - -The full import for SparkDataUtils is ```org.deeplearning4j.spark.util.SparkDataUtils```. - -After preprocessing is has been completed, the directory can be copied to the cluster for use in training (Step 2). - -**Step 1 (option 2 of 2): Preprocess using Spark** - -Alternatively, if the original images are on remote file storage (such as HDFS), we can use the following: -``` -``` -String sourceDirectory = "hdfs:///data/my_images"; //Where your data is located -String destinationDirectory = "hdfs:///data/preprocessed"; //Where the preprocessed data should be written -int batchSize = 32; //Number of examples (images) in each FileBatch object -SparkDataUtils.createFileBatchesSpark(sourceDirectory, destinationDirectory, batchSize, sparkContext); -``` -``` - -**Step 2: Training** -The data pipeline for image classification can be constructed as follows. This code is taken from the [TinyImageNet example](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-spark-examples/dl4j-spark/src/main/java/org/deeplearning4j/tinyimagenet/TrainSpark.java): -``` -//Create data loader -int imageHeightWidth = 64; //64x64 pixel input to network -int imageChannels = 3; //RGB -PathLabelGenerator labelMaker = new ParentPathLabelGenerator(); -ImageRecordReader rr = new ImageRecordReader(imageHeightWidth, imageHeightWidth, imageChannels, labelMaker); -rr.setLabels(Arrays.asList("cat", "dog")); -int numClasses = 2; -RecordReaderFileBatchLoader loader = new RecordReaderFileBatchLoader(rr, minibatch, 1, numClasses); -loader.setPreProcessor(new ImagePreProcessingScaler()); //Scale 0-255 valued pixels to 0-1 range - - -//Fit the network -String trainDataPath = "hdfs:///data/preprocessed"; //Where the preprocessed data is located -JavaRDD pathsTrain = SparkUtils.listPaths(sc, trainDataPath); -for (int i = 0; i < numEpochs; i++) { - sparkNet.fitPaths(pathsTrain, loader); -} -``` - -And that's it. - -Note: for other label generation cases (such as labels provided from the filename instead of parent directory), or for tasks such as semantic segmentation, you can substitute a different PathLabelGenerator instead of the default. For example, if the label should come from the file name, you can use ```PatternPathLabelGenerator``` instead. -Let's say images are in the format "cat_img1234.jpg", "dog_2309.png" etc. We can use the following process: -``` -PathLabelGenerator labelGenerator = new PatternPathLabelGenerator("_", 0); //Split on the "_" character, and take the first value -ImageRecordReader imageRecordReader = new ImageRecordReader(imageHW, imageHW, imageChannels, labelGenerator); -``` - -Note that PathLabelGenerator returns a Writable object, so for tasks like image segmentation, you can return an INDArray using the NDArrayWritable class in a custom PathLabelGenerator. - -

    - -## How to load prepared minibatches in custom format - -DL4J Spark training supports the ability to load data serialized in a custom format. The assumption is that each file on the remote/network storage represents a single minibatch of data in some readable format. - -Note that this approach is typically not required or recommended for most users, but is provided as an additional option for advanced users or those with pre-prepared data in a custom format or a format that is not natively supported by DL4J. -When files represent a single record/example (instead of a minibatch) in a custom format, a custom RecordReader could be used instead. - -The interfaces of note are: - -* [DataSetLoader](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-core/src/main/java/org/deeplearning4j/api/loader/DataSetLoader.java) -* [MultiDataSetLoader](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-core/src/main/java/org/deeplearning4j/api/loader/MultiDataSetLoader.java) - -Both of which extend the single-method [Loader](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-common/src/main/java/org/nd4j/api/loader/Loader.java) interface. - -Suppose a HDFS directory contains a number of files, each being a minibatch in some custom format. -These can be loaded using the following process: -``` -JavaSparkContext sc = new JavaSparkContext(); -String dataDirectory = "hdfs:///path/with/data"; -JavaRDD loadedPaths = SparkUtils.listPaths(sc, dataDirectory); //List paths using org.deeplearning4j.spark.util.SparkUtils - -SparkDl4jMultiLayer net = ... -Loader myCustomLoader = new MyCustomLoader(); -net.fitPaths(loadedPaths, myCustomLoader); -``` - -Where the custom loader class looks something like: -``` -public class MyCustomLoader implements DataSetLoader { - @Override - public DataSet load(Source source) throws IOException { - InputStream inputStream = source.getInputStream(); - - INDArray features = ...; - INDArray labels = ...; - return new DataSet(features, labels); - } -} -``` diff --git a/docs/deeplearning4j-scaleout/templates/howto.md b/docs/deeplearning4j-scaleout/templates/howto.md deleted file mode 100644 index af55969c6..000000000 --- a/docs/deeplearning4j-scaleout/templates/howto.md +++ /dev/null @@ -1,721 +0,0 @@ ---- -title: "Deeplearning4j on Spark: How To Guides" -short_title: How To Guide -description: "Deeplearning4j on Spark: How To Guides" -category: Distributed Deep Learning -weight: 2 ---- - -# Deeplearning4j on Spark: How To Guides - -This page contains a number of how-to guides for common distributed training tasks. -Note that for guides on building data pipelines, see [here](deeplearning4j-scaleout-data-howto). - -Before going through these guides, make sure you have read the introduction guide for deeplearning4j Spark training [here](deeplearning4j-scaleout-intro). - -Before Training Guides -* [How to build an uber-JAR for training via Spark submit using Maven](#uberjar) -* [How to use GPUs for training on Spark](#gpus) -* [How to use CPUs on master, GPUs on the workers](#cpusgpus) -* [How to configure memory settings for Spark](#memory) -* [How to Configure Garbage Collection for Workers](#gc) -* [How to use Kryo Serialization with DL4J and ND4J](#kryo) -* [How to use YARN and GPUs](#yarngpus) -* [How to configure Spark Locality Configuration](#locality) - -During and After Training Guides -* [How to configure encoding thresholds](#threshold) -* [How to perform distributed test set evaluation](#evaluation) -* [How to save (and load) neural networks trained on Spark](#saveload) -* [How to perform distributed inference](#inference) - -Problems and Troubleshooting Guides -* [How to debug common Spark dependency problems (NoClassDefFoundExcption and similar)](#dependencyproblems) -* [How to fix "Error querying NTP server" errors](#ntperror) -* [How to Cache RDD[INDArray] and RDD[DataSet] Safely](#caching) -* [Fixing libgomp issues on Amazon Elastic MapReduce](#libgomp) -* [Failed training on Ubuntu 16.04 (Ubuntu bug that may affect DL4J Spark users)](#ubuntu16) - -

    - -# Before Training - How-To Guides - -## How to build an uber-JAR for training via Spark submit using Maven - -When submitting a training job to a cluster, a typical workflow is to build an "uber-jar" that is submitted to Spark submit. An uber-jar is single JAR file containing all of the dependencies (libraries, class files, etc) required to run a job. -Note that Spark submit is a script that comes with a Spark distribution that users submit their job (in the form of a JAR file) to, in order to begin execution of their Spark job. - -This guide assumes you already have code set up to train a network on Spark. - -**Step 1: Decide on the required dependencies.** - -There is a lot of overlap with single machine training with DL4J and ND4J. For example, for both single machine and Spark training you should include the standard set of deeplearning4j dependencies, such as: -* deeplearning4j-core -* deeplearning4j-spark -* nd4j-native-platform (for CPU-only training) - -In addition, you will need to include the Deeplearning4j's Spark module, ```dl4j-spark_2.10``` or ```dl4j-spark_2.11```. This module is required for both development and execution of Deeplearning4j Spark jobs. -Be careful to use the spark version that matches your cluster - for both the Spark version (Spark 1 vs. Spark 2) and the Scala version (2.10 vs. 2.11). If these are mismatched, your job will likely fail at runtime. - -Dependency example: Spark 2, Scala 2.11: -``` - - org.deeplearning4j - dl4j-spark_2.11 - 1.0.0-beta2_spark_2 - -``` - -Depedency example, Spark 1, Scala 2.10: -``` - - org.deeplearning4j - dl4j-spark_2.10 - 1.0.0-beta2_spark_1 - -``` - -Note that if you add a Spark dependency such as spark-core_2.11, this can be set to ```provided``` scope in your pom.xml (see [Maven docs](https://maven.apache.org/guides/introduction/introduction-to-dependency-mechanism.html#Dependency_Scope) for more details), as Spark submit will add Spark to the classpath. Adding this dependency is not required for execution on a cluster, but may be needed if you want to test or debug a Spark-based job on your local machine. - - -When training on CUDA GPUs, there are a couple of possible cases when adding CUDA dependencies: - -**Case 1: Cluster nodes have CUDA toolkit installed on the master and worker nodes** - -When the CUDA toolkit and CuDNN are available on the cluster nodes, we can use a smaller dependency: -* If the OS building the uber-jar is the same OS as the cluster: include nd4j-cuda-x.x -* If the OS building the uber-jar is different to the cluster OS (i.e., build on Windows, execute Spark on Linux cluster): include nd4j-cuda-x.x-platform -* In both cases, include -where x.x is the CUDA version - for example, x.x=9.2 for CUDA 9.2. - -**Case 2: Cluster nodes do NOT have the CUDA toolkit installed on the master and worker nodes** - -When CUDA/CuDNN are NOT installed on the cluster nodes, we can do the following: -* First, include the dependencies as per 'Case 1' above -* Then include the "redist" javacpp-presets for the cluster operating system, as described here: [DL4J CuDNN Docs](./deeplearning4j-config-cudnn) - - -**Step 2: Configure your pom.xml file for building an uber-jar** - -When using Spark submit, you will need an uber-jar to submit to start and run your job. After configuring the relevant dependencies in step 1, we need to configure the pom.xml file to properly build the uber-jar. - -We recommend that you use the maven shade plugin for building an uber-jar. There are alternative tools/plugins for this purpose, but these do not always include all relevant files from the source jars, such as those required for Java's ServiceLoader mechanism to function correctly. (The ServiceLoader mechanism is used by ND4J and a lot of other software libraries). - -A Maven shade configuration suitable for this purpose is provided in the example standalone sample project [pom.xml file](https://github.com/eclipse/deeplearning4j-examples/blob/master/standalone-sample-project/pom.xml): -``` - - - - - - - org.apache.maven.plugins - maven-shade-plugin - ${maven-shade-plugin.version} - - true - bin - true - - - *:* - - org/datanucleus/** - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - - - - package - - shade - - - - - reference.conf - - - - - - - - - - - -``` - - -**Step 3: Build the uber jar** - -Finally, open up a command line window (bash on Linux, cmd on Windows, etc) simply run ```mvn package -DskipTests``` to build the uber-jar for your project. -Note that the uber-jar should be present under ```/target/-bin.jar```. -Be sure to use the large ```...-bin.jar``` file as this is the shaded jar with all of the dependencies. - -That's is - you should now have an uber-jar that is suitable for submitting to spark-submit for training networks on Spark with CPUs or NVIDA (CUDA) GPUs. - - -

    - -## How to use GPUs for training on Spark - -Deeplearning4j and ND4J support GPU acceleration using NVIDA GPUs. DL4J Spark training can also be performed using GPUs. - -DL4J and ND4J are designed in such a way that the code (neural network configuration, data pipeline code) is "backend independent". That is, you can write the code once, and execute it on either a CPU or GPU, simply by including the appropriate backend (nd4j-native backend for CPUs, or nd4j-cuda-x.x for GPUs). Executing on Spark is no different from executing on a single node in this respect: you need to simply include the appropriate ND4J backend, and make sure your machines (master/worker nodes in the case) are appropriately set with the CUDA libraries (see the [uber-jar guide](#uberjar) for running on CUDA without needing to install CUDA/cuDNN on each node). - -When running on GPUs, there are a few components: -(a) The ND4J CUDA backend (nd4j-cuda-x.x dependency) -(b) The CUDA toolkit -(c) The Deeplearning4j CUDA dependency to gain cuDNN support (deeplearning4j-cuda-x.x) -(d) The cuDNN library files - -Both (a) and (b) must be available for ND4J/DL4J to run using an available CUDA GPU run. -(c) and (d) are optional, though are recommended to get optimal performance - NVIDIA's cuDNN library is able to significantly speed up training for many layers, such as convolutional layers (ConvolutionLayer, SubsamplingLayer, BatchNormalization, etc) and LSTM RNN layers. - -For configuring dependencies for Spark jobs, see the [uber-jar section](#uberjar) above. -For configuring cuDNN on a single node, see [Using Deeplearning4j with CuDNN](./deeplearning4j-config-cudnn) - -

    - -## How to use CPUs on master, GPUs on the workers - -In some cases, it may make sense to run the master using CPUs only, and the workers using GPUs. -If resources (i.e., the number of available GPU machines) are not constrained, it may simply be easier to have a homogeneous cluster: i.e., set up the cluster so that the master is using a GPU for execution also. - -Assuming the master/driver is executing on a CPU machine, and the workers are executing on GPU machines, you can simply include both backends (i.e., both the ```nd4j-cuda-x.x``` and ```nd4j-native``` dependencies as described in the [uber-jar section](#uberjar)). - -When multiple backends are present on the classpath, by default the CUDA backend will be tried first. If this cannot be loaded, the CPU (nd4j-native) backend will be loaded second. Thus, if the driver does not have a GPU, it should fall back to using a CPU. However, this default behaviour can be changed by setting the ```BACKEND_PRIORITY_CPU``` or ```BACKEND_PRIORITY_GPU``` environment variables on the master/driver, as described [here](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-common/src/main/java/org/nd4j/config/ND4JEnvironmentVars.java). -The exact process for setting environment variables may depend on the cluster manager - Spark standalone vs. YARN vs. Mesos. Please consult the documentation for each on how to set the environment variables for Spark jobs for the driver/master. - -

    - -## How to configure memory settings for Spark - -For important background on how memory and memory configuration works for DL4J and ND4J, start by reading [Memory management for ND4J/DL4J](./deeplearning4j-config-memory). - -The memory management on Spark is similar to memory management for single node training: -* On-heap memory is configured using the standard Java Xms and Xmx memory configuration settings -* Off-heap memory is configured using the javacpp system properties - -However, memory configuration in the context of Spark adds some additional complications: -1. Often, memory configuration has to be done separately (sometimes using different mechanisms) for the driver/master vs. the workers -2. The approach for configuring memory can depend on the cluster resource manager - Spark standalone vs. YARN vs. Mesos, etc -3. Cluster resource manager default memory settings are often not appropriate for libraries (such as DL4J/ND4J) that rely heavily on off-heap memory - -See the Spark documentation for your cluster manager: -* [YARN](https://spark.apache.org/docs/latest/running-on-yarn.html) -* [Mesos](https://spark.apache.org/docs/latest/running-on-mesos.html) -* [Spark Standalone](https://spark.apache.org/docs/latest/spark-standalone.html) - -You should set 4 things: -1. The worker on-heap memory (Xmx) - usually set as an argument for Spark submit (for example, ```--executor-memory 4g``` for YARN) -2. The worker off-heap memory (javacpp system properties options) (for example, ```--conf "spark.executor.extraJavaOptions=-Dorg.bytedeco.javacpp.maxbytes=8G"```) -3. The driver on-heap memory - usually set as an -4. The driver off-heap memory - - -Some notes: -* On YARN, it is generally necessary to set the ```spark.yarn.driver.memoryOverhead``` and ```spark.yarn.executor.memoryOverhead``` properties. The default settings are much too small for DL4J training. -* On Spark standalone, you can also configure memory by modifying the ```conf/spark-env.sh``` file on each node, as described in the [Spark configuration docs](https://spark.apache.org/docs/latest/configuration.html#environment-variables). For example, you could add the following lines to set 8GB heap for the driver, 12 GB off-heap for the driver, 12GB heap for the workers, and 18GB off-heap for the workers: - * ```SPARK_DRIVER_OPTS=-Dorg.bytedeco.javacpp.maxbytes=12G``` - * ```SPARK_DRIVER_MEMORY=8G``` - * ```SPARK_WORKER_OPTS=-Dorg.bytedeco.javacpp.maxbytes=18G``` - * ```SPARK_WORKER_MEMORY=12G``` - -All up, this might look like (for YARN, with 4GB on-heap, 5GB off-heap, 6GB YARN off-heap overhead): -``` ---class my.class.name.here --num-executors 4 --executor-cores 8 --executor-memory 4G --driver-memory 4G --conf "spark.executor.extraJavaOptions=-Dorg.bytedeco.javacpp.maxbytes=5G" --conf "spark.driver.extraJavaOptions=-Dorg.bytedeco.javacpp.maxbytes=5G" --conf spark.yarn.executor.memoryOverhead=6144 -``` - -

    - -## How to Configure Garbage Collection for Workers - -One determinant of the performance of training is the frequency of garbage colection. -When using [Workspaces](https://deeplearning4j.org/docs/latest/deeplearning4j-config-memory) (see also [this](https://deeplearning4j.org/docs/latest/deeplearning4j-config-workspaces)), which are enabled by default, it can be helpful to reduce the frequency of garbage collection. -For simple machine training (and on the driver) this is easy: -``` -// this will limit frequency of gc calls to 5000 milliseconds -Nd4j.getMemoryManager().setAutoGcWindow(5000) - -// OR you could totally disable it -Nd4j.getMemoryManager().togglePeriodicGc(false); -``` - -However, setting this on the driver will not change the settings on the workers. -Instead, it can be set for the workers as follows: -``` -new SharedTrainingMaster.Builder(voidConfiguration, minibatch) - - .workerTogglePeriodicGC(true) //Periodic garbage collection is enabled... - .workerPeriodicGCFrequency(5000) //...and is configured to be performed every 5 seconds (every 5000ms) - .build(); -``` - - -The default (as of 1.0.0-beta3) is to perform periodic garbage collection every 5 seconds on the workers. - -

    - -## How to use Kryo Serialization with DL4J and ND4J - -Deeplearning4j and ND4J can utilize Kryo serialization, with appropriate configuration. -Note that due to the off-heap memory of INDArrays, Kryo will offer less of a performance benefit compared to using Kryo in other contexts. - -To enable Kryo serialization, first add the [nd4j-kryo dependency](https://search.maven.org/search?q=nd4j-kryo): -``` - - org.nd4j - nd4j-kryo_2.11 - ${dl4j-version} - -``` -where ```${dl4j-version}``` is the version used for DL4J and ND4J. - -Then, at the start of your training job, add the following code: -``` - SparkConf conf = new SparkConf(); - conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); - conf.set("spark.kryo.registrator", "org.nd4j.Nd4jRegistrator"); -``` - -Note that when using Deeplearning4j's SparkDl4jMultiLayer or SparkComputationGraph classes, a warning will be logged if the Kryo configuration is incorrect. - -

    - -## How to use YARN and GPUs - -For DL4J, the only requirement for CUDA GPUs is to use the appropriate backend, with the appropriate NVIDIA libraries either installed on each node, or provided in the uber-JAR (see [Spark how-to guide](deeplearning4j-scaleout-howto) for more details). -For recent versions of YARN, some additional configuration may be required in some cases - see the [YARN GPU documentation](https://hadoop.apache.org/docs/r3.1.0/hadoop-yarn/hadoop-yarn-site/UsingGpus.html) for more details. - -Earlier version of YARN (for example, 2.7.x and similar) did not support GPUs natively. -For these versions, it is possible to utilize node labels to ensure that jobs are scheduled onto GPU-only nodes. For more details, see the Hadoop Yarn [documentation](https://hadoop.apache.org/docs/r2.7.3/hadoop-yarn/hadoop-yarn-site/NodeLabel.html) - -Note that YARN-specific memory configuration (see [memory how-to](deeplearning4j-scaleout-howto#memory)) is also required. - -

    - -## How to Configure Spark Locality Configuration - -Configuring Spark locality settings is an optional configuration option that can improve training performance. - -The summary: adding ```--conf spark.locality.wait=0``` to your Spark submit configuration may marginally reduce training times, by scheduling the network fit operations to be started sooner. - -For more details, see [link 1](https://spark.apache.org/docs/latest/tuning.html#data-locality) and [link 2](https://spark.apache.org/docs/latest/configuration.html#scheduling). - -

    - -# During and After Training Guides - -## How to Configure Encoding Thresholds - -Deeplearning4j's Spark implementation uses a threshold encoding scheme for sending parameter updates between nodes. This encoding scheme results in a small quantized message, which significantly reduces the network cost of communicating updates. See the [technical explanation page](./deeplearning4j-scaleout-technicalref) for more details on this encoding process. - -This threshold encoding process introduces a "distributed training specific" hyperparameter - the encoding threshold. -Both too large thresholds and too small thresholds can result in sub-optimal performance: - -* Large thresholds mean infrequent communication - too infrequent and convergence can suffer -* Small thresholds mean more frequent communication - but smaller changes are communicated at each step - -The encoding threshold to be used is controlled by the [ThresholdAlgorithm](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/encoding/ThresholdAlgorithm.java). The specific implementation of the ThresholdAlgorithm determines what threshold should be used. - -The default behaviour for DL4J is to use [AdaptiveThresholdAlgorithm](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/encoding/threshold/AdaptiveThresholdAlgorithm.java) which tries to keep the sparsity ratio in a certain range. -* The sparsity ratio is defined as numValues(encodedUpdate)/numParameters - 1.0 means fully dense (all values communicated), 0.0 means fully sparse (no values communicated) -* Larger thresholds mean more sparse values (less network communication), and a smaller threshold means less sparse values (more network communication) -* The AdaptiveThresholdAlgorithm tries to keep the sparsity ratio between 0.01 and 0.0001 by default. If the sparsity of the updates falls outside of this range, the threshold is either increased or decreased until it is within this range. -* An initial threshold value still needs to be set - we have found the - -In practice, we have seen that this adaptive threshold process to work well. -The built-in implementations for threshold algorithms include: - -* AdaptiveThresholdAlgorithm -* [FixedThresholdAlgorithm](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/encoding/threshold/FixedThresholdAlgorithm.java): a fixed, non-adaptive threshold using the specified encoding threshold. -* [TargetSparsityThresholdAlgorithm](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/encoding/threshold/TargetSparsityThresholdAlgorithm.java): an adaptive threshold algorithm that targets a specific sparsity, and increases or decreases the threshold to try to match the target. - -In addition, DL4J has a [ResidualPostProcessor](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/encoding/ResidualPostProcessor.java) interface, with the default implementation being [ResidualClippingPostProcessor](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/encoding/residual/ResidualClippingPostProcessor.java) which clips the residual vector to a maximum of 5x the current threshold, every 5 steps. -The motivation for this is that the "left over" parts of the updates (i.e., those parts not communicated) are store in the residual vector. If the updates are much larger than the threshold, we can have a phenomenon we have termed "residual explosion" - that is, the residual values can continue to grow to many times the threshold (hence would take many steps to communicate the gradient). The residual post processor is used to avoid this phenomenon. - -The threshold algorithm (and initial threshold) and the residual post processor can be set as follows: -``` -TrainingMaster tm = new SharedTrainingMaster.Builder(voidConfiguration, minibatch) - .thresholdAlgorithm(new AdaptiveThresholdAlgorithm(this.gradientThreshold)) - .residualPostProcessor(new ResidualClippingPostProcessor(5, 5)) - - .build(); -``` - -Finally, DL4J's SharedTrainingMaster also has an encoding debug mode, enabled by setting ```.encodingDebugMode(true)``` in the SharedTrainingmaster builder. -When this is enabled, each of the workers will log the current threshold, sparsity, and various other statistics about the encoding. -These statistics can be used to determine if the threshold is appropriately set: for example, many updates that are tens or hundreds of times the threshold may indicate the threshold is too low and should be increased; at the other end of the spectrum, very sparse updates (less than one in 10000 values being communicated) may indicate that the threshold should be decreased. - -

    - -## How to perform distributed test set evaluation - -Deeplearning4j supports most standard evaluation metrics for neural networks. For basic information on evaluation, see the [Deeplearning4j Evaluation Page](./deeplearning4j-nn-evaluation) - -All of the [evaluation metrics](./deeplearning4j-nn-evaluation) that Deeplearning4j supports can be calculated in a distributed manner using Spark. - -**Step 1: Prepare Your Data** - -Evaluation data for Deeplearinng4j on Spark is very similar to training data. That is, you can use: -* ```RDD``` or ```JavaRDD``` for evaluating single input/output networks -* ```RDD``` or ```JavaRDD``` for evaluating multi input/output networks -* ```RDD``` or ```JavaRDD``` where each String is a path that points to serialized DataSet/MultiDataSet (or other minibatch file-based formats) on network storage such as HDFS. - -See the data page (TODO: LINK) for details on how to prepare your data into one of these formats. - -**Step 2: Prepare Your Network** - -Creating your network is straightforward. -First, load your network (MultiLayerNetwork or ComputationGraph) into memory on the driver using the information from the following guide: [How to save (and load) neural networks trained on Spark](#saveload) - -Then, simply create your network using: - -``` -JavaSparkContext sc = new JavaSparkContext(); -MultiLayerNetwork net = -SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, cgForEval, null); -``` - -``` -JavaSparkContext sc = new JavaSparkContext(); -ComputationGraph net = -SparkComputationGraph sparkNet = new SparkComputationGraph(sc, net, null); -``` - -Note that you don't need to configure a TrainingMaster (i.e., the 3rd argument is null above), as evaluation does not use it. - - -**Step 3: Call the appropriate evaluation method** - -For common cases, you can call one of the standard evalutation methods on SparkDl4jMultiLayer or SparkComputationGraph: -``` -evaluate(RDD) //Accuracy/F1 etc for classifiers -evaluate(JavaRDD) //Accuracy/F1 etc for classifiers -evaluateROC(JavaRDD) //ROC for single output binary classifiers -evaluateRegression(JavaRDD) //For regression metrics -``` - -For performing multiple evaluations simultaneously (more efficient than performing them sequentially) you can use something like: -``` -IEvaluation[] evaluations = new IEvaluation[]{new Evaluation(), new ROCMultiClass()}; -JavaRDD data = ...; -sparkNet.doEvaluation(data, 32, evaluations); -``` - -Note that some of the evaluation methods have overloads with extra parameters, including: -* ```int evalNumWorkers``` - the number of evaluation workers - i.e., the number of copies of a network used for evaluation on each node (up to the maximum number of Spark threads per worker). For large networks (or limited cluster memory), you might want to reduce this to avoid running into memory problems. -* ```int evalBatchSize``` - the minibatch size to use when performing evaluation. This needs to be large enough to efficiently use the hardware resources, but small enough to not run out of memory. Values of 32-128 is unsually a good starting point; increase when more memory is available and for smaller networks; decrease if memory is a problem. -* ```DataSetLoader loader``` and ```MultiDataSetLoader loader``` - these are available when evaluating on a ```RDD``` or ```JavaRDD```. They are interfaces to load a path into a DataSet or MultiDataSet using a custom user-defined function. Most users will not need to use these, however the functionality is provided for greater flexibility. They would be used for example if the saved minibatch file format is not a DataSet/MultiDataSet but some other (possibly custom) format. - - -Finally, if you want to save the results of evaluation (of any type) you can save it to JSON format directly to remote storage such as HDFS as follows: -``` -JavaSparkContext sc = new JavaSparkContext(); -Evaluation eval = ... -String json = eval.toJson(); -String writeTo = "hdfs:///output/directory/evaluation.json"; -SparkUtils.writeStringToFile(writeTo, json, sc); //Also supports local file paths - file:// -``` -The import for ```SparkUtils``` is ```org.datavec.spark.transform.utils.SparkUtils``` - -The evaluation can be loaded using: -``` -String json = SparkUtils.readStringFromFile(writeTo, sc); -Evaluation eval = Evaluation.fromJson(json); -``` - -

    - -## How to save (and load) neural networks trained on Spark - -Deeplearning4j's Spark functionality is built around the idea of wrapper classes - i.e., ```SparkDl4jMultiLayer``` and ```SparkComputationGraph``` internally use the standard ```MultiLayerNetwork``` and ```ComputationGraph``` classes. -You can access the internal MultiLayerNetwork/ComputationGraph classes using ```SparkDl4jMultiLayer.getNetwork()``` and ```SparkComputationGraph.getNetwork()``` respectively. - -To save on the master/driver's local file system, get the network as described above and simply use the ```ModelSerializer``` class or ```MultiLayerNetwork.save(File)/.load(File)``` and ```ComputationGraph.save(File)/.load(File)``` methods. - -To save to (or load from) a remote location or distributed file system such as HDFS, you can use input and output streams. - -For example, -``` -JavaSparkContext sc = new JavaSparkContext(); -FileSystem fileSystem = FileSystem.get(sc.hadoopConfiguration()); -String outputPath = "hdfs:///my/output/location/file.bin"; -MultiLayerNetwork net = sparkNet.getNetwork(); -try (BufferedOutputStream os = new BufferedOutputStream(fileSystem.create(new Path(outputPath)))) { - ModelSerializer.writeModel(net, os, true); -} -``` - -Reading is a similar process: -``` -JavaSparkContext sc = new JavaSparkContext(); -FileSystem fileSystem = FileSystem.get(sc.hadoopConfiguration()); -String outputPath = "hdfs:///my/output/location/file.bin"; -MultiLayerNetwork net; -try(BufferedInputStream is = new BufferedInputStream(fileSystem.open(new Path(outputPath)))){ - net = ModelSerializer.restoreMultiLayerNetwork(is); -} -``` - -

    - - -## How to perform distributed inference - -Deeplearning4j's Spark implementation supports distributed inference. That is, we can easily generate predictions on an RDD of inputs using a cluster of machines. -This distributed inference can also be used for networks trained on a single machine and loaded for Spark (see the [saving/loading section](#saveload) for details on how to load a saved network for use with Spark). - -Note: If you want to perform evaluation (i.e., calculate accuracy, F1, MSE, etc), refer to the [evaluation how-to](#evaluation) instead. - -The method signatures for performing distributed inference are as follows: -``` -SparkDl4jMultiLayer.feedForwardWithKey(JavaPairRDD featuresData, int batchSize) : JavaPairRDD -SparkComputationGraph.feedForwardWithKey(JavaPairRDD featuresData, int batchSize) : JavaPairRDD -``` -There are also overloads that accept an input mask array, when required - -Note the parameter ```K``` - this is a generic type to signify the unique 'key' used to identify each example. The key values are not used as part of the inference process. This key is required as Spark's RDDs are unordered - without this, we would have no way to know which element in the predictions RDD corresponds to which element in the input RDD. -The batch size parameter is used to specify the minibatch size when performing inference. It does not impact the values returned, but instead is used to balance memory use vs. computational efficiency: large batches might compute a little quicker overall, but require more memory. In many cases, a batch size of 64 is a good starting point to try if you are unsure of what to use. - -


    - -# Problems and Troubleshooting Guides - -## How to debug common Spark dependency problems (NoClassDefFoundExcption and similar) - -Unfortunately, dependency problems at runtime can occur on a cluster if your project is not configured correctly. These problems can occur with any Spark jobs, not just those using DL4J - and they may be caused by other dependencies or libraries on the classpath, not by Deeplearning4j dependencies. - -When dependency problems occur, they typically produce exceptions like: -* NoSuchMethodException -* ClassNotFoundException -* AbstractMethodError - -For example, mismatched Spark versions (trying to use Spark 1 on a Spark 2 cluster) can look like: -``` -java.lang.AbstractMethodError: org.deeplearning4j.spark.api.worker.ExecuteWorkerPathMDSFlatMap.call(Ljava/lang/Object;)Ljava/util/Iterator; -``` - -Another class of errors is the ```UnsupportedClassVersionError``` for example ```java.lang.UnsupportedClassVersionError: XYZ : Unsupported major.minor version 52.0``` - this can result from trying to run (for example) Java 8 code on a cluster that is set up with only a Java 7 JRE/JDK. - - -How to debug dependency problems: - -**Step 1: Collect Dependency Information** - -The first step (when using Maven) is to produce a dependency tree that you can refer to. -Open a command line window (for example, bash on Linux, cmd on Windows), navigate to the root directory of your Maven project and run ```mvn dependency:tree``` -This will give you a list of dependencies (direct and transient) that can be helpful to understand exactly what is on the classpath, and why. - -Note also that ```mvn dependency:tree -Dverbose``` will provide extra information, and can be useful when debugging problems related to mismatched library versions. - -**Step 2: Check your Spark Versions** - -When running into dependency issues, check the following. - -*First: check the Spark versions* -If your cluster is running Spark 2, you should be using a version of deeplearning4j-spark_2.10/2.11 (and DataVec) that ends with ```_spark_2``` - -Look through - -If you find a problem, you should change your project dependencies as follows: -On a Spark 2 (Scala 2.11) cluster, use: -``` - - org.deeplearning4j - dl4j-spark_2.11 - 1.0.0-beta2_spark_2 - -``` -whereas on a Spark 1 (Scala 2.11) cluster, you should use: -``` - - org.deeplearning4j - dl4j-spark_2.11 - 1.0.0-beta2_spark_1 - -``` - -**Step 3: Check the Scala Versions** - -Apache Spark is distributed with versions that support both Scala 2.10 and Scala 2.11. - -To avoid problems with Scala versions, you need to do two things: -(a) Ensure you don't have a mix of Scala 2.10 and Scala 2.11 (or 2.12) dependencies on your project classpath. Check your dependency tree for entries ending in ```_2.10``` or ```_2.11```: for example, ```org.apache.spark:spark-core_2.11:jar:1.6.3:compile``` is a Spark 1 (1.6.3) dependency using Scala 2.11 -(b) Ensure that your project matches what the cluster is using. For example, if you cluster is running Spark 2 with Scala 2.11, all of your Scala dependencies should use 2.11 also. Note that Scala 2.11 is more common for Spark clusters. - -If you find mismatched Scala versions, you will need to align them by changing the dependency versions in your pom.xml (or similar configuration file for other dependency management systems). Many libraries (including Spark and DL4J) release dependencies with both Scala 2.10 and 2.11 versions. - -**Step 4: Check for Mismatched Library Versions** - -A number of common utility libraries that are widely used across the Java ecosystem are not compatible across versions. For example, Spark might rely on library X version Y and will fail to run when library X version Z is on the classpath. Furthermore, many of these libraries are split into multiple modules (i.e., multiple separate modular dependencies) that won't work correctly when mixing different versions. - -Some that can commonly cause problems include: -* Jackson -* Guava - -DL4J and ND4J use versions of these libraries that should avoid dependency conflicts with Spark. -However, it is possible that other (3rd party libraries) can pull in versions of these dependencies. - -Often, the exception will give a hint of where to look - i.e., the stack trace might include a specific class, which can be used to identify the problematic library. - -**Step 5: Once Identified, Fix the Dependency Conflict** - -To debug these sorts of problems, check the dependency tree (the output of ```mvn dependency:tree -Dverbose```) carefully. Where necessary, you can use [exclusions](https://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html) or add the problematic dependency as a direct dependency to force it's version in your probelm. To do this, you would add the dependency of the version you want directly to your project. Often, this is enough to solve the problem. - -Keep in mind that when using Spark submit, Spark will add a copy of Spark and it's dependent libraries to the driver and worker classpaths. -This means that for dependencies that are added by Spark, you can't simply exclude them in your project - Spark submit will add them at runtime whether you exclude them or not in your project. - -One additional setting that is worth knowing about is the (experimental) Spark configuration options, ```spark.driver.userClassPathFirst``` and ```spark.executor.userClassPathFirst``` (See the [Spark configuartion docs](https://spark.apache.org/docs/latest/configuration.html) for more details). In some cases, these options may be a fix for dependency issues. - -

    - -## How to Cache RDD[INDArray] and RDD[DataSet] Safely - -Spark has some issues regarding how it handles Java objects with large off-heap components, such as the DataSet and INDArray objects used in Deeplearning4j. This section explains the issues related to caching/persisting these objects. - -The key points to know about are: - -* MEMORY_ONLY and MEMORY_AND_DISK persistence can be problematic with off-heap memory, due to Spark not properly estimating the size of objects in the RDD. This can lead to out of (off-heap) memory issues. -* When persisting a ```RDD``` or ```RDD``` for re-use, use MEMORY_ONLY_SER or MEMORY_AND_DISK_SER - -**Why MEMORY_ONLY_SER or MEMORY_AND_DISK_SER Are Recommended** - -One of the way that Apache Spark improves performance is by allowing users to cache data in memory. This can be done using the ```RDD.cache()``` or ```RDD.persist(StorageLevel.MEMORY_ONLY())``` to store the contents in-memory, in deserialized (i.e., standard Java object) form. -The basic idea is simple: if you persist a RDD, you can re-use it from memory (or disk, depending on configuration) without having to recalculate it. However, large RDDs may not entirely fit into memory. In this case, some parts of the RDD have to be recomputed or loaded from disk, depending on the storage level used. Furthermore, to avoid using too much memory, Spark will drop parts (blocks) of an RDD when required. - -The main storage levels available in Spark are listed below. For an explanation of these, see the [Spark Programming Guide](https://spark.apache.org/docs/1.6.2/programming-guide.html#rdd-persistence). - -* MEMORY_ONLY -* MEMORY_AND_DISK -* MEMORY_ONLY_SER -* MEMORY_AND_DISK_SER -* DISK_ONLY - -The problem with Spark is how it handles memory. In particular, Spark will drop part of an RDD (a block) based on the estimated size of that block. The way Spark estimates the size of a block depends on the persistence level. For ```MEMORY_ONLY``` and ```MEMORY_AND_DISK``` persistence, this is done by walking the Java object graph - i.e., look at the fields in an object and recursively estimate the size of those objects. This process does not however take into account the off-heap memory used by Deeplearning4j or ND4J. For objects like DataSets and INDArrays (which are stored almost entirely off-heap), Spark significantly under-estimates the true size of the objects using this process. Furthermore, Spark considers only the amount of on-heap memory use when deciding whether to keep or drop blocks. Because DataSet and INDArray objects have a very small on-heap size, Spark will keep too many of them around with ```MEMORY_ONLY``` and ```MEMORY_AND_DISK``` persistence, resulting in off-heap memory being exhausted, causing out of memory issues. - -However, for ```MEMORY_ONLY_SER``` and ```MEMORY_AND_DISK_SER``` Spark stores blocks in *serialized* form, on the Java heap. The size of objects stored in serialized form can be estimated accurately by Spark (there is no off-heap memory component for the serialized objects) and consequently Spark will drop blocks when required - avoiding any out of memory issues. - -

    - -## How to fix "Error querying NTP server" errors - -DL4J's parameter averaging implementation has the option to collect training stats, by using ```SparkDl4jMultiLayer.setCollectTrainingStats(true)```. -When this is enabled, internet access is required to connect to the NTP (network time protocal) server. - -It is possible to get errors like ```NTPTimeSource: Error querying NTP server, attempt 1 of 10```. Sometimes these failures are transient (later retries will work) and can be ignored. However, if the Spark cluster is configured such that one or more of the workers cannot access the internet (or specifically, the NTP server), all retries can fail. - -Two solutions are available: - -1. Don't use ```sparkNet.setCollectTrainingStats(true)``` - this functionality is optional (not required for training), and is disabled by default -2. Set the system to use the local machine clock instead of the NTP server, as the time source (note however that the timeline information may be very inaccurate as a result) -To use the system clock time source, add the following to Spark submit: -``` ---conf spark.driver.extraJavaOptions=-Dorg.deeplearning4j.spark.time.TimeSource=org.deeplearning4j.spark.time.SystemClockTimeSource ---conf spark.executor.extraJavaOptions=-Dorg.deeplearning4j.spark.time.TimeSource=org.deeplearning4j.spark.time.SystemClockTimeSource -``` - -

    - -## Failed training on Ubuntu 16.04 (Ubuntu bug that may affect DL4J users) - -When running a Spark on YARN cluster on Ubuntu 16.04 machines, chances are that after finishing a job, all processes owned by the user running Hadoop/YARN are killed. This is related to a bug in Ubuntu, which is documented at https://bugs.launchpad.net/ubuntu/+source/procps/+bug/1610499. There's also a Stackoverflow discussion about it at https://stackoverflow.com/questions/38419078/logouts-while-running-hadoop-under-ubuntu-16-04. - -Some workarounds are suggested. - -**Option 1** - -Add -``` -[login] -KillUserProcesses=no -``` -to /etc/systemd/logind.conf, and reboot. - -**Option 2** - -Copy the /bin/kill binary from Ubuntu 14.04 and use that one instead. - -**Option 3** - -Downgrade to Ubuntu 14.04 - -**Option 4** - -## How to Cache RDD[INDArray] and RDD[DataSet] Safely - -Spark has some issues regarding how it handles Java objects with large off-heap components, such as the DataSet and INDArray objects used in Deeplearning4j. This section explains the issues related to caching/persisting these objects. - -The key points to know about are: - -* MEMORY_ONLY and MEMORY_AND_DISK persistence can be problematic with off-heap memory, due to Spark not properly estimating the size of objects in the RDD. This can lead to out of (off-heap) memory issues. -* When persisting a ```RDD``` or ```RDD``` for re-use, use MEMORY_ONLY_SER or MEMORY_AND_DISK_SER - -**Why MEMORY_ONLY_SER or MEMORY_AND_DISK_SER Are Recommended** - -One of the way that Apache Spark improves performance is by allowing users to cache data in memory. This can be done using the ```RDD.cache()``` or ```RDD.persist(StorageLevel.MEMORY_ONLY())``` to store the contents in-memory, in deserialized (i.e., standard Java object) form. -The basic idea is simple: if you persist a RDD, you can re-use it from memory (or disk, depending on configuration) without having to recalculate it. However, large RDDs may not entirely fit into memory. In this case, some parts of the RDD have to be recomputed or loaded from disk, depending on the storage level used. Furthermore, to avoid using too much memory, Spark will drop parts (blocks) of an RDD when required. - -The main storage levels available in Spark are listed below. For an explanation of these, see the [Spark Programming Guide](https://spark.apache.org/docs/1.6.2/programming-guide.html#rdd-persistence). - -* MEMORY_ONLY -* MEMORY_AND_DISK -* MEMORY_ONLY_SER -* MEMORY_AND_DISK_SER -* DISK_ONLY - -The problem with Spark is how it handles memory. In particular, Spark will drop part of an RDD (a block) based on the estimated size of that block. The way Spark estimates the size of a block depends on the persistence level. For ```MEMORY_ONLY``` and ```MEMORY_AND_DISK``` persistence, this is done by walking the Java object graph - i.e., look at the fields in an object and recursively estimate the size of those objects. This process does not however take into account the off-heap memory used by Deeplearning4j or ND4J. For objects like DataSets and INDArrays (which are stored almost entirely off-heap), Spark significantly under-estimates the true size of the objects using this process. Furthermore, Spark considers only the amount of on-heap memory use when deciding whether to keep or drop blocks. Because DataSet and INDArray objects have a very small on-heap size, Spark will keep too many of them around with ```MEMORY_ONLY``` and ```MEMORY_AND_DISK``` persistence, resulting in off-heap memory being exhausted, causing out of memory issues. - -However, for ```MEMORY_ONLY_SER``` and ```MEMORY_AND_DISK_SER``` Spark stores blocks in *serialized* form, on the Java heap. The size of objects stored in serialized form can be estimated accurately by Spark (there is no off-heap memory component for the serialized objects) and consequently Spark will drop blocks when required - avoiding any out of memory issues. - -## How to fix "Error querying NTP server" errors - -DL4J's parameter averaging implementation has the option to collect training stats, by using ```SparkDl4jMultiLayer.setCollectTrainingStats(true)```. -When this is enabled, internet access is required to connect to the NTP (network time protocal) server. - -It is possible to get errors like ```NTPTimeSource: Error querying NTP server, attempt 1 of 10```. Sometimes these failures are transient (later retries will work) and can be ignored. However, if the Spark cluster is configured such that one or more of the workers cannot access the internet (or specifically, the NTP server), all retries can fail. - -Two solutions are available: - -1. Don't use ```sparkNet.setCollectTrainingStats(true)``` - this functionality is optional (not required for training), and is disabled by default -2. Set the system to use the local machine clock instead of the NTP server, as the time source (note however that the timeline information may be very inaccurate as a result) -To use the system clock time source, add the following to Spark submit: -``` ---conf spark.driver.extraJavaOptions=-Dorg.deeplearning4j.spark.time.TimeSource=org.deeplearning4j.spark.time.SystemClockTimeSource ---conf spark.executor.extraJavaOptions=-Dorg.deeplearning4j.spark.time.TimeSource=org.deeplearning4j.spark.time.SystemClockTimeSource -``` - -## Failed training on Ubuntu 16.04 (Ubuntu bug that may affect DL4J users) - -When running a Spark on YARN cluster on Ubuntu 16.04 machines, chances are that after finishing a job, all processes owned by the user running Hadoop/YARN are killed. This is related to a bug in Ubuntu, which is documented at https://bugs.launchpad.net/ubuntu/+source/procps/+bug/1610499. There's also a Stackoverflow discussion about it at https://stackoverflow.com/questions/38419078/logouts-while-running-hadoop-under-ubuntu-16-04. - -Some workarounds are suggested. - -**Option 1** - -Add -``` -[login] -KillUserProcesses=no -``` -to /etc/systemd/logind.conf, and reboot. - -**Option 2** - -Copy the /bin/kill binary from Ubuntu 14.04 and use that one instead. - -**Option 3** - -Downgrade to Ubuntu 14.04 - -**Option 4** - -run ```sudo loginctl enable-linger hadoop_user_name``` on cluster nodes diff --git a/docs/deeplearning4j-scaleout/templates/intro.md b/docs/deeplearning4j-scaleout/templates/intro.md deleted file mode 100644 index fd801541e..000000000 --- a/docs/deeplearning4j-scaleout/templates/intro.md +++ /dev/null @@ -1,153 +0,0 @@ ---- -title: "Deeplearning4j on Spark: Introduction/Getting Started" -short_title: Introduction/Getting Started -description: "Deeplearning4j on Spark: Introduction" -category: Distributed Deep Learning -weight: 0 ---- - -# Distributed Deep Learning with DL4J and Spark - -Deeplearning4j supports neural network training on a cluster of CPU or GPU machines using Apache Spark. Deeplearning4j also supports distributed evaluation as well as distributed inference using Spark. - -## DL4J’s Distributed Training Implementations - -DL4J has two implementations of distributed training. - * Gradient sharing, available as of 1.0.0-beta: Based on [this](http://nikkostrom.com/publications/interspeech2015/strom_interspeech2015.pdf) paper by Nikko Strom, is an asynchronous SGD implementation with quantized and compressed updates implemented in Spark+Aeron - * Parameter averaging: A synchronous SGD implementation with a single parameter server implemented entirely in Spark. - - -Users are directed towards the gradient sharing implementation which superseded the parameter averaging implementation. The gradient sharing implementation results in faster training times and is implemented to be scalable and fault-tolerant (as of 1.0.0-beta3). For the sake of completeness, this page will also cover the parameter averaging approach. The [technical reference section](deeplearning4j-scaleout-technicalref) covers details on the implementation. - -In addition to distributed training DL4J also enables users to do distributed evaluation (including multiple evaluations simultaneously) and distributed inference. Refer to the [Deeplearning4j on Spark: How To Guides](deeplearning4j-scaleout-howto) for more details. - -### When to use Spark for Training Neural Networks - -Spark is not always the most appropriate tool for training neural networks. - -You should use Spark when: -1. You have a cluster of machines for training (not just a single machine - this includes multi-GPU machines) -2. You need more than single machine to train the network -3. Your network is large to justify a distributed implementation - -For a single machine with multiple GPUs or multiple physical processors, users should consider using DL4J's Parallel-Wrapper implementation as shown in [this example](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-cuda-specific-examples/src/main/java/org/deeplearning4j/examples/multigpu/MultiGpuLenetMnistExample.java). ParallelWrapper allows for easy data parallel training of networks on a single machine with multiple cores. Spark has higher overheads compared to ParallelWrapper for single machine training. - -Similarly, if you don't need Spark (smaller networks and/or datasets) - it is recommended to use single machine training, which is usually simpler to set up. - -For a network to be large enough: here's a rough guide. If the network takes 100ms or longer to perform one iteration (100ms per fit operation on each minibatch), distributed training should work well with good scalability. At 10ms per iteration, we might expect sub-linear scaling of performance vs. number of nodes. At around 1ms or below per iteration, the communication overhead may be too much: training on a cluster may be no faster (or perhaps even slower) than on a single machine. -For the benefits of parallelism to outweigh the communication overhead, users should consider the ratio of network transfer time to computation time and ensure that the computation time is large enough to mask the additional overhead of distributed training. - -### Setup and Dependencies - -To run training on GPUs make sure that you are specifying the correct backend in your pom file (nd4j-cuda-x.x for GPUs vs nd4j-native backend for CPUs) and have set up the machines with the appropriate CUDA libraries. Refer to the [Deeplearning4j on Spark: How To Guides](deeplearning4j-scaleout-howto) for more details. - -To use the gradient sharing implementation include the following dependency: - -``` - - org.deeplearning4j - dl4j-spark-parameterserver_${scala.binary.version} - ${dl4j.version} - -``` - -If using the parameter averaging implementation (again, the gradient sharing implemention should be preferred) include: - -``` - - org.deeplearning4j - dl4j-spark_${scala.binary.version} - ${dl4j.version} - -``` -Note that ${scala.binary.version} is a Maven property with the value 2.10 or 2.11 and should match the version of Spark you are using. - -## Key Concepts - -The following are key classes the user should be familiar with to get started with distributed training with DL4J. - - * **TrainingMaster**: Specifies how distributed training will be conducted in practice. Implementations include Gradient Sharing (SharedTrainingMaster) or Parameter Averaging (ParameterAveragingTrainingMaster) - * **SparkDl4jMultiLayer and SparkComputationGraph**: These are wrappers around the MultiLayerNetwork and ComputationGraph classes in DL4J that enable the functionality related to distributed training. For training, they are configured with a TrainingMaster. - * **```RDD``` and ```RDD```**: A Spark RDD with DL4J's DataSet or MultiDataSet classes define the source of the training data (or evaluation data). Note that the recommended best practice is to preprocess your data once, and save it to network storage such as HDFS. Refer to the [Deeplearning4j on Spark: How To Build Data Pipelines](deeplearning4j-scaleout-data-howto) section for more details. - - -The training workflow usually proceeds as follows: -1. Prepare training code with a few components: - a. Neural network configuration - b. Data pipeline - c. SparkDl4jMultiLayer/SparkComputationGraph plus Trainingmaster -2. Create uber-JAR file (see [Spark how-to guide](deeplearning4j-scaleout-howto) for details) -3. Determine the arguments (memory, number of nodes, etc) for Spark submit -4. Submit the uber-JAR to Spark submit with the required arguments - - -## Minimal Examples -The following code snippets outlines the general setup required. The [API reference](deeplearning4j-scaleout-apiref) outlines detailed usage of the various classes. The user can submit a uber jar to Spark Submit for execution with the right options. See [Deeplearning4j on Spark: How To Guides](deeplearning4j-scaleout-howto) for further details. - - -### Gradient Sharing (Preferred Implementation) - -``` -JavaSparkContext sc = ...; -JavaRDD trainingData = ...; - -//Model setup as on a single node. Either a MultiLayerConfiguration or a ComputationGraphConfiguration -MultiLayerConfiguration model = ...; - -// Configure distributed training required for gradient sharing implementation -VoidConfiguration conf = VoidConfiguration.builder() - .unicastPort(40123) //Port that workers will use to communicate. Use any free port - .networkMask(“10.0.0.0/16”) //Network mask for communication. Examples 10.0.0.0/24, or 192.168.0.0/16 etc - .controllerAddress("10.0.2.4") //IP of the master/driver - .build(); - -//Create the TrainingMaster instance -TrainingMaster trainingMaster = new SharedTrainingMaster.Builder(conf) - .batchSizePerWorker(batchSizePerWorker) //Batch size for training - .updatesThreshold(1e-3) //Update threshold for quantization/compression. See technical explanation page - .workersPerNode(numWorkersPerNode) // equal to number of GPUs. For CPUs: use 1; use > 1 for large core count CPUs - .meshBuildMode(MeshBuildMode.MESH) // or MeshBuildMode.PLAIN for < 32 nodes - .build(); - -//Create the SparkDl4jMultiLayer instance -SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, model, trainingMaster); - -//Execute training: -for (int i = 0; i < numEpochs; i++) { - sparkNet.fit(trainingData); -} -``` - - - -### Parameter Averaging Implementation - -``` -JavaSparkContext sc = ...; -JavaRDD trainingData = ...; - -//Model setup as on a single node. Either a MultiLayerConfiguration or a ComputationGraphConfiguration -MultiLayerConfiguration model = ...; - -//Create the TrainingMaster instance -int examplesPerDataSetObject = 1; -TrainingMaster trainingMaster = new ParameterAveragingTrainingMaster.Builder(examplesPerDataSetObject) - .(other configuration options) - .build(); - -//Create the SparkDl4jMultiLayer instance and fit the network using the training data: -SparkDl4jMultiLayer sparkNetwork = new SparkDl4jMultiLayer(sc, model, trainingMaster); - -//Execute training: -for (int i = 0; i < numEpochs; i++) { - sparkNet.fit(trainingData); -} -``` - -## Further Reading - -* [Deeplearning4j on Spark: Technical Explanation](deeplearning4j-scaleout-technicalref) -* [Deeplearning4j on Spark: How To Guides](deeplearning4j-scaleout-howto) -* [Deeplearning4j on Spark: How To Build Data Pipelines](deeplearning4j-scaleout-data-howto) -* [Deeplearning4j on Spark: API Reference](deeplearning4j-scaleout-apiref) -* The [Deeplearning4j examples repo](https://github.com/eclipse/deeplearning4j-examples) contains a number of Spark examples that can be used by the user as reference. diff --git a/docs/deeplearning4j-scaleout/templates/parameter-server.md b/docs/deeplearning4j-scaleout/templates/parameter-server.md deleted file mode 100644 index 61aff7b9c..000000000 --- a/docs/deeplearning4j-scaleout/templates/parameter-server.md +++ /dev/null @@ -1,161 +0,0 @@ ---- -title: Distributed Training with Parameter Server -short_title: Parameter Server -description: Deeplearning4j supports fast distributed training with Spark and a parameter server. -category: Distributed Deep Learning -weight: 12 ---- - -# Distributed training with gradients sharing - -DeepLearning4j supports distributed training in the Apache Spark environment and [Aeron](https://github.com/real-logic/Aeron) for high performance inter-node communication outside of Spark. The idea is relatively simple: individual workers calculate gradients on their DataSets. - -Before gradients are applied to the network weights, they are accumulated in an intermediate storage mechanism (one for each machine). After aggregation, updated values above some configurable threshold are propagated across the network as a sparse binary array. Values below the threshold are stored and added to future updates, hence they are not lost, but merely delayed in their communication. - -This thresholding approach reduces the network communication requirements by many orders of magnitude compared to a -naive approach of sending the entire dense update, or parameter vector, while maintaining high accuracy. - -For more details on the thresholding approach, see [Strom, 2015 - Scalable Distributed DNN Training using Commodity GPU Cloud Computing](http://nikkostrom.com/publications/interspeech2015/strom_interspeech2015.pdf) and [Distributed Deep Learning, Part 1: An Introduction to Distributed Training of Neural Networks](http://engineering.skymind.io/distributed-deep-learning-part-1-an-introduction-to-distributed-training-of-neural-networks). - -Here are a few more perks were added to original algorithm proposed by Nikko Strom: - -- Variable threshold: If the number of updates per iteration gets too low, the threshold is automatically decreased by a configurable step value. -- Dense bitmap encoding: If the number of updates gets too high, another encoding scheme is used, which provides guarantees of "maximum number of bytes" being sent over the wire for any given update message. -- Periodically, we send "shake up" messages, encoded with a significantly smaller threshold, to share delayed weights that can't get above current threshold. - -![Two phases within the cluster](/images/guide/distributed.png) - -Note that using Spark entails overhead. In order to determine whether Spark will help you or not, consider using the [Performance Listener](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/listeners/PerformanceListener.java) and look at the millisecond iteration time. -If it's <= 150ms, Spark may not be worth it. - -## Setting up Your Cluster - -All you need to run training is a Spark 1.x/2.x cluster and at least one open UDP port (both inbound/outbound). - -### Cluster Setup - -As mentioned above, DeepLearning4j supports both Spark 1.x and Spark 2.x clusters. However, this particular implementation also requires Java 8+ to run. If your cluster is running Java 7, you'll either have to upgrade or use our [Parameters Averaging training mode](./deeplearning4j-spark-training). - -### Network Environment - -Gradient sharing relies heavily on the UDP protocol for communication between the Master and the slave nodes during training. If you're running your cluster in a cloud environment such as AWS or Azure, you need to allow one UDP port for Inbound/Outbound connections, and you have to specify that port in the `VoidConfiguration.unicastPort(int)` bean that is passed to `SharedTrainingMaster` constructor. - -Another option to keep in mind: if you use YARN (or any other resource manager that handles Spark networking), you'll have to specify the network mask of the network that'll be used for UDP communications. That could be done with something like this: `VoidConfiguration.setNetworkMask("10.1.1.0/24")`. - -An option of last resort for IP address selection is the `DL4J_VOID_IP` environment variable. Set that variable on each node you're running, with a local IP address to be used for comms. - -### Netmask - -Network mask is CIDR notation, is just a way to tell software, which network interfaces should be used for communication. For example, if your cluster has 3 boxes with following IP addresses: `192.168.1.23, 192.168.1.78, 192.168.2.133` their common part of network address is 192.168.*, so netmask is `192.168.0.0/16`. You can also get detailed explanation what is netmask in wikipedia: [https://en.wikipedia.org/wiki/Subnetwork](https://en.wikipedia.org/wiki/Subnetwork) - -We're using netmasks for cases when Spark cluster is run on top of hadoop, or any other environment which doesn't assume Spark IP addresses announced. In such cases valid netmask should be provided in `VoidConfiguration` bean, and it will be used to pick interface for out-of-Spark communications. - -### Dependencies - -Here's the template for the only required dependency: - -``` - - org.deeplearning4j - dl4j-spark-parameterserver_${scala.binary.version} - ${dl4j.version} - -``` - -For example: - -``` - - org.deeplearning4j - dl4j-spark-parameterserver_2.11 - ${dl4j.version} - -``` - -### Example Configuration: - -Below is a snippet from an example project taken from [our examples repo on Github](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-spark-examples/dl4j-spark/src/main/java/org/deeplearning4j/mlp/MnistMLPDistributedExample.java) - -``` -SparkConf sparkConf = new SparkConf(); -sparkConf.setAppName("DL4J Spark Example"); -JavaSparkContext sc = new JavaSparkContext(sparkConf); - -MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .seed(12345) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - ... - .build(); - -/* - This is a ParameterServer configuration bean. The only option you'll really ever use is .unicastPort(int) -*/ -VoidConfiguration voidConfiguration = VoidConfiguration.builder() - .unicastPort(40123) - .build(); - -/* - SharedTrainingMaster is the basement of distributed training. Tt holds all logic required for training -*/ -TrainingMaster tm = new SharedTrainingMaster.Builder(voidConfiguration,batchSizePerWorker) - .updatesThreshold(1e-3) - .rddTrainingApproach(RDDTrainingApproach.Export) - .batchSizePerWorker(batchSizePerWorker) - .workersPerNode(4) - .build(); - -//Create the Spark network -SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, conf, tm); - -//Execute training: -for (int i = 0; i < numEpochs; i++) { - sparkNet.fit(trainData); - log.info("Completed Epoch {}", i); -} -``` -**_PLEASE NOTE_**: This configuration assumes that you have UDP port 40123 open on ALL nodes within your cluster. - - -## Effective Scalability - -Network IO has its own price, and this algorithm does some IO as well. Additional overhead to training time can be calculated as `updates encoding time + message serialization time + updates application from other workers`. - -The longer the original iteration time, the less relative impact will come from sharing, and the better hypothetical scalability you will get. - -Here's a simple form that'll help you with scalability expectations: -{% include formscalability.html %} - -## Performance Hints - -### Executors, Cores, Parallelism - -By design, Spark allows you to configure the number of executors and cores per executor for your task. Imagine you have a cluster of 18 nodes with 32 cores in each node. - -In this case, your `--num-executors` value will be 18 and the recommended `--executor-cores` value will be somewhere between 2 and 32. This option will basically define how many partitions your RDD will be split into. - -Plus, you can manually set the specific number of DL4J workers that'll be used on each node. This can be done via the `SharedTrainingMaster.Builder().workersPerNode(int)` method. - -If your nodes are GPU-powered, it's usually a very good idea to set `workersPerNode(int)` to the number of GPUs per box or to keep its default value for auto-tuning. - -### Encoding Threshold - -A higher threshold value gives you more sparse updates which will boost network IO performance, but it might (and probably will) affect the learning performance of your neural network. - -A lower threshold value will give you more dense updates so each individual updates message will become larger. This will degrade network IO performance. Individual "best threshold value" is impossible to predict since it may vary for different architectures, but a default value of `1e-3` is a good value to start with. - -### Network Latency vs Bandwidth - -The rule of thumb is simple here: the faster your network, the better your performance. A 1GBe network should be considered the absolute minimum, but a 10GBe will perform better due to lower latency. - -Of course, performance depends on the network size and the amount of computation. Larger networks require greater bandwidth but also require more time per iteration (hence possibly leaving more time for asynchronous communication). - -### UDP Unicast vs UDP Broadcast - -To ensure maximum compatibility (for example, with cloud computing environments such as AWS and Azure, which do not support multicast), only UDP unicast is currently utilized in DL4J. - -UDP Broadcast transfers should be faster, but for training performance, the difference should not be noticeable (except perhaps for very small workloads). - -By design, each worker sends 1 updates message per iteration and this won’t change regardless of UDP transport type. Since message retransmission in UDP Unicast transport is handled by the Master node (which typically has low utilization) and since message passing is asynchronous, we simply require that update communication time is less than network iteration time for performance - which is usually the case. - -### Multi-GPU Environments -The best results are to be expected on boxes where PCIe/NVLink P2P connectivity between devices is available. However, everything will still work fine even without P2P. Just "a bit" slower. :) \ No newline at end of file diff --git a/docs/deeplearning4j-scaleout/templates/technicalref.md b/docs/deeplearning4j-scaleout/templates/technicalref.md deleted file mode 100644 index 99f78968e..000000000 --- a/docs/deeplearning4j-scaleout/templates/technicalref.md +++ /dev/null @@ -1,140 +0,0 @@ ---- -title: "Deeplearning4j on Spark: Technical Explanation" -short_title: Technical Explanation -description: "Deeplearning4j on Spark: Technical Explanation" -category: Distributed Deep Learning -weight: 1 ---- - -# DL4J Distributed Training: Technical Explanation - -This section will cover the technical details of Deeplearning4j's Apache Spark gradient sharing training implementation. Details on the parameter averaging implementation also follow. Note that the parameter averaging implementation has been superseded by the gradient sharing implementation as of 1.0.0-beta. This guide assumes the reader is familiar with key concepts in distributed training like data parallelism and synchronous vs asynchronous SGD. This [blog post](https://blog.skymind.ai/distributed-deep-learning-part-1-an-introduction-to-distributed-training-of-neural-networks/) can provide an introduction. - -* [Asynchronous SGD Implementation](#asgd) -* [Parameter Averaging Implementation](#parameteravg) -* [Fault Tolerance](#faulttol) - -## Asynchronous SGD Implementation -DL4J's asynchronous SGD implementation is based on the [Strom 2015 neural network training paper](http://nikkostrom.com/publications/interspeech2015/strom_interspeech2015.pdf) by Nikko Strom, with some modifications. -The next section will review the key features of the Strom paper followed by another section that describes the DL4J implementation and how it differs from the paper. - -### Strom's Approach -When training a neural network on a cluster, the worker machines need to communicate changes to their parameters - either by communicating the new parameter values directly (such as in parameter averaging) or by communicating gradient/update information (as in gradient sharing). - -The key feature of this approach is that opposed to relaying all parameters/updates across the network only updates that are above a user specified threshold are communicated. Put another way: we start out with an update vector (1 entry per parameter) that needs to be communicated. Instead of communicating the vector as-is, we communicate only the large elements in a quantized way (which is a sparse binary vector) instead of all elements. -The motivation here is to reduce the amount of network communication required - this "sparse, 1-bit binary encoding" approach can reduce the size required for communicating updates by a factor of 1000x or more - see the Strom paper for some compression statistics. - -Note that updates below the threshold are not discarded but accumulated in a “residual” vector to be applied later. Also of note is the absence of a centralized parameter server which is replaced by peer to peer communication as indicated in the image below. - -![Strom's ASGD implementation](/images/guide/Strom_ASGD.svg) - -The update vectors, δi,j in the image above, are: -1. Sparse: only some of the gradients are communicated in each vector δi,j (the remainder are assumed to be 0) - sparse entries are encoded using an integer index -2. Quantized to a single bit: each element of the sparse update vector takes value +τ or −τ. This value of τ is the same for all elements of the vector, hence only a single bit is required to differentiate between the two options -3. Integer indexes (used to identify the entries in the sparse array) are optionally compressed using entropy coding to further reduce update sizes (the author quotes a further 3x reduction at the cost of additional computation, though the benefit may not be worth the additional cost) - -One of the main concerns of asynchronous SGD is the issue of stale gradients. Stale gradients need not be explicitly handled in Strom's approach - in most cases, the updates are applied very quickly on each node. The paper reports a reduction in network transfers by several orders of magnitude. Given a suitably computation intensive model (like an RNN or a CNN) this drastic reduction in network communication ensures that model equivalency is maintained across all nodes and stale gradients are not an issue. - -However the approach is not without its downsides as described below: -1. Strom reports that convergence can suffer in the early stages of training (using fewer compute nodes for a fraction of an epoch seems to help) -2. Compression and quantization is not free: these processes result in extra computation time per minibatch, and a small amount of memory overhead per executor -3. The process introduces two additional hyperparameters to consider: the value for the threshold, τ and whether to use entropy coding for the updates or not (though notably both parameter averaging and async SGD also introduce additional hyperparameters) - - -### DL4J's ASGD implementation - -The DL4J implementation differs from Strom's approach in the following ways: - -1. Not point-to-point: -The implementation allows the user to choose between two modes of network organization - plain mode and mesh mode. Plain mode is to be used when the number of nodes in the cluster are < 32 nodes and mesh mode is to be used for larger clusters. Refer to the section on [different modes](#modes) for more details. -2. Two encoding schemes: - DL4J uses two encoding schemes, dynamically switching between the two depending on which will provide less network communication. Refer to the section on [encoding](#encoding) for more details. -3. Quantization thresholds adjusted: - The quantization threshold is stepped up or down depending on the distribution of the updates after each iteration. This is done on each node independently to make sure that updates are indeed sparse. In practice, this is implemented via the [ThresholdAlgorithm](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/encoding/ThresholdAlgorithm.java) interface and the [implementations](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/encoding/threshold) there-of. -4. Residual clipping - As noted earlier, the "left over" parts of the updates (i.e., those parts not communicated) are store in the residual vector. If the updates are much larger than the threshold, we can have a phenomenon we have termed "residual explosion" - that is, the residual values can continue to grow to many times the threshold (hence would take many steps to communicate the gradient). To avoid this, DL4J has a [ResidualPostProcessor](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/encoding/ResidualPostProcessor.java) interface, with the default implementation being [ResidualClippingPostProcessor](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/encoding/residual/ResidualClippingPostProcessor.java) which clips the residual vector to a maximum of 5x the current threshold, every 5 steps. -5. Local parallelism via ParallelWrapper: - This enables multi-CPU/GPU nodes to share information faster - - -As is evident from the description, an implementation of ASGD requires updates to be transferred with every iteration of training. Further communication between workers within the cluster is a requirement in mesh mode. - -To enable fast out of spark communication DL4J uses [Aeron](https://github.com/real-logic/aeron/wiki). Aeron is a high performance messaging system that can run over UDP, Infiniband or Shared Memory. Aeron is designed to be the highest throughput with the lowest and most predictable latency possible of any messaging system. Building our own communications stack above Aeron allows us to have a custom implementation of the parameter server integrated with Spark and yet control and minimize allocations right of the wire. - -#### Plain Mode vs Mesh Mode - -DL4J's gradient sharing implementation can be configured in 2 ways, depending on the cluster size. - -Below is an image describing how plain mode is organized: -![Plain Mode](/images/guide/plainmode.png) - - -In plain mode, quantized encoded updates are relayed by each node to the master and the master then relays them to the remaining nodes. This ensures that the master always has an up to date version of the model, which is necessary for fault tolerance. The master node however is a potential bottleneck in this implementation. To scale to larger sized cluster (more than about 32 nodes - though this is network and hardware specific) use mesh mode as described below. - -Below is an image describing how mesh mode is organized: -![Mesh Mode](/images/guide/meshmode.png) - -Mesh mode is a non-binary tree with Spark master at its root. By default each node can have a maximum of eight nodes and the tree can be a maximum of five levels deep. In mesh mode each node relays encoded updates to all nodes connected to it and each node aggregates updates received from all other nodes connected to it. In mesh mode, the master is no longer a bottleneck as the amount of communication it recieves directly is reduced. As the writing of this document, the implementation has been tested with unicast as well as multicast (available in 1.0.0-beta3). Future support is planned for RDMA. - -#### Encoding Schemes -Updates are send using one of two schemes as described below. - * Threshold encoding: Sends an array of integers each referring to the index of the parameter. A positive integer is send for a positive threshold and a negative integer is send for a negative threshold. - * Bitmap encoding: Each parameter update is encoded with two bits. The four states are used to indicate no change, a +ve threshold change, a -ve threshold change and a half threshold change that cycles between +ve and -ve. - -Using these two kinds of encoding schemes accommodates cases when the updates are dense. Since each node has its own threshold it's value is also communicated with each transfer. Encoding updates are pushed down to optimized native code (c++) for the sake of performance and GPU parallelization. -The sparse threshold (integer index) encoding can result in very high compression rates, whereas the bitmap encoding results in a fixed size 16x compression ratio (i.e., 2 bits per parameter vs. 32 bits for the original update vector). - - -## Parameter Averaging Implementation -The parameter averaging implementation was the first distributed training implementation in DL4J. It has since been superseded by the gradient sharing implementation described in the previous section. Details on the parameter averaging implementation are included here for the sake of completeness. - -The parameter averaging implementation is a synchronous SGD approach implemented entirely in Spark. DL4J's parameter averaging implementation uses a single parameter server, a role served by the Spark master node. - -Parameter averaging is the conceptually simplest approach to data parallelism. It requires the user to specify the frequency at which the workers synchronize with each other and the master. With parameter averaging, training proceeds as follows: - -1. The master (Spark driver) starts with an initial network configuration and parameters -2. Data is split into a number of subsets, based on the configuration of the TrainingMaster. -3. Iterate over the data splits. For each split of the training data: - a. Distribute the configuration, parameters (and if applicable, network updater state for momentum/rmsprop/adagrad) from the master to each worker - b. Fit each worker on its portion of the split - c. Average the parameters (and if applicable, updater state) and return the averaged results to the master -4. Training is complete, with the master having a copy of the trained network - -Steps 3a through 3c are demonstrated in the image below. In this diagram, W represents the parameters (weights, biases) in the neural network. Subscripts are used to index the version of the parameters over time, and where necessary for each worker machine. - -![Parameter Averaging](/images/guide/parameteraveraging.svg) - -The implementation uses Spark's treeAggregate under the hood. There are a number of enhancements that can be made to this implementation that will result in faster training times. Even with these enhancements in place the asynchronous SGD approach with quantized compressed updates is expected to continue to be much faster. Therefore the user is strongly recommended to switch from the parameter averaging implementation to the asynchronous SGD gradient sharing approach. - - -## Fault Tolerance - -Spark implementations of distributed training in DL4J are fault tolerant as of 1.0.0-beta3. -The parameter averaging implementation has always been fault tolerant; the gradient sharing implementation was made fully fault tolerant after (not including) 1.0.0-beta2. - -Before going into the details of the implementation let us first consider what happens when a node goes down. Since Spark is unaware of the updates send via Aeron the RDD lineage tracks back to the initial parameter and optimizer state. When Spark restores a node in place of one that went down it will therefore will resume training from its initial state. In other words, this restored node will be out of sync with the other nodes and this will cause training to diverge. - -DL4J's Gradient sharing utilizes its own internal heartbeat mechanism outside of Spark to detect when a node goes down, as well as to detect when a recovered node comes online. To ensure that training continues without diverging it is necessary that the restored node resumes training with a copy of the model identical to that on the other nodes at the current point. To ensure that updates are not applied multiple times each update is tagged with a unique ID. The state of the updater/optimizer (RMSProp, AdaGrad etc) as well as the iteration/epoch number are also required for network training to proceed from the state prior to the node failure. - -The following outlines what happens when a node goes down in plain mode and is restored: -1. The restored node reconnects to the master node -2. The restored node starts receiving updates and then sends request for parameters, updater state and current epoch/iteration -3. Master fulfils these requests (by itself or by proxy) -4. The restored node applies ONLY relevant updates (relative to the parameter vector) -4. Training continues on the RDD data on the new node, properly in-sync with other nodes and properly converging - -Requesting a copy of the model after the node has started receiving updates makes sure that updates are not missed. Updates are tagged by unique IDs and no update will be incorrectly applied twice. Since the master does not do any training it does not hold the updater state, when it receives a request for the updater/optimizer state it sends out a request to one of the other nodes - upon receiving the request, it sends the updater to the restored node. - -The only additional step in mesh node when a node fails is to remap the descendants of the failed node. In this case a descendant of the failed node is mapped to master and all the remaining descendants are mapped to the one mapped to master. - -Concretely with the tree structure below if node 2 fails, node 5 is mapped to the master and node 6 and 7 are mapped to node 5. - -![Node Failure](/images/guide/nodefailure.png) - - -The decision to remap to master instead of the neighboring nodes was made since the master is assumed to be the most reliable option. Requesting a copy of the model etc are also made to the master for this very same reason. It is to be noted that similar to a Spark job distributed neural network training with DL4J cannot withstand the master node failing. For this reason, the user is advised to persist the state of the model frequently. In this case if the master were to fail training can be restarted from the latest saved state. - -Limitations of fault tolerance: There are two main limitations of fault tolerance for the gradient sharing implementation. -First: A small amount of data (a few minibatches) may be processed multiple times. This is because a failed node may process part of a partition (sending out updates) before failing. This is not a problem in practice: the number of duplicated minibatches is usually very small, and we are typically training for multiple epochs anyway (thus each example is already being seen multiple times during training). -Second: The master/driver node is a single point of failure. This is essentially a Spark limitation: DL4J could (in principle) implement functionality to recover from a failed master and continue training, but Apache Spark does not support fault tolerance for the master node. - diff --git a/docs/deeplearning4j-zoo/README.md b/docs/deeplearning4j-zoo/README.md deleted file mode 100644 index e294f59be..000000000 --- a/docs/deeplearning4j-zoo/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# deeplearning4j-zoo documentation - -Build and serve documentation for DataVec with MkDocs (install with `pip install mkdocs`) -The source for Keras documentation is in this directory under `doc_sources/`. - -The structure of this project (template files, generating code, mkdocs YAML) is closely aligned -with the [Keras documentation](keras.io) and heavily inspired by the [Keras docs repository](https://github.com/keras-team/keras/tree/master/docs). - -To generate docs into the `deeplearning4j-zoo/doc_sources` folder, first `cd docs` then run: - -```shell -python generate_docs.py \ - --project deeplearning4j-zoo \ - --code ../deeplearning4j - --out_language en -``` \ No newline at end of file diff --git a/docs/deeplearning4j-zoo/pages.json b/docs/deeplearning4j-zoo/pages.json deleted file mode 100644 index 56e1dc2b8..000000000 --- a/docs/deeplearning4j-zoo/pages.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "excludes": [ - "abstract" - ], - "indices": [ - ], - "pages": [ - { - "page": "overview.md", - "class": [] - }, - { - "page": "models.md", - "class": [ - "deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/AlexNet.java", - "deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/Darknet19.java", - "deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/FaceNetNN4Small2.java", - "deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/InceptionResNetV1.java", - "deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/LeNet.java", - "deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/NASNet.java", - "deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/ResNet50.java", - "deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/SimpleCNN.java", - "deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/SqueezeNet.java", - "deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/TextGenerationLSTM.java", - "deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/TinyYOLO.java", - "deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/UNet.java", - "deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/VGG16.java", - "deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/VGG19.java", - "deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/Xception.java", - "deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/YOLO2.java" - ] - } - ] -} - diff --git a/docs/deeplearning4j-zoo/templates/models.md b/docs/deeplearning4j-zoo/templates/models.md deleted file mode 100644 index c16b1d6c6..000000000 --- a/docs/deeplearning4j-zoo/templates/models.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Deeplearning4j Zoo Models -short_title: Zoo Models -description: Prebuilt model architectures and weights for out-of-the-box application. -category: Models -weight: 10 ---- - -## Available models - -{{autogenerated}} \ No newline at end of file diff --git a/docs/deeplearning4j-zoo/templates/overview.md b/docs/deeplearning4j-zoo/templates/overview.md deleted file mode 100644 index ed51ef92d..000000000 --- a/docs/deeplearning4j-zoo/templates/overview.md +++ /dev/null @@ -1,131 +0,0 @@ ---- -title: Deeplearning4j Model Zoo -short_title: Zoo Usage -description: Prebuilt model architectures and weights for out-of-the-box application. -category: Models -weight: 10 ---- - -## About the Deeplearning4j model zoo - -Deeplearning4j has native model zoo that can be accessed and instantiated directly from DL4J. The model zoo also includes pretrained weights for different datasets that are downloaded automatically and checked for integrity using a checksum mechanism. - -If you want to use the new model zoo, you will need to add it as a dependency. A Maven POM would add the following: - -``` - - org.deeplearning4j - deeplearning4j-zoo - {{ page.version }} - -``` - -## Getting started - -Once you've successfully added the zoo dependency to your project, you can start to import and use models. Each model extends the `ZooModel` abstract class and uses the `InstantiableModel` interface. These classes provide methods that help you initialize either an empty, fresh network or a pretrained network. - -### Initializing fresh configurations - -You can instantly instantiate a model from the zoo using the `.init()` method. For example, if you want to instantiate a fresh, untrained network of AlexNet you can use the following code: - -``` -import org.deeplearning4j.zoo.model.AlexNet -import org.deeplearning4j.zoo.*; - -... - -int numberOfClassesInYourData = 1000; -int randomSeed = 123; - -ZooModel zooModel = AlexNet.builder() - .numClasses(numberOfClassesInYourData) - .seed(randomSeed) - .build(); -Model net = zooModel.init(); -``` - -If you want to tune parameters or change the optimization algorithm, you can obtain a reference to the underlying network configuration: - -``` -ZooModel zooModel = AlexNet.builder() - .numClasses(numberOfClassesInYourData) - .seed(randomSeed) - .build(); -MultiLayerConfiguration net = ((AlexNet) zooModel).conf(); -``` - -### Initializing pretrained weights - -Some models have pretrained weights available, and a small number of models are pretrained across different datasets. `PretrainedType` is an enumerator that outlines different weight types, which includes `IMAGENET`, `MNIST`, `CIFAR10`, and `VGGFACE`. - -For example, you can initialize a VGG-16 model with ImageNet weights like so: - -``` -import org.deeplearning4j.zoo.model.VGG16; -import org.deeplearning4j.zoo.*; - -... - -ZooModel zooModel = VGG16.builder().build();; -Model net = zooModel.initPretrained(PretrainedType.IMAGENET); -``` - -And initialize another VGG16 model with weights trained on VGGFace: - -``` -ZooModel zooModel = VGG16.builder().build(); -Model net = zooModel.initPretrained(PretrainedType.VGGFACE); -``` - -If you're not sure whether a model contains pretrained weights, you can use the `.pretrainedAvailable()` method which returns a boolean. Simply pass a `PretrainedType` enum to this method, which returns true if weights are available. - -Note that for convolutional models, input shape information follows the NCHW convention. So if a model's input shape default is `new int[]{3, 224, 224}`, this means the model has 3 channels and height/width of 224. - - - -## What's in the zoo? - -The model zoo comes with well-known image recognition configurations in the deep learning community. The zoo also includes an LSTM for text generation, and a simple CNN for general image recognition. - -You can find a complete list of models using this [deeplearning4j-zoo Github link](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model). - -This includes ImageNet models such as VGG-16, ResNet-50, AlexNet, Inception-ResNet-v1, LeNet, and more. - -* [AlexNet](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/AlexNet.java) -* [Darknet19](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/Darknet19.java) -* [FaceNetNN4Small2](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/FaceNetNN4Small2.java) -* [InceptionResNetV1](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/InceptionResNetV1.java) -* [LeNet](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/LeNet.java) -* [ResNet50](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/ResNet50.java) -* [SimpleCNN](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/SimpleCNN.java) -* [TextGenerationLSTM](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/TextGenerationLSTM.java) -* [TinyYOLO](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/TinyYOLO.java) -* [VGG16](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/VGG16.java) -* [VGG19](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/VGG19.java) - -## Advanced usage - -The zoo comes with a couple additional features if you're looking to use the models for different use cases. - -### Changing Inputs - -Aside from passing certain configuration information to the constructor of a zoo model, you can also change its input shape using `.setInputShape()`. NOTE: this applies to fresh configurations only, and will not affect pretrained models: - -``` -int numberOfClassesInYourData = 10; -int randomSeed = 123; - -ZooModel zooModel = ResNet50.builder() - .numClasses(numberOfClassesInYourData) - .seed(randomSeed) - .build(); -zooModel.setInputShape(new int[][]{{3, 28, 28}}); -``` - -### Transfer Learning - -Pretrained models are perfect for transfer learning! You can read more about transfer learning using DL4J [here](./deeplearning4j-nn-transfer-learning). - -### Workspaces - -Initialization methods often have an additional parameter named `workspaceMode`. For the majority of users you will not need to use this; however, if you have a large machine that has "beefy" specifications, you can pass `WorkspaceMode.SINGLE` for models such as VGG-19 that have many millions of parameters. To learn more about workspaces, please see [this section](./deeplearning4j-config-workspaces). \ No newline at end of file diff --git a/docs/deeplearning4j/README.md b/docs/deeplearning4j/README.md deleted file mode 100644 index f59f7cb24..000000000 --- a/docs/deeplearning4j/README.md +++ /dev/null @@ -1,10 +0,0 @@ -# deeplearning4j documentation - -To generate docs into the `deeplearning4j/doc_sources` folder, first `cd docs` then run: - -```shell -python generate_docs.py \ - --project deeplearning4j \ - --code ../deeplearning4j - --out_language en -``` diff --git a/docs/deeplearning4j/pages.json b/docs/deeplearning4j/pages.json deleted file mode 100644 index a11bb7c88..000000000 --- a/docs/deeplearning4j/pages.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "excludes": [ - "abstract" - ], - "indices": [ - ], - "pages": [ - { - "page": "quickstart.md", - "class": [] - }, - { - "page": "examples-tour.md", - "class": [] - }, - { - "page": "cheat-sheet.md", - "class": [] - }, - { - "page": "android.md", - "class": [] - }, - { - "page": "android-prerequisites.md", - "class": [] - }, - { - "page": "android-linear-classifier.md", - "class": [] - }, - { - "page": "android-image-classification.md", - "class": [] - }, - { - "page": "beginners.md", - "class": [] - }, - { - "page": "benchmark.md", - "class": [] - }, - { - "page": "build-from-source.md", - "class": [] - }, - { - "page": "concepts.md", - "class": [] - }, - { - "page": "contribute.md", - "class": [] - }, - { - "page": "config-buildtools.md", - "class": [] - }, - { - "page": "config-maven.md", - "class": [] - }, - { - "page": "config-memory.md", - "class": [] - }, - { - "page": "config-workspaces.md", - "class": [] - }, - { - "page": "troubleshooting-training.md", - "class": [] - } - ] -} - diff --git a/docs/deeplearning4j/templates/android-image-classification.md b/docs/deeplearning4j/templates/android-image-classification.md deleted file mode 100644 index d0cc8f558..000000000 --- a/docs/deeplearning4j/templates/android-image-classification.md +++ /dev/null @@ -1,395 +0,0 @@ ---- -title: Using DL4J for Android Image Classification -short_title: Android Image Classifier -description: How to create an Android Image Classification app with Eclipse Deeplearning4j. -category: Mobile -weight: 3 ---- - -## Using Deeplearning4J in Android Applications - -Contents - -* [Setting the Dependencies](#head_link1) -* [Training and loading the Mnist model in the Android project resources](#head_link2) -* [Accessing the trained model using an AsyncTask](#head_link7) -* [Handling images from user input](#head_link3) -* [Updating the UI](#head_link5) -* [Conclusion](#head_link6) - - -## DL4JImageRecognitionDemo - -This example application uses a neural network trained on the standard MNIST dataset of 28x28 greyscale 0..255 pixel value images of hand drawn numbers 0..9. The application user interace allows the user to draw a number on the device screen which is then tested against the trained network. The output displays the most probable numeric values and the probability score. This tutorial will cover the use of a trained neural network in an Android Application, the handling of user generated images, and the output of the results to the UI from a background thread. More information on general prerequisites for building DL4J Android Applications can be found [here](./deeplearning4-android-prerequisites). - -![](/images/guide/screen2.png) - - -## Setting the Dependencies - -Deeplearning4J applications requires application specific dependencies in the build.gradle file. The Deeplearning library in turn depends on the libraries of ND4J and OpenBLAS, thus these must also be added to the dependencies declaration. Starting with Android Studio 3.0, annotationProcessors need to be defined as well, thus dependencies for either -x86 or -arm processors should be included, depending on your device, if you are working in Android Studio 3.0 or later. Note that both can be include without conflict as is done in the example app. -```groovy -implementation (group: 'org.deeplearning4j', name: 'deeplearning4j-core', version: '{{page.version}}') { - exclude group: 'org.bytedeco', module: 'opencv-platform' - exclude group: 'org.bytedeco', module: 'leptonica-platform' - exclude group: 'org.bytedeco', module: 'hdf5-platform' - exclude group: 'org.nd4j', module: 'nd4j-base64' -} -implementation group: 'org.nd4j', name: 'nd4j-native', version: '{{page.version}}' -implementation group: 'org.nd4j', name: 'nd4j-native', version: '{{page.version}}', classifier: "android-arm" -implementation group: 'org.nd4j', name: 'nd4j-native', version: '{{page.version}}', classifier: "android-arm64" -implementation group: 'org.nd4j', name: 'nd4j-native', version: '{{page.version}}', classifier: "android-x86" -implementation group: 'org.nd4j', name: 'nd4j-native', version: '{{page.version}}', classifier: "android-x86_64" -implementation group: 'org.bytedeco', name: 'openblas', version: '0.3.9-1.5.3' -implementation group: 'org.bytedeco', name: 'openblas', version: '0.3.9-1.5.3', classifier: "android-arm" -implementation group: 'org.bytedeco', name: 'openblas', version: '0.3.9-1.5.3', classifier: "android-arm64" -implementation group: 'org.bytedeco', name: 'openblas', version: '0.3.9-1.5.3', classifier: "android-x86" -implementation group: 'org.bytedeco', name: 'openblas', version: '0.3.9-1.5.3', classifier: "android-x86_64" -implementation group: 'org.bytedeco', name: 'opencv', version: '4.3.0-1.5.3' -implementation group: 'org.bytedeco', name: 'opencv', version: '4.3.0-1.5.3', classifier: "android-arm" -implementation group: 'org.bytedeco', name: 'opencv', version: '4.3.0-1.5.3', classifier: "android-arm64" -implementation group: 'org.bytedeco', name: 'opencv', version: '4.3.0-1.5.3', classifier: "android-x86" -implementation group: 'org.bytedeco', name: 'opencv', version: '4.3.0-1.5.3', classifier: "android-x86_64" -implementation group: 'org.bytedeco', name: 'leptonica', version: '1.79.0-1.5.3' -implementation group: 'org.bytedeco', name: 'leptonica', version: '1.79.0-1.5.3', classifier: "android-arm" -implementation group: 'org.bytedeco', name: 'leptonica', version: '1.79.0-1.5.3', classifier: "android-arm64" -implementation group: 'org.bytedeco', name: 'leptonica', version: '1.79.0-1.5.3', classifier: "android-x86" -implementation group: 'org.bytedeco', name: 'leptonica', version: '1.79.0-1.5.3', classifier: "android-x86_64" - -implementation 'com.google.code.gson:gson:2.8.2' -annotationProcessor 'org.projectlombok:lombok:1.16.16' - -//This corrects for a junit version conflict. -configurations.all { - resolutionStrategy.force 'junit:junit:4.12' -} -``` - -Compiling these dependencies involves a large number of files, thus it is necessary to set multiDexEnabled to true in defaultConfig. -```java -multiDexEnabled true -``` -Finally, a conflict in the junit module versions will give the following error: > Conflict with dependency 'junit:junit' in project ':app'. Resolved versions for app (4.8.2) and test app (4.12) differ. -This can be suppressed by forcing all of the junit modules to use the same version. -```java -configurations.all { - resolutionStrategy.force 'junit:junit:4.12' -} -``` -## Training and loading the Mnist model in the Android project resources - -Using a neural network requires a significant amount of processor power, which is in limited supply on mobile devices. Therefore, a background thread must be used for loading of the trained neural network and the testing of the user drawn image by using AsyncTask. In this application we will run the canvas.draw code on the main thread and use an AsyncTask to load the drawn image from internal memory and test it against the trained model on a background thread. First, lets look at how to save the trained neural network we will be using in the application. - -You will need to begin by following the DeepLearning4j quick start [guide](./deeplearning4j-quickstart) to set up, train, and save neural network models on a desktop computer. The DL4J example which trains and saves the Mnist model used in this application is *MnistImagePipelineExampleSave.java* and is included in the quick start guide referenced above. The code for the Mnist demo is also available [here](https://gist.github.com/tomthetrainer/7cb2fbc14a5c631a567a98c3134f7dd6). Running this demo will train the Mnist neural network model and save it as *"trained_mnist_model.zip"* in the *dl4j\target folder* of the *dl4j-examples* directory. You can then copy the file and save it in the raw folder of your Android project. - -![](/images/guide/rawFolder.PNG) - -## Accessing the trained model using an AsyncTask - -Now let’s start by writing our AsyncTask<*Params*, *Progress*, *Results*> to load and use the neural network on a background thread. The AsyncTask will use the parameter types . The *Params* type is set to String, which will pass the Path for the saved image to the asyncTask as it is executed. This path will be used in the doInBackground() method to locate and load the trained Mnist model. The *Results* parameter is of type INDArray which will store the results from the neural network and pass it to the onPostExecute method that has access to the main thread for updating the UI. For more on NDArrays, see https://nd4j.org/userguide. Note that the AsyncTask requires that we override two more methods (the onProgressUpdate and onPostExecute methods) which we will get to later in the demo. -```java -private class AsyncTaskRunner extends AsyncTask { - - // Runs in UI before background thread is called. - @Override - protected void onPreExecute() { - super.onPreExecute(); - } - - @Override - protected INDArray doInBackground(String... params) { - // Main background thread, this will load the model and test the input image - // The dimensions of the images are set here - int height = 28; - int width = 28; - int channels = 1; - - //Now we load the model from the raw folder with a try / catch block - try { - // Load the pretrained network. - InputStream inputStream = getResources().openRawResource(R.raw.trained_mnist_model); - MultiLayerNetwork model = ModelSerializer.restoreMultiLayerNetwork(inputStream); - - //load the image file to test - File f=new File(absolutePath, "drawn_image.jpg"); - - //Use the nativeImageLoader to convert to numerical matrix - NativeImageLoader loader = new NativeImageLoader(height, width, channels); - - //put image into INDArray - INDArray image = loader.asMatrix(f); - - //values need to be scaled - DataNormalization scalar = new ImagePreProcessingScaler(0, 1); - - //then call that scalar on the image dataset - scalar.transform(image); - - //pass through neural net and store it in output array - output = model.output(image); - - } catch (IOException e) { - e.printStackTrace(); - } - return output; - } -``` - -## Handling images from user input - -Now lets add the code for the drawing canvas that will run on the main thread and allow the user to draw a number on the screen. This is a generic draw program written as an inner class within the MainActivity. It extends View and overrides a series of methods. The drawing is saved to internal memory and the AsyncTask is executed with the image Path passed to it in the onTouchEvent case statement for case *MotionEvent.ACTION_UP*. This has the streamline action of automatically returning results for an image after the user completes the drawing. -```java -//code for the drawing input - public class DrawingView extends View { - - private Path mPath; - private Paint mBitmapPaint; - private Paint mPaint; - private Bitmap mBitmap; - private Canvas mCanvas; - - public DrawingView(Context c) { - super(c); - - mPath = new Path(); - mBitmapPaint = new Paint(Paint.DITHER_FLAG); - mPaint = new Paint(); - mPaint.setAntiAlias(true); - mPaint.setStrokeJoin(Paint.Join.ROUND); - mPaint.setStrokeCap(Paint.Cap.ROUND); - mPaint.setStrokeWidth(60); - mPaint.setDither(true); - mPaint.setColor(Color.WHITE); - mPaint.setStyle(Paint.Style.STROKE); - } - - @Override - protected void onSizeChanged(int W, int H, int oldW, int oldH) { - super.onSizeChanged(W, H, oldW, oldH); - mBitmap = Bitmap.createBitmap(W, H, Bitmap.Config.ARGB_4444); - mCanvas = new Canvas(mBitmap); - } - - @Override - protected void onDraw(Canvas canvas) { - canvas.drawBitmap(mBitmap, 0, 0, mBitmapPaint); - canvas.drawPath(mPath, mPaint); - } - - private float mX, mY; - private static final float TOUCH_TOLERANCE = 4; - - private void touch_start(float x, float y) { - mPath.reset(); - mPath.moveTo(x, y); - mX = x; - mY = y; - } - private void touch_move(float x, float y) { - float dx = Math.abs(x - mX); - float dy = Math.abs(y - mY); - if (dx >= TOUCH_TOLERANCE || dy >= TOUCH_TOLERANCE) { - mPath.quadTo(mX, mY, (x + mX)/2, (y + mY)/2); - mX = x; - mY = y; - } - } - private void touch_up() { - mPath.lineTo(mX, mY); - mCanvas.drawPath(mPath, mPaint); - mPath.reset(); - } - - @Override - public boolean onTouchEvent(MotionEvent event) { - float x = event.getX(); - float y = event.getY(); - - switch (event.getAction()) { - case MotionEvent.ACTION_DOWN: - invalidate(); - clear(); - touch_start(x, y); - invalidate(); - break; - case MotionEvent.ACTION_MOVE: - touch_move(x, y); - invalidate(); - break; - case MotionEvent.ACTION_UP: - touch_up(); - absolutePath = saveDrawing(); - invalidate(); - clear(); - loadImageFromStorage(absolutePath); - onProgressBar(); - //launch the asyncTask now that the image has been saved - AsyncTaskRunner runner = new AsyncTaskRunner(); - runner.execute(absolutePath); - break; - - } - return true; - } - - public void clear(){ - mBitmap.eraseColor(Color.TRANSPARENT); - invalidate(); - System.gc(); - } - - } - -``` -Now we need to build a series of helper methods. First we will write the saveDrawing() method. It uses getDrawingCache() to retrieve the drawing from the drawingView and store it as a bitmap. We then create a file directory and file for the bitmap called "drawn_image.jpg". Finally, FileOutputStream is used in a try / catch block to write the bitmap to the file location. The method returns the absolute Path to the file location which will be used by the loadImageFromStorage() method. -```java -public String saveDrawing(){ - drawingView.setDrawingCacheEnabled(true); - Bitmap b = drawingView.getDrawingCache(); - - ContextWrapper cw = new ContextWrapper(getApplicationContext()); - // set the path to storage - File directory = cw.getDir("imageDir", Context.MODE_PRIVATE); - // Create imageDir and store the file there. Each new drawing will overwrite the previous - File mypath=new File(directory,"drawn_image.jpg"); - - //use a fileOutputStream to write the file to the location in a try / catch block - FileOutputStream fos = null; - try { - fos = new FileOutputStream(mypath); - b.compress(Bitmap.CompressFormat.JPEG, 100, fos); - } catch (Exception e) { - e.printStackTrace(); - } finally { - try { - fos.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } - return directory.getAbsolutePath(); - } -``` - -Next we will write the loadImageFromStorage method which will use the absolute path returned from saveDrawing() to load the saved image and display it in the UI as part of the output display. It uses a try / catch block and a FileInputStream to set the image to the ImageView *img* in the UI layout. - -```java - private void loadImageFromStorage(String path) - { - - //use a fileInputStream to read the file in a try / catch block - try { - File f=new File(path, "drawn_image.jpg"); - Bitmap b = BitmapFactory.decodeStream(new FileInputStream(f)); - ImageView img=(ImageView)findViewById(R.id.outputView); - img.setImageBitmap(b); - } - catch (FileNotFoundException e) - { - e.printStackTrace(); - } - - } -``` - -We also need to write two methods that extract the predicted number from the neural network output and the confidence score, which we will call later when we complete the AsyncTask. - -```java -//helper class to return the largest value in the output array - public static double arrayMaximum(double[] arr) { - double max = Double.NEGATIVE_INFINITY; - for(double cur: arr) - max = Math.max(max, cur); - return max; - } - - // helper class to find the index (and therefore numerical value) of the largest confidence score - public int getIndexOfLargestValue( double[] array ) - { - if ( array == null || array.length == 0 ) return -1; - int largest = 0; - for ( int i = 1; i < array.length; i++ ) - {if ( array[i] > array[largest] ) largest = i; } - return largest; - } -``` - -Finally, we need a few methods we can call to control the visibility of an 'In Progress...' message while the background thread is running. These will be called when the AsyncTask is executed and in the onPostExecute method when the background thread completes. - -```java - public void onProgressBar(){ - TextView bar = findViewById(R.id.processing); - bar.setVisibility(View.VISIBLE); - } - - public void offProgressBar(){ - TextView bar = findViewById(R.id.processing); - bar.setVisibility(View.INVISIBLE); - } -``` - -Now let's go to the onCreate method to initialize the draw canvas and set some global variables. - -```java -public class MainActivity extends AppCompatActivity { - - MainActivity.DrawingView drawingView; - String absolutePath; - public static INDArray output; - - @Override - public void onCreate(Bundle savedInstanceState) { - super.onCreate(savedInstanceState); - setContentView(R.layout.activity_main); - - RelativeLayout parent = findViewById(R.id.layout2); - drawingView = new MainActivity.DrawingView(this); - parent.addView(drawingView); - } -``` - -## Updating the UI - -Now we can complete our AsyncTask by overriding the onProgress and onPostExecute methods. Once the doInBackground method of AsyncTask completes, the classification results will be passed to the onPostExecute which has access to the main thread and UI allowing us to update the UI with the results. Since we will not be using the onProgress method, a call to its superclass will suffice. - -```java -@Override - protected void onProgressUpdate(Integer... values) { - super.onProgressUpdate(values); - } -``` - -The onPostExecute method will receive an INDArray which contains the neural network results as a 1x10 array of probability values that the input drawing is each possible digit (0..9). From this we need to determine which row of the array contains the largest value and what the size of that value is. These two values will determine which number the neural network has classified the drawing as and how confident the network score is. These values will be referred to in the UI as *Prediction* and the *Confidence*, respectively. In the code below, the individual values for each position of the INDArray are passed to an array of type double using the getDouble() method on the result INDArray. We then get references to the TextViews which will be updated in the UI and call our helper methods on the array to return the array maximum (confidence) and index of the largest value (prediction). Note we also need to limit the number of decimal places reported on the probabilities by setting a DecimalFormat pattern. - -```java - - @Override - protected void onPostExecute(INDArray result) { - super.onPostExecute(result); - - //used to control the number of decimals places for the output probability - DecimalFormat df2 = new DecimalFormat(".##"); - - //transfer the neural network output to an array - double[] results = {result.getDouble(0,0),result.getDouble(0,1),result.getDouble(0,2), - result.getDouble(0,3),result.getDouble(0,4),result.getDouble(0,5),result.getDouble(0,6), - result.getDouble(0,7),result.getDouble(0,8),result.getDouble(0,9),}; - - //find the UI tvs to display the prediction and confidence values - TextView out1 = findViewById(R.id.prediction); - TextView out2 = findViewById(R.id.confidence); - - //display the values using helper functions defined below - out2.setText(String.valueOf(df2.format(arrayMaximum(results)))); - out1.setText(String.valueOf(getIndexOfLargestValue(results))); - - //helper function to turn off progress test - offProgressBar(); - } -``` - -## Conclusion - -This tutorial provides a basic framework for image recognition in an Android Application using a DL4J neural network. It illustrates how to load a pre-trained DL4J model from the raw resources file and how to test user generate input images against the model. The AsyncTask then returns the output to the main thread and updates the UI. - -The complete code for this example is available [here.](https://github.com/eclipse/deeplearning4j-examples/tree/master/android/DL4JImageRecognitionDemo) diff --git a/docs/deeplearning4j/templates/android-linear-classifier.md b/docs/deeplearning4j/templates/android-linear-classifier.md deleted file mode 100644 index b6fe5352c..000000000 --- a/docs/deeplearning4j/templates/android-linear-classifier.md +++ /dev/null @@ -1,280 +0,0 @@ ---- -title: Android Classifier with DL4J -short_title: Android Classifier -description: How to create an IRIS classifier on Android using Eclipse Deeplearning4j. -category: Mobile -weight: 2 ---- - -# IRIS Classifier Demo - -The example application trains a small neural network on the device using Anderson’s Iris data set for iris flower type classification. For a more indepth look at optimizing android for DL4J, please see the Prerequisites and Configuration documentation [here](./deeplearning4j-android-prerequisites). This application has a simple UI to take measurements of petal length, petal width, sepal length, and sepal width from the user and returns the probability that the measurements belong to one of three types of Iris (*Iris serosa*, *Iris versicolor*, and *Iris virginica*). A data set includes 150 measurement values (50 for each iris type) and training the model takes anywhere from 5-20 seconds, depending on the device. - -Contents - -* [Setting the Dependencies](#head_link1) -* [Setting up the neural network on a background thread](#head_link2) -* [Preparing the training data set and user input](#head_link3) -* [Building and Training the Neural Network](#head_link4) -* [Updating the UI](#head_link5) -* [Conclusion](#head_link6) - - -## DL4JIrisClassifierDemo - -## Setting the Dependencies -Deeplearning4J applications require several dependencies in the build.gradle file. The Deeplearning library in turn depends on the libraries of ND4J and OpenBLAS, thus these must also be added to the dependencies declaration. Starting with Android Studio 3.0, annotationProcessors need to be defined as well, requiring dependencies for -x86 or -arm processors. -```groovy -implementation (group: 'org.deeplearning4j', name: 'deeplearning4j-core', version: '{{page.version}}') { - exclude group: 'org.bytedeco', module: 'opencv-platform' - exclude group: 'org.bytedeco', module: 'leptonica-platform' - exclude group: 'org.bytedeco', module: 'hdf5-platform' -} -implementation group: 'org.nd4j', name: 'nd4j-native', version: '{{page.version}}' -implementation group: 'org.nd4j', name: 'nd4j-native', version: '{{page.version}}', classifier: "android-arm" -implementation group: 'org.nd4j', name: 'nd4j-native', version: '{{page.version}}', classifier: "android-arm64" -implementation group: 'org.nd4j', name: 'nd4j-native', version: '{{page.version}}', classifier: "android-x86" -implementation group: 'org.nd4j', name: 'nd4j-native', version: '{{page.version}}', classifier: "android-x86_64" -implementation group: 'org.bytedeco', name: 'openblas', version: '0.3.9-1.5.3' -implementation group: 'org.bytedeco', name: 'openblas', version: '0.3.9-1.5.3', classifier: "android-arm" -implementation group: 'org.bytedeco', name: 'openblas', version: '0.3.9-1.5.3', classifier: "android-arm64" -implementation group: 'org.bytedeco', name: 'openblas', version: '0.3.9-1.5.3', classifier: "android-x86" -implementation group: 'org.bytedeco', name: 'openblas', version: '0.3.9-1.5.3', classifier: "android-x86_64" -implementation group: 'org.bytedeco', name: 'opencv', version: '4.3.0-1.5.3' -implementation group: 'org.bytedeco', name: 'opencv', version: '4.3.0-1.5.3', classifier: "android-arm" -implementation group: 'org.bytedeco', name: 'opencv', version: '4.3.0-1.5.3', classifier: "android-arm64" -implementation group: 'org.bytedeco', name: 'opencv', version: '4.3.0-1.5.3', classifier: "android-x86" -implementation group: 'org.bytedeco', name: 'opencv', version: '4.3.0-1.5.3', classifier: "android-x86_64" -implementation group: 'org.bytedeco', name: 'leptonica', version: '1.79.0-1.5.3' -implementation group: 'org.bytedeco', name: 'leptonica', version: '1.79.0-1.5.3', classifier: "android-arm" -implementation group: 'org.bytedeco', name: 'leptonica', version: '1.79.0-1.5.3', classifier: "android-arm64" -implementation group: 'org.bytedeco', name: 'leptonica', version: '1.79.0-1.5.3', classifier: "android-x86" -implementation group: 'org.bytedeco', name: 'leptonica', version: '1.79.0-1.5.3', classifier: "android-x86_64" -``` - -Compiling these dependencies involves a large number of files, thus it is necessary to set multiDexEnabled to true in defaultConfig. - -```java -multiDexEnabled true -``` - -Finally, a conflict in the junit module versions will likely throw the following error: > Conflict with dependency 'junit:junit' in project ':app'. Resolved versions for app (4.8.2) and test app (4.12) differ. -This can be suppressed by forcing all of the junit modules to use the same version. - -```java -configurations.all { - resolutionStrategy.force 'junit:junit:4.12' -} -``` - - -## Setting up the neural network on a background thread - -Training even a simple neural network like in this example requires a significant amount of processor power, which is in limited supply on mobile devices. Thus, it is imperative that a background thread be used for the building and training of the neural network which then returns the output to the main thread for updating the UI. In this example we will be using an AsyncTask which accepts the input measurements from the UI and passes them as type double to the doInBackground() method. First, lets get references to the editTexts in the UI layout that accept the iris measurements inside of our onCreate method. Then an onClickListener will execute our asyncTask, pass it the measurements entered by the user, and show a progress bar until we hide it again in onPostExecute(). - -```java -public class MainActivity extends AppCompatActivity { -  -  -@Override - public void onCreate(Bundle savedInstanceState) { - super.onCreate(savedInstanceState); - setContentView(R.layout.activity_main); -  - //get references to the editTexts that take the measurements - final EditText PL = (EditText) findViewById(R.id.editText); - final EditText PW = (EditText) findViewById(R.id.editText2); - final EditText SL = (EditText) findViewById(R.id.editText3); - final EditText SW = (EditText) findViewById(R.id.editText4); -  - //onclick to capture the input and launch the asyncTask - Button button = (Button) findViewById(R.id.button); -  - button.setOnClickListener(new View.OnClickListener() { - @Override - public void onClick(View v) { -  - final double pl = Double.parseDouble(PL.getText().toString()); - final double pw = Double.parseDouble(PW.getText().toString()); - final double sl = Double.parseDouble(SL.getText().toString()); - final double sw = Double.parseDouble(SW.getText().toString()); -  - AsyncTaskRunner runner = new AsyncTaskRunner(); -  - //pass the measurement as params to the AsyncTask - runner.execute(pl,pw,sl,sw); -  - ProgressBar bar = (ProgressBar) findViewById(R.id.progressBar); - bar.setVisibility(View.VISIBLE); - } - }); - } -``` - -Now let’s write our AsyncTask<*Params*, *Progress*, *Results*>. The AsyncTask needs to have a *Params* of type Double to receive the decimal value measurements from the UI. The *Result* type is set to INDArray, which is returned from the doInBackground() Method and passed to the onPostExecute() method for updating the UI. NDArrays are provided by the ND4J library and are essentially n-dimensional arrays with a given number of dimensions. For more on NDArrays, see https://nd4j.org/userguide. - -```java -private class AsyncTaskRunner extends AsyncTask { -  - // Runs in UI before background thread is called - @Override - protected void onPreExecute() { - super.onPreExecute(); -  - ProgressBar bar = (ProgressBar) findViewById(R.id.progressBar); - bar.setVisibility(View.INVISIBLE); - } -``` - - -## Preparing the training data set and user input - -The doInBackground() method will handle the formatting of the training data, the construction of the neural net, the training of the net, and the analysis of the input data by the trained model. The user input has only 4 values, thus we can add those directly to a 1x4 INDArray using the putScalar() method. The training data is much larger and must be converted from CSV lists to matrices through an iterative *for* loop. -  -The training data is stored in the app as two arrays, one for the Iris measurements named *irisData* which contains a list of 150 iris measurements and another for the labels of iris type named *labelData*. These will be transformed to 150x4 and 150x3 matrices, respectively, so that they can be converted into INDArray objects that the neural network will use for training. - -```java - // This is our main background thread for the neural net - @Override - protected String doInBackground(Double... params) { - //Get the doubles from params, which is an array so they will be 0,1,2,3 - double pld = params[0]; - double pwd = params[1]; - double sld = params[2]; - double swd = params[3]; -   - //Create input INDArray for the user measurements - INDArray actualInput = Nd4j.zeros(1,4); - actualInput.putScalar(new int[]{0,0}, pld); - actualInput.putScalar(new int[]{0,1}, pwd); - actualInput.putScalar(new int[]{0,2}, sld); - actualInput.putScalar(new int[]{0,3}, swd); -   - //Convert the iris data into 150x4 matrix - int row=150; - int col=4; - double[][] irisMatrix=new double[row][col]; - int i = 0; - for(int r=0; rBuilding and Training the Neural Network - -Now that our data is ready, we can build a simple multi-layer perceptron with a single hidden layer. The *DenseLayer* class is used to create the input layer and the hidden layer of the network while the *OutputLayer* class is used for the Output layer. The number of columns in the input INDArray must equal to the number of neurons in the input layer (nIn). The number of neurons in the hidden layer input must equal the number inputLayer’s output array (nOut). Finally, the outputLayer input should match the hiddenLayer output. The output must equal the number of possible classifications, which is 3. - -```java - //define the layers of the network - DenseLayer inputLayer = new DenseLayer.Builder() - .nIn(4) - .nOut(3) - .name("Input") - .build(); -  - DenseLayer hiddenLayer = new DenseLayer.Builder() - .nIn(3) - .nOut(3) - .name("Hidden") - .build(); -  - OutputLayer outputLayer = new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) - .nIn(3) - .nOut(3) - .name("Output") - .activation(Activation.SOFTMAX) - .build(); -``` -The next step is to build the neural network using *nccBuilder*. The parameters selected below for training are standard. To learn more about optimizing network training, see deeplearning4j.org. -```java - NeuralNetConfiguration.Builder nncBuilder = new NeuralNetConfiguration.Builder(); - long seed = 6; - nncBuilder.seed(seed); - nncBuilder.activation(Activation.TANH); - nncBuilder.weightInit(WeightInit.XAVIER); -   - NeuralNetConfiguration.ListBuilder listBuilder = nncBuilder.list(); - listBuilder.layer(0, inputLayer); - listBuilder.layer(1, hiddenLayer); - listBuilder.layer(2, outputLayer); -   - listBuilder.backprop(true); -   - MultiLayerNetwork myNetwork = new MultiLayerNetwork(listBuilder.build()); - myNetwork.init(); -   - //Create a data set from the INDArrays and train the network - DataSet myData = new DataSet(trainingIn, trainingOut); - for(int l=0; l<=1000; l++) { - myNetwork.fit(myData); - } -   - //Evaluate the input data against the model - INDArray actualOutput = myNetwork.output(actualInput); - Log.d("myNetwork Output ", actualOutput.toString()); -   - //Here we return the INDArray to onPostExecute where it can be - //used to update the UI - return actualOutput; -} -``` -## Updating the UI - -Once the training of the neural network and the classification of the user measurements are complete, the doInBackground() method will finish and onPostExecute() will have access to the main thread and UI, allowing us to update the UI with the classification results. Note that the decimal places reported on the probabilities can be controlled by setting a DecimalFormat pattern. -```java -//This is where we update the UI with our classification results - @Override - protected void onPostExecute(INDArray result) { - super.onPostExecute(result); -  - //Hide the progress bar now that we are finished - ProgressBar bar = (ProgressBar) findViewById(R.id.progressBar); - bar.setVisibility(View.INVISIBLE); -  - //Retrieve the three probabilities - Double first = result.getDouble(0,0); - Double second = result.getDouble(0,1); - Double third = result.getDouble(0,2); -  - //Update the UI with output - TextView setosa = (TextView) findViewById(R.id.textView11); - TextView versicolor = (TextView) findViewById(R.id.textView12); - TextView virginica = (TextView) findViewById(R.id.textView13); -  - //Limit the double to values to two decimals using DecimalFormat - DecimalFormat df2 = new DecimalFormat(".##"); -  - //Set the text of the textViews in UI to show the probabilites - setosa.setText(String.valueOf(df2.format(first))); - versicolor.setText(String.valueOf(df2.format(second))); - virginica.setText(String.valueOf(df2.format(third))); -  - } -``` - - -## Conclusion - -Hopefully this tutorial has illustrated how the compatibility of DL4J with Android makes it easy to build, train, and evaluate neural networks on mobile devices. We used a simple UI to take input values from the measurement and then passed them as the *Params* in an AsyncTask. The processor intensive steps of data preparation, network layer building, model training, and evaluation of the user data were all performed in the doInBackground() method of the background thread, maintaining a stable and responsive device. Once completed, we passed the output INDArray as the AsyncTask *Results* to onPostExecute() where the UI was updated to demonstrate the classification results. -The limitations of processing power and battery life of mobile devices make training robust, multi-layer networks somewhat unfeasible. To address this limitation, we will next look at an example Android application that saves the trained model on the device for faster performance after an initial model training. - -The complete code for this example is available [here.](https://github.com/eclipse/deeplearning4j-examples/tree/master/android/DL4JIrisClassifierDemo) - - - diff --git a/docs/deeplearning4j/templates/android-prerequisites.md b/docs/deeplearning4j/templates/android-prerequisites.md deleted file mode 100644 index 43b4d26bd..000000000 --- a/docs/deeplearning4j/templates/android-prerequisites.md +++ /dev/null @@ -1,410 +0,0 @@ ---- -title: Prerequisites and Configurations for DL4J in Android -short_title: Android Prerequisites -description: Setting up and configuring Android Studio for DL4J. -category: Mobile -weight: 1 ---- - -## Prerequisites and Configurations for DL4J in Android - -Contents -* [Prerequisites](#head_link1) -* [Required Dependencies](#head_link2) -* [Managing Dependencies with ProGuard](#head_link3) -* [Memory Management](#head_link4) -* [Saving and Loading Networks on Android](#head_link5) - -While neural networks are typically run on powerful computers using multiple GPUs, the compatibility of Deeplearning4J with the Android platform makes using DL4J neural networks in android applications a possibility. This tutorial will cover the basics of setting up android studio for building DL4J applications. Several configurations for dependencies, memory management, and compilation exclusions needed to mitigate the limitations of low powered mobile device are outlined below. If you just want to get a DL4J app running on your device, you can jump ahead to a simple demo application which trains a neural network for Iris flower classification available [here](./deeplearning4j-android-linear-classifier). - - -## Prerequisites - -* Android Studio 2.2 or newer, which can be downloaded [here](https://developer.android.com/studio/index.html#Other). -* Android Studio version 2.2 and higher comes with the latest OpenJDK embedded; however, it is recommended to have the JDK installed on your own as you are then able to update it independent of Android Studio. Android Studio 3.0 and later supports all of Java 7 and a subset of Java 8 language features. Java JDKs can be downloaded from Oracle's website. -* Within Android studio, the Android SDK Manager can be used to install Android Build tools 24.0.1 or later, SDK platform 24 or later, and the Android Support Repository. -* An Android device or an emulator running API level 21 or higher. A minimum of 200 MB of internal storage space free is recommended. - -It is also recommended that you download and install IntelliJ IDEA, Maven, and the complete dl4j-examples directory for building and building and training neural nets on your desktop instead of android studio. - - -## Required Dependencies - -In order to use Deeplearning4J in your Android projects, you will need to add the following dependencies to your app module’s build.gradle file. Depending on the type of neural network used in your application, you may need to add additional dependencies. - -``` groovy -implementation (group: 'org.deeplearning4j', name: 'deeplearning4j-core', version: '{{page.version}}') { - exclude group: 'org.bytedeco', module: 'opencv-platform' - exclude group: 'org.bytedeco', module: 'leptonica-platform' - exclude group: 'org.bytedeco', module: 'hdf5-platform' -} -implementation group: 'org.nd4j', name: 'nd4j-native', version: '{{page.version}}' -implementation group: 'org.nd4j', name: 'nd4j-native', version: '{{page.version}}', classifier: "android-arm" -implementation group: 'org.nd4j', name: 'nd4j-native', version: '{{page.version}}', classifier: "android-arm64" -implementation group: 'org.nd4j', name: 'nd4j-native', version: '{{page.version}}', classifier: "android-x86" -implementation group: 'org.nd4j', name: 'nd4j-native', version: '{{page.version}}', classifier: "android-x86_64" -implementation group: 'org.bytedeco', name: 'openblas', version: '0.3.9-1.5.3' -implementation group: 'org.bytedeco', name: 'openblas', version: '0.3.9-1.5.3', classifier: "android-arm" -implementation group: 'org.bytedeco', name: 'openblas', version: '0.3.9-1.5.3', classifier: "android-arm64" -implementation group: 'org.bytedeco', name: 'openblas', version: '0.3.9-1.5.3', classifier: "android-x86" -implementation group: 'org.bytedeco', name: 'openblas', version: '0.3.9-1.5.3', classifier: "android-x86_64" -implementation group: 'org.bytedeco', name: 'opencv', version: '4.3.0-1.5.3' -implementation group: 'org.bytedeco', name: 'opencv', version: '4.3.0-1.5.3', classifier: "android-arm" -implementation group: 'org.bytedeco', name: 'opencv', version: '4.3.0-1.5.3', classifier: "android-arm64" -implementation group: 'org.bytedeco', name: 'opencv', version: '4.3.0-1.5.3', classifier: "android-x86" -implementation group: 'org.bytedeco', name: 'opencv', version: '4.3.0-1.5.3', classifier: "android-x86_64" -implementation group: 'org.bytedeco', name: 'leptonica', version: '1.79.0-1.5.3' -implementation group: 'org.bytedeco', name: 'leptonica', version: '1.79.0-1.5.3', classifier: "android-arm" -implementation group: 'org.bytedeco', name: 'leptonica', version: '1.79.0-1.5.3', classifier: "android-arm64" -implementation group: 'org.bytedeco', name: 'leptonica', version: '1.79.0-1.5.3', classifier: "android-x86" -implementation group: 'org.bytedeco', name: 'leptonica', version: '1.79.0-1.5.3', classifier: "android-x86_64" -testimplementation 'junit:junit:4.12' -``` - -DL4J depends on ND4J, which is a library that offers fast n-dimensional arrays. ND4J in turn depends on a platform-specific native code library called JavaCPP, therefore you must load a version of ND4J that matches the architecture of the Android device. Both -x86 and -arm types can be included to support multiple device processor types. - -The above dependencies contain several files with identical names which must be handled with the following exclude parameters to your packagingOptions. - -```java -packagingOptions { - exclude 'META-INF/DEPENDENCIES' - exclude 'META-INF/DEPENDENCIES.txt' - exclude 'META-INF/LICENSE' - exclude 'META-INF/LICENSE.txt' - exclude 'META-INF/license.txt' - exclude 'META-INF/NOTICE' - exclude 'META-INF/NOTICE.txt' - exclude 'META-INF/notice.txt' - exclude 'META-INF/INDEX.LIST' - -} - ``` -After adding the above dependencies and exclusions to the build.gradle file, try syncing Gradle with to see if any other exclusions are needed. The error message will identify the file path that should be added to the list of exclusions. An example error message with file path is: *> More than one file was found with OS independent path 'org/bytedeco/javacpp/ windows-x86_64/msvp120.dll'* -Compiling these dependencies involves a large number of files, thus it is necessary to set multiDexEnabled to true in defaultConfig. - -```java -multiDexEnabled true -``` - -A conflict in the junit module versions often causes the following error: *> Conflict with dependency 'junit:junit' in project ':app'. Resolved versions for app (4.8.2) and test app (4.12) differ*. This can be suppressed by forcing all of the junit modules to use the same version with the following: - -```java -configurations.all { - resolutionStrategy.force 'junit:junit:4.12' -} -``` - - -## Managing Dependencies with ProGuard - -The DL4J dependencies compile a large number of files. ProGuard can be used to minimize your APK file size. ProGuard detects and removes unused classes, fields, methods, and attributes from your packaged app, including those from code libraries. You can learn more about using Proguard [here](https://developer.android.com/studio/build/shrink-code.html). -To enable code shrinking with ProGuard, add minifyEnabled true to the appropriate build type in your build.gradle file. - -```java -buildTypes { - release { - minifyEnabled true - proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro' - } -} -``` - -It is recommended to upgrade your ProGuard in the Android SDK to the latest release (5.1 or higher). Note that upgrading the build tools or other aspects of your SDK might cause Proguard to reset to the version shipped with the SDK. In order to force ProGuard to use a version of other than the Android Gradle default, you can include this in the buildscript of `build.gradle` file: - -``` java -buildscript { - configurations.all { - resolutionStrategy { - force 'net.sf.proguard:proguard-gradle:5.3.2' - } - } -} -``` - -Proguard optimizes and reduces the amount of code in your Android application in order to make if smaller and faster. Unfortunately, proguard removes annotations by default, including the @Platform annotation used by javaCV. To make proguard preserve these annotations and keep native methods add the following flags to the progaurd-rules.pro file. - -```java -# enable optimization --optimizations !code/simplification/arithmetic,!code/simplification/cast,!field/*,!class/merging/* --optimizationpasses 5 --allowaccessmodification --dontwarn org.apache.lang.** --ignorewarnings - --keepattributes *Annotation* -# JavaCV --keep @org.bytedeco.javacpp.annotation interface * {*;} --keep @org.bytedeco.javacpp.annotation.Platform public class * --keepclasseswithmembernames class * {@org.bytedeco.* ;} --keepclasseswithmembernames class * {@org.bytedeco.* ;} - --keepattributes EnclosingMethod --keep @interface org.bytedeco.javacpp.annotation.*,javax.inject.* - --keepattributes *Annotation*, Exceptions, Signature, Deprecated, SourceFile, SourceDir, LineNumberTable, LocalVariableTable, LocalVariableTypeTable, Synthetic, EnclosingMethod, RuntimeVisibleAnnotations, RuntimeInvisibleAnnotations, RuntimeVisibleParameterAnnotations, RuntimeInvisibleParameterAnnotations, AnnotationDefault, InnerClasses --keep class org.bytedeco.javacpp.** {*;} --dontwarn java.awt.** --dontwarn org.bytedeco.javacv.** --dontwarn org.bytedeco.javacpp.** -# end javacv - -# This flag is needed to keep native methods --keepclasseswithmembernames class * { - native ; -} - --keep public class * extends android.view.View { - public (android.content.Context); - public (android.content.Context, android.util.AttributeSet); - public (android.content.Context, android.util.AttributeSet, int); - public void set*(...); -} - --keepclasseswithmembers class * { - public (android.content.Context, android.util.AttributeSet); -} - --keepclasseswithmembers class * { - public (android.content.Context, android.util.AttributeSet, int); -} - --keepclassmembers class * extends android.app.Activity { - public void *(android.view.View); -} - -# For enumeration classes --keepclassmembers enum * { - public static **[] values(); - public static ** valueOf(java.lang.String); -} - --keep class * implements android.os.Parcelable { - public static final android.os.Parcelable$Creator *; -} - --keepclassmembers class **.R$* { - public static ; -} - --keep class android.support.v7.app.** { *; } --keep interface android.support.v7.app.** { *; } --keep class com.actionbarsherlock.** { *; } --keep interface com.actionbarsherlock.** { *; } --dontwarn android.support.** --dontwarn com.google.ads.** - -# Flags to keep standard classes --keep public class * extends android.app.Activity --keep public class * extends android.app.Application --keep public class * extends android.app.Service --keep public class * extends android.content.BroadcastReceiver --keep public class * extends android.content.ContentProvider --keep public class * extends android.app.backup.BackupAgent --keep public class * extends android.preference.Preference --keep public class * extends android.support.v7.app.Fragment --keep public class * extends android.support.v7.app.DialogFragment --keep public class * extends com.actionbarsherlock.app.SherlockListFragment --keep public class * extends com.actionbarsherlock.app.SherlockFragment --keep public class * extends com.actionbarsherlock.app.SherlockFragmentActivity --keep public class * extends android.app.Fragment --keep public class com.android.vending.licensing.ILicensingService -``` - -Testing your app is the best way to check if any errors are being caused by inappropriately removed code; however, you can also inspect what was removed by reviewing the usage.txt output file saved in /build/outputs/mapping/release/. - -To fix errors and force ProGuard to retain certain code, add a -keep line in the ProGuard configuration file. For example: -```java --keep public class MyClass -``` - - -## Memory Management - -It may also be advantageous to increase the allocated memory to your app by adding android:largeHeap="true" to the manifest file. Allocating a larger heap means that you decrease the risk of throwing an OutOfMemoryError during memory intensive operations. - -```xml -android:largeHeap="true" -``` - -As of release 0.9.0, ND4J offers an additional memory-management model: workspaces. Workspaces allow you to reuse memory for cyclic workloads without the JVM Garbage Collector for off-heap memory tracking. D4j Workspace allows for memory to be preallocated before a try / catch block and reused over in over within that block. - -If your training process uses workspaces, it is recommended that you disable or reduce the frequency of periodic GC calls prior to your model.fit() call. - -```java -// this will limit frequency of gc calls to 5000 milliseconds -Nd4j.getMemoryManager().setAutoGcWindow(5000) - -// this will totally disable it -Nd4j.getMemoryManager().togglePeriodicGc(false); -``` - -The example below illustrates the use of a Workspace for memory allocation in the AsyncTask of and Android Application. More information concerning ND4J Workspaces can be found [here](https://deeplearning4j.org/workspaces). - -```java -import org.nd4j.linalg.api.memory.MemoryWorkspace; -import org.nd4j.linalg.api.memory.conf.WorkspaceConfiguration; -import org.nd4j.linalg.api.memory.enums.AllocationPolicy; -import org.nd4j.linalg.api.memory.enums.LearningPolicy; - - -private class AsyncTaskRunner extends AsyncTask { - - // Runs in UI before background thread is called - @Override - protected void onPreExecute() { - super.onPreExecute(); - } - - //Runs on background thread, this is where we will initiate the Workspace - protected INDArray doInBackground(String... params) { - - // we will create configuration with 10MB memory space preallocated - WorkspaceConfiguration initialConfig = WorkspaceConfiguration.builder() - .initialSize(10 * 1024L * 1024L) - .policyAllocation(AllocationPolicy.STRICT) - .policyLearning(LearningPolicy.NONE) - .build(); - - INDArray result = null; - - try(MemoryWorkspace ws = Nd4j.getWorkspaceManager().getAndActivateWorkspace(initialConfig, "SOME_ID")) { - // now, INDArrays created within this try block will be allocated from this workspace pool - - //Load a trained model - File file = new File(Environment.getExternalStorageDirectory() + "/trained_model.zip"); - MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(file); - - // Create input in INDArray - INDArray inputData = Nd4j.zeros(1, 4); - - inputData.putScalar(new int[]{0, 0}, 1); - inputData.putScalar(new int[]{0, 1}, 0); - inputData.putScalar(new int[]{0, 2}, 1); - inputData.putScalar(new int[]{0, 3}, 0); - - result = restored.output(inputData); - - } - catch(IOException ex){Log.d("AsyncTaskRunner2 ", "catchIOException = " + ex );} - - return result; - } - - protected void onProgressUpdate(Integer... values) { - super.onProgressUpdate(values); - } - - protected void onPostExecute(INDArray result) { - super.onPostExecute(result); - //Handle results and update UI here. - } - -} -``` - - -## Saving and Loading Networks on Android - -Practical considerations regarding performance limits are needed when building Android applications that run neural networks. Training a neural network on a device is possible, but should only be attempted with networks with limited numbers of layers, nodes, and iterations. The first Demo app [DL4JIrisClassifierDemo](./deeplearning4j-android-linear-classifier) is able to train on a standard device in about 15 seconds. - -When training on a device is a reasonable option, the application performance can be improved by saving the trained model on the phone's external storage once an initial training is complete. The trained model can then be used as an application resource. This approach is useful for training networks with data obtained from user input. The following code illustrates how to train a network and save it on the phone's external resources. - -For API 23 and greater, you will need to include the permissions in your manifest and also programmatically request the read and write permissions in your activity. The required Manifest permissions are: - -```xml - - - - ... -``` - -You need to implement ActivityCompat.OnRequestPermissionsResultCallback in the activity and then check for permission status. - -```java -public class MainActivity extends AppCompatActivity - implements ActivityCompat.OnRequestPermissionsResultCallback { - - private static final int REQUEST_EXTERNAL_STORAGE = 1; - private static String[] PERMISSIONS_STORAGE = { - Manifest.permission.READ_EXTERNAL_STORAGE, - Manifest.permission.WRITE_EXTERNAL_STORAGE - }; - - @Override - protected void onCreate(Bundle savedInstanceState) { - super.onCreate(savedInstanceState); - setContentView(R.layout.activity_main); - - verifyStoragePermission(MainActivity.this); - //… - } - - public static void verifyStoragePermission(Activity activity) { - // Get permission status - int permission = ActivityCompat.checkSelfPermission(activity, Manifest.permission.WRITE_EXTERNAL_STORAGE); - if (permission != PackageManager.PERMISSION_GRANTED) { - // We don't have permission we request it - ActivityCompat.requestPermissions( - activity, - PERMISSIONS_STORAGE, - REQUEST_EXTERNAL_STORAGE - ); - } - } -``` - -To save a network after training on the device use a OutputStream within a try catch block. - -```java -try { - File file = new File(Environment.getExternalStorageDirectory() + "/trained_model.zip"); - OutputStream outputStream = new FileOutputStream(file); - boolean saveUpdater = true; - ModelSerializer.writeModel(myNetwork, outputStream, saveUpdater); - -} catch (Exception e) { - Log.e("saveToExternalStorage error", e.getMessage()); -} -``` - -To load the trained network from storage you can use the restoreMultiLayerNetwork method. - -```java -try{ - //Load the model - File file = new File(Environment.getExternalStorageDirectory() + "/trained_model.zip"); - MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(file); - -} catch (Exception e) { - Log.e("Load from External Storage error", e.getMessage()); -} -``` - -For larger or more complex neural networks like Convolutional or Recurrent Neural Networks, training on the device is not a realistic option as long processing times during network training run the risk of generating an OutOfMemoryError and make for a poor user experience. As an alternative, the Neural Network can be trained on the desktop, saved via ModelSerializer, and then loaded as a pre-trained model in the application. Using a pre-trained model in you Android application can be achieved with the following steps: - -* Train the yourModel on desktop and save via modelSerializer. -* Create a raw resource folder in the res directory of the application. -* Copy yourModel.zip file into the raw folder. -* Access it from your resources using an inputStream within a try / catch block. - -```java -try { -// Load name of model file (yourModel.zip). - InputStream is = getResources().openRawResource(R.raw.yourModel); - -// Load yourModel.zip. - MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(is); - -// Use yourModel. - INDArray results = restored.output(input) - System.out.println("Results: "+ results ); -// Handle the exception error -} catch(IOException e) { - e.printStackTrace(); - } -``` - - -## Next Step: Pretrained DL4J Models on Android - -An example application which uses a pretrained model can be found [here](./deeplearning4j-android-image-classification). diff --git a/docs/deeplearning4j/templates/android.md b/docs/deeplearning4j/templates/android.md deleted file mode 100644 index 92d302619..000000000 --- a/docs/deeplearning4j/templates/android.md +++ /dev/null @@ -1,266 +0,0 @@ ---- -title: Android for Deep Learning -short_title: Android Overview -description: Using Deep Learning and Neural Networks in Android Applications -category: Mobile -weight: 0 ---- - -## Using Deep Learning & Neural Networks in Android Applications - -Contents - -* [Prerequisites](#head_link1) -* [Configuring Your Android Studio Project](#head_link2) -* [Starting an Asynchronous Task](#head_link7) -* [Creating a Neural Network](#head_link3) -* [Creating Training Data](#head_link5) -* [Conclusion](#head_link6) - -Generally speaking, training a neural network is a task best suited for powerful computers with multiple GPUs. But what if you want to do it on your humble Android phone or tablet? Well, it’s definitely possible. Considering an average Android device’s specifications, however, it will most likely be quite slow. If that’s not a problem for you, keep reading. - -In this tutorial, I’ll show you how to use [Deeplearning4J](https://deeplearning4j.org/quickstart), a popular Java-based deep learning library, to create and train a neural network on an Android device. - - -## Prerequisites - -For best results, you’ll need the following: - -* An Android device or emulator that runs API level 21 or higher, and has about 200 MB of internal storage space free. I strongly suggest you use an emulator first because you can quickly tweak it in case you run out of memory or storage space. -* Android Studio 2.2 or newer -* A more in-depth look at using DL4J in Android Applications can be found here. This guide covers dependencies, memory management, saving device-trained models, and loading pre-trained models in the application. - - -## Configuring Your Android Studio Project - -To be able to use Deeplearning4J in your project, add the following implementation dependencies to your app module’s build.gradle file: - -``` groovy -implementation (group: 'org.deeplearning4j', name: 'deeplearning4j-core', version: '{{page.version}}') { - exclude group: 'org.bytedeco', module: 'opencv-platform' - exclude group: 'org.bytedeco', module: 'leptonica-platform' - exclude group: 'org.bytedeco', module: 'hdf5-platform' -} -implementation group: 'org.nd4j', name: 'nd4j-native', version: '{{page.version}}' -implementation group: 'org.nd4j', name: 'nd4j-native', version: '{{page.version}}', classifier: "android-arm" -implementation group: 'org.nd4j', name: 'nd4j-native', version: '{{page.version}}', classifier: "android-arm64" -implementation group: 'org.nd4j', name: 'nd4j-native', version: '{{page.version}}', classifier: "android-x86" -implementation group: 'org.nd4j', name: 'nd4j-native', version: '{{page.version}}', classifier: "android-x86_64" -implementation group: 'org.bytedeco', name: 'openblas', version: '0.3.9-1.5.3' -implementation group: 'org.bytedeco', name: 'openblas', version: '0.3.9-1.5.3', classifier: "android-arm" -implementation group: 'org.bytedeco', name: 'openblas', version: '0.3.9-1.5.3', classifier: "android-arm64" -implementation group: 'org.bytedeco', name: 'openblas', version: '0.3.9-1.5.3', classifier: "android-x86" -implementation group: 'org.bytedeco', name: 'openblas', version: '0.3.9-1.5.3', classifier: "android-x86_64" -implementation group: 'org.bytedeco', name: 'opencv', version: '4.3.0-1.5.3' -implementation group: 'org.bytedeco', name: 'opencv', version: '4.3.0-1.5.3', classifier: "android-arm" -implementation group: 'org.bytedeco', name: 'opencv', version: '4.3.0-1.5.3', classifier: "android-arm64" -implementation group: 'org.bytedeco', name: 'opencv', version: '4.3.0-1.5.3', classifier: "android-x86" -implementation group: 'org.bytedeco', name: 'opencv', version: '4.3.0-1.5.3', classifier: "android-x86_64" -implementation group: 'org.bytedeco', name: 'leptonica', version: '1.79.0-1.5.3' -implementation group: 'org.bytedeco', name: 'leptonica', version: '1.79.0-1.5.3', classifier: "android-arm" -implementation group: 'org.bytedeco', name: 'leptonica', version: '1.79.0-1.5.3', classifier: "android-arm64" -implementation group: 'org.bytedeco', name: 'leptonica', version: '1.79.0-1.5.3', classifier: "android-x86" -implementation group: 'org.bytedeco', name: 'leptonica', version: '1.79.0-1.5.3', classifier: "android-x86_64" - -``` - -If you choose to use a SNAPSHOT version of the dependencies with gradle, you will need to create the a pom.xml file in the root directory and run ``` mvn -U compile ``` on it from the terminal. You will also need to include ``` mavenLocal() ``` in the ``` repository {} ``` block of the build.gradle file. An example pom.xml file is provided below. - -``` xml - - 4.0.0 - org.deeplearning4j - snapshots - 1.0.0-SNAPSHOT - - - org.nd4j - nd4j-native-platform - 1.0.0-SNAPSHOT - - - org.deeplearning4j - deeplearning4j-core - 1.0.0-SNAPSHOT - - - - - sonatype-nexus-snapshots - https://oss.sonatype.org/content/repositories/snapshots - - false - - - true - always - - - - - -``` -Android Studio 3.0 introduced new Gradle, now annotationProcessors should be defined too If you are using it, add following code to gradle dependencies: - -```java -NeuralNetConfiguration.Builder nncBuilder = new NeuralNetConfiguration.Builder(); -nncBuilder.updater(Updater.ADAM); -``` -As you can see, DL4J depends on ND4J, short for N-Dimensions for Java, which is a library that offers fast n-dimensional arrays. ND4J internally depends on a library called OpenBLAS, which contains platform-specific native code. Therefore, you must load a version of OpenBLAS and ND4J that matches the architecture of your Android device. - -Dependencies of DL4J and ND4J have several files with identical names. In order to avoid build errors, add the following exclude parameters to your packagingOptions. - -```groovy -packagingOptions { - exclude 'META-INF/DEPENDENCIES' - exclude 'META-INF/DEPENDENCIES.txt' - exclude 'META-INF/LICENSE' - exclude 'META-INF/LICENSE.txt' - exclude 'META-INF/license.txt' - exclude 'META-INF/NOTICE' - exclude 'META-INF/NOTICE.txt' - exclude 'META-INF/notice.txt' - exclude 'META-INF/INDEX.LIST' -} -``` -Your compiled code will have well over 65,536 methods. To be able to handle this condition, add the following option in the defaultConfig: - -```groovy -multiDexEnabled true -``` -And now, press Sync Now to update the project. Finally, make sure that your APK doesn't contain both lib/armeabi and lib/armeabi-v7a subdirectories. If it does, move all files to one or the other as some Android devices will have problems with both present. - - -## Starting an Asynchronous Task - -Training a neural network is CPU-intensive, which is why you wouldn’t want to do it in your application’s UI thread. I’m not too sure if DL4J trains its networks asynchronously by default. Just to be safe, I’ll spawn a separate thread now using the AsyncTask class. - -```java -AsyncTask.execute(new Runnable() { - @Override - public void run() { - createAndUseNetwork(); - } -}); -``` - -Because the method createAndUseNetwork() doesn’t exist yet, create it. - -```java -private void createAndUseNetwork() { -} -``` - - -## Creating a Neural Network - -DL4J has a very intuitive API. Let us now use it to create a simple multi-layer perceptron with hidden layers. It will take two input values, and spit out one output value. To create the layers, we’ll use the DenseLayer and OutputLayer classes. Accordingly, add the following code to the createAndUseNetwork() method you created in the previous step: -``` java -DenseLayer inputLayer = new DenseLayer.Builder() - .nIn(2) - .nOut(3) - .name("Input") - .build(); -DenseLayer hiddenLayer = new DenseLayer.Builder() - .nIn(3) - .nOut(2) - .name("Hidden") - .build(); -OutputLayer outputLayer = new OutputLayer.Builder() - .nIn(2) - .nOut(1) - .name("Output") - .build(); -``` -Now that our layers are ready, let’s create a NeuralNetConfiguration.Builder object to configure our neural network. -``` java -NeuralNetConfiguration.Builder nncBuilder = new NeuralNetConfiguration.Builder(); -nncBuilder.updater(Updater.ADAM); -``` -We must now create a NeuralNetConfiguration.ListBuilder object to actually connect our layers and specify their order. -``` java -NeuralNetConfiguration.ListBuilder listBuilder = nncBuilder.list(); -listBuilder.layer(0, inputLayer); -listBuilder.layer(1, hiddenLayer); -listBuilder.layer(2, outputLayer); -``` -Additionally, enable backpropagation by adding the following code: -``` java -listBuilder.backprop(true); -``` -At this point, we can generate and initialize our neural network as an instance of the MultiLayerNetwork class. - -``` java -MultiLayerNetwork myNetwork = new MultiLayerNetwork(listBuilder.build()); -myNetwork.init(); -``` - -## Creating Training Data -To create our training data, we’ll be using the INDArray class, which is provided by ND4J. Here’s what our training data will look like: -``` -INPUTS EXPECTED OUTPUTS ------- ---------------- -0,0 0 -0,1 1 -1,0 1 -1,1 0 - -``` -As you might have guessed, our neural network will behave like an XOR gate. The training data has four samples, and you must mention it in your code. - -``` java -final int NUM_SAMPLES = 4; -``` -And now, create two INDArray objects for the inputs and expected outputs, and initialize them with zeroes. - -``` java -INDArray trainingInputs = Nd4j.zeros(NUM_SAMPLES, inputLayer.getNIn()); -INDArray trainingOutputs = Nd4j.zeros(NUM_SAMPLES, outputLayer.getNOut()); -``` -Note that the number of columns in the inputs array is equal to the number of neurons in the input layer. Similarly, the number of columns in the outputs array is equal to the number of neurons in the output layer. - -Filling those arrays with the training data is easy. Just use the putScalar() method: - - -``` java -// If 0,0 show 0 -trainingInputs.putScalar(new int[]{0, 0}, 0); -trainingInputs.putScalar(new int[]{0, 1}, 0); -trainingOutputs.putScalar(new int[]{0, 0}, 0); -// If 0,1 show 1 -trainingInputs.putScalar(new int[]{1, 0}, 0); -trainingInputs.putScalar(new int[]{1, 1}, 1); -trainingOutputs.putScalar(new int[]{1, 0}, 1); -// If 1,0 show 1 -trainingInputs.putScalar(new int[]{2, 0}, 1); -trainingInputs.putScalar(new int[]{2, 1}, 0); -trainingOutputs.putScalar(new int[]{2, 0}, 1); -// If 1,1 show 0 -trainingInputs.putScalar(new int[]{3, 0}, 1); -trainingInputs.putScalar(new int[]{3, 1}, 1); -trainingOutputs.putScalar(new int[]{3, 0}, 0); -``` - -We won’t be using the INDArray objects directly. Instead, we’ll convert them into a DataSet. -```java -DataSet myData = new DataSet(trainingInputs, trainingOutputs); -``` - -At this point, we can start the training by calling the ``` fit() ``` method of the neural network and passing the data set to it. The ``` for ``` loop controls the iterations of the data set through the network. It is set to 1000 iterations in this example. - -```java -for(int l=0; l<=1000; l++) { - myNetwork.fit(myData); -} -``` - -And that’s all there is to it. Your neural network is ready to be used. - - -## Conclusion - -In this tutorial, you saw how easy it is to create and train a neural network using the Deeplearning4J library in an Android Studio project. I’d like to warn you, however, that training a neural network on a low-powered, battery operated device might not always be a good idea. - -A second example DL4J Android Application which includes a user interface can be found [here](./deeplearning4j-android-linear-classifier). This example trains a neural network on the device using Anderson’s iris data set for iris flower type classification. The application includes user input for the measurements and returns the probability that these measurements belong to one of three iris types (*Iris serosa, Iris versicolor,* and *Iris virginica*). - -The limitations of processing power and battery life on mobile devices make training robust, multi-layer networks unfeasible. As an alternative to training a network on the device, the neural network being used by your application can be trained on the desktop, saved via ModelSerializer, and then loaded as a pre-trained model in the application. A third example DL4J Android Application can be found [here](./deeplearning4j-android-image-classification) which loads a pre-trained Mnist network and uses it to classify user drawn numbers. diff --git a/docs/deeplearning4j/templates/beginners.md b/docs/deeplearning4j/templates/beginners.md deleted file mode 100644 index 3ca4d82f1..000000000 --- a/docs/deeplearning4j/templates/beginners.md +++ /dev/null @@ -1,102 +0,0 @@ ---- -title: Deep Learning for Beginners -short_title: Beginners -description: Road map for beginners new to deep learning. -category: Get Started -weight: 10 ---- - -## How Do I Start Using Deep Learning? - -Where you start depends on what you already know. - -The prerequisites for really understanding deep learning are linear algebra, calculus and statistics, as well as programming and some machine learning. The prerequisites for applying it are just learning how to deploy a model. - -In the case of Deeplearning4j, you should know Java well and be comfortable with tools like the IntelliJ IDE and the automated build tool Maven. [Skymind's SKIL](https://docs.skymind.ai/) also includes a managed Conda environment for machine learning tools using Python. - -Below you'll find a list of resources. The sections are roughly organized in the order they will be useful. - -## Free Machine- and Deep-learning Courses Online - -* [Andrew Ng's Machine-Learning Class on YouTube](https://www.youtube.com/watch?v=qeHZOdmJvFU) -* [Geoff Hinton's Neural Networks Class on YouTube](https://youtu.be/2fRnHVVLf1Y) -* [Patrick Winston's Introduction to Artificial Intelligence @MIT](http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-034-artificial-intelligence-fall-2010/) (For those interested in a survey of artificial intelligence.) -* [Andrej Karpathy's Convolutional Neural Networks Class at Stanford](http://cs231n.github.io) (For those interested in image recognition.) -* [ML@B: Machine Learning Crash Course: Part 1](https://ml.berkeley.edu/blog/2016/11/06/tutorial-1/) -* [ML@B: Machine Learning Crash Course: Part 2](https://ml.berkeley.edu/blog/2016/12/24/tutorial-2/) -* [Gradient descent, how neural networks learn, Deep learning, part 2](https://www.youtube.com/watch?v=IHZwWFHWa-w&feature=youtu.be) - -## Math - -The math involved with deep learning is basically linear algebra, calculus and probility, and if you have studied those at the undergraduate level, you will be able to understand most of the ideas and notation in deep-learning papers. If haven't studied those in college, never fear. There are many free resources available (and some on this website). - -* [Calculus Made Easy, by Silvanus P. Thompson](http://www.gutenberg.org/ebooks/33283?msg=welcome_stranger) -* [Seeing Theory: A Visual Introduction to Probability and Statistics](http://students.brown.edu/seeing-theory/) -* [Andrew Ng's 6-Part Review of Linear Algebra](https://www.youtube.com/playlist?list=PLnnr1O8OWc6boN4WHeuisJWmeQHH9D_Vg) -* [Khan Academy's Linear Algebra Course](https://www.khanacademy.org/math/linear-algebra) -* [Linear Algebra for Machine Learning](https://www.youtube.com/watch?v=ZumgfOei0Ak); Patrick van der Smagt -* [CMU's Linear Algebra Review](http://www.cs.cmu.edu/~zkolter/course/linalg/outline.html) -* [Math for Machine Learning](https://www.umiacs.umd.edu/~hal/courses/2013S_ML/math4ml.pdf) -* [Immersive Linear Algebra](http://immersivemath.com/ila/learnmore.html) -* [Probability Cheatsheet](https://static1.squarespace.com/static/54bf3241e4b0f0d81bf7ff36/t/55e9494fe4b011aed10e48e5/1441352015658/probability_cheatsheet.pdf) -* [The best linear algebra books](https://begriffs.com/posts/2016-07-24-best-linear-algebra-books.html) -* [Markov Chains, Visually Explained](http://setosa.io/ev/markov-chains/) -* [An Introduction to MCMC for Machine Learning](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.13.7133&rep=rep1&type=pdf) -* [Eigenvectors, Eigenvalues, PCA, Covariance and Entropy](https://skymind.ai/wiki/eigenvector) -* [Markov Chain Monte Carlo (MCMC) & Machine Learning](https://skymind.ai/wiki/markov-chain-monte-carlo) -* [Relearning Matrices as Linear Functions](https://www.dhruvonmath.com/2018/12/31/matrices/) - -## Programming - -If you do not know how to program yet, you can start with Java, but you might find other languages easier. Python and Ruby resources can convey the basic ideas in a faster feedback loop. "Learn Python the Hard Way" and "Learn to Program (Ruby)" are two great places to start. - -* [Scratch: A Visual Programming Environment From MIT](https://scratch.mit.edu/) -* [Learn to Program (Ruby)](https://pine.fm/LearnToProgram/) -* [Grasshopper: A Mobile App to Learn Basic Coding (Javascript)](https://grasshopper.codes/) -* [Intro to the Command Line](http://cli.learncodethehardway.org/book/) -* [Additional command-line tutorial](http://www.learnenough.com/command-line) -* [A Vim Tutorial and Primer](https://danielmiessler.com/study/vim/) (Vim is an editor accessible from the command line.) -* [Intro to Computer Science (CS50 @Harvard edX)](https://www.edx.org/course/introduction-computer-science-harvardx-cs50x) -* [A Gentle Introduction to Machine Fundamentals](https://marijnhaverbeke.nl/turtle/) -* [Teaching C](https://blog.regehr.org/archives/1393) - -If you want to jump into deep-learning from here without Java, we recommend [Theano](http://deeplearning.net/) and the various Python frameworks built atop it, including [Keras](https://github.com/fchollet/keras) and [Lasagne](https://github.com/Lasagne/Lasagne). - -## Python - -* [Learn Python the Hard Way](http://learnpythonthehardway.org/) -* [Google's Python Class](https://developers.google.com/edu/python/) -* [Udemy: Complete Python 3 Masterclass Journey](https://www.udemy.com/complete-python-3-masterclass-journey/) -* [MIT: Introduction to Computer Science and Python Programming](https://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-0001-introduction-to-computer-science-and-programming-in-python-fall-2016/) -* [David Beazley: Python Tutorials](http://www.dabeaz.com/tutorials.html) -* [CS231n: Python Numpy Tutorial](http://cs231n.github.io/python-numpy-tutorial/) -* [Pyret: A Python Learning Environment](https://www.pyret.org/) - -## Java - -Once you have programming basics down, tackle Java, the world's most widely used programming language. Most large organizations in the world operate on huge Java code bases. (There will always be Java jobs.) The big data stack -- Hadoop, Spark, Kafka, Lucene, Solr, Cassandra, Flink -- have largely been written for Java's compute environment, the JVM. - -* [Think Java: Interactive Web-based Dev Environment](https://books.trinket.io/thinkjava/) -* [Learn Java The Hard Way](https://learnjavathehardway.org/) -* [Introduction to JShell](https://docs.oracle.com/javase/10/jshell/introduction-jshell.htm#JSHEL-GUID-630F27C8-1195-4989-9F6B-2C51D46F52C8) -* [JShell in 5 Minutes](https://dzone.com/articles/jshell-in-five-minutes) -* [Java Resources](http://wiht.link/java-resources) -* [Java Ranch: A Community for Java Beginners](http://javaranch.com/) -* [Intro to Programming in Java @Princeton](http://introcs.cs.princeton.edu/java/home/) -* [Head First Java](http://www.amazon.com/gp/product/0596009208) -* [Java in a Nutshell](http://www.amazon.com/gp/product/1449370829) -* [Java Programming for Complete Beginners in 250 Steps](https://www.udemy.com/java-tutorial/) - -## Deeplearning4j - -With that under your belt, we recommend you approach Deeplearning4j through its [examples](https://github.com/eclipse/deeplearning4j-examples). - -* [Quickstart](./deeplearning4j-quickstart) - -You can also download a [free version of the Skymind Intelligence Layer](https://docs.skymind.ai/), which supports Python, Java and Scala machine-learning and data science tools. SKIL is a machine-learning backend that works on prem and in the cloud, and can ship with your software to provide a machine learning model server. - -## Other Resources - -Most of what we know about deep learning is contained in academic papers. You can find some of the major research groups [here](https://skymind.ai/wiki/machine-learning-research-groups-labs). - -While individual courses have limits on what they can teach, the Internet does not. Most math and programming questions can be answered by Googling and searching sites like [Stackoverflow](https://stackoverflow.com) and [Math Stackexchange](https://math.stackexchange.com/). diff --git a/docs/deeplearning4j/templates/benchmark.md b/docs/deeplearning4j/templates/benchmark.md deleted file mode 100644 index 0dddd748c..000000000 --- a/docs/deeplearning4j/templates/benchmark.md +++ /dev/null @@ -1,314 +0,0 @@ ---- -title: Benchmarking with DL4J and ND4J -short_title: Benchmark Guide -description: General guidelines for benchmarking in DL4J and ND4J. -category: Get Started -weight: 10 ---- - -## General Benchmarking Guidelines - -**Guideline 1: Run Warm-Up Iterations Before Benchmarking** - -A warm-up period is where you run a number of iterations (for example, a few hundred) of your benchmark without timing, before commencing timing for further iterations. - -Why is a warm-up required? The first few iterations of any ND4J/DL4J execution may be slower than those that come later, for a number of reasons: -1. In the initial benchmark iterations, the JVM has not yet had time to perform just-in-time compilation of code. Once JIT has completed, code is likely to execute faster for all subsequent operations -2. ND4J and DL4J (and, some other libraries) have some degree of lazy initialization: the first operation may trigger some one-off execution code. -3. DL4J or ND4J (when using workspaces) can take some iterations to learn memory requirements for execution. During this learning phase, performance will be lower than after its completion. - - -**Guideline 2: Run Multiple Iterations of All Benchmarks** - -Your benchmark isn't the only thing running on your computer (not to mention if you are using cloud harware, that might have shared resources). And operation runtime is not perfectly deterministic. - -For benchmark results to be reliable, it is important to run multiple iterations - and ideally report both mean and standard deviation for the runtime. Without this, it's impossible to compare the performance of operations, as performance differences may simply be due to random variation. - - - -**Guideline 3: Pay Careful Attention to What You Are Benchmarking** - -This is especially important when comparing frameworks. Before you declare that "performance on operation X is Y" or "A is faster than B", make sure that: - -1. You are benchmarking only the operations of interest. - -If your goal is to check the performance of an operation, make sure that only this operation is being timed. - -You should carefully check whether you unintentionally including other things - for example, does it include: -JVM initialization time? Library initialization time? Result array allocation time? Garbage collection time? Data loading time? - -Ideally, these should be excluded from any timing/performance results you report. If they cannot be excluded, make sure you note this whenever making performance claims. - - -2. What native libraries are you using? - -For example: what BLAS implementation (MKL, OpenBLAS, etc)? If you are using CUDA, are you using CuDNN? -ND4J and DL4J can use these libraries (MKL, CuDNN) when they are available - but are not always available by default. If they are not made available, performance can be lower - sometimes considerably. - -This is especially important when comparing results between libraries: for example, if you compared two libraries (one using OpenBLAS, another using MKL) your results may simply reflect the performance differences it the BLAS library being used - and not the performance oth the libraries being tested. Similarly, one library with CuDNN and another without CuDNN may simply reflect the performance benefit of using CuDNN. - - -3. How are things configured? - -For better or worse, DL4J and ND4J allow a lot of configuration. The default values for a lot of this configuration is adequate for most users - but sometimes manual configuration is required for optimal performance. This can be especially true in some benchmarks! -Some of these configuration options allow users to trade off higher memory use for better performance, for example. Some configuration options of note: -(a) [Memory configuration](./deeplearning4j-config-memory) -(b) [Workspaces and garbage collection](./deeplearning4j-config-workspaces) -(c) [CuDNN](./deeplearning4j-config-cudnn) -(d) DL4J Cache Mode (enable using ```.cacheMode(CacheMode.DEVICE)```) - - -If you aren't sure if you are only measuring what you intend to measure when running DL4J or ND4J code, you can use a profiler such as VisualVM or YourKit Profilers. - - -4. What versions are you using? - -When benchmarking, you should use the latest version of whatever libraries you are benchmarking. There's no point identifying and reporting a bottleneck that was fixed 6 months ago. An exception to this would be when you are comparing performance over time between versions. -Note also that snapshot versions of DL4J and ND4J are also available - these may contain performance improvements (feel free to ask) - - -**Guideline 4: Focus on Real-World Use Cases - And Run a Range of Sizes** - -Consider for example a benchmark a benchmark that adds two numbers: -``` -double x = 0; -// -x += 1.0; -// -``` - -And something equivalent in ND4J: -``` -INDArray x = Nd4j.create(1); -// -x.addi(1.0); -// -``` - -Of course, the ND4J benchmark above is going to be much slower - method calls are required, input validation is performed, native code has to be called (with context switching overhead), and so on. One must ask the question, however: is this what users will actually be doing with ND4J or an equivalent linear algebra library? It's an extreme example - but the general point is a valid one. - - -Note also that performance on mathematical operations can be size - and shape - specific. -For example, if you are benchmarking the performance on matrix multiplication - the matrix dimensions can matter a lot. In some internal benchmarks, we found that different BLAS implementations (MKL vs OpenBLAS) - and different backends (CPU vs GPU) - can perform very differently with different matrix dimensions. None of the BLAS implementations (OpenBLAS, MKL, CUDA) we have tested internally were uniformly faster than others for all input shapes and sizes. - -Therefore - whenever you are running benchmarks, it's important to run those benchmarks with multiple different input shapes/sizes, to get the full performance picture. - - -**Guideline 5: Understand Your Hardware** - -When comparing different hardware, it's important to be aware of what it excels at. -For example, you might find that neural network training performs faster on a CPU with minibatch size 1 than on a GPU - yet larger minibatch sizes show exactly the opposite. Similarly, small layer sizes may not be able to adequately utilize the power of a GPU. - -Furthermore, some deep learning distributions may need to be specifically compiled to provide support for hardware features such as AVX2 (note that recent version of ND4J are packaged with binaries for CPUs that support these features). When running benchmarks, the utilization (or lack there-of) of these features can make a considerable difference to performance. - - -**Guideline 6: Make It Reproducible** - -When running benchmarks, it's important to make your benchmarks reproducible. -Why? Good or bad performance may only occur under certain limited circumstances. - -And finally - remember that (a) ND4J and DL4J are in constant development, and (b) benchmarks do sometimes identify performance bottlenecks (after all we - ND4J includes literally hundreds of distinct operations). If you identify a performance bottleneck, great - we want to know about it - so we can fix it. Any time a potential bottleneck is identified, we first need to reproduce it - so that we can study it, understand it and ultimately fix it. - -**Guideline 7: Understand the Limitations of Your Benchmarks** - -Linear algebra libraries contain hundreds of distinct operations. Neural network libraries contain dozens of layer types. When benchmarking, it's important to understand the limitations of those benchmarks. Benchmarking one type of operation or layer cannot tell you anything about the performance on other types of layers or operations - unless they share code that has been identified to be a performance bottleneck. - -**Guideline 8: If You Aren't Sure - Ask** - -The DL4J/ND4J developers are available on Gitter. You can ask questions about benchmarking and performance there: [https://gitter.im/deeplearning4j/deeplearning4j](https://gitter.im/deeplearning4j/deeplearning4j) - -And if you do happen to find a performance issue - let us know! - - - -## ND4J Specific Benchmarking - - -**A Note on BLAS and Array Orders** - -BLAS - or Basic Linear Algebra Subprograms - refers to an interface and set of methods used for linear algebra operations. Some examples include 'gemm' - General Matrix Multiplication - and 'axpy', which implements ```Y = a*X+Y```. - - -ND4J can use multiple BLAS implementations - versions up to and including 1.0.0-beta have defaulted to OpenBLAS. However, if Intel MKL (free versions are available [here](https://software.intel.com/en-us/mkl)) is installed an available, ND4J will link with it for improved performance in many BLAS operations. - -Note that ND4J will log the BLAS backend used when it initializes. For example: -``` -14:17:34,169 INFO ~ Loaded [CpuBackend] backend -14:17:34,672 INFO ~ Number of threads used for NativeOps: 8 -14:17:34,823 INFO ~ Number of threads used for BLAS: 8 -14:17:34,831 INFO ~ Backend used: [CPU]; OS: [Windows 10] -14:17:34,831 INFO ~ Cores: [16]; Memory: [7.1GB]; -14:17:34,831 INFO ~ Blas vendor: [OPENBLAS] -``` - - -Performance can depend on the available BLAS library - in internal tests, we have found that OpenBLAS has been between 30% faster and 8x slower than MKL - depending on the array sizes and array orders. - -Regarding array orders, this also matters for performance. ND4J has the possibility of representing arrays in either row major ('c') or column major ('f') order. See [this Wikipedia page](https://en.wikipedia.org/wiki/Row-_and_column-major_order) for more details. Performance in operations such as matrix multiplication - but also more general ND4J operations - depends on the input and result array orders. - -For matrix multiplication, this means there are 8 possible combinations of array orders (c/f for each of input 1, input 2 and result arrays). Performance won't be the same for all cases. - -Similarly, an operation such as element-wise addition (i.e., z=x+y) will be much faster for some combinations of input orders than others - notably, when x, y and z are all the same order. In short, this is due to memory striding: it's cheaper to read a sequencee of memory addresses when those memory addresess are adjacent to each other in memory, as compared to being spread far apart. - -Note that, by default, ND4J expects result arrays (for matrix multiplication) to be defined in column major ('f') order, to be consistent across backends, given that CuBLAS (i.e., NVIDIA's BLAS library for CUDA) requires results to be in f order. As a consequence, some ways of performing matrix multiplication with the result array being in c order will have lower performance than if the same operation was executed with an 'f' order array. - -Finally, when it comes to CUDA: array orders/striding can matter even more than when running on CPU. For example, certain combinations of orders can be much faster than others - and input/output dimesions that are even multiples of 32 or 64 typically perform faster (sometimes considerably) than when input/output dimensions are not multiples of 32. - - - -## DL4J Specific Benchmarking - - -Most of what has been said for ND4J also applies to DL4J. - -In addition: -1. If you are using the nd4j-native (CPU) backend, ensure you are using Intel MKL. This is faster than the default of OpenBLAS in most cases. -2. If you are using CUDA, ensure you are using CuDNN ([link](./deeplearning4j-config-cudnn) -3. Check the [Workspaces](./deeplearning4j-config-workspaces) and [Memory](./deeplearning4j-config-memory) guides. The defaults are usually good - but sometimes better performance can be obtained with some tweaking. This is especially important if you have a lot of Java objects (such as, Word2Vec vectors) in memory while training. -4. Watch out for ETL bottlenecks. You can add PerformanceListener to your network training to see if ETL is a bottleneck. -5. Don't forget that performance is dependent on minibatch sizes. Don't benchmark with minibatch size 1 - use something more realistic. -6. If you need multi-GPU training or inference support, use ParallelWrapper or ParallelInference. -7. Don't forget that CuDNN is configurable: you can specify DL4J/CuDNN to prefer performance - at the expense of memory - using ```.cudnnAlgoMode(ConvolutionLayer.AlgoMode.PREFER_FASTEST)``` configuration on convolution layers -8. When using GPUs, multiples of 8 (or 32) for input sizes and layer sizes may perform better. -9. When using RNNs (and manually creating INDArrays), use 'f' ordered arrays for both features and (RnnOutputLayer) labels. Otherwise, use 'c' ordered arrays. This is for faster memory access. - - -## Common Benchmark Mistakes - -Finally, here's a summary list of common benchmark mistakes: - -1. Not using the latest version of ND4J/DL4J (there's no point identifying a bottleneck that was fixed many releases back). Consider trying snapshots to get the latest performance improvements. -2. Not paying attention to whan native libraries (MKL, OpenBLAS, CuDNN etc) are being used -3. Providing no warm-up period before benchmarking begins -4. Running only a single (or too few) iterations, or not reporting mean, standard deviation and number of iterations -5. Not configuring workspaces, garbage collection, etc -6. Running only one possible case - for example, benchmarking a single set of array dimensions/orders when benchmarking BLAS operations -7. Running unusually small inputs - for example, minibatch size 1 on a GPU (which might be slower - but isn't realistic!) -8. Not measuring exactly - and only - what you claim to be measuring (for example, not accounting for array allocation, initialization or garbage collection time) -9. Not making your benchmarks reprodicable (does the benchmark conclusion generalize? are there problems with the benchmark? what can we do to fix it?) -10. Comparing results across different hardware, not accounting for differences (for example, testing on one machine with AVX2 support, and on another without) -11. Not asking the devs (via the [DL4J/ND4J Gitter Channel](https://gitter.im/deeplearning4j/deeplearning4j) - we are happy to provide suggestions and investigate if performance isn't where it should be! - - - - - - -# How to Run Deeplearning4j Benchmarks - A Guide - -Total training time is always ETL plus computation. That is, both the data pipeline and the matrix manipulations determine how long a neural network takes to train on a dataset. - -When programmers familiar with Python try to run benchmarks comparing Deeplearning4j to well-known Python frameworks, they usually end up comparing ETL + computation on DL4J to just computation on the Python framework. That is, they're comparing apples to oranges. We'll explain how to optimize several parameters below. - -The JVM has knobs to tune, and if you know how to tune them, you can make it a very fast environment for deep learning. There are several things to keep in mind on the JVM. You need to: - -* Increase the [heap space](http://javarevisited.blogspot.com/2011/05/java-heap-space-memory-size-jvm.html) -* Get garbage collection right -* Make ETL asynchronous -* Presave datasets (aka pickling) - -## Setting Heap Space - -Users have to reconfigure their JVMs themselves, including setting the heap space. We can't give it to you preconfigured, but we can show you how to do it. Here are the two most important knobs for heap space. - -* Xms sets the minimum heap space -* Xmx sets the maximum heap space - -You can set these in IDEs like IntelliJ and Eclipse, as well as via the CLI like so: - - java -Xms256m -Xmx1024m YourClassNameHere - -In [IntelliJ, this is a VM parameter](https://www.jetbrains.com/help/idea/2016.3/setting-configuration-options.html), not a program argument. When you hit run in IntelliJ (the green button), that sets up a run-time configuration. IJ starts a Java VM for you with the configurations you specify. - -What’s the ideal amount to set `Xmx` to? That depends on how much RAM is on your computer. In general, allocate as much heap space as you think the JVM will need to get work done. Let’s say you’re on a 16G RAM laptop — allocate 8G of RAM to the JVM. A sound minimum on laptops with less RAM would be 3g, so - - java -Xmx3g - -It may seem counterintuitive, but you want the min and max to be the same; i.e. `Xms` should equal `Xmx`. If they are unequal, the JVM will progressively allocate more memory as needed until it reaches the max, and that process of gradual allocation slows things down. You want to pre-allocate it at the beginning. So - - java -Xms3g -Xmx3g YourClassNameHere - -IntelliJ will automatically specify the [Java main class](https://docs.oracle.com/javase/tutorial/getStarted/application/) in question. - -Another way to do this is by setting your environmental variables. Here, you would alter your hidden `.bash_profile` file, which adds environmental variables to bash. To see those variables, enter `env` in the command line. To add more heap space, enter this command in your console: - - echo "export MAVEN_OPTS="-Xmx512m -XX:MaxPermSize=512m"" > ~/.bash_profile - -We need to increase heap space because Deeplearning4j loads data in the background, which means we're taking more RAM in memory. By allowing more heap space for the JVM, we can cache more data in memory. - -## Garbage Collection - -A garbage collector is a program which runs on the JVM and gets rid of objects no longer used by a Java application. It is automatic memory management. Creating a new object in Java takes on-heap memory: A new Java object takes up 8 bytes of memory by default. So every new `DatasetIterator` you create takes another 8 bytes. - -You may need to alter the garbage collection algorithm that Java is using. This can be done via the command line like so: - - java -XX:+UseG1GC - -Better garbage collection increases throughput. For a more detailed exploration of the issue, please read this [InfoQ article](https://www.infoq.com/articles/Make-G1-Default-Garbage-Collector-in-Java-9). - -DL4J is tightly linked to the garbage collector. [JavaCPP](https://github.com/bytedeco/javacpp), the bridge between the JVM and C++, adheres to the heap space you set with `Xmx` and works extensively with off-heap memory. The off-heap memory will not surpass the amount of heap space you specify. - -JavaCPP, created by a Skymind engineer, relies on the garbage collector to tell it what has been done. We rely on the Java GC to tell us what to collect; the Java GC points at things, and we know how to de-allocate them with JavaCPP. This applies equally to how we work with GPUs. - -The larger the batch size you use, the more RAM you’re taking in memory. - -## ETL & Asynchronous ETL - -In our `dl4j-examples` repo, we don't make the ETL asynchronous, because the point of examples is to keep them simple. But for real-world problems, you need asynchronous ETL, and we'll show you how to do it with examples. - -Data is stored on disk and disk is slow. That’s the default. So you run into bottlenecks when loading data onto your harddrive. When optimizing throughput, the slowest component is always the bottleneck. For example, a distributed Spark job using three GPU workers and one CPU worker will have a bottleneck with the CPU. The GPUs have to wait for that CPU to finish. - -The Deeplearning4j class `DatasetIterator` hides the complexity of loading data on disk. The code for using any Datasetiterator will always be the same, invoking looks the same, but they work differently. - -* one loads from disk -* one loads asynchronously -* one loads pre-saved from RAM - -Here's how the DatasetIterator is uniformly invoked for MNIST: - - while(mnistTest.hasNext()){ - DataSet ds = mnistTest.next(); - INDArray output = model.output(ds.getFeatures(), false); - eval.eval(ds.getLabels(), output); - } - -You can optimize by using an asychronous loader in the background. Java can do real multi-threading. It can load data in the background while other threads take care of compute. So you load data into the GPU at the same time that compute is being run. The neural net trains even as you grab new data from memory. - -This is the [relevant code](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/ParallelWrapper.java#L136), in particular the third line: - - MultiDataSetIterator iterator; - if (prefetchSize > 0 && source.asyncSupported()) { - iterator = new AsyncMultiDataSetIterator(source, prefetchSize); - } else iterator = source; - -There are actually two types of asynchronous dataset iterators. The `AsyncDataSetIterator` is what you would use most of the time. It's described in the [Javadoc here](https://deeplearning4j.org/api/{{page.version}}/org/deeplearning4j/datasets/iterator/AsyncDataSetIterator.html). - -For special cases such as recurrent neural nets applied to time series, or for computation graphs, you would use a `AsyncMultiDataSetIterator`, described in the [Javadoc here](https://deeplearning4j.org/api/{{page.version}}/org/deeplearning4j/datasets/iterator/AsyncMultiDataSetIterator.html). - -Notice in the code above that `prefetchSize` is another parameter to set. Normal batch size might be 1000 examples, but if you set `prefetchSize` to 3, it would pre-fetch 3,000 instances. - -## ETL: Comparing Python frameworks With Deeplearning4j - -In Python, programmers are converting their data into [pickles](https://docs.python.org/2/library/pickle.html), or binary data objects. And if they're working with a smallish toy dataset, they're loading all those pickles into RAM. So they're effectively sidestepping a major task in dealing with larger datasets. At the same time, when benchmarking against Dl4j, they're not loading all the data onto RAM. So they're effectively comparing Dl4j speed for training computations + ETL against only training computation time for Python frameworks. - -But Java has robust tools for moving big data, and if compared correctly, is much faster than Python. The Deeplearning4j community has reported up to 3700% increases in speed over Python frameworks, when ETL and computation are optimized. - -Deeplearning4j uses DataVec as it ETL and vectorization library. Unlike other deep-learning tools, DataVec does not force a particular format on your dataset. (Caffe forces you to use [hdf5](https://support.hdfgroup.org/HDF5/), for example.) - -We try to be more flexible. That means you can point DL4J at raw photos, and it will load the image, run the transforms and put it into an NDArray to generate a dataset on the fly. - -But if your training pipeline is doing that every time, Deeplearning4j will seem about 10x slower than other frameworks, because you’re spending your time creating datasets. Every time you call `fit`, you're recreating a dataset, over and over again. We allow it to happen for ease of use, but we can show you how to speed things up. There are ways to make it just as fast. - -One way is to pre-save the datasets, in a manner similar to the Python frameworks. (Pickles are pre-formatted data.) When you pre-save the dataset, you create a separate class. - -Here’s how you [pre-save datasets](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/misc/presave/PreSave.java). - -A `Recordreaderdatasetiterator` talks to Datavec and outputs datasets for DL4J. - -Here’s how you [load a pre-saved dataset](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/misc/presave/LoadPreSavedLenetMnistExample.java). - -Line 90 is where you see the asynchronous ETL. In this case, it's wrapping the pre-saved iterator, so you're taking advantage of both methods, with the asynch loading the pre-saved data in the background as the net trains. - -## MKL and Inference on CPUs - -If you are running inference benchmarks on CPUs, make sure you are using Deeplearning4j with Intel's MKL library, which is available via a clickwrap; i.e. Deeplearning4j does not bundle MKL like Anaconda, which is used by libraries like PyTorch. \ No newline at end of file diff --git a/docs/deeplearning4j/templates/build-from-source.md b/docs/deeplearning4j/templates/build-from-source.md deleted file mode 100644 index 20a0243bb..000000000 --- a/docs/deeplearning4j/templates/build-from-source.md +++ /dev/null @@ -1,388 +0,0 @@ ---- -title: Buidling Deeplearning4j from Source -short_title: Build from Source -description: Instructions to build all DL4J libraries from source. -category: Get Started -weight: 10 ---- - -## Build Locally from Master - -**NOTE: MOST USERS SHOULD USE THE RELEASES ON MAVEN CENTRAL AS PER THE QUICK START GUIDE, AND NOT BUILD FROM SOURCE** - -*Unless you have a very good reason to build from source (such as developing new features - excluding custom layers, custom activation functions, custom loss functions, etc - all of which can be added without modifying DL4J directly) then you shouldn't build from source. Building from source can be quite complex, with no benefit in a lot of cases.* - -For those developers and engineers who prefer to use the most up-to-date version of Deeplearning4j or fork and build their own version, these instructions will walk you through building and installing Deeplearning4j. The preferred installation destination is to your machine's local maven repository. If you are not using the master branch, you can modify these steps as needed (i.e.: switching GIT branches and modifying the `build-dl4j-stack.sh` script). - -Building locally requires that you build the entire Deeplearning4j stack which includes: - -- [libnd4j](https://github.com/eclipse/deeplearning4j/tree/master/libnd4j) -- [nd4j](https://github.com/eclipse/deeplearning4j/tree/master/nd4j) -- [datavec](https://github.com/eclipse/deeplearning4j/tree/master/datavec) -- [deeplearning4j](https://github.com/eclipse/deeplearning4j) - -Note that Deeplearning4j is designed to work on most platforms (Windows, OS X, and Linux) and is also includes multiple "flavors" depending on the computing architecture you choose to utilize. This includes CPU (OpenBLAS, MKL, ATLAS) and GPU (CUDA). The DL4J stack also supports x86 and PowerPC architectures. - -## Prerequisites - -Your local machine will require some essential software and environment variables set *before* you try to build and install the DL4J stack. Depending on your platform and the version of your operating system, the instructions may vary in getting them to work. This software includes: - -- git -- cmake (3.2 or higher) -- OpenMP -- gcc (4.9 or higher) -- maven (3.3 or higher) - -Architecture-specific software includes: - -CPU options: - -- Intel MKL -- OpenBLAS -- ATLAS - -GPU options: - -- CUDA - - -IDE-specific requirements: - -- IntelliJ Lombok plugin - -DL4J testing dependencies: - -- dl4j-test-resources - -### Installing Prerequisite Tools - -#### Linux - -**Ubuntu** -Assuming you are using Ubuntu as your flavor of Linux and you are running as a non-root user, follow these steps to install prerequisite software: - -``` -sudo apt-get purge maven maven2 maven3 -sudo add-apt-repository ppa:natecarlson/maven3 -sudo apt-get update -sudo apt-get install maven build-essentials cmake libgomp1 -``` - -#### OS X - -Homebrew is the accepted method of installing prerequisite software. Assuming you have Homebrew installed locally, follow these steps to install your necessary tools. - -First, before using Homebrew we need to ensure an up-to-date version of Xcode is installed (it is used as a primary compiler): - -``` -xcode-select --install -``` - -Finally, install prerequisite tools: - -``` -brew update -brew install maven gcc5 -``` -Note: You can *not* use clang. You also can *not* use a new version of gcc. If you have a newer version of gcc, please -switch versions with [this link](https://apple.stackexchange.com/questions/190684/homebrew-how-to-switch-between-gcc-versions-gcc49-and-gcc) - - -#### Windows - -libnd4j depends on some Unix utilities for compilation. So in order to compile it you will need to install [Msys2](https://msys2.github.io/). - -After you have setup Msys2 by following [their instructions](https://msys2.github.io/), you will have to install some additional development packages. Start the msys2 shell and setup the dev environment with: - - pacman -S mingw-w64-x86_64-gcc mingw-w64-x86_64-cmake mingw-w64-x86_64-extra-cmake-modules make pkg-config grep sed gzip tar mingw64/mingw-w64-x86_64-openblas - -This will install the needed dependencies for use in the msys2 shell. - -You will also need to setup your PATH environment variable to include `C:\msys64\mingw64\bin` (or where ever you have decided to install msys2). If you have IntelliJ (or another IDE) open, you will have to restart it before this change takes effect for applications started through them. If you don't, you probably will see a "Can't find dependent libraries" error. - -### Installing Prerequisite Architectures - -Once you have installed the prerequisite tools, you can now install the required architectures for your platform. - -#### Intel MKL - -Of all the existing architectures available for CPU, Intel MKL is currently the fastest. However, it requires some "overhead" before you actually install it. - -1. Apply for a license at [Intel's site](https://software.intel.com/en-us/intel-mkl) -2. After a few steps through Intel, you will receive a download link -3. Download and install Intel MKL using [the setup guide](https://software.intel.com/sites/default/files/managed/94/bf/Install_Guide_0.pdf) - -#### OpenBLAS - -##### Linux - -**Ubuntu** -Assuming you are using Ubuntu, you can install OpenBLAS via: - -``` -sudo apt-get install libopenblas-dev -``` - -You will also need to ensure that `/opt/OpenBLAS/lib` (or any other home directory for OpenBLAS) is on your `PATH`. In order to get OpenBLAS to work with Apache Spark, you will also need to do the following: - -``` -sudo cp libopenblas.so liblapack.so.3 -sudo cp libopenblas.so libblas.so.3 -``` - -**CentOS** -Enter the following in your terminal (or ssh session) as a root user: - - yum groupinstall 'Development Tools' - -After that, you should see a lot of activity and installs on the terminal. To verify that you have, for example, *gcc*, enter this line: - - gcc --version - -For more complete instructions, [go here](http://www.cyberciti.biz/faq/centos-linux-install-gcc-c-c-compiler/). - -##### OS X - -You can install OpenBLAS on OS X with Homebrew: - -``` -brew install openblas -``` - -##### Windows - -An OpenBLAS package is available for `msys2`. You can install it using the `pacman` command. - -#### ATLAS - -##### Linux - -**Ubuntu** -An apt package is available for ATLAS on Ubuntu: - -``` -sudo apt-get install libatlas-base-dev libatlas-dev -``` - -**CentOS** -You can install ATLAS on CentOS using: - -``` -sudo yum install atlas-devel -``` - -##### OS X - -Installing ATLAS on OS X is a somewhat complicated and lengthy process. However, the following commands will work on most machines: - -``` -wget --content-disposition https://sourceforge.net/projects/math-atlas/files/latest/download?source=files -tar jxf atlas*.tar.bz2 -mkdir atlas (Creating a directory for ATLAS) -mv ATLAS atlas/src-3.10.1 -cd atlas/src-3.10.1 -wget http://www.netlib.org/lapack/lapack-3.5.0.tgz (It may be possible that the atlas download already contains this file in which case this command is not needed) -mkdir intel(Creating a build directory) -cd intel -cpufreq-selector -g performance (This command requires root access. It is recommended but not essential) -../configure --prefix=/path to the directory where you want ATLAS installed/ --shared --with-netlib-lapack-tarfile=../lapack-3.5.0.tgz -make -make check -make ptcheck -make time -make install -``` - -#### CUDA - -##### Linux & OS X - -Detailed instructions for installing GPU architectures such as CUDA can be found [here](./deeplearning4j-config-gpu-cpu). - -##### Windows - -The CUDA Backend has some additional requirements before it can be built: - -* [CUDA SDK](https://developer.nvidia.com/cuda-downloads) -* [Visual Studio 2012 or 2013](https://www.visualstudio.com/en-us/news/vs2013-community-vs.aspx) (Please note: Visual Studio 2015 is *NOT SUPPORTED* by CUDA 7.5 and below) - -In order to build the CUDA backend you will have to setup some more environment variables first, by calling `vcvars64.bat`. -But first, set the system environment variable `SET_FULL_PATH` to `true`, so all of the variables that `vcvars64.bat` sets up, are passed to the mingw shell. - -1. Inside a normal cmd.exe command prompt, run `C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\bin\amd64\vcvars64.bat` -2. Run `c:\msys64\mingw64_shell.bat` inside that -3. Change to your libnd4j folder -4. `./buildnativeoperations.sh -c cuda` - -This builds the CUDA nd4j.dll. - -#### IDE Requirements - -If you are building Deeplearning4j through an IDE such as IntelliJ, you will need to install certain plugins to ensure your IDE renders code highlighting appropriately. You will need to install a plugin for Lombok: - -* IntelliJ Lombok Plugin: https://plugins.jetbrains.com/plugin/6317-lombok-plugin -* Eclipse Lombok Plugin: Follow instructions at https://projectlombok.org/download.html - -If you want to work on ScalNet, the Scala API, or on certain modules such as the DL4J UI, you will need to ensure your IDE has Scala support installed and available to you. - -#### Testing - -Deeplearning4j uses a separate repository that contains all resources necessary for testing. This is to keep the central DL4J repository lightweight and avoid large blobs in the GIT history. To run the tests you need to install the test-resources from https://github.com/deeplearning4j/dl4j-test-resources (~10gb). If you don't care about history, do a shallow clone only with -```bash -git clone --depth 1 --branch master https://github.com/deeplearning4j/dl4j-test-resources -cd dl4j-test-resources -mvn install -``` - -Tests will run __only__ when `testresources` and a backend profile (such as `test-nd4j-native`) are selected - -```bash -mvn clean test -P testresources,test-nd4j-native -``` - -Running the tests will take a while. To run tests of just a single maven module you can add a module constraint with `-pl deeplearning4j-core` (for details see [here](https://stackoverflow.com/questions/11869762/maven-run-only-single-test-in-multi-module-project)) - -## Installing the DL4J Stack - -## OS X & Linux - -### Checking ENV - -Before running the DL4J stack build script, you must ensure certain environment variables are defined before running your build. These are outlined below depending on your architecture. - -#### LIBND4J_HOME - -You will need to know the exact path of the directory where you are running the DL4J build script (you are encouraged to use a clean empty directory). Otherwise, your build will fail. Once you determine this path, add `/libnd4j` to the end of that path and export it to your local environment. This will look like: - -``` -export LIBND4J_HOME=/home/user/directory/libnd4j -``` - -#### CPU architecture w/ MKL - -You can link with MKL either at build time, or at runtime with binaries initially linked with another BLAS implementation such as OpenBLAS. To build against MKL, simply add the path containing `libmkl_rt.so` (or `mkl_rt.dll` on Windows), say `/path/to/intel64/lib/`, to the `LD_LIBRARY_PATH` environment variable on Linux (or `PATH` on Windows) and build like before. On Linux though, to make sure it uses the correct version of OpenMP, we also might need to set these environment variables: - -```bash -export MKL_THREADING_LAYER=GNU -export LD_PRELOAD=/lib64/libgomp.so.1 -``` - -When libnd4j cannot be rebuilt, we can use the MKL libraries after the facts and get them loaded instead of OpenBLAS at runtime, but things are a bit trickier. Please additionally follow the instructions below. - -1. Make sure that files such as `/lib64/libopenblas.so.0` and `/lib64/libblas.so.3` are not available (or appear after in the `PATH` on Windows), or they will get loaded by libnd4j by their absolute paths, before anything else. -2. Inside `/path/to/intel64/lib/`, create a symbolic link or copy of `libmkl_rt.so` (or `mkl_rt.dll` on Windows) to the name that libnd4j expect to load, for example: - -```bash -ln -s libmkl_rt.so libopenblas.so.0 -ln -s libmkl_rt.so libblas.so.3 -``` - -```cmd -copy mkl_rt.dll libopenblas.dll -copy mkl_rt.dll libblas3.dll -``` - -3. Finally, add `/path/to/intel64/lib/` to the `LD_LIBRARY_PATH` environment variable (or early in the `PATH` on Windows) and run your Java application as usual. - - -### Build Script - -You can use the [build-dl4j-stack.sh](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/build-dl4j-stack.sh) script from the deeplearning4j repository to build the whole deeplearning4j stack from source: libndj4, ndj4, datavec, deeplearning4j. It clones the DL4J stack, builds each repository, and installs them locally to Maven. This script will work on both Linux and OS X platforms. - -OK, now read the following section carefully. - -Use the build script below for CPU architectures: - -``` -./build-dl4j-stack.sh -``` -Make sure to read this if you are on OS X (ensure gcc 5.x is setup and you aren't using clang): -https://github.com/eclipse/deeplearning4j/issues/2668 - - -If you are using a GPU backend, use this instead: - -``` -./build-dl4j-stack.sh -c cuda -``` - -You can speed up your CUDA builds by using the `cc` flag as explained in the [libndj4 README](https://github.com/eclipse/deeplearning4j/tree/master/libnd4j). - -For Scala users, you can pass your binary version for Spark compatibility: - -``` -./build-dl4j-stack.sh -c cuda --scalav 2.11 -``` - -The build script passes all options and flags to the libnd4j `./buildnativeoperations.sh` script. All flags used for those script can be passed via `build-dl4j-stack.sh`. - -### Building Manually - -If you prefer, you can build each piece in the DL4J stack by hand. The procedure for each piece of software is essentially: - -1. Git clone -2. Build -3. Install - -The overall procedure looks like the following commands below, with the exception that libnd4j's `./buildnativeoperations.sh` accepts parameters based on the backend you are building for. You need to follow these instructions in the order they're given. If you don't, you'll run into errors. The GPU-specific instructions below have been commented out, but should be substituted for the CPU-specific commands when building for a GPU backend. - -``` shell -# removes any existing repositories to ensure a clean build -rm -rf libnd4j -rm -rf nd4j -rm -rf datavec -rm -rf deeplearning4j - -# compile libnd4j -git clone https://github.com/eclipse/deeplearning4j.git -cd libnd4j -./buildnativeoperations.sh -# and/or when using GPU -# ./buildnativeoperations.sh -c cuda -cc INSERT_YOUR_DEVICE_ARCH_HERE -# i.e. if you have GTX 1070 device, use -cc 61 -export LIBND4J_HOME=`pwd` -cd .. - -# build and install nd4j to maven locally -git clone https://github.com/eclipse/deeplearning4j.git -cd nd4j -# cross-build across Scala versions (recommended) -bash buildmultiplescalaversions.sh clean install -DskipTests -Dmaven.javadoc.skip=true -pl '!:nd4j-cuda-9.0,!:nd4j-cuda-9.0-platform,!:nd4j-tests' -# or build for a single scala version -# mvn clean install -DskipTests -Dmaven.javadoc.skip=true -pl '!:nd4j-cuda-9.0,!:nd4j-cuda-9.0-platform,!:nd4j-tests' -# or when using GPU -# mvn clean install -DskipTests -Dmaven.javadoc.skip=true -pl '!:nd4j-tests' -cd .. - -# build and install datavec -git clone https://github.com/eclipse/deeplearning4j.git -cd datavec -if [ "$SCALAV" == "" ]; then - bash buildmultiplescalaversions.sh clean install -DskipTests -Dmaven.javadoc.skip=true -else - mvn clean install -DskipTests -Dmaven.javadoc.skip=true -Dscala.binary.version=$SCALAV -Dscala.version=$SCALA -fi -cd .. - -# build and install deeplearning4j -git clone https://github.com/eclipse/deeplearning4j.git -cd deeplearning4j -# cross-build across Scala versions (recommended) -./buildmultiplescalaversions.sh clean install -DskipTests -Dmaven.javadoc.skip=true -# or build for a single scala version -# mvn clean install -DskipTests -Dmaven.javadoc.skip=true -# If you skipped CUDA you may need to add -# -pl '!./deeplearning4j-cuda/' -# to the mvn clean install command to prevent the build from looking for cuda libs -cd .. -``` - -## Using Local Dependencies - -Once you've installed the DL4J stack to your local maven repository, you can now include it in your build tool's dependencies. Follow the typical [Getting Started](http://deeplearning4j.org/gettingstarted) instructions for Deeplearning4j, and appropriately replace versions with the SNAPSHOT version currently on the [master POM](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/pom.xml). - -Note that some build tools such as Gradle and SBT don't properly pull in platform-specific binaries. You can follow instructions [here](http://nd4j.org/dependencies.html) for setting up your favorite build tool. - -## Support and Assistance - -If you encounter issues while building locally, the Deeplearning4j [Early Adopters Channel](https://gitter.im/deeplearning4j/deeplearning4j/earlyadopters) is a channel dedicated to assisting with build issues and other source problems. Please reach out on Gitter for help. \ No newline at end of file diff --git a/docs/deeplearning4j/templates/cheat-sheet.md b/docs/deeplearning4j/templates/cheat-sheet.md deleted file mode 100644 index f4b4157af..000000000 --- a/docs/deeplearning4j/templates/cheat-sheet.md +++ /dev/null @@ -1,697 +0,0 @@ ---- -title: Deeplearning4j Cheat Sheet -short_title: Cheat Sheet -description: Snippets and links for common functionality in Eclipse Deeplearning4j. -category: Get Started -weight: 2 ---- - -## Quick reference - -Deeplearning4j (and related projects) have a lot of functionality. The goal of this page is to summarize this functionality so users know what exists, and where to find more information. - -**Contents** - -* [Layers](#layers) - * [Feed-Forward Layers](#layers-ff) - * [Output Layers](#layers-out) - * [Convolutional Layers](#layers-conv) - * [Recurrent Layers](#layers-rnn) - * [Unsupervised Layers](#layers-unsupervised) - * [Other Layers](#layers-other) - * [Graph Vertices](#layers-vertices) - * [InputPreProcessors](#layers-preproc) -* [Iteration/Training Listeners](#listeners) -* [Evaluation](#evaluation) -* [Network Saving and Loading](#saving) -* [Network Configurations](#config) - * [Activation Functions](#config-afn) - * [Weight Initialization](#config-init) - * [Updaters (Optimizers)](#config-updaters) - * [Learning Rate Schedules](#config-schedules) - * [Regularization](#config-regularization) - * [L1/L2 regularization](#config-l1l2) - * [Dropout](#config-dropout) - * [Weight Noise](#config-weightnoise) - * [Constraints](#config-constraints) -* [Data Classes](#data) - * [Iterators](#data-iter) - * [Iterators - Build-In (DL4J-Provided Data)](#data-iter-builtin) - * [Iterators - User Provided Data](#data-iter-user) - * [Iterators - Adapter and Utility Iterators](#data-iter-util) - * [Reading Raw Data: DataVec RecordReaders](#data-datavec) - * [Data Normalization](#data-norm) - * [Spark Network Training Data Classes](#data-spark) -* [Transfer Learning](#transfer) -* [Trained Model Library - Model Zoo](#zoo) -* [SKIL - Model Deployment](#skil) -* [Keras Import](#keras) -* [Distributed Training (Spark)](#spark) -* [Hyperparameter Optimization (Arbiter)](#arbiter) - -### Layers - -### Feed-Forward Layers - -* **DenseLayer** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseLayer.java)) - A simple/standard fully-connected layer -* **EmbeddingLayer** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayer.java)) - Takes positive integer indexes as input, outputs vectors. Only usable as first layer in a model. Mathematically equivalent (when bias is enabled) to DenseLayer with one-hot input, but more efficient. See also: EmbeddingSequenceLayer. - -#### Output Layers - -Output layers: usable only as the last layer in a network. Loss functions are set here. - -* **OutputLayer** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/OutputLayer.java)) - Output layer for standard classification/regression in MLPs/CNNs. Has a fully connected DenseLayer built in. 2d input/output (i.e., row vector per example). -* **LossLayer** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LossLayer.java)) - Output layer without parameters - only loss function and activation function. 2d input/output (i.e., row vector per example). Unlike Outputlayer, restricted to nIn = nOut. -* **RnnOutputLayer** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnOutputLayer.java)) - Output layer for recurrent neural networks. 3d (time series) input and output. Has time distributed fully connected layer built in. -* **RnnLossLayer** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnLossLayer.java)) - The 'no parameter' version of RnnOutputLayer. 3d (time series) input and output. -* **CnnLossLayer** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CnnLossLayer.java)) - Used with CNNs, where a prediction must be made at each spatial location of the output (for example: segmentation or denoising). No parameters, 4d input/output with shape [minibatch, depth, height, width]. When using softmax, this is applied depthwise at each spatial location. -* **Cnn3DLossLayer** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Cnn3DLossLayer.java)) - used with 3D CNNs, where a preduction must be made at each spatial location (x/y/z) of the output. Layer has no parameters, 5d data in either NCDHW or NDHWC ("channels first" or "channels last") format (configurable). Supports masking. When using Softmax, this is applied along channels at each spatial location. -* **Yolo2OutputLayer** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java)) - Implentation of the YOLO 2 model for object detection in images -* **CenterLossOutputLayer** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CenterLossOutputLayer.java)) - A version of OutputLayer that also attempts to minimize the intra-class distance of examples' activations - i.e., "If example x is in class Y, ensure that embedding(x) is close to average(embedding(y)) for all examples y in Y" - - -#### Convolutional Layers - -* **ConvolutionLayer** / Convolution2D - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java)) - Standard 2d convolutional neural network layer. Inputs and outputs have 4 dimensions with shape [minibatch,depthIn,heightIn,widthIn] and [minibatch,depthOut,heightOut,widthOut] respectively. -* **Convolution1DLayer** / Convolution1D - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DLayer.java)) - Standard 1d convolution layer -* **Convolution3DLayer** / Convolution3D - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution3D.java)) - Standard 3D convolution layer. Supports both NDHWC ("channels last") and NCDHW ("channels first") activations format. -* **Deconvolution2DLayer** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Deconvolution2DLayer.java)) - also known as transpose or fractionally strided convolutions. Can be considered a "reversed" ConvolutionLayer; output size is generally larger than the input, whilst maintaining the spatial connection structure. -* **SeparableConvolution2DLayer** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SeparableConvolution2DLayer.java)) - depthwise separable convolution layer -* **SubsamplingLayer** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/SubsamplingLayer.java)) - Implements standard 2d spatial pooling for CNNs - with max, average and p-norm pooling available. -* **Subsampling1DLayer** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java)) - 1D version of the subsampling layer. -* **Upsampling2D** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling2D.java)) - Upscale CNN activations by repeating the row/column values -* **Upsampling1D** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling1D.java)) - 1D version of the upsampling layer -* **Cropping2D** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping2D.java)) - Cropping layer for 2D convolutional neural networks -* **DepthwiseConvolution2D** ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DepthwiseConvolution2D.java))- 2d depthwise convolution layer -* **ZeroPaddingLayer** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPaddingLayer.java)) - Very simple layer that adds the specified amount of zero padding to edges of the 4d input activations. -* **ZeroPadding1DLayer** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPadding1DLayer.java)) - 1D version of ZeroPaddingLayer -* **SpaceToDepth** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToDepthLayer.java)) - This operation takes 4D array in, and moves data from spatial dimensions (HW) to channels (C) for given blockSize -* **SpaceToBatch** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToBatchLayer.java)) - Transforms data from a tensor from 2 spatial dimensions into batch dimension according to the "blocks" specified - - -#### Recurrent Layers - -* **LSTM** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LSTM.java)) - LSTM RNN without peephole connections. Supports CuDNN. -* **GravesLSTM** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java)) - LSTM RNN with peephole connections. Does *not* support CuDNN (thus for GPUs, LSTM should be used in preference). -* **GravesBidirectionalLSTM** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java)) - A bidirectional LSTM implementation with peephole connections. Equivalent to Bidirectional(ADD, GravesLSTM). Due to addition of Bidirecitonal wrapper (below), has been deprecated on master. -* **Bidirectional** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/Bidirectional.java)) - A 'wrapper' layer - converts any standard uni-directional RNN into a bidirectional RNN (doubles number of params - forward/backward nets have independent parameters). Activations from forward/backward nets may be either added, multiplied, averaged or concatenated. -* **SimpleRnn** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/SimpleRnn.java)) - A standard/'vanilla' RNN layer. Usually not effective in practice with long time series dependencies - LSTM is generally preferred. -* **LastTimeStep** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/LastTimeStep.java)) - A 'wrapper' layer - extracts out the last time step of the (non-bidirectional) RNN layer it wraps. 3d input with shape [minibatch, size, timeSeriesLength], 2d output with shape [minibatch, size]. -* EmbeddingSequenceLayer: ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingSequenceLayer.java)) - A version of EmbeddingLayer that expects fixed-length number (inputLength) of integers/indices per example as input, ranged from 0 to numClasses - 1. This input thus has shape [numExamples, inputLength] or shape [numExamples, 1, inputLength]. The output of this layer is 3D (sequence/time series), namely of shape [numExamples, nOut, inputLength]. Can only be used as the first layer for a network. - - -#### Unsupervised Layers - -* **VariationalAutoencoder** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java)) - A variational autoencoder implementation with MLP/dense layers for the encoder and decoder. Supports multiple different types of [reconstruction distributions](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational) -* **AutoEncoder** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AutoEncoder.java)) - Standard denoising autoencoder layer - -#### Other Layers - -* **GlobalPoolingLayer** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GlobalPoolingLayer.java)) - Implements both pooling over time (for RNNs/time series - input size [minibatch, size, timeSeriesLength], out [minibatch, size]) and global spatial pooling (for CNNs - input size [minibatch, depth, h, w], out [minibatch, depth]). Available pooling modes: sum, average, max and p-norm. -* **ActivationLayer** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java)) - Applies an activation function (only) to the input activations. Note that most DL4J layers have activation functions built in as a config option. -* **DropoutLayer** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DropoutLayer.java)) - Implements dropout as a separate/single layer. Note that most DL4J layers have a "built-in" dropout configuration option. -* **BatchNormalization** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java)) - Batch normalization for 2d (feedforward), 3d (time series) or 4d (CNN) activations. For time series, parameter sharing across time; for CNNs, parameter sharing across spatial locations (but not depth). -* **LocalResponseNormalization** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java)) - Local response normalization layer for CNNs. Not frequently used in modern CNN architectures. -* **FrozenLayer** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java)) - Usually not used directly by users - added as part of transfer learning, to freeze a layer's parameters such that they don't change during further training. -* **LocallyConnected2D** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected2D.java)) - a 2d locally connected layer, assumes input is 4d data in NCHW ("channels first") format. -* **LocallyConected1D** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected1D.java)) - a 1d locally connected layer, assumes input is 3d data in NCW ([minibatch, size, sequenceLength]) format - - -#### Graph Vertices - -Graph vertex: use with ComputationGraph. Similar to layers, vertices usually don't have any parameters, and may support multiple inputs. - -* **ElementWiseVertex** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/graph/ElementWiseVertex.java)) - Performs an element-wise operation on the inputs - add, subtract, product, average, max -* **L2NormalizeVertex** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/graph/L2NormalizeVertex.java)) - normalizes the input activations by dividing by the L2 norm for each example. i.e., out <- out / l2Norm(out) -* **L2Vertex** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/graph/L2Vertex.java)) - calculates the L2 distance between the two input arrays, for each example separately. Output is a single value, for each input value. -* **MergeVertex** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/graph/L2Vertex.java)) - merge the input activations along dimension 1, to make a larger output array. For CNNs, this implements merging along the depth/channels dimension -* **PreprocessorVertex** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/graph/PreprocessorVertex.java)) - a simple GraphVertex that contains an InputPreProcessor only -* **ReshapeVertex** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/graph/ReshapeVertex.java)) - Performs arbitrary activation array reshaping. The preprocessors in the next section should usually be preferred. -* **ScaleVertex** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/graph/ScaleVertex.java)) - implements simple multiplicative scaling of the inputs - i.e., out = scalar * input -* **ShiftVertex** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/graph/ShiftVertex.java)) - implements simple scalar element-wise addition on the inputs - i.e., out = input + scalar -* **StackVertex** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/graph/StackVertex.java)) - used to stack all inputs along the minibatch dimension. Analogous to MergeVertex, but along dimension 0 (minibatch) instead of dimension 1 (nOut/channels) -* **SubsetVertex** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/graph/SubsetVertex.java)) - used to get a contiguous subset of the input activations along dimension 1. For example, two SubsetVertex instances could be used to split the activations from an input array into two separate activations. Essentially the opposite of MergeVertex. -* **UnstackVertex** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/graph/UnstackVertex.java)) - similar to SubsetVertex, but along dimension 0 (minibatch) instead of dimension 1 (nOut/channels). Opposite of StackVertex - - - -### Input Pre Processors - -An InputPreProcessor is a simple class/interface that operates on the input to a layer. That is, a preprocessor is attached to a layer, and performs some operation on the input, before passing the layer to the output. Preprocessors also handle backpropagation - i.e., the preprocessing operations are generally differentiable. - -Note that in many cases (such as the XtoYPreProcessor classes), users won't need to (and shouldn't) add these manually, and can instead just use ```.setInputType(InputType.feedForward(10))``` or similar, which whill infer and add the preprocessors as required. - -* **CnnToFeedForwardPreProcessor** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/CnnToFeedForwardPreProcessor.java)) - handles the activation reshaping necessary to transition from a CNN layer (ConvolutionLayer, SubsamplingLayer, etc) to DenseLayer/OutputLayer etc. -* **CnnToRnnPreProcessor** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/CnnToRnnPreProcessor.java)) - handles reshaping necessary to transition from a (effectively, time distributed) CNN layer to a RNN layer. -* **ComposableInputPreProcessor** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/ComposableInputPreProcessor.java)) - simple class that allows multiple preprocessors to be chained + used on a single layer -* **FeedForwardToCnnPreProcessor** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToCnnPreProcessor.java)) - handles activation reshaping to transition from a row vector (per example) to a CNN layer. Note that this transition/preprocessor only makes sense if the activations are actually CNN activations, but have been 'flattened' to a row vector. -* **FeedForwardToRnnPreProcessor** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToRnnPreProcessor.java)) - handles transition from a (time distributed) feed-forward layer to a RNN layer -* **RnnToCnnPreProcessor** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/RnnToCnnPreProcessor.java)) - handles transition from a sequence of CNN activations with shape ```[minibatch, depth*height*width, timeSeriesLength]``` to time-distributed ```[numExamples*timeSeriesLength, numChannels, inputWidth, inputHeight]``` format -* **RnnToFeedForwardPreProcessor** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/RnnToFeedForwardPreProcessor.java)) - handles transition from time series activations (shape ```[minibatch,size,timeSeriesLength]```) to time-distributed feed-forward (shape ```[minibatch*tsLength,size]```) activations. - - -### Iteration/Training Listeners - -IterationListener: can be attached to a model, and are called during training, once after every iteration (i.e., after each parameter update). -TrainingListener: extends IterationListener. Has a number of additional methods are called at different stages of training - i.e., after forward pass, after gradient calculation, at the start/end of each epoch, etc. - -Neither type (iteration/training) are called outside of training (i.e., during output or feed-forward methods) - - -* **ScoreIterationListener** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/listeners/ScoreIterationListener.java), Javadoc) - Logs the loss function score every N training iterations -* **PerformanceListener** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/listeners/PerformanceListener.java), Javadoc) - Logs performance (examples per sec, minibatches per sec, ETL time), and optionally score, every N training iterations. -* **EvaluativeListener** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/listeners/EvaluativeListener.java), Javadoc) - Evaluates network performance on a test set every N iterations or epochs. Also has a system for callbacks, to (for example) save the evaluation results. -* **CheckpointListener** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/listeners/checkpoint/CheckpointListener.java), Javadoc) - Save network checkpoints periodically - based on epochs, iterations or time (or some combination of all three). -* **StatsListener** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/main/java/org/deeplearning4j/ui/stats/StatsListener.java)) - Main listener for DL4J's web-based network training user interface. See [visualization page](https://deeplearning4j.org/visualization) for more details. -* **CollectScoresIterationListener** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/listeners/CollectScoresIterationListener.java), Javadoc) - Similar to ScoreIterationListener, but stores scores internally in a list (for later retrieval) instead of logging scores -* **TimeIterationListener** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/optimize/listeners/TimeIterationListener.java), Javadoc) - Attempts to estimate time until training completion, based on current speed and specified total number of iterations - -### Evaluation - -Link: [Main evaluation page](https://deeplearning4j.org/evaluation) - -ND4J has a number of classes for evaluating the performance of a network, against a test set. Deeplearning4j (and SameDiff) use these ND4J evaluation classes. Different evaluation classes are suitable for different types of networks. -Note: in 1.0.0-beta3 (November 2018), all evaluation classes were moved from DL4J to ND4J; previously they were in DL4J. - - -* **Evaluation** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/evaluation/classification/Evaluation.java)) - Used for the evaluation of multi-class classifiers (assumes standard one-hot labels, and softmax probability distribution over N classes for predictions). Calculates a number of metrics - accuracy, precision, recall, F1, F-beta, Matthews correlation coefficient, confusion matrix. Optionally calculates top N accuracy, custom binary decision thresholds, and cost arrays (for non-binary case). Typically used for softmax + mcxent/negative-log-likelihood networks. -* **EvaluationBinary** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/evaluation/classification/EvaluationBinary.java)) - A multi-label binary version of the Evaluation class. Each network output is assumed to be a separate/independent binary class, with probability 0 to 1 independent of all other outputs. Typically used for sigmoid + binary cross entropy networks. -* **EvaluationCalibration** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/evaluation/classification/EvaluationCalibration.java)) - Used to evaluation the calibration of a binary or multi-class classifier. Produces reliability diagrams, residual plots, and histograms of probabilities. Export plots to HTML using [EvaluationTools](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-core/src/main/java/org/deeplearning4j/evaluation/EvaluationTools.java).exportevaluationCalibrationToHtmlFile method -* **ROC** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/evaluation/classification/ROC.java)) - Used for single output binary classifiers only - i.e., networks with nOut(1) + sigmoid, or nOut(2) + softmax. Supports 2 modes: thresholded (approximate) or exact (the default). Calculates area under ROC curve, area under precision-recall curve. Plot ROC and P-R curves to HTML using [EvaluationTools](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-core/src/main/java/org/deeplearning4j/evaluation/EvaluationTools.java) -* **ROCBinary** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/evaluation/classification/ROCBinary.java)) - a version of ROC that is used for multi-label binary networks (i.e., sigmoid + binary cross entropy), where each network output is assumed to be an independent binary variable. -* **ROCMultiClass** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/evaluation/classification/ROCMultiClass.java)) - a version of ROC that is used for multi-class (non-binary) networks (i.e., softmax + mcxent/negative-log-likelihood networks). As ROC metrics are only defined for binary classification, this treats the multi-class output as a set of 'one-vs-all' binary classification problems. -* **RegressionEvaluation** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/evaluation/regression/RegressionEvaluation.java)) - An evaluation class used for regression models (including multi-output regression models). Reports metrics such as mean-squared error (MSE), mean-absolute error, etc for each output/column. - - -## Network Saving and Loading - -```MultiLayerNetwork.save(File)``` and ```MultiLayerNetwork.load(File)``` methods can be used to save and load models. These use ModelSerializer internally. Similar save/load methods are also available for ComputationGraph. - -MultiLayerNetwork and ComputationGraph can be saved using the [ModelSerializer](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/util/ModelSerializer.java) class - and specifically the ```writeModel```, ```restoreMultiLayerNetwork``` and ```restoreComputationGraph``` methods. - -Examples: [Saving and loading network](https://github.com/eclipse/deeplearning4j-examples/tree/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/misc/modelsaving) - -Networks can be trained further after saving and loading: however, be sure to load the 'updater' (i.e., the historical state for updaters like momentum, ). If no futher training is required, the updater state can be ommitted to save disk space and memory. - -Most Normalizers (implementing the ND4J ```Normalizer``` interface) can also be added to a model using the ```addNormalizerToModel``` method. - -Note that the format used for models in DL4J is .zip: it's possible to open/extract these files using programs supporting the zip format. - - - -## Network Configurations - -This section lists the various configuration options that Deeplearning4j supports. - -### Activation Functions - -Activation functions can be defined in one of two ways: -(a) By passing an [Activation](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/Activation.java) enumeration value to the configuration - for example, ```.activation(Activation.TANH)``` -(b) By passing an [IActivation](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/IActivation.java) instance - for example, ```.activation(new ActivationSigmoid())``` - -Note that Deeplearning4j supports custom activation functions, which can be defined by extending [BaseActivationFunction](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/impl) - -List of supported activation functions: -* **CUBE** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationCube.java)) - ```f(x) = x^3``` -* **ELU** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationELU.java)) - Exponential linear unit ([Reference](https://arxiv.org/abs/1511.07289)) -* **HARDSIGMOID** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationHardSigmoid.java)) - a piecewise linear version of the standard sigmoid activation function. ```f(x) = min(1, max(0, 0.2*x + 0.5))``` -* **HARDTANH** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationHardTanH.java)) - a piecewise linear version of the standard tanh activation function. -* **IDENTITY** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationIdentity.java)) - a 'no op' activation function: ```f(x) = x``` -* **LEAKYRELU** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationLReLU.java)) - leaky rectified linear unit. ```f(x) = max(0, x) + alpha * min(0, x)``` with ```alpha=0.01``` by default. -* **RATIONALTANH** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationRationalTanh.java)) - ```tanh(y) ~ sgn(y) * { 1 - 1/(1+|y|+y^2+1.41645*y^4)}``` which approximates ```f(x) = 1.7159 * tanh(2x/3)```, but should be faster to execute. ([Reference](https://arxiv.org/abs/1508.01292)) -* **RELU** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationReLU.java)) - standard rectified linear unit: ```f(x) = x``` if ```x>0``` or ```f(x) = 0``` otherwise -* **RRELU** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationRReLU.java)) - randomized rectified linear unit. Deterministic during test time. ([Reference](https://arxiv.org/abs/1505.00853)) -* **SIGMOID** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationSigmoid.java)) - standard sigmoid activation function, ```f(x) = 1 / (1 + exp(-x))``` -* **SOFTMAX** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationSoftmax.java)) - standard softmax activation function -* **SOFTPLUS** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationSoftPlus.java)) - ```f(x) = log(1+e^x)``` - shape is similar to a smooth version of the RELU activation function -* **SOFTSIGN** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationSoftSign.java)) - ```f(x) = x / (1+|x|)``` - somewhat similar in shape to the standard tanh activation function (faster to calculate). -* **TANH** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationTanH.java)) - standard tanh (hyperbolic tangent) activation function -* **RECTIFIEDTANH** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationRectifiedTanh.java)) - ```f(x) = max(0, tanh(x))``` -* **SELU** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationSELU.java)) - scaled exponential linear unit - used with [self normalizing neural networks](https://arxiv.org/abs/1706.02515) -* **SWISH** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationSwish.java)) - Swish activation function, ```f(x) = x * sigmoid(x)``` ([Reference](https://arxiv.org/abs/1710.05941)) - -### Weight Initialization - -Weight initialization refers to the method by which the initial parameters for a new network should be set. - -Weight initialization are usually defined using the [WeightInit](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInit.java) enumeration. - -Custom weight initializations can be specified using ```.weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))``` for example. As for master (but not 0.9.1 release) ```.weightInit(new NormalDistribution(0, 1))``` is also possible, which is equivalent to the previous approach. - -Available weight initializations. Not again that not all are available in the 0.9.1 release: - -* **DISTRIBUTION**: Sample weights from a provided distribution (specified via ```dist``` configuration method -* **ZERO**: Generate weights as zeros -* **ONES**: All weights are set to 1 -* **SIGMOID_UNIFORM**: A version of XAVIER_UNIFORM for sigmoid activation functions. U(-r,r) with r=4*sqrt(6/(fanIn + fanOut)) -* **NORMAL**: Normal/Gaussian distribution, with mean 0 and standard deviation 1/sqrt(fanIn). This is the initialization recommented in [Klambauer et al. 2017, "Self-Normalizing Neural Network"](https://arxiv.org/abs/1706.02515) paper. Equivalent to DL4J's XAVIER_FAN_IN and LECUN_NORMAL (i.e. Keras' "lecun_normal") -* **LECUN_UNIFORM**: Uniform U[-a,a] with a=3/sqrt(fanIn). -* **UNIFORM**: Uniform U[-a,a] with a=1/sqrt(fanIn). "Commonly used heuristic" as per Glorot and Bengio 2010 -* **XAVIER**: As per [Glorot and Bengio 2010](http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf): Gaussian distribution with mean 0, variance 2.0/(fanIn + fanOut) -* **XAVIER_UNIFORM**: As per [Glorot and Bengio 2010](http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf): Uniform distribution U(-s,s) with s = sqrt(6/(fanIn + fanOut)) -* **XAVIER_FAN_IN**: Similar to Xavier, but 1/fanIn -> Caffe originally used this. -* **RELU**: [He et al. (2015), "Delving Deep into Rectifiers"](https://arxiv.org/abs/1502.01852). Normal distribution with variance 2.0/nIn -* **RELU_UNIFORM**: [He et al. (2015), "Delving Deep into Rectifiers"](https://arxiv.org/abs/1502.01852). Uniform distribution U(-s,s) with s = sqrt(6/fanIn) -* **IDENTITY**: Weights are set to an identity matrix. Note: can only be used with square weight matrices -* **VAR_SCALING_NORMAL_FAN_IN**: Gaussian distribution with mean 0, variance 1.0/(fanIn) -* **VAR_SCALING_NORMAL_FAN_OUT**: Gaussian distribution with mean 0, variance 1.0/(fanOut) -* **VAR_SCALING_NORMAL_FAN_AVG**: Gaussian distribution with mean 0, variance 1.0/((fanIn + fanOut)/2) -* **VAR_SCALING_UNIFORM_FAN_IN**: Uniform U[-a,a] with a=3.0/(fanIn) -* **VAR_SCALING_UNIFORM_FAN_OUT**: Uniform U[-a,a] with a=3.0/(fanOut) -* **VAR_SCALING_UNIFORM_FAN_AVG**: Uniform U[-a,a] with a=3.0/((fanIn + fanOut)/2) - - -### Updaters (Optimizers) - -An 'updater' in DL4J is a class that takes raw gradients and modifies them to become updates. These updates will then be applied to the network parameters. -The [CS231n course notes](http://cs231n.github.io/neural-networks-3/#ada) have a good explanation of some of these updaters. - -Supported updaters in Deeplearning4j: -* **AdaDelta** - ([Source](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/config/AdaDelta.java)) - [Reference](https://arxiv.org/abs/1212.5701) -* **AdaGrad** - ([Source](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/config/AdaGrad.java)) - [Reference](http://jmlr.org/papers/v12/duchi11a.html) -* **AdaMax** - ([Source](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/config/AdaMax.java)) - A variant of the Adam updater - [Reference](https://arxiv.org/abs/1412.6980) -* **Adam** - ([Source](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/config/Adam.java)) -* **Nadam** - ([Source](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/config/Nadam.java)) - A variant of the Adam updater, using the Nesterov mementum update rule - [Reference](https://arxiv.org/abs/1609.04747) -* **Nesterovs** - ([Source](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/config/Nesterovs.java)) - Nesterov momentum updater -* **NoOp** - ([Source](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/config/NoOp.java)) - A 'no operation' updater. That is, gradients are not modified at all by this updater. Mathematically equivalent to the SGD updater with a learning rate of 1.0 -* **RmsProp** - ([Source](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/config/RmsProp.java)) - [Reference - slide 29](http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf) -* **Sgd** - ([Source](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/config/Sgd.java)) - Standard stochastic gradient descent updater. This updater applies a learning rate only. - - -### Learning Rate Schedules - -All updaters that support a learning rate also support learning rate schedules (the Nesterov momentum updater also supports a momentum schedule). Learning rate schedules can be specified either based on the number of iterations, or the number of epochs that have elapsed. Dropout (see below) can also make use of the schedules listed here. - -Configure using, for example: ```.updater(new Adam(new ExponentialSchedule(ScheduleType.ITERATION, 0.1, 0.99 )))``` -You can plot/inspect the learning rate that will be used at any point by calling ```ISchedule.valueAt(int iteration, int epoch)``` on the schedule object you have created. - -Available schedules: - -* **ExponentialSchedule** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/schedule/ExponentialSchedule.java)) - Implements ```value(i) = initialValue * gamma^i``` -* **InverseSchedule** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/schedule/InverseSchedule.java)) - Implements ```value(i) = initialValue * (1 + gamma * i)^(-power)``` -* **MapSchedule** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/schedule/MapSchedule.java)) - Learning rate schedule based on a user-provided map. Note that the provided map must have a value for iteration/epoch 0. Has a builder class to conveniently define a schedule. -* **PolySchedule** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/schedule/PolySchedule.java)) - Implements ```value(i) = initialValue * (1 + i/maxIter)^(-power)``` -* **SigmoidSchedule** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/schedule/SigmoidSchedule.java)) - Implements ```value(i) = initialValue * 1.0 / (1 + exp(-gamma * (iter - stepSize)))``` -* **StepSchedule** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/schedule/StepSchedule.java)) - Implements ```value(i) = initialValue * gamma^( floor(iter/step) )``` - - -Note that custom schedules can be created by implementing the ISchedule interface. - - -### Regularization - -#### L1/L2 Regularization - -L1 and L2 regularization can easily be added to a network via the configuration: ```.l1(0.1).l2(0.2)```. -Note that ```.regularization(true)``` must be enabled on 0.9.1 also (this option has been removed after 0.9.1 was released). - -L1 and L2 regularization is applied by default on the weight parameters only. That is, .l1 and .l2 will not impact bias parameters - these can be regularized using ```.l1Bias(0.1).l2Bias(0.2)```. - - -#### Dropout - -All dropout types are applied at training time only. They are not applied at test time. - -* **Dropout** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/Dropout.java)) - Each input activation x is independently set to (0, with probability 1-p) or (x/p with probability p)
    -* **GaussianDropout** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/GaussianDropout.java)) - This is a multiplicative Gaussian noise (mean 1) on the input activations. Each input activation x is independently set to: ```x * y```, where ```y ~ N(1, stdev = sqrt((1-rate)/rate))``` -* **GaussianNoise** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/GaussianNoise.java)) - Applies additive, mean-zero Gaussian noise to the input - i.e., ```x = x + N(0,stddev)``` -* **AlphaDropout** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/AlphaDropout.java)) - AlphaDropout is a dropout technique proposed by [Klaumbauer et al. 2017 - Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515). Designed for self-normalizing neural networks (SELU activation, NORMAL weight init). Attempts to keep both the mean and variance of the post-dropout activations to the same (in expectation) as before alpha dropout was applied - -Note that (as of current master - but not 0.9.1) the dropout parameters can also be specified according to any of the schedule classes mentioned in the Learning Rate Schedules section. - -### Weight Noise - -As per dropout, dropconnect / weight noise is applied only at training time - -* **DropConnect** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/DropConnect.java)) - DropConnect is similar to dropout, but applied to the parameters of a network (instead of the input activations). [Reference](https://cs.nyu.edu/~wanli/dropc/dropc.pdf) -* **WeightNoise** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/WeightNoise.java)) - Apply noise of the specified distribution to the weights at training time. Both additive and multiplicative modes are supported - when additive, noise should be mean 0, when multiplicative, noise should be mean 1 - -### Constraints - -Constraints are deterministic limitations that are placed on a model's parameters at the end of each iteration (after the parameter update has occurred). They can be thought of as a type of regularization. - -* **MaxNormConstraint** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java)) - Constrain the maximum L2 norm of the incoming weights for each unit to be less than or equal to the specified value. If the L2 norm exceeds the specified value, the weights will be scaled down to satisfy the constraint. -* **MinMaxNormConstraint** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java)) - Constrain the minimum AND maximum L2 norm of the incoming weights for each unit to be between the specified values. Weights will be scaled up/down if required. -* **NonNegativeConstraint** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/NonNegativeConstraint.java)) - Constrain all parameters to be non-negative. Negative parameters will be replaced with 0. -* **UnitNormConstraint** - ([Source](https://github.com/eclipse/deeplearning4j/blob/b841c0f549194dbdf88b42836df662d9b8ea8c6d/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java)) - Constrain the L2 norm of the incoming weights for each unit to be 1.0. - - -## Data Classes - -### Iterators - -DataSetIterator is an abstraction that DL4J uses to iterate over minibatches of data, used for training. DataSetIterator returns DataSet objects, which are minibatches, and support a maximum of 1 input and 1 output array (INDArray). - -MultiDataSetIterator is similar to DataSetIterator, but returns MultiDataSet objects, which can have as many input and output arrays as required for the network. - -#### Iterators - Build-In (DL4J-Provided Data) - -These iterators download their data as required. The actual datasets they return are not customizable. - -* **MnistDataSetIterator** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-data/deeplearning4j-datasets/src/main/java/org/deeplearning4j/datasets/iterator/impl/MnistDataSetIterator.java)) - DataSetIterator for the well-known MNIST digits dataset. By default, returns a row vector (1x784), with values normalized to 0 to 1 range. Use ```.setInputType(InputType.convolutionalFlat())``` to use with CNNs. -* **EmnistDataSetIterator** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-data/deeplearning4j-datasets/src/main/java/org/deeplearning4j/datasets/iterator/impl/EmnistDataSetIterator.java)) - Similar to the MNIST digits dataset, but with more examples, and also letters. Includes multiple different splits (letters only, digits only, letters + digits, etc). Same 1x784 format as MNIST, hence (other than different number of labels for some splits) can be used as a drop-in replacement for MnistDataSetIterator. [Reference 1](https://www.nist.gov/itl/iad/image-group/emnist-dataset), [Reference 2](https://arxiv.org/abs/1702.05373) -* **IrisDataSetIterator** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-data/deeplearning4j-datasets/src/main/java/org/deeplearning4j/datasets/iterator/impl/IrisDataSetIterator.java)) - An iterator for the well known Iris dataset. 4 features, 3 output classes. -* **CifarDataSetIterator** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-data/deeplearning4j-datasets/src/main/java/org/deeplearning4j/datasets/iterator/impl/CifarDataSetIterator.java)) - An iterator for the CIFAR images dataset. 10 classes, 4d features/activations format for CNNs in DL4J: ```[minibatch,channels,height,width] = [minibatch,3,32,32]```. Features are *not* normalized - instead, are in the range 0 to 255. -* **LFWDataSetIterator** - ([Source]()) -* **TinyImageNetDataSetIterator** ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-data/deeplearning4j-datasets/src/main/java/org/deeplearning4j/datasets/iterator/impl/TinyImageNetDataSetIterator.java)) - A subset of the standard imagenet dataset; 200 classes, 500 images per class -* **UciSequenceDataSetIterator** ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-data/deeplearning4j-datasets/src/main/java/org/deeplearning4j/datasets/iterator/impl/UciSequenceDataSetIterator.java)) - UCI synthetic control time series dataset - -#### Iterators - User Provided Data - -The iterators in this subsection are used with user-provided data. - -* **RecordReaderDataSetIterator** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-data/deeplearning4j-datavec-iterators/src/main/java/org/deeplearning4j/datasets/datavec/RecordReaderDataSetIterator.java)) - an iterator that takes a DataVec record reader (such as CsvRecordReader or ImageRecordReader) and handles conversion to DataSets, batching, masking, etc. One of the most commonly used iterators in DL4J. Handles non-sequence data only, as input (i.e., RecordReader, no SequenceeRecordReader). -* **RecordReaderMultiDataSetIterator** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-data/deeplearning4j-datavec-iterators/src/main/java/org/deeplearning4j/datasets/datavec/RecordReaderMultiDataSetIterator.java)) - the MultiDataSet version of RecordReaderDataSetIterator, that supports multiple readers. Has a builder pattern for creating more complex data pipelines (such as different subsets of a reader's output to different input/output arrays, conversion to one-hot, etc). Handles both sequence and non-sequence data as input. -* **SequenceRecordReaderDataSetIterator** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-data/deeplearning4j-datavec-iterators/src/main/java/org/deeplearning4j/datasets/datavec/SequenceRecordReaderDataSetIterator.java)) - The sequence (SequenceRecordReader) version of RecordReaderDataSetIterator. Users may be better off using RecordReaderMultiDataSetIterator, in conjunction with -* **DoublesDataSetIterator** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-data/deeplearning4j-utility-iterators/src/main/java/org/deeplearning4j/datasets/iterator/DoublesDataSetIterator.java)) -* **FloatsDataSetIterator** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-data/deeplearning4j-utility-iterators/src/main/java/org/deeplearning4j/datasets/iterator/FloatsDataSetIterator.java)) -* **INDArrayDataSetIterator** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-data/deeplearning4j-utility-iterators/src/main/java/org/deeplearning4j/datasets/iterator/INDArrayDataSetIterator.java)) - - -#### Iterators - Adapter and Utility Iterators - -* **MultiDataSetIteratorAdapter** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/datasets/iterator/impl/MultiDataSetIteratorAdapter.java)) - Wrap a DataSetIterator to convert it to a MultiDataSetIterator -* **SingletonMultiDataSetIterator** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-data/deeplearning4j-utility-iterators/src/main/java/org/deeplearning4j/datasets/iterator/impl/SingletonMultiDataSetIterator.java)) - Wrap a MultiDataSet into a MultiDataSetIterator that returns one MultiDataSet (i.e., the wrapped MultiDataSet is *not* split up) -* **AsyncDataSetIterator** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-data/deeplearning4j-utility-iterators/src/main/java/org/deeplearning4j/datasets/iterator/AsyncDataSetIterator.java)) - Used automatically by MultiLayerNetwork and ComputationGraph where appropriate. Implements asynchronous prefetching of datasets to improve performance. -* **AsyncMultiDataSetIterator** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-data/deeplearning4j-utility-iterators/src/main/java/org/deeplearning4j/datasets/iterator/AsyncMultiDataSetIterator.java)) - Used automatically by ComputationGraph where appropriate. Implements asynchronous prefetching of MultiDataSets to improve performance. -* **AsyncShieldDataSetIterator** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-data/deeplearning4j-utility-iterators/src/main/java/org/deeplearning4j/datasets/iterator/AsyncShieldDataSetIterator.java)) - Generally used only for debugging. Stops MultiLayerNetwork and ComputationGraph from using an AsyncDataSetIterator. -* **AsyncShieldMultiDataSetIterator** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-data/deeplearning4j-utility-iterators/src/main/java/org/deeplearning4j/datasets/iterator/AsyncShieldMultiDataSetIterator.java)) - The MultiDataSetIterator version of AsyncShieldDataSetIterator -* **EarlyTerminationDataSetIterator** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-data/deeplearning4j-utility-iterators/src/main/java/org/deeplearning4j/datasets/iterator/EarlyTerminationDataSetIterator.java)) - Wraps another DataSetIterator, ensuring that only a specified (maximum) number of minibatches (DataSet) objects are returned between resets. Can be used to 'cut short' an iterator, returning only the first N DataSets. -* **EarlyTerminationMultiDataSetIterator** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-data/deeplearning4j-utility-iterators/src/main/java/org/deeplearning4j/datasets/iterator/EarlyTerminationMultiDataSetIterator.java)) - The MultiDataSetIterator version of EarlyTerminationDataSetIterator -* **ExistingDataSetIterator** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-data/deeplearning4j-utility-iterators/src/main/java/org/deeplearning4j/datasets/iterator/ExistingDataSetIterator.java)) - Convert an ```Iterator``` or ```Iterable``` to a DataSetIterator. Does not split the underlying DataSet objects -* **FileDataSetIterator** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-data/deeplearning4j-utility-iterators/src/main/java/org/deeplearning4j/datasets/iterator/file/FileDataSetIterator.java)) - An iterator that iterates over DataSet files that have been previously saved with ```DataSet.save(File)```. Supports randomization, filtering, different output batch size vs. saved DataSet batch size, etc. -* **FileMultiDataSetIterator** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-data/deeplearning4j-utility-iterators/src/main/java/org/deeplearning4j/datasets/iterator/file/FileMultiDataSetIterator.java)) - A MultiDataSet version of FileDataSetIterator -* **IteratorDataSetIterator** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-data/deeplearning4j-utility-iterators/src/main/java/org/deeplearning4j/datasets/iterator/IteratorDataSetIterator.java)) - Convert an ```Iterator``` to a DataSetIterator. Unlike ExistingDataSetIterator, the underlying DataSet objects may be split/combined - i.e., the minibatch size may differ for the output, vs. the input iterator. -* **IteratorMultiDataSetIterator** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-data/deeplearning4j-utility-iterators/src/main/java/org/deeplearning4j/datasets/iterator/IteratorMultiDataSetIterator.java)) - The ```Iterator``` version of IteratorDataSetIterator -* **MultiDataSetWrapperIterator** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-data/deeplearning4j-utility-iterators/src/main/java/org/deeplearning4j/datasets/iterator/MultiDataSetWrapperIterator.java)) - Convert a MultiDataSetIterator to a DataSetIterator. Note that this is only possible if the number of features and labels arrays is equal to 1. -* **MultipleEpochsIterator** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-data/deeplearning4j-utility-iterators/src/main/java/org/deeplearning4j/datasets/iterator/MultipleEpochsIterator.java)) - Treat multiple passes (epochs) of the underlying iterator as a single epoch, when training. -* **WorkspaceShieldDataSetIterator** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-data/deeplearning4j-utility-iterators/src/main/java/org/deeplearning4j/datasets/iterator/WorkspacesShieldDataSetIterator.java)) - Generally used only for debugging, and not usually by users. Detaches/migrates DataSets coming out of the underlying DataSetIterator. - - -### Data Normalization - -ND4J provides a number of classes for performing data normalization. These are implemented as DataSetPreProcessors. -The basic pattern for normalization: - -1. Create your (unnormalized) DataSetIterator or MultiDataSetIterator: ```DataSetIterator myTrainData = ...``` -2. Create the normalizer you want to use: ```NormalizerMinMaxScaler normalizer = new NormalizerMinMaxScaler();``` -3. Fit the normalizer: ```normalizer.fit(myTrainData)``` -4. Set the normalizer/preprocessor on the iterator: ```myTrainData.setPreProcessor(normalizer);``` -End result: the data that comes from your DataSetIterator will now be normalized. - -In general, you should fit *only* on the training data, and do ```trainData.setPreProcessor(normalizer)``` and ```testData.setPreProcessor(normalizer)``` with the same/single normalizer that has been fit on the training data only. - -Note that where appropriate (NormalizerStandardize, NormalizerMinMaxScaler) statistics such as mean/standard-deviation/min/max are shared across time (for time series) and across image x/y locations (but not depth/channels - for image data). - -Data normalization example: [link](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/dataexamples/PreprocessNormalizerExample.java) - -**Available normalizers: DataSet / DataSetIterator** - -* **ImagePreProcessingScaler** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/dataset/api/preprocessor/ImagePreProcessingScaler.java)) - Applies min-max scaling to image activations. Default settings do 0-255 input to 0-1 output (but is configurable). Note that unlike the other normalizers here, this one does not rely on statistics (mean/min/max etc) collected from the data, hence the ```normalizer.fit(trainData)``` step is unnecessary (is a no-op). -* **NormalizerStandardize** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/dataset/api/preprocessor/NormalizerStandardize.java)) - normalizes each feature value independently (and optionally label values) to have 0 mean and a standard deviation of 1 -* **NormalizerMinMaxScaler** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/dataset/api/preprocessor/NormalizerMinMaxScaler.java)) - normalizes each feature value independently (and optionally label values) to lie between a minimum and maximum value (by default between 0 and 1) -* **VGG16ImagePreProcessor** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/dataset/api/preprocessor/VGG16ImagePreProcessor.java)) - This is a preprocessor specifically for VGG16. It subtracts the mean RGB value, computed on the training set, from each pixel as reported in [Link](https://arxiv.org/pdf/1409.1556.pdf) - - -**Available normalizers: MultiDataSet / MultiDataSetIterator** - -* **ImageMultiPreProcessingScaler** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/dataset/api/preprocessor/ImageMultiPreProcessingScaler.java)) - A MultiDataSet/MultiDataSetIterator version of ImagePreProcessingScaler -* **MultiNormalizerStandardize** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/dataset/api/preprocessor/MultiNormalizerStandardize.java)) - MultiDataSet/MultiDataSetIterator version of NormalizerStandardize -* **MultiNormalizerMinMaxScaler** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/dataset/api/preprocessor/MultiNormalizerMinMaxScaler.java)) - MultiDataSet/MultiDataSetIterator version of NormalizerMinMaxScaler -* **MultiNormalizerHybrid** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/dataset/api/preprocessor/MultiNormalizerHybrid.java)) - A MultiDataSet normalizer that can combine different normalization types (standardize, min/max etc) for different input/feature and output/label arrays. - - -### Transfer Learning - -Deeplearning4j has classes/utilities for performing transfer learning - i.e., taking an existing network, and modifying some of the layers (optionally freezing others so their parameters don't change). For example, an image classifier could be trained on ImageNet, then applied to a new/different dataset. Both MultiLayerNetwork and ComputationGraph can be used with transfer learning - frequently starting from a pre-trained model from the model zoo (see next section), though any MultiLayerNetwork/ComputationGraph can be used. - -Link: [Transfer learning examples](https://github.com/eclipse/deeplearning4j-examples/tree/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/transferlearning/vgg16) - -The main class for transfer learning is [TransferLearning](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java). This class has a builder pattern that can be used to add/remove layers, freeze layers, etc. -[FineTuneConfiguration](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java) can be used here to specify the learning rate and other settings for the non-frozen layers. - - -### Trained Model Library - Model Zoo - -Deeplearning4j provides a 'model zoo' - a set of pretrained models that can be downloaded and used either as-is (for image classification, for example) or often for transfer learning. - -Link: [Deeplearning4j Model Zoo](https://deeplearning4j.org/model-zoo) - -Models available in DL4J's model zoo: - -* **AlexNet** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/AlexNet.java)) -* **Darknet19** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/Darknet19.java)) -* **FaceNetNN4Small2** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/FaceNetNN4Small2.java)) -* **InceptionResNetV1** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/InceptionResNetV1.java)) -* **LeNet** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/LeNet.java)) -* **ResNet50** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/ResNet50.java)) -* **SimpleCNN** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/SimpleCNN.java)) -* **TextGenerationLSTM** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/TextGenerationLSTM.java)) -* **TinyYOLO** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/TinyYOLO.java)) -* **VGG16** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/VGG16.java)) -* **VGG19** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-zoo/src/main/java/org/deeplearning4j/zoo/model/VGG19.java)) - - -**Note*: Trained Keras models (not provided by DL4J) may also be imported, using Deeplearning4j's Keras model import functionality. - -## Cheat sheet code snippets - -The Eclipse Deeplearning4j libraries come with a lot of functionality, and we've put together this cheat sheet to help users assemble neural networks and use tensors faster. - -### Neural networks - -Code for configuring common parameters and layers for both `MultiLayerNetwork` and `ComputationGraph`. See [MultiLayerNetwork](/api/{{page.version}}/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.html) and [ComputationGraph](/api/{{page.version}}/org/deeplearning4j/nn/graph/ComputationGraph.html) for full API. - -**Sequential networks** - -Most network configurations can use `MultiLayerNetwork` class if they are sequential and simple. - -```java -MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .seed(1234) - // parameters below are copied to every layer in the network - // for inputs like dropOut() or activation() you should do this per layer - // only specify the parameters you need - .updater(new AdaGrad()) - .activation(Activation.RELU) - .dropOut(0.8) - .l1(0.001) - .l2(1e-4) - .weightInit(WeightInit.XAVIER) - .weightInit(Distribution.TruncatedNormalDistribution) - .cudnnAlgoMode(ConvolutionLayer.AlgoMode.PREFER_FASTEST) - .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) - .gradientNormalizationThreshold(1e-3) - .list() - // layers in the network, added sequentially - // parameters set per-layer override the parameters above - .layer(new DenseLayer.Builder().nIn(numInputs).nOut(numHiddenNodes) - .weightInit(WeightInit.XAVIER) - .build()) - .layer(new ActivationLayer(Activation.RELU)) - .layer(new ConvolutionLayer.Builder(1,1) - .nIn(1024) - .nOut(2048) - .stride(1,1) - .convolutionMode(ConvolutionMode.Same) - .weightInit(WeightInit.XAVIER) - .activation(Activation.IDENTITY) - .build()) - .layer(new GravesLSTM.Builder() - .activation(Activation.TANH) - .nIn(inputNum) - .nOut(100) - .build()) - .layer(new OutputLayer.Builder(LossFunction.NEGATIVELOGLIKELIHOOD) - .weightInit(WeightInit.XAVIER) - .activation(Activation.SOFTMAX) - .nIn(numHiddenNodes).nOut(numOutputs).build()) - .pretrain(false).backprop(true) - .build(); - -MultiLayerNetwork neuralNetwork = new MultiLayerNetwork(conf); -``` - -**Complex networks** - -Networks that have complex graphs and "branching" such as *Inception* need to use `ComputationGraph`. - -```java -ComputationGraphConfiguration.GraphBuilder graph = new NeuralNetConfiguration.Builder() - .seed(seed) - // parameters below are copied to every layer in the network - // for inputs like dropOut() or activation() you should do this per layer - // only specify the parameters you need - .activation(Activation.IDENTITY) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .updater(updater) - .weightInit(WeightInit.RELU) - .l2(5e-5) - .miniBatch(true) - .cacheMode(cacheMode) - .trainingWorkspaceMode(workspaceMode) - .inferenceWorkspaceMode(workspaceMode) - .cudnnAlgoMode(cudnnAlgoMode) - .convolutionMode(ConvolutionMode.Same) - .graphBuilder() - // layers in the network, added sequentially - // parameters set per-layer override the parameters above - // note that you must name each layer and manually specify its input - .addInputs("input1") - .addLayer("stem-cnn1", new ConvolutionLayer.Builder(new int[] {7, 7}, new int[] {2, 2}, new int[] {3, 3}) - .nIn(inputShape[0]) - .nOut(64) - .cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) - .build(),"input1") - .addLayer("stem-batch1", new BatchNormalization.Builder(false) - .nIn(64) - .nOut(64) - .build(), "stem-cnn1") - .addLayer("stem-activation1", new ActivationLayer.Builder() - .activation(Activation.RELU) - .build(), "stem-batch1") - .addLayer("lossLayer", new CenterLossOutputLayer.Builder() - .lossFunction(LossFunctions.LossFunction.SQUARED_LOSS) - .activation(Activation.SOFTMAX).nOut(numClasses).lambda(1e-4).alpha(0.9) - .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).build(), - "stem-activation1") - .setOutputs("lossLayer") - .setInputTypes(InputType.convolutional(224, 224, 3)) - .backprop(true).pretrain(false).build(); - -ComputationGraph neuralNetwork = new ComputationGraph(graph); -``` - - -### Training - -The code snippet below creates a basic pipeline that loads images from disk, applies random transformations, and fits them to a neural network. It also sets up a UI instance so you can visualize progress, and uses early stopping to terminate training early. You can adapt this pipeline for many different use cases. - -```java -ParentPathLabelGenerator labelMaker = new ParentPathLabelGenerator(); -File mainPath = new File(System.getProperty("user.dir"), "dl4j-examples/src/main/resources/animals/"); -FileSplit fileSplit = new FileSplit(mainPath, NativeImageLoader.ALLOWED_FORMATS, rng); -int numExamples = Math.toIntExact(fileSplit.length()); -int numLabels = fileSplit.getRootDir().listFiles(File::isDirectory).length; //This only works if your root is clean: only label subdirs. -BalancedPathFilter pathFilter = new BalancedPathFilter(rng, labelMaker, numExamples, numLabels, maxPathsPerLabel); - -InputSplit[] inputSplit = fileSplit.sample(pathFilter, splitTrainTest, 1 - splitTrainTest); -InputSplit trainData = inputSplit[0]; -InputSplit testData = inputSplit[1]; - -boolean shuffle = false; -ImageTransform flipTransform1 = new FlipImageTransform(rng); -ImageTransform flipTransform2 = new FlipImageTransform(new Random(123)); -ImageTransform warpTransform = new WarpImageTransform(rng, 42); -List> pipeline = Arrays.asList( - new Pair<>(flipTransform1,0.9), - new Pair<>(flipTransform2,0.8), - new Pair<>(warpTransform,0.5)); - -ImageTransform transform = new PipelineImageTransform(pipeline,shuffle); -DataNormalization scaler = new ImagePreProcessingScaler(0, 1); - -// training dataset -ImageRecordReader recordReaderTrain = new ImageRecordReader(height, width, channels, labelMaker); -recordReader.initialize(trainData, null); -DataSetIterator trainingIterator = new RecordReaderDataSetIterator(recordReaderTrain, batchSize, 1, numLabels); - -// testing dataset -ImageRecordReader recordReaderTest = new ImageRecordReader(height, width, channels, labelMaker); -recordReader.initialize(testData, null); -DataSetIterator testingIterator = new RecordReaderDataSetIterator(recordReaderTest, batchSize, 1, numLabels); - -// early stopping configuration, model saver, and trainer -EarlyStoppingModelSaver saver = new LocalFileModelSaver(System.getProperty("user.dir")); -EarlyStoppingConfiguration esConf = new EarlyStoppingConfiguration.Builder() - .epochTerminationConditions(new MaxEpochsTerminationCondition(50)) //Max of 50 epochs - .evaluateEveryNEpochs(1) - .iterationTerminationConditions(new MaxTimeIterationTerminationCondition(20, TimeUnit.MINUTES)) //Max of 20 minutes - .scoreCalculator(new DataSetLossCalculator(testingIterator, true)) //Calculate test set score - .modelSaver(saver) - .build(); - -EarlyStoppingTrainer trainer = new EarlyStoppingTrainer(esConf, neuralNetwork, trainingIterator); - -// begin training -trainer.fit(); -``` - -### Complex Transformation - -DataVec comes with a portable `TransformProcess` class that allows for more complex data wrangling and data conversion. It works well with both 2D and sequence datasets. - -```java -Schema schema = new Schema.Builder() - .addColumnsDouble("Sepal length", "Sepal width", "Petal length", "Petal width") - .addColumnCategorical("Species", "Iris-setosa", "Iris-versicolor", "Iris-virginica") - .build(); - -TransformProcess tp = new TransformProcess.Builder(schema) - .categoricalToInteger("Species") - .build(); - -// do the transformation on spark -JavaRDD> processedData = SparkTransformExecutor.execute(parsedInputData, tp); -``` - -We recommend having a look at the [DataVec examples](https://github.com/eclipse/deeplearning4j-examples/tree/master/datavec-examples/src/main/java/org/datavec/transform) before creating more complex transformations. - - -### Evaluation - -Both `MultiLayerNetwork` and `ComputationGraph` come with built-in `.eval()` methods that allow you to pass a dataset iterator and return evaluation results. - -```java -// returns evaluation class with accuracy, precision, recall, and other class statistics -Evaluation eval = neuralNetwork.eval(testIterator); -System.out.println(eval.accuracy()); -System.out.println(eval.precision()); -System.out.println(eval.recall()); - -// ROC for Area Under Curve on multi-class datasets (not binary classes) -ROCMultiClass roc = neuralNetwork.doEvaluation(testIterator, new ROCMultiClass()); -System.out.println(roc.calculateAverageAuc()); -System.out.println(roc.calculateAverageAucPR()); -``` - -For advanced evaluation the code snippet below can be adapted into training pipelines. This is when the built-in `neuralNetwork.eval()` method outputs confusing results or if you need to examine raw data. - -```java -//Evaluate the model on the test set -Evaluation eval = new Evaluation(numClasses); -INDArray output = neuralNetwork.output(testData.getFeatures()); -eval.eval(testData.getLabels(), output, testMetaData); //Note we are passing in the test set metadata here - -//Get a list of prediction errors, from the Evaluation object -//Prediction errors like this are only available after calling iterator.setCollectMetaData(true) -List predictionErrors = eval.getPredictionErrors(); -System.out.println("\n\n+++++ Prediction Errors +++++"); -for(Prediction p : predictionErrors){ - System.out.println("Predicted class: " + p.getPredictedClass() + ", Actual class: " + p.getActualClass() - + "\t" + p.getRecordMetaData(RecordMetaData.class).getLocation()); -} - -//We can also load the raw data: -List predictionErrorRawData = recordReader.loadFromMetaData(predictionErrorMetaData); -for(int i=0; i rawData = predictionErrorRawData.get(i).getRecord(); - - INDArray networkPrediction = model.output(features); - - System.out.println(meta.getLocation() + ": " - + "\tRaw Data: " + rawData - + "\tNormalized: " + features - + "\tLabels: " + labels - + "\tPredictions: " + networkPrediction); -} - -//Some other useful evaluation methods: -List list1 = eval.getPredictions(1,2); //Predictions: actual class 1, predicted class 2 -List list2 = eval.getPredictionByPredictedClass(2); //All predictions for predicted class 2 -List list3 = eval.getPredictionsByActualClass(2); //All predictions for actual class 2 -``` \ No newline at end of file diff --git a/docs/deeplearning4j/templates/concepts.md b/docs/deeplearning4j/templates/concepts.md deleted file mode 100644 index f04d17782..000000000 --- a/docs/deeplearning4j/templates/concepts.md +++ /dev/null @@ -1,108 +0,0 @@ ---- -title: Core Concepts in Deeplearning4j -short_title: Core Concepts -description: Introduction to core Deeplearning4j concepts. -category: Get Started -weight: 1 ---- - -## Overview - -Every machine-learning workflow consists of at least two parts. The first is loading your data and preparing it to be used for learning. We refer to this part as the ETL (extract, transform, load) process. [DataVec](./datavec-overview) is the library we built to make building data pipelines easier. The second part is the actual learning system itself. That is the algorithmic core of DL4J. - -All deep learning is based on vectors and tensors, and DL4J relies on a tensor library called [ND4J](./nd4j-overview). It provides us with the ability to work with *n-dimensional arrays* (also called tensors). Thanks to its different backends, it even enables us to use both CPUs and GPUs. - -## Preparing Data for Learning and Prediction - -Unlike other machine learning or deep learning frameworks, DL4J treats the tasks of loading data and training algorithms as separate processes. You don't just point the model at data saved somewhere on disk, you load the data using DataVec. This gives you a lot more flexibility, and retains the convenience of simple data loading. - -Before the algorithm can start learning, you have to prepare the data, even if you already have a trained model. Preparing data means loading it and putting it in the right shape and value range (e.g. normalization, zero-mean and unit variance). Building these processes from scratch is error prone, so use DataVec wherever possible. - -Deeplearning4j works with a lot of different data types, such as images, CSV, plain text and, with [Apache Camel](https://camel.apache.org/) [integration](https://github.com/eclipse/deeplearning4j/tree/master/datavec/tree/master/datavec-camel), pretty much any other data type you can think of. - -To use DataVec, you will need one of the implementations of the [RecordReader](/api/{{page.version}}/org/datavec/api/records/reader/RecordReader.html) interface along with the [RecordReaderDataSetIterator](/api/{{page.version}}/org/deeplearning4j/datasets/datavec/RecordReaderDataSetIterator.html). - -Once you have a [DataSetIterator](/api/{{page.version}}/org/nd4j/linalg/dataset/api/iterator/DataSetIterator.html), which is just a pattern that describes sequential access to data, you can use it to retrieve the data in a format suited for training a neural net model. - -### Normalizing Data - -Neural networks work best when the data they're fed is normalized, constrained to a range between -1 and 1. There are several reasons for that. One is that nets are trained using [gradient descent](https://en.wikipedia.org/wiki/Gradient_descent), and their activation functions usually having an active range somewhere between -1 and 1. Even when using an activation function that doesn't saturate quickly, it is still good practice to constrain your values to this range to improve performance. - -Normalizing data is pretty easy in DL4J. Decide how you want to normalize your data, and set the corresponding [DataNormalization](./datavec-normalization) up as a preprocessor for your DataSetIterator. - -The `ImagePreProcessingScaler` is obviously a good choice for image data. The `NormalizerMinMaxScaler` is a good choice if you have a uniform range along all dimensions of your input data, and `NormalizerStandardize` is what you would usually use in other cases. - -If you need other types of normalization, you are also free to implement the `DataNormalization` interface. - -If you use `NormalizerStandardize`, note that this is a normalizer that depends on statistics that it extracts from the data. So you will have to save those statistics along with the model to restore them when you restore your model. - -## DataSets, INDArrays and Mini-Batches - -As the name suggests, a DataSetIterator returns [DataSet](/api/{{page.version}}/org/nd4j/linalg/dataset/DataSet.html) objects. DataSet objects are containers for the features and labels of your data. But they aren't constrained to holding just a single example at once. A DataSet can contain as many examples as needed. - -It does that by keeping the values in several instances of [INDArray](/api/{{page.version}}/org/nd4j/linalg/api/ndarray/INDArray.html): one for the features of your examples, one for the labels and two additional ones for masking, if you are using timeseries data (see [Using RNNs / Masking](./deeplearning4j-nn-recurrent) for more information). - -An INDArray is one of the n-dimensional arrays, or tensors, used in ND4J. In the case of the features, it is a matrix of the size `Number of Examples x Number of Features`. Even with only a single example, it will have this shape. - -Why doesn't it contain all of the data examples at once? - -This is another important concept for deep learning: mini-batching. In order to produce accurate results, a lot of real-world training data is often needed. Often that is more data than can fit in available memory, so storing it in a single `DataSet` sometimes isn't possible. But even if there is enough data storage, there is another important reason not to use all of your data at once. With mini-batches you can get more updates to your model in a single epoch. - -So why bother having more than one example in a DataSet? Since the model is trained using [gradient descent](https://en.wikipedia.org/wiki/Gradient_descent), it requires a good gradient to learn how to minimize error. Using only one example at a time will create a gradient that only takes errors produced with the current example into consideration. This would make the learning behavior erratic, slow down the learning, and may not even lead to a usable result. - -A mini-batch should be large enough to provide a representative sample of the real world (or at least your data). That means that it should always contain all of the classes that you want to predict and that the count of those classes should be distributed in approximately the same way as they are in your overall data. - -## Building a Neural Net Model - -DL4J gives data scientists and developers tools to build a deep neural networks on a high level using concepts like `layer`. It employs a builder pattern in order to build the neural net declaratively, as you can see in this (simplified) example: - -```java -MultiLayerConfiguration conf = - new NeuralNetConfiguration.Builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .updater(new Nesterovs(learningRate, 0.9)) - .list( - new DenseLayer.Builder().nIn(numInputs).nOut(numHiddenNodes).activation("relu").build(), - new OutputLayer.Builder(LossFunction.NEGATIVELOGLIKELIHOOD).activation("softmax").nIn(numHiddenNodes).nOut(numOutputs).build() - ).backprop(true).build(); -``` - -If you are familiar with other deep learning frameworks, you will notice that this looks a bit like Keras. - -Unlike other frameworks, DL4J splits the optimization algorithm from the updater algorithm. This allows for flexibility as you seek a combination of optimizer and updater that works best for your data and problem. - -Besides the [DenseLayer](/api/{{page.version}}/org/deeplearning4j/nn/conf/layers/DenseLayer.html) -and [OutputLayer](/api/{{page.version}}/org/deeplearning4j/nn/conf/layers/OutputLayer.html) -that you have seen in the example above, there are several [other layer types](/api/{{page.version}}/org/deeplearning4j/nn/conf/layers/package-summary.html), like `GravesLSTM`, `ConvolutionLayer`, `RBM`, `EmbeddingLayer`, etc. Using those layers you can define not only simple neural networks, but also [recurrent](./deeplearning4j-nn-recurrent) and [convolutional](./deeplearning4j-nn-convolutional) networks. - -## Training a Model - -After configuring your neural, you will have to train the model. The simplest case is to simply call the `.fit()` method on the model configuration with your `DataSetIterator` as an argument. This will train the model on all of your data -once. A single pass over the entire dataset is called an *epoch*. DL4J has several different methods for passing through the data more than just once. - -The simplest way, is to reset your `DataSetIterator` and loop over the fit call as many times as you want. This way you can train your model for as many epochs as you think is a good fit. - -Yet another way would be to use an [EarlyStoppingTrainer](/api/{{page.version}}/org/deeplearning4j/earlystopping/trainer/EarlyStoppingTrainer.html). -You can configure this trainer to run for as many epochs as you like and -additionally for as long as you like. It will evaluate the performance of your -network after each epoch (or what ever you have configured) and save the best -performing version for later use. - -Also note that DL4J does not only support training just `MultiLayerNetworks`, but it also supports a more flexible [ComputationGraph](./deeplearning4j-nn-computationgraph). - -### Evaluating Model Performance - -As you train your model, you will want to test how well it performs. For that test, you will need a dedicated data set that will not be used for training but instead will only be used for evaluating your model. This data should have the same distribution as the real-world data you want to make predictions about with your model. The reason you can't simply use your training data for evaluation is because machine learning methods are prone to overfitting (getting good at making predictions about the training set, but not performing well on larger datasets). - -The [Evaluation](/api/{{page.version}}/org/deeplearning4j/eval/Evaluation.html) -class is used for evaluation. Slightly different methods apply to evaluating a normal feed forward networks or recurrent networks. For more details on using it, take a look at the corresponding [examples](https://github.com/eclipse/deeplearning4j-examples). - -## Troubleshooting a Neural Net Model - -Building neural networks to solve problems is an empirical process. That is, it requires trial and error. So you will have to try different settings and architectures in order to find a neural net configuration that performs well. - -DL4J provides a listener facility help you monitor your network's performance visually. You can set up listeners for your model that will be called after each mini-batch is processed. One of most often used listeners that DL4J ships out of the box is [ScoreIterationListener](/api/{{page.version}}/org/deeplearning4j/optimize/listeners/ScoreIterationListener.html). Check out all [Listeners](./deeplearning4j-nn-listeners) for more. - -While `ScoreIterationListener` will simply print the current error score for your network, `HistogramIterationListener` will start up a web UI that to provide you with a host of different information that you can use to fine tune your network configuration. See [Visualize, Monitor and Debug Network Learning](./deeplearning4j-nn-visualization) on how to interpret that data. - -See [Troubleshooting neural nets](./deeplearning4j-troubleshooting-training) for more information on how to improve results. \ No newline at end of file diff --git a/docs/deeplearning4j/templates/config-buildtools.md b/docs/deeplearning4j/templates/config-buildtools.md deleted file mode 100644 index f393df0c6..000000000 --- a/docs/deeplearning4j/templates/config-buildtools.md +++ /dev/null @@ -1,56 +0,0 @@ ---- -title: Configuration for Gradle, SBT, and More -short_title: SBT, Gradle, & Others -description: Configure the build tools for Deeplearning4j. -category: Configuration -weight: 3 ---- - -## Configuring your build tool - -While we encourage Deeplearning4j, ND4J and DataVec users to employ Maven, it's worthwhile documenting how to configure build files for other tools, like Ivy, Gradle and SBT -- particularly since Google prefers Gradle over Maven for Android projects. - -The instructions below apply to all DL4J and ND4J submodules, such as deeplearning4j-api, deeplearning4j-scaleout, and ND4J backends. - -## Gradle - -You can use Deeplearning4j with Gradle by adding the following to your build.gradle in the dependencies block: - - compile "org.deeplearning4j:deeplearning4j-core:{{ page.version }}" - -Add a backend by adding the following: - - compile "org.nd4j:nd4j-native-platform:{{ page.version }}" - -You can also swap the standard CPU implementation for [GPUs](./deeplearning4j-config-gpu-cpu). - -## SBT - -You can use Deeplearning4j with SBT by adding the following to your build.sbt: - - libraryDependencies += "org.deeplearning4j" % "deeplearning4j-core" % "{{ page.version }}" - -Add a backend by adding the following: - - libraryDependencies += "org.nd4j" % "nd4j-native-platform" % "{{ page.version }}" - -You can also swap the standard CPU implementation for [GPUs](./deeplearning4j-config-gpu-cpu). - -## Ivy - -You can use Deeplearning4j with ivy by adding the following to your ivy.xml: - - - - -Add a backend by adding the following: - - - -You can also swap the standard CPU implementation for [GPUs](./deeplearning4j-config-gpu-cpu). - -## Leinengen - -Clojure programmers may want to use [Leiningen](https://github.com/technomancy/leiningen/) or [Boot](http://boot-clj.com/) to work with Maven. A [Leiningen tutorial is here](https://github.com/technomancy/leiningen/blob/master/doc/TUTORIAL.md). - -NOTE: You'll still need to download ND4J, DataVec and Deeplearning4j, or doubleclick on the their respective JAR files file downloaded by Maven / Ivy / Gradle, to install them in your Eclipse installation. \ No newline at end of file diff --git a/docs/deeplearning4j/templates/config-cudnn.md b/docs/deeplearning4j/templates/config-cudnn.md deleted file mode 100644 index 64d248fe3..000000000 --- a/docs/deeplearning4j/templates/config-cudnn.md +++ /dev/null @@ -1,80 +0,0 @@ ---- -title: Using Deeplearning4j with cuDNN -short_title: cuDNN -description: Using the NVIDIA cuDNN library with DL4J. -category: Configuration -weight: 3 ---- - -## Using Deeplearning4j with cuDNN - -Deeplearning4j supports CUDA but can be further accelerated with cuDNN. Most 2D CNN layers (such as ConvolutionLayer, SubsamplingLayer, etc), and also LSTM and BatchNormalization layers support CuDNN. - -The only thing we need to do to have DL4J load cuDNN is to add a dependency on `deeplearning4j-cuda-10.0`, `deeplearning4j-cuda-10.1`, or `deeplearning4j-cuda-10.2` for example: - -```xml - - org.deeplearning4j - deeplearning4j-cuda-10.0 - {{page.version}} - -``` - -or -```xml - - org.deeplearning4j - deeplearning4j-cuda-10.1 - {{page.version}} - -``` - -or -```xml - - org.deeplearning4j - deeplearning4j-cuda-10.2 - {{page.version}} - -``` - - -The actual library for cuDNN is not bundled, so be sure to download and install the appropriate package for your platform from NVIDIA: - -* [NVIDIA cuDNN](https://developer.nvidia.com/cudnn) - -Note there are multiple combinations of cuDNN and CUDA supported. At this time the following combinations are supported by Deeplearning4j: - - - - - - - - -
    CUDA VersioncuDNN Version
    10.07.4
    10.17.6
    10.27.6
    - - - To install, simply extract the library to a directory found in the system path used by native libraries. The easiest way is to place it alongside other libraries from CUDA in the default directory (`/usr/local/cuda/lib64/` on Linux, `/usr/local/cuda/lib/` on Mac OS X, and `C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0\bin\`, `C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1\bin\`, or `C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\bin\` on Windows). - -Alternatively, in the case of CUDA 10.2, cuDNN comes bundled with the "redist" package of the [JavaCPP Presets for CUDA](https://github.com/bytedeco/javacpp-presets/tree/master/cuda). [After agreeing to the license](https://github.com/bytedeco/javacpp-presets/tree/master/cuda#license-agreements), we can add the following dependencies instead of installing CUDA and cuDNN: - - - org.bytedeco - cuda-platform-redist - 10.2-7.6-1.5.3 - - -Also note that, by default, Deeplearning4j will use the fastest algorithms available according to cuDNN, but memory usage may be excessive, causing strange launch errors. When this happens, try to reduce memory usage by using the [`NO_WORKSPACE` mode settable via the network configuration](/api/{{page.version}}/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.Builder.html#cudnnAlgoMode-org.deeplearning4j.nn.conf.layers.ConvolutionLayer.AlgoMode-), instead of the default of `ConvolutionLayer.AlgoMode.PREFER_FASTEST`, for example: - -```java - // for the whole network - new NeuralNetConfiguration.Builder() - .cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) - // ... - // or separately for each layer - new ConvolutionLayer.Builder(h, w) - .cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) - // ... - -``` diff --git a/docs/deeplearning4j/templates/config-gpu-cpu.md b/docs/deeplearning4j/templates/config-gpu-cpu.md deleted file mode 100644 index 29b0a6c7b..000000000 --- a/docs/deeplearning4j/templates/config-gpu-cpu.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -title: Deeplearning4j Hardware and CPU/GPU Setup -short_title: GPU/CPU Setup -description: Hardware setup for Eclipse Deeplearning4j, including GPUs and CUDA. -category: Configuration -weight: 1 ---- - -## ND4J backends for GPUs and CPUs - -You can choose GPUs or native CPUs for your backend linear algebra operations by changing the dependencies in ND4J's POM.xml file. Your selection will affect both ND4J and DL4J being used in your application. - -If you have CUDA v9.2+ installed and NVIDIA-compatible hardware, then your dependency declaration will look like: - -```xml - - org.nd4j - nd4j-cuda-{{ page.cudaVersion }} - {{ page.version }} - -``` -As of now, the `artifactId` for the CUDA versions can be one of `nd4j-cuda-9.0`, `nd4j-cuda-9.2` or `nd4j-cuda-10.0`. - -You can also find the available CUDA versions via [Maven Central search](https://search.maven.org/search?q=nd4j-cuda) or in the [Release Notes](https://deeplearning4j.org/release-notes.html). - -Otherwise you will need to use the native implementation of ND4J as a CPU backend: - -```xml - - org.nd4j - nd4j-native - {{ page.version }} - -``` - -## System architectures - -If you are developing your project on multiple operating systems/system architectures, you can add `-platform` to the end of your `artifactId` which will download binaries for most major systems. - -```xml - - ... - nd4j-native-platform - ... - -``` - -## Multiple GPUs - -If you have several GPUs, but your system is forcing you to use just one, you can use the helper `CudaEnvironment.getInstance().getConfiguration().allowMultiGPU(true);` as first line of your `main()` method. - -## CuDNN - -See our page on [CuDNN](./deeplearning4j-config-cudnn). - - -## CUDA Installation - -Check the NVIDIA guides for instructions on setting up CUDA on the NVIDIA [website](http://docs.nvidia.com/cuda/). diff --git a/docs/deeplearning4j/templates/config-maven.md b/docs/deeplearning4j/templates/config-maven.md deleted file mode 100644 index fd3d772a6..000000000 --- a/docs/deeplearning4j/templates/config-maven.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -title: Configuration for Maven -short_title: Maven -description: Configure the Maven build tool for Deeplearning4j. -category: Configuration -weight: 2 ---- - -## Configuring the Maven build tool - -You can use Deeplearning4j with Maven by adding the following to your `pom.xml`: -```xml - - - org.deeplearning4j - deeplearning4j-core - {{ page.version }} - - -``` - -The instructions below apply to all DL4J and ND4J submodules, such as `deeplearning4j-api`, `deeplearning4j-scaleout`, and ND4J backends. - -## Add a backend - -DL4J relies on ND4J for hardware-specific implementations and tensor operations. Add a backend by pasting the following snippet into your `pom.xml`: - -```xml - - - org.nd4j - nd4j-native-platform - {{ page.version }} - - -``` - -You can also swap the standard CPU implementation for [GPUs](./deeplearning4j-config-gpu-cpu). diff --git a/docs/deeplearning4j/templates/config-memory.md b/docs/deeplearning4j/templates/config-memory.md deleted file mode 100644 index 4660baaf8..000000000 --- a/docs/deeplearning4j/templates/config-memory.md +++ /dev/null @@ -1,101 +0,0 @@ ---- -title: Memory management in DL4J and ND4J -short_title: Memory Management -description: Setting available Memory/RAM for a DL4J application -category: Configuration -weight: 1 ---- - -## Memory Management for ND4J/DL4J: How does it work? - -ND4J uses off-heap memory to store NDArrays, to provide better performance while working with NDArrays from native code such as BLAS and CUDA libraries. - -"Off-heap" means that the memory is allocated outside of the JVM (Java Virtual Machine) and hence isn't managed by the JVM's garbage collection (GC). On the Java/JVM side, we only hold pointers to the off-heap memory, which can be passed to the underlying C++ code via JNI for use in ND4J operations. - -To manage memory allocations, we use two approaches: - -- JVM Garbage Collector (GC) and WeakReference tracking -- MemoryWorkspaces - see [Workspaces guide](https://deeplearning4j.org/workspaces) for details - -Despite the differences between these two approaches, the idea is the same: once an NDArray is no longer required on the Java side, the off-heap associated with it should be released so that it can be reused later. The difference between the GC and `MemoryWorkspaces` approaches is in when and how the memory is released. - -- For JVM/GC memory: whenever an INDArray is collected by the garbage collector, its off-heap memory will be deallocated, assuming it is not used elsewhere. -- For `MemoryWorkspaces`: whenever an INDArray leaves the workspace scope - for example, when a layer finished forward pass/predictions - its memory may be reused without deallocation and reallocation. This results in better performance for cyclical workloads like neural network training and inference. - -## Configuring Memory Limits - -With DL4J/ND4J, there are two types of memory limits to be aware of and configure: The on-heap JVM memory limit, and the off-heap memory limit, where NDArrays live. Both limits are controlled via Java command-line arguments: - -* `-Xms` - this defines how much memory JVM heap will use at application start. - -* `-Xmx` - this allows you to specify JVM heap memory limit (maximum, at any point). Only allocated up to this amount (at the discretion of the JVM) if required. - -* `-Dorg.bytedeco.javacpp.maxbytes` - this allows you to specify the off-heap memory limit. This can also be a percentage, in which case it would apply to maxMemory. - -* `-Dorg.bytedeco.javacpp.maxphysicalbytes` - this specifies the maximum bytes for the entire process - usually set to `maxbytes` plus Xmx plus a bit extra, in case other libraries require some off-heap memory also. This can also be a percentage (>100%), in which case it would apply to maxMemory. Unlike setting `maxbytes` setting `maxphysicalbytes` is optional - -Example: Configuring 1GB initial on-heap, 2GB max on-heap, 8GB off-heap, 10GB maximum for process: - -```shell --Xms1G -Xmx2G -Dorg.bytedeco.javacpp.maxbytes=8G -Dorg.bytedeco.javacpp.maxphysicalbytes=10G -``` - -## Gotchas: A few things to watch out for - -* With GPU systems, the maxbytes and maxphysicalbytes settings currently also effectively defines the memory limit for the GPU, since the off-heap memory is mapped (via NDArrays) to the GPU - read more about this in the GPU-section below. - -* For many applications, you want less RAM to be used in JVM heap, and more RAM to be used in off-heap, since all NDArrays are stored there. If you allocate too much to the JVM heap, there will not be enough memory left for the off-heap memory. - -* If you get a "RuntimeException: Can't allocate [HOST] memory: xxx; threadId: yyy", you have run out of off-heap memory. You should most often use a WorkspaceConfiguration to handle your NDArrays allocation, in particular in e.g. training or evaluation/inference loops - if you do not, the NDArrays and their off-heap (and GPU) resources are reclaimed using the JVM GC, which might introduce severe latency and possible out of memory situations. - -* If you don't specify JVM heap limit, it will use 1/4 of your total system RAM as the limit, by default. - -* If you don't specify off-heap memory limit, the JVM heap limit (Xmx) will be used by default. i.e. `-Xmx8G` will mean that 8GB can be used by JVM heap, and an additional 8GB can be used by ND4j in off-heap. - -* In limited memory environments, it's usually a bad idea to use high `-Xmx` value together with `-Xms` option. That is because doing so won't leave enough off-heap memory. Consider a 16GB system in which you set `-Xms14G`: 14GB of 16GB would be allocated to the JVM, leaving only 2GB for the off-heap memory, the OS and all other programs. - -# Memory-mapped files - -ND4J supports the use of a memory-mapped file instead of RAM when using the `nd4j-native` backend. On one hand, it's slower then RAM, but on other hand, it allows you to allocate memory chunks in a manner impossible otherwise. - -Here's sample code: - -```java -WorkspaceConfiguration mmap = WorkspaceConfiguration.builder() - .initialSize(1000000000) - .policyLocation(LocationPolicy.MMAP) - .build(); - -try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().getAndActivateWorkspace(mmap, "M2")) { - INDArray x = Nd4j.create(10000); -} -``` -In this case, a 1GB temporary file will be created and mmap'ed, and NDArray `x` will be created in that space. Obviously, this option is mostly viable for cases when you need NDArrays that can't fit into your RAM. - -## GPUs - -When using GPUs, oftentimes your CPU RAM will be greater than GPU RAM. When GPU RAM is less than CPU RAM, you need to monitor how much RAM is being used off-heap. You can check this based on the JavaCPP options specified above. - -We allocate memory on the GPU equivalent to the amount of off-heap memory you specify. We don't use any more of your GPU than that. You are also allowed to specify heap space greater than your GPU (that's not encouraged, but it's possible). If you do so, your GPU will run out of RAM when trying to run jobs. - -We also allocate off-heap memory on the CPU RAM as well. This is for efficient communicaton of CPU to GPU, and CPU accessing data from an NDArray without having to fetch data from the GPU each time you call for it. - -If JavaCPP or your GPU throw an out-of-memory error (OOM), or even if your compute slows down due to GPU memory being limited, then you may want to either decrease batch size or increase the amount of off-heap memory that JavaCPP is allowed to allocate, if that's possible. - -Try to run with an off-heap memory equal to your GPU's RAM. Also, always remember to set up a small JVM heap space using the `Xmx` option. - -Note that if your GPU has < 2g of RAM, it's probably not usable for deep learning. You should consider using your CPU if this is the case. Typical deep-learning workloads should have 4GB of RAM *at minimum*. Even that is small. 8GB of RAM on a GPU is recommended for deep learning workloads. - -It is possible to use HOST-only memory with a CUDA backend. That can be done using workspaces. - -Example: -```java -WorkspaceConfiguration basicConfig = WorkspaceConfiguration.builder() - .policyAllocation(AllocationPolicy.STRICT) - .policyLearning(LearningPolicy.FIRST_LOOP) - .policyMirroring(MirroringPolicy.HOST_ONLY) // <--- this option does this trick - .policySpill(SpillPolicy.EXTERNAL) - .build(); -``` - -It's not recommended to use HOST-only arrays directly, since they will dramatically reduce performance. But they might be useful as in-memory cache pairs with the `INDArray.unsafeDuplication()` method. diff --git a/docs/deeplearning4j/templates/config-performance-debugging.md b/docs/deeplearning4j/templates/config-performance-debugging.md deleted file mode 100644 index 04b92ba23..000000000 --- a/docs/deeplearning4j/templates/config-performance-debugging.md +++ /dev/null @@ -1,445 +0,0 @@ ---- -title: Deeplearning4j and ND4J - Debugging Performance Issues -short_title: Performance Issues Debugging -description: How to debug performance issues in Deeplearning4j and ND4J -category: Configuration -weight: 11 ---- - -# DL4J and ND4J: How to Debugging Performance Issues - -This page is a how-to guide for debugging performance issues encountered when training neural networks with Deeplearning4j. -Much of the information also applies to debugging performance issues encountered when using ND4J. - -Deeplearning4j and ND4J provide excellent performance in most cases (utilizing optimized c++ code for all numerical operations as well as high performance libraries such as NVIDIA cuDNN and Intel MKL). However, sometimes bottlenecks or misconfiguration issues may limit performance to well below the maximum. This page is intended to be a guide to help users identify the cause of poor performance, and provide steps to fix these issues. - -Performance issues may include: -1. Poor CPU/GPU utilization -2. Slower than expected training or operation execution - -To start, here's a summary of some possible causes of performance issues: -1. Wrong ND4J backend is used (for example, CPU backend when GPU backend is expected) -2. Not using cuDNN when using CUDA GPUs -3. ETL (data loading) bottlenecks -4. Garbage collection overheads -5. Small batch sizes -6. Multi-threaded use of MultiLayerNetwork/ComputationGraph for inference (not thread safe) -7. Double precision floating point data type used when single precision should be used -8. Not using workspaces for memory management (enabled by default) -9. Poorly configured network -10. Layer or operation is CPU-only -11. CPU: Lack of hardware support for modern AVX etc extensions -12. Other processes using CPU or GPU resources -13. CPU: Lack of configuration of OMP_NUM_THREADS when using many models/threads simultaneously - -Finally, this page has a short section on [Debugging Performance Issues with JVM Profiling](#profiling) - -## Step 1: Check if correct backend is used - -ND4J (and by extension, Deeplearning4j) can perform computation on either the CPU or GPU. -The device used for computation is determined by your project dependencies - you include ```nd4j-native-platform``` to use CPUs for computation or ```nd4j-cuda-x.x-platform``` to use GPUs for computation (where ```x.x``` is your CUDA version - such as 9.2, 10.0 etc). - -It is straightforward to check which backend is used. ND4J will log the backend upon initialization. - -For CPU execution, you will expect output that looks something like: -``` -o.n.l.f.Nd4jBackend - Loaded [CpuBackend] backend -o.n.n.NativeOpsHolder - Number of threads used for NativeOps: 8 -o.n.n.Nd4jBlas - Number of threads used for BLAS: 8 -o.n.l.a.o.e.DefaultOpExecutioner - Backend used: [CPU]; OS: [Windows 10] -o.n.l.a.o.e.DefaultOpExecutioner - Cores: [16]; Memory: [7.1GB]; -o.n.l.a.o.e.DefaultOpExecutioner - Blas vendor: [MKL] -``` - -For CUDA execution, you would expect the output to look something like: -``` -13:08:09,042 INFO ~ Loaded [JCublasBackend] backend -13:08:13,061 INFO ~ Number of threads used for NativeOps: 32 -13:08:14,265 INFO ~ Number of threads used for BLAS: 0 -13:08:14,274 INFO ~ Backend used: [CUDA]; OS: [Windows 10] -13:08:14,274 INFO ~ Cores: [16]; Memory: [7.1GB]; -13:08:14,274 INFO ~ Blas vendor: [CUBLAS] -13:08:14,274 INFO ~ Device Name: [TITAN X (Pascal)]; CC: [6.1]; Total/free memory: [12884901888] -``` - -Pay attention to the ```Loaded [X] backend``` and ```Backend used: [X]``` messages to confirm that the correct backend is used. -If the incorrect backend is being used, check your program dependencies to ensure tho correct backend has been included. - - -## Step 2: Check for cuDNN - -If you are using CPUs only (nd4j-native backend) then you can skip to step 3 as cuDNN only applies when using NVIDIA GPUs (```nd4j-cuda-x.x-platform``` dependency). - -cuDNN is NVIDIA's library for accelerating neural network training on NVIDIA GPUs. -Deeplearning4j can make use of cuDNN to accelerate a number of layers - including ConvolutionLayer, SubsamplingLayer, BatchNormalization, Dropout, LocalResponseNormalization and LSTM. When training on GPUs, cuDNN should always be used if possible as it is usually much faster than the built-in layer implementations. - -Instructions for configuring CuDNN can be found [here](https://deeplearning4j.org/docs/latest/deeplearning4j-config-cudnn). -In summary, include the ```deeplearning4j-cuda-x.x``` dependency (where ```x.x``` is your CUDA version - such as 9.2 or 10.0). The network configuration does not need to change to utilize cuDNN - cuDNN simply needs to be available along with the deeplearning4j-cuda module. - - -**How to determine if CuDNN is used or not** - -Not all DL4J layer types are supported in cuDNN. DL4J layers with cuDNN support include ConvolutionLayer, SubsamplingLayer, BatchNormalization, Dropout, LocalResponseNormalization and LSTM. - -To check if cuDNN is being used, the simplest approach is to look at the log output when running inference or training: -If cuDNN is NOT available when you are using a layer that supports it, you will see a message such as: -``` -o.d.n.l.c.ConvolutionLayer - cuDNN not found: use cuDNN for better GPU performance by including the deeplearning4j-cuda module. For more information, please refer to: https://deeplearning4j.org/docs/latest/deeplearning4j-config-cudnn -java.lang.ClassNotFoundException: org.deeplearning4j.nn.layers.convolution.CudnnConvolutionHelper - at java.net.URLClassLoader.findClass(URLClassLoader.java:381) - at java.lang.ClassLoader.loadClass(ClassLoader.java:424) - at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:335) - at java.lang.ClassLoader.loadClass(ClassLoader.java:357) - at java.lang.Class.forName0(Native Method) -``` - -If cuDNN is available and was loaded successfully, no message will be logged. - -Alternatively, you can confirm that cuDNN is used by using the following code: -``` -MultiLayerNetwork net = ... -LayerHelper h = net.getLayer(0).getHelper(); //Index 0: assume layer 0 is a ConvolutionLayer in this example -System.out.println("Layer helper: " + (h == null ? null : h.getClass().getName())); -``` -Note that you will need to do at least one forward pass or fit call to initialize the cuDNN layer helper. - -If cuDNN is available and was loaded successfully, you will see the following printed: -``` -Layer helper: org.deeplearning4j.nn.layers.convolution.CudnnConvolutionHelper -``` -whereas if cuDNN is not available or could not be loaded successfully (you will get a warning or error logged also): -``` -Layer helper: null -``` - - - -## Step 3: Check for ETL (Data Loading) Bottlenecks - -Neural network training requires data to be in memory before training can proceed. If the data is not loaded fast enough, the network will have to wait until data is available. -DL4J uses asynchronous prefetch of data to improve performance by default. Under normal circumstances, this asynchronous prefetching means the network should never be waiting around for data (except on the very first iteration) - the next minibatch is loaded in another thread while training is proceeding in the main thread. - -However, when data loading takes longer than the iteration time, data can be a bottleneck. For example, if a network takes 100ms to perform fitting on a single minibatch, but data loading takes 200ms, then we have a bottleneck: the network will have to wait 100ms per iteration (200ms loading - 100ms loading in parallel with training) before continuing the next iteration. -Conversely, if network fit operation was 100ms and data loading was 50ms, then no data loading bottleck will occur, as the 50ms loading time can be completed asynchronously within one iteration. - -**How to check for ETL / data loading bottlenecks** - -The way to identify ETL bottlenecks is simple: add PerformanceListener to your network, and train as normal. -For example: -``` -MultiLayerNetwork net = ... -net.setListeners(new PerformanceListener(1)); //Logs ETL and iteration speed on each iteration -``` -When training, you will see output such as: -``` -.d.o.l.PerformanceListener - ETL: 0 ms; iteration 16; iteration time: 65 ms; samples/sec: 492.308; batches/sec: 15.384; -``` -The above output shows that there is no ETL bottleneck (i.e., ```ETL: 0 ms```). However, if ETL time is greater than 0 consistently (after the first iteration), an ETL bottleneck is present. - -**How to identify the cause of an ETL bottleneck** - -There are a number of possible causes of ETL bottlenecks. These include (but are not limited to): -* Slow hard drives -* Network latency or throughput issues (when reading from remote or network storage) -* Computationally intensive or inefficient ETL (especially for custom ETL pipelines) - -One useful way to get more information is to perform profiling, as described in the [profiling section](#profiling) later in this page. -For custom ETL pipelines, adding logging for the various stages can help. Finally, another approach to use a process of elimination - for example, measuring the latency and throughput of reading raw files from disk or from remote storage vs. measuring the time to actually process the data from its raw format. - -## Step 4: Check for Garbage Collection Overhead - -Java uses garbage collection for management of on-heap memory (see [this link](https://stackify.com/what-is-java-garbage-collection/) for example for an explanation). -Note that DL4J and ND4J use off-heap memory for storage of all INDArrays (see the [memory page](https://deeplearning4j.org/docs/latest/deeplearning4j-config-memory) for details). - -Even though DL4J/ND4J array memory is off-heap, garbage collection can still cause performance issues. - -In summary: -* Garbage collection will sometimes (temporarily and briefly) pause/stop application execution ("stop the world") -* These GC pauses slow down program execution -* The overall performance impact of GC pauses depends on both the frequency of GC pauses, and the duration of GC pauses -* The frequency is controllable (in part) by ND4J, using ```Nd4j.getMemoryManager().setAutoGcWindow(10000);``` and ```Nd4j.getMemoryManager().togglePeriodicGc(false);``` -* Not every GC event is caused by or controlled by the above ND4J configuration. - -In our experience, garbage collection time depends strongly on the number of objects in the JVM heap memory. -As a rough guide: -* Less than 100,000 objects in heap memory: short GC events (usually not a performance problem) -* 100,000-500,000 objects: GC overhead becomes noticeable, often in the 50-250ms range per full GC event -* 500,000 or more objects: GC can be a bottleneck if performed frequently. Performance may still be good if GC events are infrequent (for example, every 10 seconds or less). -* 10 million or more objects: GC is a major bottleneck even if infrequently called, with each full GC takes multiple seconds - -**How to configure ND4J garbage collection settings** - -In simple terms, there are two settings of note: -``` -Nd4j.getMemoryManager().setAutoGcWindow(10000); //Set to 10 seconds (10000ms) between System.gc() calls -Nd4j.getMemoryManager().togglePeriodicGc(false); //Disable periodic GC calls -``` - -If you suspect garbage collection overhead is having an impact on performance, try changing these settings. -The main downside to reducing the frequency or disabling periodic GC entirely is when you are not using [workspaces](https://deeplearning4j.org/docs/latest/deeplearning4j-config-workspaces), though workspaces are enabled by default for all neural networks in Deeplearning4j. - - -Side note: if you are using DL4J for training on Spark, setting these values on the master/driver will not impact the settings on the worker. Instead, see [this guide](https://deeplearning4j.org/docs/latest/deeplearning4j-scaleout-howto#gc). - -**How to determine GC impact using PerformanceListener** - -*NOTE: this feature was added after 1.0.0-beta3 and will be available in future releases* -To determine the impact of garbage collection using PerformanceListener, you can use the following: - -``` -int listenerFrequency = 1; -boolean reportScore = true; -boolean reportGC = true; -net.setListeners(new PerformanceListener(listenerFrequency, reportScore, reportGC)); -``` - -This will report GC activity: -``` -o.d.o.l.PerformanceListener - ETL: 0 ms; iteration 30; iteration time: 17 ms; samples/sec: 588.235; batches/sec: 58.824; score: 0.7229335801186025; GC: [PS Scavenge: 2 (1ms)], [PS MarkSweep: 2 (24ms)]; -``` -The garbage collection activity is reported for all available garbage collectors - the ```GC: [PS Scavenge: 2 (1ms)], [PS MarkSweep: 2 (24ms)]``` means that garbage collection was performed 2 times since the last PerformanceListener reporting, and took 1ms and 24ms total respectively for the two GC algorithms, respectively. - -Keep in mind: PerformanceListener reports GC events every N iterations (as configured by the user). Thus, if PerformanceListener is configured to report statistics every 10 iterations, the garbage collection stats would be for the period of time corresponding to the last 10 iterations. - -**How to determine GC impact using ```-verbose:gc```** - -Another useful tool is the ```-verbose:gc```, ```-XX:+PrintGCDetails``` ```-XX:+PrintGCTimeStamps``` command line options. -For more details, see [Oracle Command Line Options](https://www.oracle.com/technetwork/java/javase/clopts-139448.html#gbmpt) and [Oracle GC Portal Documentation](https://www.oracle.com/technetwork/articles/javase/gcportal-136937.html) - -These options can be passed to the JVM on launch (when using ```java -jar``` or ```java -cp```) or can be added to IDE launch options (for example, in IntelliJ: these should be placed in the "VM Options" field in Run/Debug Configurations - see [Setting Configuration Options](https://www.jetbrains.com/help/idea/setting-configuration-options.html)) - -When these options are enabled, you will have information reported on each GC event, such as: -``` -5.938: [GC (System.gc()) [PSYoungGen: 5578K->96K(153088K)] 9499K->4016K(502784K), 0.0006252 secs] [Times: user=0.00 sys=0.00, real=0.00 secs] -5.939: [Full GC (System.gc()) [PSYoungGen: 96K->0K(153088K)] [ParOldGen: 3920K->3911K(349696K)] 4016K->3911K(502784K), [Metaspace: 22598K->22598K(1069056K)], 0.0117132 secs] [Times: user=0.02 sys=0.00, real=0.01 secs] -``` - -This information can be used to determine the frequency, cause (System.gc() calls, allocation failure, etc) and duration of GC events. - - -**How to determine GC impact using a profiler** - -An alternative approach is to use a profiler to collect garbage collection information. - -For example, [YourKit Java Profiler](https://www.yourkit.com) can be used to determine both the frequency and duration of garbage collection - see [Garbage collection telemetry](https://www.yourkit.com/docs/java/help/garbage_collection.jsp) for more details. - -[Other tools](https://www.cubrid.org/blog/how-to-monitor-java-garbage-collection/), such as VisualVM can also be used to monitor GC activity. - - -**How to determine number (and type) of JVM heap objects using memory dumps** - -If you determine that garbage collection is a problem, and suspect that this is due to the number of objects in memory, you can perform a heap dump. - -To perform a heap dump: -* Step 1: Run your program -* Step 2: While running, determine the process ID - - One approach is to use jps: - - For basic details, run ```jps``` on the command line. If jps is not on the system PATH, it can be found (on Windows) at ```C:\Program Files\Java\jdk\bin\jps.exe``` - - For more details on each process, run ```jps -lv``` instead - - Alternatively, you can use the ```top``` command on Linux or Task Manager (Windows) to find the PID (on Windows, the PID column may not be enabled by default) -* Step 3: Create a heap dump using ```jmap -dump:format=b,file=file_name.hprof 123``` where ```123``` is the process id (PID) to create the heap dump for - -A number of alternatives for generating heap dumps can be found [here](https://www.yourkit.com/docs/java/help/hprof_snapshots.jsp). - -After a memory dump has been collected, it can be opened in tools such as YourKit profiler and VisualVM to determine the number, type and size of objects. -With this information, you should be able to pinpoint the cause of the large number of objects and make changes to your code to reduce or eliminate the objects that are causing the garbage collection overhead. - -## Step 5: Check Minibatch Size - -Another common cause of performance issues is a poorly chosen minibatch size. -A minibatch is a number of examples used together for one step of inference and training. Minibatch sizes of 32 to 128 are commonly used, though smaller or larger are sometimes used. - -In summary: -* If minibatch size is too small (for example, training or inference with 1 example at a time), poor hardware utilization and lower overall throughput is expected -* If minibatch size is too large - - Hardware utilization will usually be good - - Iteration times will slow down - - Memory utilization may be too high (leading to out-of-memory errors) - -For inference, avoid using minibatch size of 1, as throughput will suffer. Unless there are strict latency requirements, you should use larger minibatch sizes as this will give you the best hardware utilization and hence throughput, and is especially important for GPUs. - -For training, you should never use a minibatch size of 1 as overall performance and hardware utilization will be reduced. Network convergence may also suffer. Start with a minibatch size of 32-128, if memory will allow this to be used. - -For serving predictions in multi-threaded applications (such as a web server), [ParallelInference](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/ParallelInference.java) should be used. - - -## Step 6: Ensure you are not using a single MultiLayerNetwork/ComputationGraph for inference from multiple threads - -MultiLayerNetwork and ComputationGraph are not considered thread-safe, and should not be used from multiple threads. -That said, most operations such as fit, output, etc use synchronized blocks. These synchronized methods should avoid hard to understand exceptions (race conditions due to concurrent use), they will limit throughput to a single thread (though, note that native operation parallelism will still be parallelized as normal). -In summary, using the one network from multiple threads should be avoided as it is not thread safe and can be a performance bottleneck. - - -For inference from multiple threads, you should use one model per thread (as this avoids locks) or for serving predictions in multi-threaded applications (such as a web server), use [ParallelInference](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper/src/main/java/org/deeplearning4j/parallelism/ParallelInference.java). - -## Step 7: Check Data Types - -As of 1.0.0-beta3 and earlier, ND4J has a global datatype setting that determines the datatype of all arrays. -The default value is 32-bit floating point. The data type can be set using ```Nd4j.setDataType(DataBuffer.Type.FLOAT);``` for example. - -For best performance, this value should be left as its default. If 64-bit floating point precision (double precision) is used instead, performance can be significantly reduced, especially on GPUs - most consumer NVIDIA GPUs have very poor double precision performance (and half precision/FP16). On Tesla series cards, double precision performance is usually much better than for consumer (GeForce) cards, though is still usually half or less of the single precision performance. -Wikipedia has a summary of the single and double precision performance of NVIDIA GPUs [here](https://en.wikipedia.org/wiki/List_of_Nvidia_graphics_processing_units). - -Performance on CPUs can also be reduced for double precision due to the additional memory batchwidth requirements vs. float precision. - -You can check the data type setting using: -``` -System.out.println("ND4J Data Type Setting: " + Nd4j.dataType()); -``` - -## Step 8: Check workspace configuration for memory management (enabled by default) - -For details on workspaces, see the [workspaces page](https://deeplearning4j.org/docs/latest/deeplearning4j-config-workspaces). - -In summary, workspaces are enabled by default for all Deeplearning4j networks, and enabling them improves performance and reduces memory requirements. -There are very few reasons to disable workspaces. - -You can check that workspaces are enabled for your MultiLayerNetwork using: -``` -System.out.println("Training workspace config: " + net.getLayerWiseConfigurations().getTrainingWorkspaceMode()); -System.out.println("Inference workspace config: " + net.getLayerWiseConfigurations().getInferenceWorkspaceMode()); -``` -or for a ComputationGraph using: -``` -System.out.println("Training workspace config: " + cg.getConfiguration().getTrainingWorkspaceMode()); -System.out.println("Inference workspace config: " + cg.getConfiguration().getInferenceWorkspaceMode()); -``` - -You want to see the output as ```ENABLED``` output for both training and inference. -To change the workspace configuration, use the setter methods, for example: ```net.getLayerWiseConfigurations().setTrainingWorkspaceMode(WorkspaceMode.ENABLED);``` - - -## Step 9: Check for a badly configured network or network with layer bottlenecks - -Another possible cause (especially for newer users) is a poorly designed network. -A network may be poorly designed if: -* It has too many layers. A rough guideline: - - More than about 100 layers for a CNN may be too many - - More than about 10 layers for a RNN/LSTM network may be too many - - More than about 20 feed-forward layers may be too many for a MLP -* The input/activations are too large - - For CNNs, inputs in the range of 224x224 (for image classification) to 600x600 (for object detection and segmentation) are used. Large image sizes (such as 500x500) are computationally demanding, and much larger than this should be considered too large in most cases. - - For RNNs, the sequence length matters. If you are using sequences longer than a few hundred steps, you should use [truncated backpropgation through time](https://deeplearning4j.org/docs/latest/deeplearning4j-nn-recurrent#tbptt) if possible. -* The output number of classes is too large - - Classification with more than about 10,000 classes can become a performance bottleneck with standard softmax output layers -* The layers are too large - - For CNNs, most layers have kernel sizes in the range 2x2 to 7x7, with channels equal to 32 to 1024 (with larger number of channels appearing later in the network). Much larger than this may cause a performance bottleneck. - - For MLPs, most layers have at most 2048 units/neurons (often much smaller). Much larger than this may be too large. - - For RNNs such as LSTMs, layers are typically in the range of 128 to 512, though the largest RNNs may use around 1024 units per layer. -* The network has too many parameters - - This is usually a consequence of the other issues already mentioned - too many layers, too large input, too many output classes - - For comparison, less than 1 million parameters would be considered small, and more than about 100 million parameters would be considered very large. - - You can check the number of parameters using ```MultiLayerNetwork/ComputationGraph.numParams()``` or ```MultiLayerNetwork/ComputationGraph.summary()``` - -Note that these are guidelines only, and some reasonable network may exceed the numbers specified here. Some networks can become very large, such as those commonly used for imagenet classification or object detection. However, in these cases, the network is usually carefully designed to provide a good tradeoff between accuracy and computation time. - -If your network architecture is significantly outside of the guidelines specified here, you may want to reconsider the design to improve performance. - - -## Step 10: Check for CPU-only ops (when using GPUs) - -If you are using CPUs only (nd4j-native backend), you can skip this step, as it only applies when using the GPU (nd4j-cuda) backend. - -As of 1.0.0-beta3, a handful of recently added operations do not yet have GPU implementations. Thus, when these layer are used in a network, they will execute on CPU only, irrespective of the nd4j-backend used. GPU support for these layers will be added in an upcoming release. - -The layers without GPU support as of 1.0.0-beta3 include: -* Convolution3D -* Upsampling1D/2D/3D -* Deconvolution2D -* LocallyConnected1D/2D -* SpaceToBatch -* SpaceToDepth - -Unfortunately, there is no workaround or fix for now, until these operations have GPU implementations completed. - - -## Step 11: Check CPU support for hardware extensions (AVX etc) - -If you are running on a GPU, this section does not apply. - -When running on older CPUs or those that lack modern AVX extensions such as AVX2 and AVX512, performance will be reduced compared to running on CPUs with these features. -Though there is not much you can do about the lack of such features, it is worth knowing about if you are comparing performance between different CPU models. - -In summary, CPU models with AVX2 support will perform better than those without it; similarly, AVX512 is an improvement over AVX2. - -For more details on AVX, see the [Wikipedia AVX article](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions) - - -## Step 12: Check other processes using CPU or GPU resources - -Another obvious cause of performance issues is other processes using CPU or GPU resources. - -For CPU, it is straightforward to see if other processes are using resources using tools such as ```top``` (for Linux) or task managed (for Windows). - -For NVIDIA CUDA GPUs, nvidia-smi can be used. nvidia-smi is usually installed with the NVIDIA display drivers, and (when run) shows the overall GPU and memory utilization, as well as the GPU utilization of programs running on the system. - -On Linux, this is usually on the system path by default. -On Windows, it may be found at ```C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi``` - - -## Step 13: Check OMP_NUM_THREADS performing concurrent inference using CPU in multiple threads simultaneously - -If you are using GPUs (nd4j-cuda backend), you can skip this section. - -One issue to be aware of when running multiple DL4J networks (or ND4J operations generally) concurrently in multiple threads is the OpenMP number of threads setting. -In summary, in ND4J we use OpenMP pallelism at the c++ level to increase operation performance. By default, ND4J will use a value equal to the number of physical CPU cores (*not logical cores*) as this will give optimal performance - -This also applies if the CPU resources are shared with other computationally demanding processes. - -In either case, you may see better overall throughput by reducing the number of OpenMP threads by setting the OMP_NUM_THREADS environment variable - see [ND4JEnvironmentVars](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-common/src/main/java/org/nd4j/config/ND4JEnvironmentVars.java) for details. - -One reason for reducing OMP_NUM_THREADS improving overall performance is due to reduced [cache thrashing](https://en.wikipedia.org/wiki/Thrashing_(computer_science)). - - -# Debugging Performance Issues with JVM Profiling - -Profiling is a process whereby you can trace how long each method in your code takes to execute, to identify and debug performance bottlenecks. - -A full guide to profiling is beyond the scope of this page, but the summary is that you can trace how long each method takes to execute (and where it is being called from) using a profiling tool. This information can then be used to identify bottlenecks (and their causes) in your program. - - -## How to Perform Profiling - -Multiple options are available for performing profiling locally. -We suggest using either [YourKit Java Profiler](https://www.yourkit.com/java/profiler/features/) or [VisualVM](https://visualvm.github.io/) for profiling. - -The YourKit profiling documentation is quite good. To perform profiling with YourKit: -* Install and start YourKit Profiler -* Start your application with the profiler enabled. For details, see [Running applications with the profiler](https://www.yourkit.com/docs/java/help/running_with_profiler.jsp) and [Local profiling](https://www.yourkit.com/docs/java/help/local_profiling.jsp) - - Note that IDE integrations are available - see [IDE integration](https://www.yourkit.com/docs/java/help/ide_integration.jsp) -* Collect a snapshot and analyze - -Note that YourKit provides multiple different types of profiling: Sampling, tracing, and call counting. -Each type of profiling has different pros and cons, such as accuracy vs. overhead. For more details, see [Sampling, tracing, call counting](https://www.yourkit.com/docs/java/help/cpu_intro.jsp) - -VisualVM also supports profiling - see the Profiling Applications section of the [VisualVM documentation](https://visualvm.github.io/documentation.html) for more details. - -## Profiling on Spark - -When debugging performance issues for Spark training or inference jobs, it can often be useful to perform profiling here also. - -One approach that we have used internally is to combine manual profiling settings (```-agentpath``` JVM argument) with spark-submit arguments for YourKit profiler. - -To perform profiling in this manner, 5 steps are required: -1. Download YourKit profiler to a location on each worker (must be the same location on each worker) and (optionally) the driver -2. [Optional] Copy the profiling configuration onto each worker (must be the same location on each worker) -3. Create a local output directory for storing the profiling result files on each worker -4. Launch the Spark job with the appropriate configuration (see example below) -5. The snapshots will be saved when the Spark job completes (or is cancelled) to the specified directories. - -For example, to perform tracing on both the driver and the workers, -``` -spark-submit - --conf 'spark.executor.extraJavaOptions=-agentpath:/home/user/YourKit-JavaProfiler-2018.04/bin/linux-x86-64/libyjpagent.so=tracing,port=10001,dir=/home/user/yourkit_snapshots/executor/,tracing_settings_path=/home/user/yourkitconf.txt' - --conf 'spark.driver.extraJavaOptions=-agentpath:/home/user/YourKit-JavaProfiler-2018.04/bin/linux-x86-64/libyjpagent.so=tracing,port=10001,dir=/home/user/yourkit_snapshots/driver/,tracing_settings_path=/home/user/yourkitconf.txt' - -``` - -The configuration (tracing_settings_path) is optional. A sample tracing settings file is provided below: -``` -walltime=* -adaptive=true -adaptive_min_method_invocation_count=1000 -adaptive_max_average_method_time_ns=100000 -``` - diff --git a/docs/deeplearning4j/templates/config-snapshots.md b/docs/deeplearning4j/templates/config-snapshots.md deleted file mode 100644 index dfc6e6b66..000000000 --- a/docs/deeplearning4j/templates/config-snapshots.md +++ /dev/null @@ -1,123 +0,0 @@ ---- -title: Snapshots and daily builds -short_title: Snapshots -description: Using daily builds for access to latest Eclipse Deeplearning4j features. -category: Configuration -weight: 10 ---- - -## Contents - -* [Introduction to Snapshots](#Introduction) -* [Setup Instructions](#Setup_Instructions) -* [Limitations](#Limitations) -* [Configuration of ND4J Backend](#ND4J_Backend) -* [Note to Gradle Users](#Note_to_gradle_users) - -## Overview/Introduction - -We provide automated daily builds of repositories such as ND4J, DataVec, DeepLearning4j, RL4J etc. So all the newest functionality and most recent bug fixes are released daily. - -Snapshots work like any other Maven dependency. The only difference is that they are served from a custom repository rather than from Maven Central. - -**Due to ongoing development, snapshots should be considered less stable than releases: breaking changes or bugs can in principle be introduced at any point during the course of normal development. Typically, releases (not snapshots) should be used when possible, unless a bug fix or new feature is required.** - -## Setup Instructions - -**Step 1:** -To use snapshots in your project, you should add snapshot repository information like this to your `pom.xml` file: - -``` - - - snapshots-repo - https://oss.sonatype.org/content/repositories/snapshots - - false - - - true - daily - - - -``` - -**Step 2:** -Make sure to specify the snapshot version. We follow a simple rule: If the latest stable release version is `A.B.C`, the snapshot version will be `A.B.(C+1)-SNAPSHOT`. The current snapshot version is `1.0.0-SNAPSHOT`. -For more details on the repositories section of the pom.xml file, see [Maven documentation](https://maven.apache.org/settings.html#Repositories) - -If using properties like the DL4J examples, change: -From version: -``` -1.0.0-beta2 -1.0.0-beta2 -``` -To version: -``` -1.0.0-SNAPSHOT -1.0.0-SNAPSHOT -``` - -**Sample pom.xml using Snapshots** - -A sample pom.xml is provided here: [sample pom.xml using snapshots](https://gist.github.com/AlexDBlack/28b0c9a72bce562c8782be326a6e2aaa) -This has been taken from the DL4J standalone sample project and modified using step 1 and 2 above. The original (using the last release) can be found [here](https://github.com/eclipse/deeplearning4j-examples/blob/master/standalone-sample-project/pom.xml) - - -## Limitations - -Both `-platform` (all operating systems) and single OS (non-platform) snapshot dependencies are released. -Due to the multi-platform build nature of snapshots, it is possible (though rare) for the `-platform` artifacts to temporarily get out of sync, which can cause build issues. - -If you are building and deploying on just one platform, it is safter use the non-platform artifacts, such as: -``` - - org.nd4j - nd4j-native - ${nd4j.version} - -``` - - -## Useful Maven Commands for Snapshots - -Two commands that might be useful when using snapshot dependencies in Maven is as follows: -1. ```-U``` - for example, in ```mvn package -U```. This ```-U``` option forces Maven to check (and if necessary, download) of new snapshot releases. This can be useful if you need the be sure you have the absolute latest snapshot release. -2. ```-nsu``` - for example, in ```mvn package -nsu```. This ```-nsu``` option stops Maven from checking for snapshot releases. Note however your build will only succeed with this option if you have some snapshot dependencies already downloaded into your local Maven cache (.m2 directory) - -An alternative approach to (1) is to set ```always``` in the `````` section found earlier in this page. -An alternative approach to (2) is to set ```never``` in the `````` section found earlier in this page. - -## Note to Gradle users - -Snapshots will not work with Gradle. You must use Maven to download the files. After that, you may try using your local Maven repository with `mavenLocal()`. - -A bare minimum file like this: - -```Gradle -version '1.0-SNAPSHOT' - -apply plugin: 'java' - -sourceCompatibility = 1.8 - -repositories { - maven { url "https://oss.sonatype.org/content/repositories/snapshots" } - mavenCentral() -} - -dependencies { - compile group: 'org.deeplearning4j', name: 'deeplearning4j-core', version: '1.0.0-SNAPSHOT' - compile group: 'org.deeplearning4j', name: 'deeplearning4j-modelimport', version: '1.0.0-SNAPSHOT' - compile "org.nd4j:nd4j-native:1.0.0-SNAPSHOT" - // Use windows-x86_64 or linux-x86_64 if you are not on macos - compile "org.nd4j:nd4j-native:1.0.0-SNAPSHOT:macosx-x86_64" - testCompile group: 'junit', name: 'junit', version: '4.12' - -} -``` - -should work in theory, but it does not. This is due to [a bug in Gradle](https://github.com/gradle/gradle/issues/2882). Gradle with snapshots *and* Maven classifiers appears to be a problem. - - Of note when using the nd4j-native backend on Gradle (and SBT - but not Maven), you need to add openblas as a dependency. We do this for you in the -platform pom. Reference the -platform pom [here](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native-platform/pom.xml#L19) to double check your dependencies. Note that these are version properties. See the `````` section of the pom for current versions of the openblas and javacpp presets required to run nd4j-native. diff --git a/docs/deeplearning4j/templates/config-workspaces.md b/docs/deeplearning4j/templates/config-workspaces.md deleted file mode 100644 index db9179ecb..000000000 --- a/docs/deeplearning4j/templates/config-workspaces.md +++ /dev/null @@ -1,132 +0,0 @@ ---- -title: Workspaces for Memory Management -short_title: Memory Workspaces -description: Workspaces are an efficient model for memory paging in DL4J. -category: Configuration -weight: 10 ---- - -## What are workspaces? - -ND4J offers an additional memory-management model: workspaces. That allows you to reuse memory for cyclic workloads without the JVM Garbage Collector for off-heap memory tracking. In other words, at the end of the workspace loop, all `INDArray`s' memory content is invalidated. Workspaces are integrated into DL4J for training and inference. - -The basic idea is simple: You can do what you need within a workspace (or spaces), and if you want to get an INDArray out of it (i.e. to move result out of the workspace), you just call `INDArray.detach()` and you'll get an independent `INDArray` copy. - -## Neural Networks - -For DL4J users, workspaces provide better performance out of the box, and are enabled by default from 1.0.0-alpha onwards. -Thus for most users, no explicit worspaces configuration is required. - -To benefit from worspaces, they need to be enabled. You can configure the workspace mode using: - - `.trainingWorkspaceMode(WorkspaceMode.SEPARATE)` and/or `.inferenceWorkspaceMode(WorkspaceMode.SINGLE)` in your neural network configuration. - -The difference between **SEPARATE** and **SINGLE** workspaces is a tradeoff between the performance & memory footprint: - -* **SEPARATE** is slightly slower, but uses less memory. -* **SINGLE** is slightly faster, but uses more memory. - -That said, it’s fine to use different modes for training & inference (i.e. use SEPARATE for training, and use SINGLE for inference, since inference only involves a feed-forward loop without backpropagation or updaters involved). - -With workspaces enabled, all memory used during training will be reusable and tracked without the JVM GC interference. -The only exclusion is the `output()` method that uses workspaces (if enabled) internally for the feed-forward loop. Subsequently, it detaches the resulting `INDArray` from the workspaces, thus providing you with independent `INDArray` which will be handled by the JVM GC. - -***Please note***: After the 1.0.0-alpha release, workspaces in DL4J were refactored - SEPARATE/SINGLE modes have been deprecated, and users should use ENABLED instead. - -## Garbage Collector - -If your training process uses workspaces, we recommend that you disable (or reduce the frequency of) periodic GC calls. That can be done like so: - -```java -// this will limit frequency of gc calls to 5000 milliseconds -Nd4j.getMemoryManager().setAutoGcWindow(5000) - -// OR you could totally disable it -Nd4j.getMemoryManager().togglePeriodicGc(false); -``` - -Put that somewhere before your `model.fit(...)` call. - -## ParallelWrapper & ParallelInference - -For `ParallelWrapper`, the workspace-mode configuration option was also added. As such, each of the trainer threads will use a separate workspace attached to the designated device. - - -```java -ParallelWrapper wrapper = new ParallelWrapper.Builder(model) - // DataSets prefetching options. Buffer size per worker. - .prefetchBuffer(8) - - // set number of workers equal to number of GPUs. - .workers(2) - - // rare averaging improves performance but might reduce model accuracy - .averagingFrequency(5) - - // if set to TRUE, on every averaging model score will be reported - .reportScoreAfterAveraging(false) - - // 3 options here: NONE, SINGLE, SEPARATE - .workspaceMode(WorkspaceMode.SINGLE) - - .build(); -``` - -## Iterators - -We provide asynchronous prefetch iterators, `AsyncDataSetIterator` and `AsyncMultiDataSetIterator`, which are usually used internally. - -These iterators optionally use a special, cyclic workspace mode to obtain a smaller memory footprint. The size of the workspace, in this case, will be determined by the memory requirements of the first `DataSet` coming out of the underlying iterator, whereas the buffer size is defined by the user. The workspace will be adjusted if memory requirements change over time (e.g. if you’re using variable-length time series). - -***Caution***: If you’re using a custom iterator or the `RecordReader`, please make sure you’re not initializing something huge within the first `next()` call. Do that in your constructor to avoid undesired workspace growth. - -***Caution***: With `AsyncDataSetIterator` being used, `DataSets` are supposed to be used before calling the `next()` DataSet. You are not supposed to store them, in any way, without the `detach()` call. Otherwise, the memory used for `INDArrays` within DataSet will be overwritten within `AsyncDataSetIterator` eventually. - -If for some reason you don’t want your iterator to be wrapped into an asynchronous prefetch (e.g. for debugging purposes), special wrappers are provided: `AsyncShieldDataSetIterator` and `AsyncShieldMultiDataSetIterator`. Basically, those are just thin wrappers that prevent prefetch. - -## Evaluation - -Usually, evaluation assumes use of the `model.output()` method, which essentially returns an `INDArray` detached from the workspace. In the case of regular evaluations during training, it might be better to use the built-in methods for evaluation. For example: - -``` -Evaluation eval = new Evaluation(outputNum); -ROC roceval = new ROC(outputNum); -model.doEvaluation(iteratorTest, eval, roceval); -``` - -This piece of code will run a single cycle over `iteratorTest`, and it will update both (or less/more if required by your needs) `IEvaluation` implementations without any additional `INDArray` allocation. - -## Workspace Destruction - -There are also some situations, say, where you're short on RAM, and might want do release all workspaces created out of your control; e.g. during evaluation or training. - -That could be done like so: `Nd4j.getWorkspaceManager().destroyAllWorkspacesForCurrentThread();` - -This method will destroy all workspaces that were created within the calling thread. If you've created workspaces in some external threads on your own, you can use the same method in that thread, after the workspaces are no longer needed. - -## Workspace Exceptions - -If workspaces are used incorrectly (such as a bug in a custom layer or data pipeline, for example), you may see an error message such as: -``` -org.nd4j.linalg.exception.ND4JIllegalStateException: Op [set] Y argument uses leaked workspace pointer from workspace [LOOP_EXTERNAL] -For more details, see the ND4J User Guide: nd4j.org/userguide#workspaces-panic -``` - - -## DL4J's LayerWorkspaceMgr - -DL4J's Layer API includes the concept of a "layer workspace manager". - -The idea with this class is that it allows us to easily and precisely control the location of a given array, given different possible configurations for the workspaces. -For example, the activations out of a layer may be placed in one workspace during inference, and another during training; this is for performance reasons. -However, with the LayerWorkspaceMgr design, implementers of layers don't need to worry about this. - -What does this mean in practice? Usually it's quite simple... -* When returning activations (`activate(boolean training, LayerWorkspaceMgr workspaceMgr)` method), make sure the returned array is defined in `ArrayType.ACTIVATIONS` (i.e., use LayerWorkspaceMgr.create(ArrayType.ACTIVATIONS, ...) or similar) -* When returning activation gradients (`backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr)`), similarly return an array defined in `ArrayType.ACTIVATION_GRAD` - -You can also leverage an array defined in any workspace to the appropriate workspace using, for example, `LayerWorkspaceMgr.leverageTo(ArrayType.ACTIVATIONS, myArray)` - - -Note that if you are *not* implementing a custom layer (and instead just want to perform forward pass for a layer outside of a MultiLayerNetwork/ComputationGraph) you can use `LayerWorkspaceMgr.noWorkspaces()`. - diff --git a/docs/deeplearning4j/templates/contribute.md b/docs/deeplearning4j/templates/contribute.md deleted file mode 100644 index 735d58c36..000000000 --- a/docs/deeplearning4j/templates/contribute.md +++ /dev/null @@ -1,63 +0,0 @@ ---- -title: Contributor's Guide -short_title: Contribute -description: How to contribute to the Eclipse Deeplearning4j source code. -category: Get Started -weight: 10 ---- - -## Prerequisites - -Before contributing, make sure you know the structure of all of the Eclipse Deeplearning4j libraries. As of early 2018, all libraries now live in the Deeplearning4j [monorepo](https://github.com/eclipse/deeplearning4j). These include: - -- DeepLearning4J: Contains all of the code for learning neural networks, both on a single machine and distributed. -- ND4J: “N-Dimensional Arrays for Java”. ND4J is the mathematical backend upon which DL4J is built. All of DL4J’s neural networks are built using the operations (matrix multiplications, vector operations, etc) in ND4J. ND4J is how DL4J supports both CPU and GPU training of networks, without any changes to the networks themselves. Without ND4J, there would be no DL4J. -- DataVec: DataVec handles the data import and conversion side of the pipeline. If you want to import images, video, audio or simply CSV data into DL4J: you probably want to use DataVec to do this. -- Arbiter: Arbiter is a package for (amongst other things) hyperparameter optimization of neural networks. Hyperparameter optimization refers to the process of automating the selection of network hyperparameters (learning rate, number of layers, etc) in order to obtain good performance. - -We also have an extensive examples repository at [dl4j-examples](https://github.com/eclipse/deeplearning4j-examples). - - -## Ways to contribute - -There are numerous ways to contribute to DeepLearning4J (and related projects), depending on your interests and experince. Here’s some ideas: - -- Add new types of neural network layers (for example: different types of RNNs, locally connected networks, etc) -- Add a new training feature -- Bug fixes -- DL4J examples: Is there an application or network architecture that we don’t have examples for? -- Testing performance and identifying bottlenecks or areas to improve -- Improve website documentation (or write tutorials, etc) -- Improve the JavaDocs - - -There are a number of different ways to find things to work on. These include: - -- Looking at the issue trackers: -https://github.com/eclipse/deeplearning4j/issues -https://github.com/eclipse/deeplearning4j-examples/issues -- Reviewing our Roadmap -- Talking to the developers on Gitter, especially our early adopters channel -- Reviewing recent papers and blog posts on training features, network architectures and applications -- Reviewing the website and examples - what seems missing, incomplete, or would simply be useful (or cool) to have? - -## General guidelines - -Before you dive in, there’s a few things you need to know. In particular, the tools we use: - -- Maven: a dependency management and build tool, used for all of our projects. See this for details on Maven. -- Git: the version control system we use -- Project Lombok: Project Lombok is a code generation/annotation tool that is aimed to reduce the amount of ‘boilerplate’ code (i.e., standard repeated code) needed in Java. To work with source, you’ll need to install the Project Lombok plugin for your IDE -- VisualVM: A profiling tool, most useful to identify performance issues and bottlenecks. -- IntelliJ IDEA: This is our IDE of choice, though you may of course use alternatives such as Eclipse and NetBeans. You may find it easier to use the same IDE as the developers in case you run into any issues. But this is up to you. - -Things to keep in mind: - -- Code should be Java 7 compliant -- If you are adding a new method or class: add JavaDocs -- You are welcome to add an author tag for significant additions of functionality. This can also help future contributors, in case they need to ask questions of the original author. If multiple authors are present for a class: provide details on who did what (“original implementation”, “added feature x” etc) -- Provide informative comments throughout your code. This helps to keep all code maintainable. -- Any new functionality should include unit tests (using JUnit) to test your code. This should include edge cases. -- If you add a new layer type, you must include numerical gradient checks, as per these unit tests. These are necessary to confirm that the calculated gradients are correct -- If you are adding significant new functionality, consider also updating the relevant section(s) of the website, and providing an example. After all, functionality that nobody knows about (or nobody knows how to use) isn’t that helpful. Adding documentation is definitely encouraged when appropriate, but strictly not required. -- If you are unsure about something - ask us on Gitter! \ No newline at end of file diff --git a/docs/deeplearning4j/templates/examples-tour.md b/docs/deeplearning4j/templates/examples-tour.md deleted file mode 100644 index ee6c049ab..000000000 --- a/docs/deeplearning4j/templates/examples-tour.md +++ /dev/null @@ -1,285 +0,0 @@ ---- -title: Tour of Eclipse Deeplearning4j Examples -short_title: Examples Tour -description: Brief tour of available examples in DL4J. -category: Get Started -weight: 10 ---- - -## Survey of DeepLearning4j Examples - -Deeplearning4j's Github repository has many examples to cover its functionality. The [Quick Start Guide](./deeplearning4j-quickstart) shows you how to set up Intellij and clone the repository. This page provides an overview of some of those examples. - -## DataVec examples - -Most of the examples make use of DataVec, a toolkit for preprocessing and clearning data through normalization, standardization, search and replace, column shuffles and vectorization. Reading raw data and transforming it into a DataSet object for your Neural Network is often the first step toward training that network. If you're unfamiliar with DataVec, here is a description and some links to useful examples. - -### IrisAnalysis.java - -This example takes the canonical Iris dataset of the flower species of the same name, whose relevant measurements are sepal length, sepal width, petal length and petal width. It builds a Spark RDD from the relatively small dataset and runs an analysis against it. - -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/datavec-examples/src/main/java/org/datavec/transform/analysis/IrisAnalysis.java) - -### BasicDataVecExample.java - -This example loads data into a Spark RDD. All DataVec transform operations use Spark RDDs. Here, we use DataVec to filter data, apply time transformations and remove columns. - -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/datavec-examples/src/main/java/org/datavec/transform/basic/BasicDataVecExample.java) - -### PrintSchemasAtEachStep.java - -This example shows the print Schema tools that are useful to visualize and to ensure that the code for the transform is behaving as expected. - -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/datavec-examples/src/main/java/org/datavec/transform/debugging/PrintSchemasAtEachStep.java) - -### JoinExample.java - -You may need to join datasets before passing to a neural network. You can do that in DataVec, and this example shows you how. - -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/datavec-examples/src/main/java/org/datavec/transform/join/JoinExample.java) - -### LogDataExample.java - -This is an example of parsing log data using DataVec. The obvious use cases are cybersecurity and customer relationship management. - -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/datavec-examples/src/main/java/org/datavec/transform/logdata/LogDataExample.java) - -### MnistImagePipelineExample.java - -This example is from the video below, which demonstrates the ParentPathLabelGenerator and ImagePreProcessing scaler. - - - -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/dataExamples/MnistImagePipelineExample.java) - -### PreprocessNormalizerExample.java - -This example demonstrates preprocessing features available in DataVec. - -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/dataexamples/PreprocessNormalizerExample.java) - -### CSVExampleEvaluationMetaData.java - -DataMeta data tracking - i.e. seeing where data for each example comes from - is useful when tracking down malformed data that causes errors and other issues. This example demostrates the functionality in the RecordMetaData class. - -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/dataexamples/CSVExampleEvaluationMetaData.java) - ---- - -## DeepLearning4J Examples - -To build a neural net, you will use either `MultiLayerNetwork` or `ComputationGraph`. Both options work using a Builder interface. A few highlights from the examples are described below. - -### MNIST dataset of handwritten digits - -MNIST is the "Hello World" of deep learning. Simple, straightforward, and focussed on image recognition, a task that Neural Networks do well. - -### MLPMnistSingleLayerExample.java - -This is a Single Layer Perceptron for recognizing digits. Note that this pulls the images from a binary package containing the dataset, a rather special case for data ingestion. - -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/feedforward/mnist/MLPMnistSingleLayerExample.java) - -### MLPMnistTwoLayerExample.java - -A two-layer perceptron for MNIST, showing there is more than one useful network for a given dataset. - -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/feedforward/mnist/MLPMnistTwoLayerExample.java) - -### Feedforward Examples - -Data flows through feed-forward neural networks in a single pass from input via hidden layers to output. - -These networks can be used for a wide range of tasks depending on they are configured. Along with image classification over MNIST data, this directory has examples demonstrating regression, classification, and anomoly detection. - -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/tree/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/feedforward) - -### Convolutional Neural Networks - -Convolutional Neural Networks are mainly used for image recognition, although they apply to sound and text as well. - -### AnimalsClassification.java - -This example can be run using either LeNet or AlexNet. - -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/convolution/AnimalsClassification.java) - ---- - -## Saving and Loading Models - -Training a network over a large volume of training data takes time. Fortunately, you can save a trained model and -load the model for later training or inference. - -### SaveLoadComputationGraph.java - -This demonstrates saving and loading a network build using the class ComputationGraph. - -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/misc/modelsaving/SaveLoadComputationGraph.java) - -### SaveLoadMultiLayerNetwork.java - -Demonstrates saving and loading a Neural Network built with the class MultiLayerNetwork. - -### Saving/loading a trained model and passing it new input - -Our video series shows code that includes saving and loading models, as well as inference. - -[Our YouTube channel](https://www.youtube.com/channel/UCa-HKBJwkfzs4AgZtdUuBXQ) - ---- - -## Custom Loss Functions and Layers - -Do you need to add a Loss Function that is not available or prebuilt yet? Check out these examples. - -### CustomLossExample.java - -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/misc/lossfunctions/CustomLossExample.java) - -### CustomLossL1L2.java - -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/misc/lossfunctions/CustomLossL1L2.java) - -### Custom Layer - -Do you need to add a layer with features that aren't available in DeepLearning4J core? This example show where to begin. - -### CustomLayerExample.java - -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/misc/customlayers/CustomLayerExample.java) - ---- - -## Natural Language Processing - -Neural Networks for NLP? We have those, too. - -### GloVe - -Global Vectors for Word Representation are useful for detecting relationships between words. - -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/glove/GloVeExample.java) - -### Paragraph Vectors - -A vectorized representation of words. Described [here](https://cs.stanford.edu/~quocle/paragraph_vector.pdf) - -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/paragraphvectors/ParagraphVectorsClassifierExample.java) - -### Sequence Vectors - -One way to represent sentences is as a sequence of words. - -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/sequencevectors/SequenceVectorsTextExample.java) - -### Word2Vec - -Described [here](https://deeplearning4j.org/word2vec.html) - -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/word2vec/Word2VecRawTextExample.java) - ---- - -## Data Visualization - -t-Distributed Stochastic Neighbor Embedding (t-SNE) is useful for data visualization. We include an example in the NLP section since word similarity visualization is a common use. - -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/tsne/TSNEStandardExample.java) - ---- - -## Recurrent Neural Networks - -Recurrent Neural Networks are useful for processing time series data or other sequentially fed data like video. - -The examples folder for Recurrent Neural Networks has the following: - -### BasicRNNExample.java - -An RNN learns a string of characters. - -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/recurrent/basic/BasicRNNExample.java) - -### GravesLSTMCharModellingExample.java - -Takes the complete works of Shakespeare as a sequence of characters and Trains a Neural Net to generate "Shakespeare" one character at a time. - -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/recurrent/character/GravesLSTMCharModellingExample.java) - -### SingleTimestepRegressionExample.java - -Regression with an LSTM (Long Short Term Memory) Recurrent Neural Network. - -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/recurrent/regression/SingleTimestepRegressionExample.java) - -### AdditionRNN.java - -This example trains a neural network to do addition. - -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/recurrent/seq2seq/AdditionRNN.java) - -### RegressionMathFunctions.java - -This example trains a neural network to perform various math operations. - -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/feedforward/regression/RegressionMathFunctions.java) - -### UCISequenceClassificationExample.java - -A publicly available dataset of time series data of six classes, cyclic, up-trending, etc. Example of an RNN learning to classify the time series. - -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/recurrent/seqclassification/UCISequenceClassificationExample.java) - -### VideoClassificationExample.java - -How do autonomous vehicles distinguish between a pedestrian, a stop sign and a green light? A complex neural net using Convolutional and Recurrent layers is trained on a set of training videos. The trained network is passed live onboard video and decisions based on object detection from the Neural Net determine the vehicles actions. - -This example is similar, but simplified. It combines convolutional, max pooling, dense (feed forward) and recurrent (LSTM) layers to classify frames in a video. - -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/recurrent/video/VideoClassificationExample.java) - -### SentimentExampleIterator.java - -This sentiment analysis example classifies sentiment as positive or negative using word vectors and a Recurrent Neural Network. - -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/recurrent/word2vecsentiment/Word2VecSentimentRNN.java) - ---- - -## Distributed Training on Spark - -DeepLearning4j supports using a Spark Cluster for network training. Here are the examples. - -### MnistMLPExample.java - -This is an example of a Multi-Layer Perceptron training on the Mnist data set of handwritten digits. -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-spark-examples/dl4j-spark/src/main/java/org/deeplearning4j/mlp/MnistMLPExample.java) - -### SparkLSTMCharacterExample.java - -An LSTM recurrent Network in Spark. - -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-spark-examples/dl4j-spark/src/main/java/org/deeplearning4j/rnn/SparkLSTMCharacterExample.java) - ---- - -## ND4J Examples - -ND4J is a tensor processing library. It can be thought of as Numpy for the JVM. Neural Networks work by processing and updating MultiDimensional arrays of numeric values. In a typical Neural Net application you use DataVec to ingest and convert the data to numeric. Classes used would be RecordReader. Once you need to pass data into a Neural Network, you typically use RecordReaderDataSetIterator. RecordReaderDataSetIterator returns a DataSet object. DataSet consists of an NDArray of the input features and an NDArray of the labels. - -The learning algorithms and loss functions are executed as ND4J operations. - -### Basic ND4J examples - -This is a directory with examples for creating and manipulating NDArrays. - -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/tree/master/nd4j-examples/src/main/java/org/nd4j/examples) - ---- - -## Reinforcement Learning Examples - -Deep learning algorithms have learned to play Space Invaders and Doom using reinforcement learning. DeepLearning4J/RL4J examples of Reinforcement Learning are available here: - -[Show me the code](https://github.com/eclipse/deeplearning4j-examples/tree/master/rl4j-examples) \ No newline at end of file diff --git a/docs/deeplearning4j/templates/quickstart.md b/docs/deeplearning4j/templates/quickstart.md deleted file mode 100644 index 25f4216ff..000000000 --- a/docs/deeplearning4j/templates/quickstart.md +++ /dev/null @@ -1,251 +0,0 @@ ---- -title: Deeplearning4j Quickstart -short_title: Quickstart -description: Quickstart for Java using Maven -category: Get Started -weight: 1 ---- - -## Get started - -This is everything you need to run DL4J examples and begin your own projects. - -We recommend that you join our [Gitter Live Chat](https://gitter.im/deeplearning4j/deeplearning4j). Gitter is where you can request help and give feedback, but please do use this guide before asking questions we've answered below. If you are new to deep learning, we've included [a road map for beginners](./deeplearning4j-beginners) with links to courses, readings and other resources. - -### A Taste of Code - -Deeplearning4j is a domain-specific language to configure deep neural networks, which are made of multiple layers. Everything starts with a `MultiLayerConfiguration`, which organizes those layers and their hyperparameters. - -Hyperparameters are variables that determine how a neural network learns. They include how many times to update the weights of the model, how to initialize those weights, which activation function to attach to the nodes, which optimization algorithm to use, and how fast the model should learn. This is what one configuration would look like: - -```java - MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() - .weightInit(WeightInit.XAVIER) - .activation("relu") - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .updater(new Sgd(0.05)) - // ... other hyperparameters - .list() - .backprop(true) - .build(); -``` - -With Deeplearning4j, you add a layer by calling `layer` on the `NeuralNetConfiguration.Builder()`, specifying its place in the order of layers (the zero-indexed layer below is the input layer), the number of input and output nodes, `nIn` and `nOut`, as well as the type: `DenseLayer`. - -```java - .layer(0, new DenseLayer.Builder().nIn(784).nOut(250) - .build()) -``` - -Once you've configured your net, you train the model with `model.fit`. - -## Prerequisites - -* [Java (developer version)](#Java) 1.7 or later (**Only 64-Bit versions supported**) -* [Apache Maven](#Maven) (automated build and dependency manager) -* [IntelliJ IDEA](#IntelliJ) or Eclipse -* [Git](#Git) - -You should have these installed to use this QuickStart guide. DL4J targets professional Java developers who are familiar with production deployments, IDEs and automated build tools. Working with DL4J will be easiest if you already have experience with these. - -If you are new to Java or unfamiliar with these tools, read the details below for help with installation and setup. Otherwise, **skip to DL4J Examples**. - -#### Java - -If you don't have Java 1.7 or later, download the current [Java Development Kit (JDK) here](http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html). To check if you have a compatible version of Java installed, use the following command: - -```shell -java -version -``` - -Please make sure you have a 64-Bit version of java installed, as you will see an error telling you `no jnind4j in java.library.path` if you decide to try to use a 32-Bit version instead. Make sure the JAVA_HOME environment variable is set. - -#### Apache Maven - -Maven is a dependency management and automated build tool for Java projects. It works well with IDEs such as IntelliJ and lets you install DL4J project libraries easily. [Install or update Maven](https://maven.apache.org/download.cgi) to the latest release following [their instructions](https://maven.apache.org/install.html) for your system. To check if you have the most recent version of Maven installed, enter the following: - -```shell -mvn --version -``` - -If you are working on a Mac, you can simply enter the following into the command line: - -```shell -brew install maven -``` - -Maven is widely used among Java developers and it's pretty much mandatory for working with DL4J. If you come from a different background, and Maven is new to you, check out [Apache's Maven overview](http://maven.apache.org/what-is-maven.html) and our [introduction to Maven for non-Java programmers](./deeplearning4j-config-maven), which includes some additional troubleshooting tips. [Other build tools](./deeplearning4j-config-buildtools) such as Ivy and Gradle can also work, but we support Maven best. - -* [Paul Dubs' guide to maven](http://www.dubs.tech/guides/maven-essentials/) - -* [Maven In Five Minutes](http://maven.apache.org/guides/getting-started/maven-in-five-minutes.html) - -#### IntelliJ IDEA - -An Integrated Development Environment ([IDE](http://encyclopedia.thefreedictionary.com/integrated+development+environment)) allows you to work with our API and configure neural networks in a few steps. We strongly recommend using [IntelliJ](https://www.jetbrains.com/idea/download/), which communicates with Maven to handle dependencies. The [community edition of IntelliJ](https://www.jetbrains.com/idea/download/) is free. - -There are other popular IDEs such as [Eclipse](http://books.sonatype.com/m2eclipse-book/reference/creating-sect-importing-projects.html) and [Netbeans](http://wiki.netbeans.org/MavenBestPractices). However, IntelliJ is preferred, and using it will make finding help on [Gitter Live Chat](https://gitter.im/deeplearning4j/deeplearning4j) easier if you need it. - -#### Git - -Install the [latest version of Git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git). If you already have Git, you can update to the latest version using Git itself: - -```shell -$ git clone git://git.kernel.org/pub/scm/git/git.git -``` - -The latest version of Mac's Mojave OS breaks git, producing the following error message: - -```xcrun: error: invalid active developer path (/Library/Developer/CommandLineTools), missing xcrun at: /Library/Developer/CommandLineTools/usr/bin/xcrun -``` - -This can be fixed by running: - -```xcode-select --install -``` - -## DL4J Examples in a Few Easy Steps - -1. Use the command line to enter the following: - -```shell -$ git clone https://github.com/eclipse/deeplearning4j-examples.git -$ cd dl4j-examples/ -$ mvn clean install -``` - -2. Open IntelliJ and choose Import Project. Then select the main 'dl4j-examples' directory. (Note: the example in the illustration below refers to an outdated repository named dl4j-0.4-examples. However, the repository that you will download and install will be called dl4j-examples). - -![select directory](/images/guide/Install_IntJ_1.png) - -3. Choose 'Import project from external model' and ensure that Maven is selected. -![import project](/images/guide/Install_IntJ_2.png) - -4. Continue through the wizard's options. Select the SDK that begins with `jdk`. (You may need to click on a plus sign to see your options...) Then click Finish. Wait a moment for IntelliJ to download all the dependencies. You'll see the horizontal bar working on the lower right. - -5. Pick an example from the file tree on the left. -![run IntelliJ example](/images/guide/Install_IntJ_3.png) -Right-click the file to run. - -## Using DL4J In Your Own Projects: Configuring the POM.xml File - -To run DL4J in your own projects, we highly recommend using Maven for Java users, or a tool such as SBT for [Scala](https://github.com/SkymindIO/SKIL_Examples/blob/master/skil_example_notebooks/scala/uci_quickstart_notebook.scala). The basic set of dependencies and their versions are shown below. This includes: - -- `deeplearning4j-core`, which contains the neural network implementations -- `nd4j-native-platform`, the CPU version of the ND4J library that powers DL4J -- `datavec-api` - Datavec is our library vectorizing and loading data - -Every Maven project has a POM file. Here is [how the POM file should appear](https://github.com/eclipse/deeplearning4j-examples/blob/master/pom.xml) when you run your examples. - -Within IntelliJ, you will need to choose the first Deeplearning4j example you're going to run. We suggest `MLPClassifierLinear`, as you will almost immediately see the network classify two groups of data in our UI. The file on [Github can be found here](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/feedforward/classification/MLPClassifierLinear.java). - -To run the example, right click on it and select the green button in the drop-down menu. You will see, in IntelliJ's bottom window, a series of scores. The rightmost number is the error score for the network's classifications. If your network is learning, then that number will decrease over time with each batch it processes. At the end, this window will tell you how accurate your neural-network model has become: - -![mlp classifier results](/images/guide/mlp_classifier_results.png) - -In another window, a graph will appear, showing you how the multilayer perceptron (MLP) has classified the data in the example. It will look like this: - -![mlp classifier viz](/images/guide/mlp_classifier_viz.png) - -Congratulations! You just trained your first neural network with Deeplearning4j. - -## Next Steps - -1. Join us on Gitter. We have three big community channels. - * [DL4J Live Chat](https://gitter.im/deeplearning4j/deeplearning4j) is the main channel for all things DL4J. Most people hang out here. - * [Tuning Help](https://gitter.im/deeplearning4j/deeplearning4j/tuninghelp) is for people just getting started with neural networks. Beginners please visit us here! - * [Early Adopters](https://gitter.im/deeplearning4j/deeplearning4j/earlyadopters) is for those who are helping us vet and improve the next release. WARNING: This is for more experienced folks. -2. Read the [introduction to deep neural networks](https://skymind.ai/wiki/neural-network). -3. Check out the more detailed [Comprehensive Setup Guide](./deeplearning4j-quickstart). -4. Browse the [DL4J documentation](./). -5. **Python folks**: If you plan to run benchmarks on Deeplearning4j comparing it to well-known Python framework [x], please read [these instructions](./deeplearning4j-benchmark) on how to optimize heap space, garbage collection and ETL on the JVM. By following them, you will see at least a *10x speedup in training time*. - -### Additional links - -- [Deeplearning4j artifacts on Maven Central](http://search.maven.org/#search%7Cga%7C1%7Cdeeplearning4j) -- [ND4J artifacts on Maven Central](http://search.maven.org/#search%7Cga%7C1%7Cnd4j) -- [Datavec artifacts on Maven Central](http://search.maven.org/#search%7Cga%7C1%7Cdatavec) -- [Scala code for UCI notebook](https://github.com/SkymindIO/SKIL_Examples/blob/master/skil_example_notebooks/scala/uci_quickstart_notebook.scala) - -### Troubleshooting - -**Q:** I'm using a 64-Bit Java on Windows and still get the `no jnind4j in java.library.path` error - -**A:** You may have incompatible DLLs on your PATH. To tell DL4J to ignore those, you have to add the following as a VM parameter (Run -> Edit Configurations -> VM Options in IntelliJ): - -``` --Djava.library.path="" -``` -**Q:** **SPARK ISSUES** I am running the examples and having issues with the Spark based examples such as distributed training or datavec transform options. - - -**A:** You may be missing some dependencies that Spark requires. See this [Stack Overflow discussion](https://stackoverflow.com/a/38735202/3892515) for a discussion of potential dependency issues. Windows users may need the winutils.exe from Hadoop. - -Download winutils.exe from https://github.com/steveloughran/winutils and put it into the null/bin/winutils.exe (or create a hadoop folder and add that to HADOOP_HOME) - -### Troubleshooting: Debugging UnsatisfiedLinkError on Windows - -Windows users might be seeing something like: - -``` -Exception in thread "main" java.lang.ExceptionInInitializerError -at org.deeplearning4j.nn.conf.NeuralNetConfiguration$Builder.seed(NeuralNetConfiguration.java:624) -at org.deeplearning4j.examples.feedforward.anomalydetection.MNISTAnomalyExample.main(MNISTAnomalyExample.java:46) -Caused by: java.lang.RuntimeException: org.nd4j.linalg.factory.Nd4jBackend$NoAvailableBackendException: Please ensure that you have an nd4j backend on your classpath. Please see: http://nd4j.org/getstarted.html -at org.nd4j.linalg.factory.Nd4j.initContext(Nd4j.java:5556) -at org.nd4j.linalg.factory.Nd4j.(Nd4j.java:189) -... 2 more -Caused by: org.nd4j.linalg.factory.Nd4jBackend$NoAvailableBackendException: Please ensure that you have an nd4j backend on your classpath. Please see: http://nd4j.org/getstarted.html -at org.nd4j.linalg.factory.Nd4jBackend.load(Nd4jBackend.java:259) -at org.nd4j.linalg.factory.Nd4j.initContext(Nd4j.java:5553) -... 3 more -``` - -If that is the issue, see [this page](https://github.com/bytedeco/javacpp-presets/wiki/Debugging-UnsatisfiedLinkError-on-Windows#using-dependency-walker). In this case replace with "Nd4jCpu". - -### Eclipse setup without Maven - -We recommend and use Maven and Intellij. If you prefer Eclipse and dislike Maven here is a nice [blog post](http://electronsfree.blogspot.com/2016/10/how-to-setup-dl4j-project-with-eclipse.html) to walk you through an Eclipse configuration. - -## Quickstart template - -Now that you've learned how to run the different examples, we've made a template available for you that has a basic EMNIST trainer with early stopping and evaluation code. - -The Quickstart template is available at [https://github.com/deeplearning4j/dl4j-quickstart](https://github.com/deeplearning4j/dl4j-quickstart). - -To use the template: - -1. Clone to your local machine `git clone https://github.com/deeplearning4j/dl4j-quickstart.git` -2. Import the `dl4j-quickstart` main folder into IntelliJ. -3. Start coding! - -## More about Eclipse Deeplearning4j - -Deeplearning4j is a framework that lets you pick and choose with everything available from the beginning. We're not Tensorflow (a low-level numerical computing library with automatic differentiation) or Pytorch. Deeplearning4j has several subprojects that make it easy-ish to build end-to-end applications. - -If you'd like to deploy models to production, you might like our [model import from Keras](./keras-import-get-started). - -Deeplearning4j has several submodules. These range from a visualization UI to distributed training on Spark. For an overview of these modules, please look at the [**Deeplearning4j examples on Github**](https://github.com/eclipse/deeplearning4j-examples). - -To get started with a simple desktop app, you need two things: An [nd4j backend](http://nd4j.org/backend.html) and `deeplearning4j-core`. For more code, see the [simpler examples submodule](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/pom.xml#L64). - -If you want a flexible deep-learning API, there are two ways to go. You can use nd4j standalone See our [nd4j examples](https://github.com/eclipse/deeplearning4j-examples/tree/master/nd4j-examples) or the [computation graph API](http://deeplearning4j.org/compgraph). - -If you want distributed training on Spark, you can see our [Spark page](http://deeplearning4j.org/spark) -Keep in mind that we cannot setup Spark for you. If you want to set up distributed Spark and GPUs, that is largely up to you. Deeplearning4j simply deploys as a JAR file on an existing Spark cluster. - -If you want Spark with GPUs, we recommend [Spark with Mesos](https://spark.apache.org/docs/latest/running-on-mesos.html). - -If you want to deploy on mobile, you can see our [Android page](./deeplearning4j-android). - -We deploy optimized code for various hardware architectures natively. We use C++ based for loops just like everybody else. -For that, please see our [C++ framework libnd4j](https://github.com/eclipse/deeplearning4j/tree/master/libnd4j). - -Deeplearning4j has two other notable components: - -* [Arbiter: hyperparameter optimization and model evaluation](./arbiter-overview) -* [DataVec: built-in ETL for machine-learning data pipelines](./datavec-overview) - -Deeplearning4j is meant to be an end-to-end platform for building real applications, not just a tensor library with automatic differentiation. If you want a tensor library with autodiff, please see ND4J and [samediff](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff). Samediff is still in alpha, but if you want to contribute, please join our [live chat on Gitter](https://gitter.im/deeplearning4j/deeplearning4j). - -Lastly, if you are benchmarking Deeplearnin4j, please consider coming in to our live chat and asking for tips. Deeplearning4j has [all the knobs](./deeplearning4j-config-gpu-cpu), but some may not work exactly like the Python frameworks to do. You have to build Deeplearning4j from source for some applications. diff --git a/docs/deeplearning4j/templates/troubleshooting-training.md b/docs/deeplearning4j/templates/troubleshooting-training.md deleted file mode 100644 index caa689c01..000000000 --- a/docs/deeplearning4j/templates/troubleshooting-training.md +++ /dev/null @@ -1,140 +0,0 @@ ---- -title: Troubleshooting -short_title: Troubleshooting -description: Understanding common errors like NaNs and tuning hyperparameters. -category: Tuning & Training -weight: 0 ---- - -## Troubleshooting Neural Net Training - -Neural networks can be difficult to tune. If the network hyperparameters are poorly chosen, the network may learn slowly, or perhaps not at all. This page aims to provide some baseline steps you should take when tuning your network. - -Many of these tips have already been discussed in the academic literature. Our purpose is to consolidate them in one site and express them as clearly as possible. - -## Contents - -* Data Normalization -* Weight Initialization -* Epochs and Iterations -* Learning Rate -* Activation Function -* Loss Function -* Regularization -* Minibatch Size -* Updater and Optimization Algorithm -* Gradient Normalization -* Recurrent Neural Networks -* Deep Belief Network -* Restricted Boltzmann Machines -* NaN, Not a Number issues - - -## Data Normalization - -What's distribution of your data? Are you scaling it properly? As a general rule: - -- For continuous values: you want these to be in the range of -1 to 1, 0 to 1 or ditributed normally with mean 0 and standard deviation 1. This does not have to be exact, but ensuring your inputs are approximately in this range can help during training. Scale down large inputs, and scale up small inputs. -- For discrete classes (and, for classification problems for the output), generally use a one-hot representation. That is, if you have 3 classes, then your data will be represeted as [1,0,0], [0,1,0] or [0,0,1] for each of the 3 classes respectively. - -Note that it's very important to use the exact same normalization method for both the training data and testing data. - -## Weight Initialization - -Deeplearning4j supports several different kinds of weight initializations with the weightInit parameter. These are set using the .weightInit(WeightInit) method in your configuration. - -You need to make sure your weights are neither too big nor too small. Xavier weight initialization is usually a good choice for this. For networks with rectified linear (relu) or leaky relu activations, RELU weight initialization is a sensible choice. - -## Number of Epochs and Number of Iterations - -An epoch is defined as a full pass of the data set. - -Too few epochs don't give your network enough time to learn good parameters; too many and you might overfit the training data. One way to choose the number of epochs is to use early stopping. [Early stopping](http://deeplearning4j.org/earlystopping) can also help to prevent the neural network from overfitting (i.e., can help the net generalize better to unseen data). - -## Learning Rate - -The learning rate is one of, if not the most important hyperparameter. If this is too large or too small, your network may learn very poorly, very slowly, or not at all. Typical values for the learning rate are in the range of 0.1 to 1e-6, though the optimal learning rate is usually data (and network architecture) specific. Some simple advice is to start by trying three different learning rates – 1e-1, 1e-3, and 1e-6 – to get a rough idea of what it should be, before further tuning this. Ideally, they run models with different learning rates simultaneously to save time. - -The usual approach to selecting an appropriate learning rate is to use [DL4J's visualization interface](http://deeplearning4j.org/visualization) to visualize the progress of training. You want to pay attention to both the loss over time, and the ratio of update magnitudes to parameter magnitudes (a ratio of approximately 1:1000 is a good place to start). For more information on tuning the learning rate, see [this link](http://cs231n.github.io/neural-networks-3/#baby). - -For training neural networks in a distributed manner, you may need a different (frequently higher) learning rate compared to training the same network on a single machine. - -### Policies and Scheduling - -You can optionally define a learning rate policy for your neural network. A policy will change the learning rate over time, achieving better results since the learning rate can "slow down" to find closer local minima for convergence. A common policy used is scheduling. See the [LeNet example](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/convolution/LenetMnistExample.java) for a learning rate schedule used in practice. - -Note that if you're using multiple GPUs, this will affect your scheduling. For example, if you have 2x GPUs, then you will need to divide the iterations in your schedule by 2, since the throughput of your training process will be double, and the learning rate schedule is only applicable to the local GPU. - -## Activation Function - -There are two aspects to be aware of, with regard to the choice of activation function. - -First, the activation function of the hidden (non-output) layers. As a general rule, 'relu' or 'leakyrelu' activations are good choices for this. Some other activation functions (tanh, sigmoid, etc) are more prone to vanishing gradient problems, which can make learning much harder in deep neural networks. However, for LSTM layers, the tanh activation function is still commonly used. - -Second, regarding the activation function for the output layer: this is usually application specific. For classification problems, you generally want to use the softmax activation function, combined with the negative log likelihood / MCXENT (multi-class cross entropy). The softmax activation function gives you a probability distribution over classes (i.e., outputs sum to 1.0). For regression problems, the "identity" activation function is frequently a good choice, in conjunction with the MSE (mean squared error) loss function. - -## Loss Function - -Loss functions for each neural network layer can either be used in pretraining, to learn better weights, or in classification (on the output layer) for achieving some result. (In the example above, classification happens in the override section.) - -Your net's purpose will determine the loss function you use. For pretraining, choose reconstruction entropy. For classification, use multiclass cross entropy. - -## Regularization - -Regularization methods can help to avoid overfitting during training. Overfitting occurs when the network predicts the training set very well, but makes poor predictions on data the network has never seen. One way to think about overfitting is that the network memorizes the training data (instead of learning the general relationships in it). - -Common types of regularization include: - -- l1 and l2 regularization penalizes large network weights, and avoids weights becoming too large. Some level of l2 regularization is commonly used in practice. However, note that if the l1 or l2 regularization coefficients are too high, they may over-penalize the network, and stop it from learning. Common values for l2 regularization are 1e-3 to 1e-6. -- [Dropout](./glossary.html#dropout), is a frequently used regularization method can be very effective. Dropout is most commoly used with a dropout rate of 0.5. -- Dropconnect (conceptually similar to dropout, but used much less frequently) -- Restricting the total number of network size (i.e., limit the number of layers and size of each layer) -- [Early stopping](http://deeplearning4j.org/earlystopping) - -To use l1/l2/dropout regularization, use .regularization(true) followed by .l1(x), .l2(y), .dropout(z) respectively. Note that z in dropout(z) is the probability of retaining an activation. - -## Minibatch Size - -A minibatch refers to the number of examples used at a time, when computing gradients and parameter updates. In practice (for all but the smallest data sets), it is standard to break your data set up into a number of minibatches. - -The ideal minibatch size will vary. For example, a minibatch size of 10 is frequently too small for GPUs, but can work on CPUs. A minibatch size of 1 will allow a network to train, but will not reap the benefits of parallelism. 32 may be a sensible starting point to try, with minibatches in the range of 16-128 (sometimes smaller or larger, depending on the application and type of network) being common. - -## Updater and Optimization Algorithm - -In DL4J, the term 'updater' refers to training mechanisms such as momentum, RMSProp, adagrad, and others. Using one of these methods can result in much faster network training companed to 'vanilla' stochastic gradient descent. You can set the updater using the .updater(Updater) configuration option. - -The optimization algorithm is how updates are made, given the gradient. The simplest (and most commonly used) method is stochastic gradient descent (SGD), however DL4J also provides SGD with line search, conjugate gradient and LBFGS optimization algorithms. These latter algorithms are more powerful compared to SGD, but considerably more costly per parameter update due to a line search component, and aren't used as much in practice. Note that you can in principle combine any updater with any optimization algorithm. - -A good default choice in most cases is to use the stochastic gradient descent optimization algorithm combined with one of the momentum/rmsprop/adagrad updaters, with momentum frequently being used in practice. Note that for momentum, the updater is called NESTEROVS (a reference to the Nesterovs variant of momentum), and the momentum rate can be set by the .momentum(double) option. - -## Gradient Normalization - -When training a neural network, it can sometimes be helpful to apply gradient normalization, to avoid the gradients being too large (the so-called exploding gradient problem, common in recurrent neural networks) or too small. This can be applied using the .gradientNormalization(GradientNormalization) and .gradientNormalizationThreshould(double) methods. For an example of gradient normalization see, [GradientNormalization.java](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/GradientNormalization.java). The test code for that example is [here](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/TestGradientNormalization.java). - -## Recurrent Neural Networks: Truncated Backpropagation through Time - -When training recurrent networks with long time series, it is generally advisable to use truncated backpropagation through time. With 'standard' backpropagation through time (the default in DL4J) the cost per parameter update can become prohibative. For more details, see [this page](http://deeplearning4j.org/usingrnns) and [this glossary entry](./glossary.html#backprop). - -## Visible/Hidden Unit - -When using a deep-belief network, pay close attention here. An RBM (the component of the DBN used for feature extraction) is stochastic and will sample from different probability distributions relative to the visible or hidden units specified. - -See Geoff Hinton's definitive work, [A Practical Guide to Training Restricted Boltzmann Machines](https://www.cs.toronto.edu/~hinton/absps/guideTR.pdf), for a list of all of the different probability distributions. - -## Restricted Boltzmann Machines (RBMs) - -When creating hidden layers for autoencoders that perform compression, give them fewer neurons than your input data. If the hidden-layer nodes are too close to the number of input nodes, you risk reconstructing the identity function. Too many hidden-layer neurons increase the likelihood of noise and overfitting. For an input layer of 784, you might choose an initial hidden layer of 500, and a second hidden layer of 250. No hidden layer should be less than a quarter of the input layer’s nodes. And the output layer will simply be the number of labels. - -Larger datasets require more hidden layers. Facebook’s Deep Face uses nine hidden layers on what we can only presume to be an immense corpus. Many smaller datasets might only require three or four hidden layers, with their accuracy decreasing beyond that depth. As a rule: larger data sets contain more variation, which require more features/neurons for the net to obtain accurate results. Typical machine learning, of course, has one hidden layer, and those shallow nets are called Perceptrons. - -Large datasets require that you pretrain your RBM several times. Only with multiple pretrainings will the algorithm learn to correctly weight features in the context of the dataset. That said, you can run the data in parallel or through a cluster to speed up the pretraining. - -## NaN, Not a Number Errors - -Q. Why is my Neural Network throwing nan values? - -A. Backpropagation involves the multiplication of very small gradients, due to limited precision when representing real numbers values very close to zero can not be represented. The term for this issue is Arithmetic Underflow. If your Neural Network is throwing nan's then the solution is to retune your network to avoid the very small gradients. This is more likely an issue with deeper Neural Networks. - -You can try using double data type but it's usually recommended to retune the net first. - -Following the basic tuning tips and monitoring the results is the way to ensure NAN doesn't show up anymore. \ No newline at end of file diff --git a/docs/doc_generator.py b/docs/doc_generator.py deleted file mode 100644 index b3dfd5377..000000000 --- a/docs/doc_generator.py +++ /dev/null @@ -1,292 +0,0 @@ -# -*- coding: utf-8 -*- - -################################################################################ -# Copyright (c) 2015-2019 Skymind, Inc. -# -# This program and the accompanying materials are made available under the -# terms of the Apache License, Version 2.0 which is available at -# https://www.apache.org/licenses/LICENSE-2.0. -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# SPDX-License-Identifier: Apache-2.0 -################################################################################ - -import abc -import re -import os -import shutil -import json -import sys - - -"""Abstract base class for document generators. Implementations for various programming languages -need to implement the following six methods: - -- process_main_docstring -- process_docstring -- render -- get_main_doc_string -- get_constructor_data -- get_public_method_data -""" -class BaseDocumentationGenerator: - - __metaclass__ = abc.ABCMeta - - def __init__(self, args): - reload(sys) - sys.setdefaultencoding('utf8') - - self.out_language = args.out_language - self.template_dir = args.templates if self.out_language == 'en' else args.templates + '_' + self.out_language - self.project_name = args.project + '/' - self.validate_templates() - - self.target_dir = args.sources if self.out_language == 'en' else args.sources + '_' + self.out_language - self.language = args.language - self.docs_root = args.docs_root - self.source_code_path = args.code - self.github_root = ('https://github.com/deeplearning4j/deeplearning4j/tree/master/' - + self.source_code_path[3:]) - - with open(self.project_name + 'pages.json', 'r') as f: - json_pages = f.read() - site = json.loads(json_pages) - self.pages = site.get('pages', []) - self.indices = site.get('indices', []) - self.excludes = site.get('excludes', []) - - """Process top class docstring - """ - @abc.abstractmethod - def process_main_docstring(self, doc_string): - raise NotImplementedError - - """Process method and other docstrings - """ - @abc.abstractmethod - def process_docstring(self, doc_string): - raise NotImplementedError - - """Takes unformatted signatures and doc strings and returns a properly - rendered piece that fits into our markdown layout. - """ - @abc.abstractmethod - def render(self, signature, doc_string, class_name, is_method): - raise NotImplementedError - - - """Returns main doc string of class/object in question. - """ - @abc.abstractmethod - def get_main_doc_string(self, class_string, class_name): - raise NotImplementedError - - - """Returns doc string and signature data for constructors. - """ - @abc.abstractmethod - def get_constructor_data(self, class_string, class_name, use_contructor): - raise NotImplementedError - - - """Returns doc string and signature data for methods - in the public API of an object - """ - @abc.abstractmethod - def get_public_method_data(self, class_string, includes, excludes): - raise NotImplementedError - - - """Validate language templates - """ - def validate_templates(self): - assert os.path.exists(self.project_name + self.template_dir), \ - 'No template folder for language ' + self.out_language - # TODO: check if folder structure for 'templates' and 'templates_XX' aligns - # TODO: do additional sanity checks to assure different languages are in sync - - """Generate links within documentation. - """ - def class_to_docs_link(self, module_name, class_name): - return self.docs_root + module_name.replace('.', '/') + '#' + class_name - - """Generate links to source code. - """ - def class_to_source_link(self, module_name, cls_name): - return '[[source]](' + self.github_root + module_name + '/' + cls_name + '.' + self.language + ')' - - """Returns code string as markdown snippet of the respective language. - """ - def to_code_snippet(self, code): - return '```' + self.language + '\n' + code + '\n```\n' - - """Returns source code of a class in a module as string. - """ - def inspect_class_string(self, module, cls): - return self.read_file(self.source_code_path + module + '/' + cls) - - """Searches for file names within a module to generate an index. The result - of this is used to create index.md files for each module in question so as - to easily navigate documentation. - """ - def read_index_data(self, data): - module_index = data.get('module_index', "") - modules = os.listdir(self.project_name + self.target_dir + '/' + module_index) - modules = [mod.replace('.md', '') for mod in modules if mod != 'index.md'] - index_string = ''.join('- [{}](./{})\n'.format(mod.title().replace('-', ' '), mod) for mod in modules if mod) - print(index_string) - return ['', index_string] - - - """Grabs page data for each class and allows for iteration in modules and specific classes. - """ - def organize_page_data(self, module, cls, tag, use_constructors, includes, excludes): - class_string = self.inspect_class_string(module, cls) - class_string = self.get_tag_data(class_string, tag) - class_string = class_string.replace('

    ', '').replace('

    ', '') - class_name = cls.replace('.' + self.language, '') - doc_string, class_string = self.get_main_doc_string(class_string, class_name) - constructors, class_string = self.get_constructor_data(class_string, class_name, use_constructors) - methods = self.get_public_method_data(class_string, includes, excludes) - return module, class_name, doc_string, constructors, methods - - - """Main workhorse of this script. Inspects source files per class or module and reads - - class names - - doc strings of classes / objects - - doc strings and signatures of methods - - doc strings and signatures of methods - Values are returned as nested list, picked up in the main program to write documentation blocks. - """ - def read_page_data(self, data): - if data.get('module_index', ""): # indices are created after pages - return [] - page_data = [] - classes = [] - - includes = data.get('include', []) - excludes = data.get('exclude', []) - - use_constructors = data.get('constructors', True) - tag = data.get('autogen_tag', '') - - modules = data.get('module', "") - if modules: - for module in modules: - module_files = os.listdir(self.source_code_path + module) - print(module_files) - for cls in module_files: - if '.' in cls: - module, class_name, doc_string, constructors, methods = self.organize_page_data(module, cls, tag, use_constructors, includes, excludes) - page_data.append([module, class_name, doc_string, constructors, methods]) - - - class_files = data.get('class', "") - if class_files: - for cls in class_files: - classes.append(cls) - - for cls in sorted(classes): - module = "" - module, class_name, doc_string, constructors, methods = self.organize_page_data(module, cls, tag, use_constructors, includes, excludes) - page_data.append([module, class_name, doc_string, constructors, methods]) - - return page_data - - """If a tag is present in a source code string, extract everything between - tag::::start and tag::::end. - """ - def get_tag_data(self, class_string, tag): - start_tag = r'tag::' + tag + '::start' - end_tag = r'tag::' + tag + '::end' - if not tag: - return class_string - elif tag and start_tag in class_string and end_tag not in class_string: - print("Warning: Start tag, but no end tag found for tag: ", tag) - elif tag and start_tag in class_string and end_tag not in class_string: - print("Warning: End tag, but no start tag found for tag: ", tag) - else: - start = re.search(start_tag, class_string) - end = re.search(end_tag, class_string) - return class_string[start.end():end.start()] - - """Before generating new docs into target folder, clean up old files. - """ - def clean_target(self): - if os.path.exists(self.project_name + self.target_dir): - shutil.rmtree(self.project_name + self.target_dir) - - for subdir, dirs, file_names in os.walk(self.project_name + self.template_dir): - for file_name in file_names: - new_subdir = subdir.replace(self.project_name + self.template_dir, self.project_name + self.target_dir) - if not os.path.exists(new_subdir): - os.makedirs(new_subdir) - if file_name[-3:] == '.md': - file_path = os.path.join(subdir, file_name) - new_file_path = self.project_name + self.target_dir + '/' + self.project_name.replace('/','') + '-' + file_name - # print(new_file_path) - shutil.copy(file_path, new_file_path) - - - """Given a file path, read content and return string value. - """ - def read_file(self, path): - with open(path) as f: - return f.read() - - - """Create main index.md page for a project by parsing README.md - and appending it to the template version of index.md - """ - def create_index_page(self): - readme = self.read_file(self.project_name + 'README.md') - index = self.read_file(self.project_name + self.template_dir + '/index.md') - # if readme has a '##' tag, append it to index - index = index.replace('{{autogenerated}}', readme[readme.find('##'):]) - with open(self.project_name + self.target_dir + '/index.md', 'w') as f: - f.write(index) - - - """Write blocks of content (arrays of strings) as markdown to - the file name provided in page_data. - """ - def write_content(self, blocks, page_data): - #assert blocks, 'No content for page ' + page_data['page'] # unsure if necessary - - markdown = '\n\n\n'.join(blocks) - exp_name = self.project_name.replace('/','') + '-' + page_data['page'] - path = os.path.join(self.project_name + self.target_dir, exp_name) - - if os.path.exists(path): - template = self.read_file(path) - #assert '{{autogenerated}}' in template, 'Template found for {} but missing {{autogenerated}} tag.'.format(path) # unsure if needed - markdown = template.replace('{{autogenerated}}', markdown) - print('Auto-generating docs for {}'.format(path)) - markdown = markdown - subdir = os.path.dirname(path) - if not os.path.exists(subdir): - os.makedirs(subdir) - with open(path, 'w') as f: - f.write(markdown) - - - """Prepend headers for jekyll, i.e. provide "default" layout and a - title for the post. - """ - def prepend_headers(self): - for subdir, dirs, file_names in os.walk(self.project_name + self.target_dir): - for file_name in file_names: - if file_name[-3:] == '.md': - file_path = os.path.join(subdir, file_name) - header = '---\ntitle: {}\n---\n'.format(file_name.replace('.md', '')) - with open(file_path, 'r+') as f: - content = f.read() - f.seek(0, 0) - if not content.startswith('---'): - f.write(header.rstrip('\r\n') + '\n' + content) diff --git a/docs/gen_all_docs.sh b/docs/gen_all_docs.sh deleted file mode 100755 index acf886be7..000000000 --- a/docs/gen_all_docs.sh +++ /dev/null @@ -1,92 +0,0 @@ -#!/usr/bin/env bash -set -eu - -################################################################################ -# Copyright (c) 2015-2018 Skymind, Inc. -# -# This program and the accompanying materials are made available under the -# terms of the Apache License, Version 2.0 which is available at -# https://www.apache.org/licenses/LICENSE-2.0. -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# SPDX-License-Identifier: Apache-2.0 -################################################################################ - - -python generate_docs.py \ - --project deeplearning4j \ - --language java \ - --code ../deeplearning4j \ - --out_language en - -python generate_docs.py \ - --project deeplearning4j-nn \ - --language java \ - --code ../deeplearning4j \ - --out_language en - -python generate_docs.py \ - --project deeplearning4j-nlp \ - --language java \ - --code ../deeplearning4j \ - --out_language en - -python generate_docs.py \ - --project deeplearning4j-scaleout \ - --language java \ - --code ../deeplearning4j \ - --out_language en - -python generate_docs.py \ - --project deeplearning4j-zoo \ - --language java \ - --code ../deeplearning4j \ - --out_language en - -python generate_docs.py \ - --project datavec \ - --language java \ - --code ../datavec \ - --out_language en - -python generate_docs.py \ - --project nd4j \ - --language java \ - --code ../nd4j \ - --out_language en - -python generate_docs.py \ - --project nd4j-nn \ - --language java \ - --code ../nd4j \ - --out_language en - -python generate_docs.py \ - --project arbiter \ - --language java \ - --code ../arbiter \ - --out_language en - -python generate_docs.py \ - --project keras-import \ - --language java \ - --code ../deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/ \ - --docs_root deeplarning4j.org/keras \ - --out_language en - -# python generate_docs.py \ -# --project scalnet \ -# --language scala \ -# --code ../scalnet/src/main/scala/org/deeplearning4j/scalnet/ \ -# --docs_root deeplarning4j.org/scalnet - -# python generate_docs.py \ -# --project samediff \ -# --language java \ -# --code ../nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/ \ -# --docs_root deeplarning4j.org/samediff \ No newline at end of file diff --git a/docs/generate_docs.py b/docs/generate_docs.py deleted file mode 100644 index 0da868815..000000000 --- a/docs/generate_docs.py +++ /dev/null @@ -1,93 +0,0 @@ -# -*- coding: utf-8 -*- - -################################################################################ -# Copyright (c) 2015-2018 Skymind, Inc. -# -# This program and the accompanying materials are made available under the -# terms of the Apache License, Version 2.0 which is available at -# https://www.apache.org/licenses/LICENSE-2.0. -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# SPDX-License-Identifier: Apache-2.0 -################################################################################ - -import argparse -from java_doc import JavaDocumentationGenerator -from python_doc import PythonDocumentationGenerator -from scala_doc import ScalaDocumentationGenerator - -SUPPORTED_LANGUAGES = ["java", "scala", "python"] - -if __name__ == '__main__': - - parser = argparse.ArgumentParser() - parser.add_argument('--project', '-p', type=str, required=True) # e.g. keras-import - parser.add_argument('--code', '-c', type=str, required=True) # relative path to source code for this project - - parser.add_argument('--language', '-l', type=str, required=False, default='java') - parser.add_argument('--docs_root', '-d', type=str, required=False, default='http://deeplearning4j.org') - parser.add_argument('--templates', '-t', type=str, required=False, default='templates') - parser.add_argument('--sources', '-s', type=str, required=False, default='doc_sources') - parser.add_argument('--out_language', '-o', type=str, required=False, default='en') - - args = parser.parse_args() - - language = args.language - if language not in SUPPORTED_LANGUAGES: - raise ValueError("provided language not supported: {}".format(language)) - - if language == "python": - doc_generator = PythonDocumentationGenerator(args) - elif language == "scala": - doc_generator = ScalaDocumentationGenerator(args) - else: - doc_generator = JavaDocumentationGenerator(args) - - doc_generator.clean_target() - #doc_generator.create_index_page() # not necessary for now - - for page_data in doc_generator.pages: - data = doc_generator.read_page_data(page_data) - blocks = [] - for module_name, class_name, doc_string, constructors, methods in data: - class_string = doc_generator.inspect_class_string(module_name, class_name + '.' + doc_generator.language) - # skip class if it contains any exclude keywords - if not any(ex in class_string for ex in doc_generator.excludes): - sub_blocks = [] - link = doc_generator.class_to_source_link(module_name, class_name) - try: - class_name = class_name.rsplit('/',1)[1] - except: - print('Skipping split on '+class_name) - # if module_name: - # sub_blocks.append('### {}'.format(module_name)) - # sub_blocks.append(' {} \n'.format(link)) - - if doc_string: - sub_blocks.append('\n---\n') - sub_blocks.append('### {}'.format(class_name)) - sub_blocks.append(' {} \n'.format(link)) - sub_blocks.append(doc_string) - - if constructors: - sub_blocks.append("".join([doc_generator.render(cs, cd, class_name, False) for (cs, cd) in constructors])) - - if methods: - # sub_blocks.append('') - # sub_blocks.append('
    \n') - sub_blocks.append("".join([doc_generator.render(ms, md, class_name, True) for (ms, md) in methods])) - # sub_blocks.append('
    ') - blocks.append('\n'.join(sub_blocks)) - - doc_generator.write_content(blocks, page_data) - - for index_data in doc_generator.indices: - index = doc_generator.read_index_data(index_data) - doc_generator.write_content(index, index_data) - -doc_generator.prepend_headers() diff --git a/docs/java_doc.py b/docs/java_doc.py deleted file mode 100644 index 065454b46..000000000 --- a/docs/java_doc.py +++ /dev/null @@ -1,130 +0,0 @@ -# -*- coding: utf-8 -*- - -################################################################################ -# Copyright (c) 2015-2018 Skymind, Inc. -# -# This program and the accompanying materials are made available under the -# terms of the Apache License, Version 2.0 which is available at -# https://www.apache.org/licenses/LICENSE-2.0. -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# SPDX-License-Identifier: Apache-2.0 -################################################################################ - -import re -import sys -from doc_generator import BaseDocumentationGenerator - - -class JavaDocumentationGenerator(BaseDocumentationGenerator): - - def __init__(self, args): - reload(sys) - sys.setdefaultencoding('utf8') - - super(JavaDocumentationGenerator, self).__init__(args) - - '''Doc strings (in Java/Scala) need to be stripped of all '*' values. - Convert '@param' to '- param'. Strip line with author as well. - - TODO can be vastly improved. - ''' - def process_main_docstring(self, doc_string): - lines = doc_string.split('\n') - doc = [line.replace('*', '').lstrip(' ').rstrip('/') for line in lines[1:-1] if not '@' in line] - return '\n'.join(doc) - - - '''Doc strings (in Java/Scala) need to be stripped of all '*' values. - Convert '@param' to '- param'. TODO can be vastly improved. - ''' - def process_docstring(self, doc_string): - lines = doc_string.split('\n') - doc = [line.replace('*', '').lstrip(' ').replace('@', '- ') for line in lines] - return '\n'.join(doc) - - - '''Takes unformatted signatures and doc strings and returns a properly - rendered piece that fits into our markdown layout. - ''' - def render(self, signature, doc_string, class_name, is_method): - if is_method: # Method name from signature - method_regex = r'public (?:static )?[a-zA-Z0-9]* ([a-zA-Z0-9]*)\(' - name = re.findall(method_regex, signature)[0] - else: # Constructor takes class name - name = class_name - sub_blocks = ['##### {} \n{}'.format(name, self.to_code_snippet(signature))] - if doc_string: - sub_blocks.append(doc_string + '\n') - return '\n\n'.join(sub_blocks) - - - '''Returns main doc string of class/object in question. - ''' - def get_main_doc_string(self, class_string, class_name): - print(class_name) - doc_regex = r'\/\*\*\n([\S\s]*?.*)\*\/\n' # match "/** ... */" at the top - doc_string = re.search(doc_regex, class_string) - try: - doc_match = doc_string.group(); - except: - doc_match = '' - doc = self.process_main_docstring(doc_match) - if not doc_string: - print('Warning, no doc string found for class {}'.format(class_name)) - doc_index = 0 if not doc_match else doc_string.end() - return doc, class_string[doc_index:] - - - '''Returns doc string and signature data for constructors. - ''' - def get_constructor_data(self, class_string, class_name, use_contructor): - constructors = [] - if 'public ' + class_name in class_string and use_contructor: - doc_regex = r'\/\*\*\n([\S\s]*?.*)\*\/\n[\S\s]*?(public ' \ - + class_name + '.[\S\s]*?){' - result = re.search(doc_regex, class_string) - if result: - doc_string, signature = result.groups() - doc = self.process_docstring(doc_string) - class_string = class_string[result.end():] - constructors.append((signature, doc)) - else: - print("Warning, no doc string found for constructor {}".format(class_name)) - return constructors, class_string - - - '''Returns doc string and signature data for methods - in the public API of an object - ''' - def get_public_method_data(self, class_string, includes, excludes): - method_regex = r'public (?:static )?[a-zA-Z0-9]* ([a-zA-Z0-9]*)\(' - - # Either use all methods or use include methods that can be found - method_strings = re.findall(method_regex, class_string) - if includes: - method_strings = [i for i in includes if i in method_strings] - - # Exclude all 'exclude' methods - method_strings = [m for m in method_strings if m not in excludes] - - methods = [] - for method in method_strings: - # print("Processing doc string for method {}".format(method)) - doc_regex = r'\/\*\*\n([\S\s]*?.*)\*\/\n[\S\s]*?' + \ - '(public (?:static )?[a-zA-Z0-9]* ' + method + '[\S\s]*?){' - # TODO: this will sometimes run forever. fix regex - result = re.search(doc_regex, class_string) - if result: - doc_string, signature = result.groups() - doc = self.process_docstring(doc_string) - class_string = class_string[result.end():] - methods.append((signature, doc)) - else: - print("Warning, no doc string found for method {}".format(method)) - return methods diff --git a/docs/keras-import/README.md b/docs/keras-import/README.md deleted file mode 100644 index 705744378..000000000 --- a/docs/keras-import/README.md +++ /dev/null @@ -1,10 +0,0 @@ -# DL4J Keras model-import documentation - -To generate docs into the`keras-import/doc_sources` folder, run - -``` -python generate_docs.py \ - --project keras-import \ - --code ../deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/ - -``` \ No newline at end of file diff --git a/docs/keras-import/mkdocs.yml b/docs/keras-import/mkdocs.yml deleted file mode 100644 index e92e8aaed..000000000 --- a/docs/keras-import/mkdocs.yml +++ /dev/null @@ -1,37 +0,0 @@ -site_name: DL4J Keras model-import documentation -theme: readthedocs -docs_dir: doc_sources -repo_url: https://github.com/deeplearning4j/deeplearning4j -site_url: http://deeplearning4j.org -site_description: 'DL4J Keras model-import documentation' - -dev_addr: '0.0.0.0:8000' - -pages: -- Home: index.md -- Overview of supported features: supported-features.md -- Getting started: - - Guide to KerasSequentialModel: getting-started/keras-sequential-guide.md - - Guide to KerasModel: getting-started/keras-model-guide.md -- Models: - - KerasSequentialModel: models/sequential.md - - KerasModel (functional API): models/model.md -- Layers: - - About Keras import layers: layers/about-importing-layers.md - - Core Layers: layers/core.md - - Convolutional Layers: layers/convolutional.md - - Pooling Layers: layers/pooling.md - - Recurrent Layers: layers/recurrent.md - - Embedding Layers: layers/embeddings.md - - Advanced Activations Layers: layers/advanced-activations.md - - Normalization Layers: layers/normalization.md - - Noise layers: layers/noise.md - - Layer wrappers: layers/wrappers.md - - Writing custom import layers: layers/writing-custom-import-layers.md -- Losses: losses.md -- Optimizers: optimizers.md -- Activations: activations.md -- Backend: backend.md -- Initializers: initializers.md -- Regularizers: regularizers.md -- Constraints: constraints.md diff --git a/docs/keras-import/pages.json b/docs/keras-import/pages.json deleted file mode 100644 index 8d8ce5322..000000000 --- a/docs/keras-import/pages.json +++ /dev/null @@ -1,68 +0,0 @@ -{ - "excludes": [ - "abstract" - ], - "indices": [ - ], - "pages": [ - { - "page": "model-import.md", - "class": [ - "KerasModelImport.java" - ] - }, - { - "page": "model-sequential.md", - "class": [ - "KerasSequentialModel.java" - ] - }, - { - "page": "model-functional.md", - "class": [ - "KerasModel.java" - ] - }, - { - "page": "layers-core.md", - "module": ["layers/core"] - }, - { - "page": "layers-convolutional.md", - "module": ["layers/convolutional"] - }, - { - "page": "layers-pooling.md", - "module": ["layers/pooling"] - }, - { - "page": "layers-local.md", - "module": ["layers/local"] - }, - { - "page": "layers-recurrent.md", - "module": ["layers/recurrent"] - }, - { - "page": "layers-embeddings.md", - "module": ["layers/embeddings"] - }, - { - "page": "layers-normalization.md", - "module": ["layers/normalization"] - }, - { - "page": "layers-advanced-activations.md", - "module": ["layers/advanced/activations"] - }, - { - "page": "layers-noise.md", - "module": ["layers/noise"] - }, - { - "page": "layers-wrappers.md", - "module": ["layers/wrappers"] - } - ] -} - diff --git a/docs/keras-import/templates/activations.md b/docs/keras-import/templates/activations.md deleted file mode 100644 index b4b589389..000000000 --- a/docs/keras-import/templates/activations.md +++ /dev/null @@ -1,24 +0,0 @@ ---- -title: Keras Activations -short_title: Activations -description: Supported Keras activations. -category: Keras Import -weight: 4 ---- - -## Available activations - -We support all [Keras activation functions](https://keras.io/activations), namely: - -* softmax -* elu -* selu -* softplus -* softsign -* relu -* tanh -* sigmoid -* hard_sigmoid -* linear - -The mapping of Keras to DL4J activation functions is defined in [KerasActivationUtils](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasActivationUtils.java) diff --git a/docs/keras-import/templates/backend.md b/docs/keras-import/templates/backend.md deleted file mode 100644 index f4d8ac528..000000000 --- a/docs/keras-import/templates/backend.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -title: Keras Backends -short_title: Backends -description: Supported Keras backends. -category: Keras Import -weight: 4 ---- - -## Supported Keras backends - -DL4J Keras model import is backend agnostic. No matter which backend you choose (TensorFlow, Theano, CNTK), your models -can be imported into DL4J. \ No newline at end of file diff --git a/docs/keras-import/templates/constraints.md b/docs/keras-import/templates/constraints.md deleted file mode 100644 index 2a1dc7936..000000000 --- a/docs/keras-import/templates/constraints.md +++ /dev/null @@ -1,18 +0,0 @@ ---- -title: Keras Constraints -short_title: Constraints -description: Supported Keras constraints. -category: Keras Import -weight: 4 ---- - -## Supported constraints - -All [Keras constraints](https://keras.io/constraints) are supported: - -* max_norm -* non_neg -* unit_norm -* min_max_norm - -Mapping Keras to DL4J constraints happens in [KerasConstraintUtils](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasConstraintUtils.java). diff --git a/docs/keras-import/templates/get-started.md b/docs/keras-import/templates/get-started.md deleted file mode 100644 index 2cb53eae6..000000000 --- a/docs/keras-import/templates/get-started.md +++ /dev/null @@ -1,19 +0,0 @@ ---- -title: Keras Model Import Get Started -short_title: Get Started -description: Getting started with model import. -category: Keras Import -weight: 1 ---- - -## Getting started with Keras model import - -Below is a [video tutorial](https://www.youtube.com/embed/bI1aR1Tj2DM) demonstrating -working code to load a Keras model into Deeplearning4j and validating the working network. -Instructor Tom Hanlon provides an overview of a simple classifier over Iris data built -in Keras with a Theano backend, and exported and loaded into Deeplearning4j: - - - -If you have trouble viewing the video, please click here to -[view it on YouTube](https://www.youtube.com/embed/bI1aR1Tj2DM). \ No newline at end of file diff --git a/docs/keras-import/templates/initializers.md b/docs/keras-import/templates/initializers.md deleted file mode 100644 index 780cd1601..000000000 --- a/docs/keras-import/templates/initializers.md +++ /dev/null @@ -1,29 +0,0 @@ ---- -title: Keras Initializers -short_title: Initializers -description: Supported Keras weight initializers. -category: Keras Import -weight: 4 ---- - -## Supported initializers - -DL4J supports all available [Keras initializers](https://keras.io/initializers), namely: - -* Zeros -* Ones -* Constant -* RandomNormal -* RandomUniform -* TruncatedNormal -* VarianceScaling -* Orthogonal -* Identity -* lecun_uniform -* lecun_normal -* glorot_normal -* glorot_uniform -* he_normal -* he_uniform - -The mapping of Keras to DL4J initializers can be found in [KerasInitilizationUtils](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasInitilizationUtils.java). \ No newline at end of file diff --git a/docs/keras-import/templates/layers-advanced-activations.md b/docs/keras-import/templates/layers-advanced-activations.md deleted file mode 100644 index 76e782d54..000000000 --- a/docs/keras-import/templates/layers-advanced-activations.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Keras Advanced Activations -short_title: Advanced Activations -description: Supported Keras advanced layer activations. -category: Keras Import -weight: 4 ---- - -## Keras advanced activations - -{{autogenerated}} \ No newline at end of file diff --git a/docs/keras-import/templates/layers-convolutional.md b/docs/keras-import/templates/layers-convolutional.md deleted file mode 100644 index 0beadefeb..000000000 --- a/docs/keras-import/templates/layers-convolutional.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Keras Import Convolutional Layers -short_title: Convolutional Layers -description: Supported Keras convolutional layers. -category: Keras Import -weight: 4 ---- - -## Keras layers - -{{autogenerated}} \ No newline at end of file diff --git a/docs/keras-import/templates/layers-core.md b/docs/keras-import/templates/layers-core.md deleted file mode 100644 index e9258b8c1..000000000 --- a/docs/keras-import/templates/layers-core.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Keras Import Core Layers -short_title: Core Layers -description: Supported Keras layers. -category: Keras Import -weight: 4 ---- - -## Keras core layers - -{{autogenerated}} \ No newline at end of file diff --git a/docs/keras-import/templates/layers-embeddings.md b/docs/keras-import/templates/layers-embeddings.md deleted file mode 100644 index 677ba155b..000000000 --- a/docs/keras-import/templates/layers-embeddings.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Keras Import Embedding Layers -short_title: Embedding Layers -description: Supported Keras layers. -category: Keras Import -weight: 4 ---- - -## Keras layers - -{{autogenerated}} \ No newline at end of file diff --git a/docs/keras-import/templates/layers-local.md b/docs/keras-import/templates/layers-local.md deleted file mode 100644 index 28a5120b5..000000000 --- a/docs/keras-import/templates/layers-local.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Keras Import Local Layers -short_title: Local Layers -description: Supported Keras local layers. -category: Keras Import -weight: 4 ---- - -## Keras local layers - -{{autogenerated}} \ No newline at end of file diff --git a/docs/keras-import/templates/layers-noise.md b/docs/keras-import/templates/layers-noise.md deleted file mode 100644 index 293495734..000000000 --- a/docs/keras-import/templates/layers-noise.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Keras Import Noise Layers -short_title: Noise Layers -description: Supported Keras noise layers. -category: Keras Import -weight: 4 ---- - -## Keras layers - -{{autogenerated}} \ No newline at end of file diff --git a/docs/keras-import/templates/layers-normalization.md b/docs/keras-import/templates/layers-normalization.md deleted file mode 100644 index ce3a408f9..000000000 --- a/docs/keras-import/templates/layers-normalization.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Keras Import Normalization Layers -short_title: Normalization Layers -description: Supported Keras normalization layers. -category: Keras Import -weight: 4 ---- - -## Keras layers - -{{autogenerated}} \ No newline at end of file diff --git a/docs/keras-import/templates/layers-pooling.md b/docs/keras-import/templates/layers-pooling.md deleted file mode 100644 index e0a66fcbe..000000000 --- a/docs/keras-import/templates/layers-pooling.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Keras Import Pooling Layers -short_title: Pooling Layers -description: Supported Keras pooling layers. -category: Keras Import -weight: 4 ---- - -## Keras pooling layers - -{{autogenerated}} \ No newline at end of file diff --git a/docs/keras-import/templates/layers-recurrent.md b/docs/keras-import/templates/layers-recurrent.md deleted file mode 100644 index ed8fc347a..000000000 --- a/docs/keras-import/templates/layers-recurrent.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Keras Import Recurrent Layers -short_title: Recurrent Layers -description: Supported Keras recurrent layers. -category: Keras Import -weight: 4 ---- - -## Keras recurrent layers - -{{autogenerated}} \ No newline at end of file diff --git a/docs/keras-import/templates/layers-wrappers.md b/docs/keras-import/templates/layers-wrappers.md deleted file mode 100644 index 30473c8d9..000000000 --- a/docs/keras-import/templates/layers-wrappers.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Keras Import Wrapper Layers -short_title: Wrapper Layers -description: Supported Keras wrapper layers. -category: Keras Import -weight: 4 ---- - -## Keras wrapper layers - -{{autogenerated}} \ No newline at end of file diff --git a/docs/keras-import/templates/losses.md b/docs/keras-import/templates/losses.md deleted file mode 100644 index c86298129..000000000 --- a/docs/keras-import/templates/losses.md +++ /dev/null @@ -1,28 +0,0 @@ ---- -title: Keras Import Loss Functions -short_title: Losses -description: Supported Keras loss functions. -category: Keras Import -weight: 4 ---- - -## Supported loss functions - -DL4J supports all available [Keras losses](https://keras.io/losses) (except for `logcosh`), namely: - -* mean_squared_error -* mean_absolute_error -* mean_absolute_percentage_error -* mean_squared_logarithmic_error -* squared_hinge -* hinge -* categorical_hinge -* logcosh -* categorical_crossentropy -* sparse_categorical_crossentropy -* binary_crossentropy -* kullback_leibler_divergence -* poisson -* cosine_proximity - -The mapping of Keras loss functions can be found in [KerasLossUtils](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasLossUtils.java). \ No newline at end of file diff --git a/docs/keras-import/templates/model-functional.md b/docs/keras-import/templates/model-functional.md deleted file mode 100644 index 406135b15..000000000 --- a/docs/keras-import/templates/model-functional.md +++ /dev/null @@ -1,79 +0,0 @@ ---- -title: Keras Import Functional Model -short_title: Functional Model -description: Importing the functional model. -category: Keras Import -weight: 2 ---- - -## Getting started with importing Keras functional Models - -Let's say you start with defining a simple MLP using Keras' functional API: - -```python -from keras.models import Model -from keras.layers import Dense, Input - -inputs = Input(shape=(100,)) -x = Dense(64, activation='relu')(inputs) -predictions = Dense(10, activation='softmax')(x) -model = Model(inputs=inputs, outputs=predictions) -model.compile(loss='categorical_crossentropy',optimizer='sgd', metrics=['accuracy']) -``` - -In Keras there are several ways to save a model. You can store the whole model -(model definition, weights and training configuration) as HDF5 file, just the -model configuration (as JSON or YAML file) or just the weights (as HDF5 file). -Here's how you do each: - -```python -model.save('full_model.h5') # save everything in HDF5 format - -model_json = model.to_json() # save just the config. replace with "to_yaml" for YAML serialization -with open("model_config.json", "w") as f: - f.write(model_json) - -model.save_weights('model_weights.h5') # save just the weights. -``` - -If you decide to save the full model, you will have access to the training configuration of -the model, otherwise you don't. So if you want to further train your model in DL4J after import, -keep that in mind and use `model.save(...)` to persist your model. - -## Loading your Keras model - -Let's start with the recommended way, loading the full model back into DL4J (we assume it's -on your class path): - -```java -String fullModel = new ClassPathResource("full_model.h5").getFile().getPath(); -ComputationGraph model = KerasModelImport.importKerasModelAndWeights(fullModel); - -``` - -In case you didn't compile your Keras model, it will not come with a training configuration. -In that case you need to explicitly tell model import to ignore training configuration by -setting the `enforceTrainingConfig` flag to false like this: - -```java -ComputationGraph model = KerasModelImport.importKerasModelAndWeights(fullModel, false); - -``` - -To load just the model configuration from JSON, you use `KerasModelImport` as follows: - -```java -String modelJson = new ClassPathResource("model_config.json").getFile().getPath(); -ComputationGraphConfiguration modelConfig = KerasModelImport.importKerasModelConfiguration(modelJson) -``` - -If additionally you also want to load the model weights with the configuration, here's what you do: - -```java -String modelWeights = new ClassPathResource("model_weights.h5").getFile().getPath(); -MultiLayerNetwork network = KerasModelImport.importKerasModelAndWeights(modelJson, modelWeights) -``` - -In the latter two cases no training configuration will be read. - -{{autogenerated}} diff --git a/docs/keras-import/templates/model-import.md b/docs/keras-import/templates/model-import.md deleted file mode 100644 index 72f79f4cd..000000000 --- a/docs/keras-import/templates/model-import.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: Keras Import API -short_title: Import -description: Importing API. -category: Keras Import -weight: 1 ---- - -## Keras model import API - -{{autogenerated}} \ No newline at end of file diff --git a/docs/keras-import/templates/model-sequential.md b/docs/keras-import/templates/model-sequential.md deleted file mode 100644 index 3cc5c4bb6..000000000 --- a/docs/keras-import/templates/model-sequential.md +++ /dev/null @@ -1,78 +0,0 @@ ---- -title: Keras Import Sequential Model -short_title: Sequential Model -description: Importing the functional model. -category: Keras Import -weight: 2 ---- - -## Getting started with importing Keras Sequential models - -Let's say you start with defining a simple MLP using Keras: - -```python -from keras.models import Sequential -from keras.layers import Dense - -model = Sequential() -model.add(Dense(units=64, activation='relu', input_dim=100)) -model.add(Dense(units=10, activation='softmax')) -model.compile(loss='categorical_crossentropy',optimizer='sgd', metrics=['accuracy']) -``` - -In Keras there are several ways to save a model. You can store the whole model -(model definition, weights and training configuration) as HDF5 file, just the -model configuration (as JSON or YAML file) or just the weights (as HDF5 file). -Here's how you do each: - -```python -model.save('full_model.h5') # save everything in HDF5 format - -model_json = model.to_json() # save just the config. replace with "to_yaml" for YAML serialization -with open("model_config.json", "w") as f: - f.write(model_json) - -model.save_weights('model_weights.h5') # save just the weights. -``` - -If you decide to save the full model, you will have access to the training configuration of -the model, otherwise you don't. So if you want to further train your model in DL4J after import, -keep that in mind and use `model.save(...)` to persist your model. - -## Loading your Keras model - -Let's start with the recommended way, loading the full model back into DL4J (we assume it's -on your class path): - -```java -String fullModel = new ClassPathResource("full_model.h5").getFile().getPath(); -MultiLayerNetwork model = KerasModelImport.importKerasSequentialModelAndWeights(fullModel); - -``` - -In case you didn't compile your Keras model, it will not come with a training configuration. -In that case you need to explicitly tell model import to ignore training configuration by -setting the `enforceTrainingConfig` flag to false like this: - -```java -MultiLayerNetwork model = KerasModelImport.importKerasSequentialModelAndWeights(fullModel, false); - -``` - -To load just the model configuration from JSON, you use `KerasModelImport` as follows: - -```java -String modelJson = new ClassPathResource("model_config.json").getFile().getPath(); -MultiLayerNetworkConfiguration modelConfig = KerasModelImport.importKerasSequentialConfiguration(modelJson) -``` - -If additionally you also want to load the model weights with the configuration, here's what you do: - -```java -String modelWeights = new ClassPathResource("model_weights.h5").getFile().getPath(); -MultiLayerNetwork network = KerasModelImport.importKerasSequentialModelAndWeights(modelJson, modelWeights) -``` - -In the latter two cases no training configuration will be read. - - diff --git a/docs/keras-import/templates/optimizers.md b/docs/keras-import/templates/optimizers.md deleted file mode 100644 index ccc55aea9..000000000 --- a/docs/keras-import/templates/optimizers.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -title: Keras Import Optimizers -short_title: Optimizers -description: Supported Keras optimizers -category: Keras Import -weight: 2 ---- - -## Supported optimizers - -All standard Keras optimizers are supported, but importing custom TensorFlow optimizers won't work: - -* SGD -* RMSprop -* Adagrad -* Adadelta -* Adam -* Adamax -* Nadam -* TFOptimizer \ No newline at end of file diff --git a/docs/keras-import/templates/overview.md b/docs/keras-import/templates/overview.md deleted file mode 100644 index 098d21917..000000000 --- a/docs/keras-import/templates/overview.md +++ /dev/null @@ -1,135 +0,0 @@ ---- -title: Keras Import Overview -short_title: Overview -description: Overview of model import. -category: Keras Import -weight: 0 ---- - -## Deeplearing4j: Keras model import - -[Keras model import](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras) -provides routines for importing neural network models originally configured and trained -using [Keras](https://keras.io/), a popular Python deep learning library. - -Once you have imported your model into DL4J, our full production stack is at your disposal. -We support import of all Keras model types, most layers and practically all utility functionality. -Please check [here](./keras-import-supported-features) for a complete list of supported Keras features. - - -## Getting started: Import a Keras model in 60 seconds - -To import a Keras model, you need to create and [serialize](https://keras.io/getting-started/faq/#how-can-i-save-a-keras-model) -such a model first. Here's a simple example that you can use. The model is a simple MLP that takes -mini-batches of vectors of length 100, has two Dense layers and predicts a total of 10 -categories. After defining the model, we serialize it in HDF5 format. - -```python -from keras.models import Sequential -from keras.layers import Dense - -model = Sequential() -model.add(Dense(units=64, activation='relu', input_dim=100)) -model.add(Dense(units=10, activation='softmax')) -model.compile(loss='categorical_crossentropy',optimizer='sgd', metrics=['accuracy']) - -model.save('simple_mlp.h5') -``` - -If you put this model file (`simple_mlp.h5`) into the base of your resource folder of your -project, you can load the Keras model as DL4J `MultiLayerNetwork` as follows - -```java -String simpleMlp = new ClassPathResource("simple_mlp.h5").getFile().getPath(); -MultiLayerNetwork model = KerasModelImport.importKerasSequentialModelAndWeights(simpleMlp); -``` - -That's it! The `KerasModelImport` is your main entry point to model import and class takes -care of mapping Keras to DL4J concepts internally. As user you just have to provide your model -file, see our [Getting started guide](./getting-started) for more details and options to load -Keras models into DL4J. - -You can now use your imported model for inference (here with dummy data for simplicity) -```java -INDArray input = Nd4j.create(256, 100); -INDArray output = model.output(input); -``` - -Here's how you do training in DL4J for your imported model: - -```java -model.fit(input, output); -``` - -The full example just shown can be found in our [DL4J examples](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/modelimport/keras/basic/SimpleSequentialMlpImport.java). - - -## Project setup - -To use Keras model import in your existing project, all you need to do is add the following -dependency to your pom.xml. - -``` - - org.deeplearning4j - deeplearning4j-modelimport - 1.0.0-beta // This version should match that of your other DL4J project dependencies. - -``` - -If you need a project to get started in the first place, consider cloning -[DL4J examples](https://github.com/eclipse/deeplearning4j-examples) and follow -the instructions in the repository to build the project. - -## Popular models and applications - -We support import for a growing number of applications, check [here](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasModelEndToEndTest.java) -for a full list of currently covered models. These applications include - -- Deep convolutional and Wasserstein GANs -- UNET -- ResNet50 -- SqueezeNet -- MobileNet -- Inception -- Xception - -## Troubleshooting and support - -An `IncompatibleKerasConfigurationException` message indicates that you are attempting to -import a Keras model configuration that is not currently supported in Deeplearning4j -(either because model import does not cover it, or DL4J does not implement the layer, -or feature). - -Once you have imported your model, we recommend our own `ModelSerializer` class for further -saving and reloading of your model. - -You can inquire further by visiting the [DL4J gitter channel](https://gitter.im/deeplearning4j/deeplearning4j). You might consider filing -a [feature request via Github](https://github.com/eclipse/deeplearning4j/issues) -so that this missing functionality can be placed on the DL4J development roadmap or even -sending us a pull request with the necessary changes! - - -## Why Keras model import? - -Keras is a popular and user-friendly deep learning library written in Python. -The intuitive API of Keras makes defining and running your deep learning -models in Python easy. Keras allows you to choose which lower-level -library it runs on, but provides a unified API for each such backend. Currently, -Keras supports Tensorflow, CNTK and Theano backends, but Skymind is -working on an [ND4J backend](https://github.com/deeplearning4j/keras/tree/inference_only/nd4j_examples) -for Keras as well. - -There is often a gap between the production system of a company and the -experimental setup of its data scientists. Keras model import -allows data scientists to write their models in Python, but still -seamlessly integrates with the production stack. - -Keras model import is targeted at users mainly familiar with writing -their models in Python with Keras. With model import you can bring your -Python models to production by allowing users to import their models -into the DL4J ecosphere for either further training or evaluation purposes. - -You should use this module when the experimentation phase of your -project is completed and you need to ship your models to production. [Skymind](https://skymind.ai) -commercial support for Keras implementations in enterprise. diff --git a/docs/keras-import/templates/regularizers.md b/docs/keras-import/templates/regularizers.md deleted file mode 100644 index 4f3cb7d1a..000000000 --- a/docs/keras-import/templates/regularizers.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: Keras Import Regularizers -short_title: Regularizers -description: Supported Keras regularizers. -category: Keras Import -weight: 4 ---- - -## Supported regularizers - -All [Keras regularizers] are supported by DL4J model import: - -* l1 -* l2 -* l1_l2 - -Mapping of regularizers can be found in [KerasRegularizerUtils](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasRegularizerUtils.java). diff --git a/docs/keras-import/templates/supported-features.md b/docs/keras-import/templates/supported-features.md deleted file mode 100644 index 9438b64cd..000000000 --- a/docs/keras-import/templates/supported-features.md +++ /dev/null @@ -1,190 +0,0 @@ ---- -title: Keras Import Supported Features -short_title: Supported Features -description: Supported Keras features. -category: Keras Import -weight: 2 ---- - -## Keras Model Import: Supported Features - -Little-known fact: Deeplearning4j's creator, Skymind, has two of the top -five [Keras contributors](https://github.com/keras-team/keras/graphs/contributors) -on our team, making it the largest contributor to Keras after Keras creator Francois -Chollet, who's at Google. - -While not every concept in DL4J has an equivalent in Keras and vice versa, many of the -key concepts can be matched. Importing keras models into DL4J is done in -our [deeplearning4j-modelimport](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras) -module. Below is a comprehensive list of currently supported features. - -* [Layers](#layers) -* [Losses](#losses) -* [Activations](#activations) -* [Initializers](#initializers) -* [Regularizers](#regularizers) -* [Constraints](#constraints) -* [Metrics](#metrics) -* [Optimizers](#optimizers) - - -## Layers -Mapping keras to DL4J layers is done in the [layers](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers) sub-module of model import. The structure of this project loosely reflects the structure of Keras. - -### [Core Layers](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core) -* [Dense](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDense.java) -* [Activation](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasActivation.java) -* [Dropout](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDropout.java) -* [Flatten](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasFlatten.java) -* [Reshape](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasReshape.java) -* [Merge](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasMerge.java) -* [Permute](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasPermute.java) -* [RepeatVector](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasRepeatVector.java) -* [Lambda](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasLambda.java) -* ActivityRegularization -* [Masking](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasMasking.java) -* [SpatialDropout1D](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasSpatialDropout.java) -* [SpatialDropout2D](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasSpatialDropout.java) -* [SpatialDropout3D](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasSpatialDropout.java) - -### [Convolutional Layers](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional) -* [Conv1D](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution1D.java) -* [Conv2D](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution2D.java) -* [Conv3D](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution3D.java) -* [AtrousConvolution1D](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution1D.java) -* [AtrousConvolution2D](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution1D.java) -* SeparableConv1D -* [SeparableConv2D](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasSeparableConvolution2D.java) -* [Conv2DTranspose](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasDeconvolution2D.java) -* Conv3DTranspose -* [Cropping1D](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasCropping1D.java) -* [Cropping2D](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasCropping2D.java) -* [Cropping3D](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasCropping3D.java) -* [UpSampling1D](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasUpsampling1D.java) -* [UpSampling2D](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasUpsampling2D.java) -* [UpSampling3D](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasUpsampling2D.java) -* [ZeroPadding1D](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding1D.java) -* [ZeroPadding2D](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding2D.java) -* [ZeroPadding3D](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding3D.java) - -### [Pooling Layers](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling) -* [MaxPooling1D](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling1D.java) -* [MaxPooling2D](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling2D.java) -* [MaxPooling3D](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling3D.java) -* [AveragePooling1D](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling1D.java) -* [AveragePooling2D](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling2D.java) -* [AveragePooling3D](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling3D.java) -* [GlobalMaxPooling1D](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasGlobalPooling.java) -* [GlobalMaxPooling2D](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasGlobalPooling.java) -* [GlobalMaxPooling3D](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasGlobalPooling.java) -* [GlobalAveragePooling1D](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasGlobalPooling.java) -* [GlobalAveragePooling2D](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasGlobalPooling.java) -* [GlobalAveragePooling3D](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasGlobalPooling.java) - -### [Locally-connected Layers](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/local) -* [LocallyConnected1D](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected1D.java) -* [LocallyConnected2D](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected2D.java) - -### [Recurrent Layers](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent) -* [SimpleRNN](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasSimpleRnn.java) -* GRU -* [LSTM](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLstm.java) -* ConvLSTM2D - - -### [Embedding Layers](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/embeddings) -* [Embedding](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/embeddings/KerasEmbedding.java) - -### [Merge Layers](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasMerge.java) -* Add / add -* Multiply / multiply -* Subtract / subtract -* Average / average -* Maximum / maximum -* Concatenate / concatenate -* Dot / dot - - -### [Advanced Activation Layers](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations) -* [LeakyReLU](https://github.com/eclipse/deeplearning4j/tree/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasLeakyReLU.java) -* [PReLU](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasPReLU.java) -* ELU -* [ThresholdedReLU](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasThresholdedReLU.java) - -### [Normalization Layers](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization) -* [BatchNormalization](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalization.java) - -### Noise Layers -* [GaussianNoise](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/noise/KerasGaussianNoise.java) -* [GaussianDropout](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/noise/KerasGaussianDropout.java) -* [AlphaDropout](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/noise/KerasAlphaDropout.java) - -### Layer Wrappers -* TimeDistributed -* [Bidirectional](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/wrappers/KerasBidirectional.java) - -## [Losses](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasLossUtils.java) -* mean_squared_error -* mean_absolute_error -* mean_absolute_percentage_error -* mean_squared_logarithmic_error -* squared_hinge -* hinge -* categorical_hinge -* logcosh -* categorical_crossentropy -* sparse_categorical_crossentropy -* binary_crossentropy -* kullback_leibler_divergence -* poisson -* cosine_proximity - -## [Activations](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasActivationUtils.java) -* softmax -* elu -* selu -* softplus -* softsign -* relu -* tanh -* sigmoid -* hard_sigmoid -* linear - -## [Initializers](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasInitilizationUtils.java) -* Zeros -* Ones -* Constant -* RandomNormal -* RandomUniform -* TruncatedNormal -* VarianceScaling -* Orthogonal -* Identity -* lecun_uniform -* lecun_normal -* glorot_normal -* glorot_uniform -* he_normal -* he_uniform - -## [Regularizers](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasRegularizerUtils.java) -* l1 -* l2 -* l1_l2 - -## [Constraints](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasConstraintUtils.java) -* max_norm -* non_neg -* unit_norm -* min_max_norm - -## [Optimizers](https://github.com/eclipse/deeplearning4j/blob/master/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasOptimizerUtils.java) -* SGD -* RMSprop -* Adagrad -* Adadelta -* Adam -* Adamax -* Nadam -* TFOptimizer diff --git a/docs/nd4j-nn/README.md b/docs/nd4j-nn/README.md deleted file mode 100644 index f4766c08c..000000000 --- a/docs/nd4j-nn/README.md +++ /dev/null @@ -1,10 +0,0 @@ -# nd4j-nn documentation - -To generate docs into the `nd4j-nn/doc_sources` folder, run - -``` -python generate_docs.py \ - --project nd4j-nn \ - --code ../nd4j - -``` \ No newline at end of file diff --git a/docs/nd4j-nn/pages.json b/docs/nd4j-nn/pages.json deleted file mode 100644 index 3042e29b2..000000000 --- a/docs/nd4j-nn/pages.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "excludes": [ - "abstract" - ], - "indices": [ - ], - "pages": [ - { - "page": "activations.md", - "module": ["/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/impl/"] - }, - { - "page": "updaters.md", - "module": ["/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/"] - } - ] -} \ No newline at end of file diff --git a/docs/nd4j-nn/templates/activations.md b/docs/nd4j-nn/templates/activations.md deleted file mode 100644 index c94ce12ae..000000000 --- a/docs/nd4j-nn/templates/activations.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: Activations -short_title: Activations -description: Special algorithms for gradient descent. -category: Models -weight: 10 ---- - -## What are activations? - -At a simple level, activation functions help decide whether a neuron should be activated. This helps determine whether the information that the neuron is receiving is relevant for the input. The activation function is a non-linear transformation that happens over an input signal, and the transformed output is sent to the next neuron. - -## Usage - -The recommended method to use activations is to add an activation layer in your neural network, and configure your desired activation: - -```java -GraphBuilder graphBuilder = new NeuralNetConfiguration.Builder() - // add hyperparameters and other layers - .addLayer("softmax", new ActivationLayer(Activation.SOFTMAX), "previous_input") - // add more layers and output - .build(); -``` - -## Available activations - -{{autogenerated}} \ No newline at end of file diff --git a/docs/nd4j-nn/templates/updaters.md b/docs/nd4j-nn/templates/updaters.md deleted file mode 100644 index 5b8d7b5e1..000000000 --- a/docs/nd4j-nn/templates/updaters.md +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: Updaters -short_title: Updaters -description: Special algorithms for gradient descent. -category: Models -weight: 10 ---- - -## What are updaters? - -The main difference among the updaters is how they treat the learning rate. Stochastic Gradient Descent, the most common learning algorithm in deep learning, relies on `Theta` (the weights in hidden layers) and `alpha` (the learning rate). Different updaters help optimize the learning rate until the neural network converges on its most performant state. - -## Usage - -To use the updaters, pass a new class to the `updater()` method in either a `ComputationGraph` or `MultiLayerNetwork`. - -```java -ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() - .updater(new Adam(0.01)) - // add your layers and hyperparameters below - .build(); -``` - -## Available updaters - -{{autogenerated}} \ No newline at end of file diff --git a/docs/nd4j/README.md b/docs/nd4j/README.md deleted file mode 100644 index bf4e4f0ac..000000000 --- a/docs/nd4j/README.md +++ /dev/null @@ -1,10 +0,0 @@ -# nd4j documentation - -To generate docs into the `nd4j/doc_sources` folder, first `cd docs` then run: - -```shell -python generate_docs.py \ - --project nd4j \ - --code ../nd4j - --out_language en -``` \ No newline at end of file diff --git a/docs/nd4j/pages.json b/docs/nd4j/pages.json deleted file mode 100644 index 98127dda2..000000000 --- a/docs/nd4j/pages.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "excludes": [ - "abstract" - ], - "indices": [ - ], - "pages": [ - { - "page": "overview.md", - "class": [] - }, - { - "page": "basics.md", - "class": [] - }, - { - "page": "elementwise.md", - "class": [] - }, - { - "page": "matrix-manipulation.md", - "class": [] - }, - { - "page": "syntax.md", - "class": [] - }, - { - "page": "tensor.md", - "class": [] - } - ] -} - diff --git a/docs/nd4j/templates/overview.md b/docs/nd4j/templates/overview.md deleted file mode 100644 index 12e61c476..000000000 --- a/docs/nd4j/templates/overview.md +++ /dev/null @@ -1,825 +0,0 @@ ---- -title: ND4J Overview -short_title: Overview -description: Comprehensive programming guide for ND4J. -category: ND4J -weight: 0 ---- - -This user guide is designed to explain (and provide examples for) the main functionality in ND4J. - -* Introduction - * NDArrays: How Are They Stored in Memory? - * Views: When an two or more INDArrays refer to the same data -* Creating NDArrays - * Zero, One and Scalar-Value Initialized Arrays - * Random arrays - * Creating NDArrays from Java arrays - * Creating NDArrays from other NDArrays - * Miscellaneous NDArray Creation Methods -* Getting and Setting Individual Values -* Getting and Setting Parts of NDArrays - * getRow() and putRow() - * Sub-arrays: get(), put() and NDArrayIndex - * Tensor Along Dimension - * Slice -* Performing Operations on NDArrays - * Scalar Ops - * Transform Ops - * Accumulation (Reduction) Ops - * Index Accumulation Ops, - * Broadcast and Vector Operations -* Boolean Indexing: Selectively Apply Operations Based on a Condition -* Workspaces - * Workspaces: Scope Panic -* Advanced and Miscellaneous Topics - * Setting the data type - * Reshaping - * Flattening - * Permute - * sortRows/sortColumns - * Directly accessing BLAS operations -* Serialization -* Quick Reference: A Summary Overview of ND4J Methods -* FAQ: Frequently Asked Questions - - -## Introduction - -An NDArray is in essence n-dimensional array: i.e., a rectangular array of numbers, with some number of dimensions. - -Some concepts you should be familiar with: - -* The *rank* of a NDArray is the number of dimensions. 2d NDArrays have a rank of 2, 3d arrays have a rank of 3, and so on. You can create NDArrays with any arbitrary rank. -* The *shape* of an NDArray defines the size of each of the dimensions. Suppose we have a 2d array with 3 rows and 5 columns. This NDArray would have shape `[3,5]` -* The *length* of an NDArray defines the total number of elements in the array. The length is always equal to the product of the values that make up the shape. -* The *stride* of an NDArray is defined as the separation (in the underlying data buffer) of contiguous elements in each dimension. Stride is defined per dimension, so a rank N NDArray has N stride values, one for each dimension. Note that most of the time, you don't need to know (or concern yourself with) the stride - just be aware that this is how ND4J operates internally. The next section has an example of strides. -* The *data type* of an NDArray refers to the type of data of an NDArray (for example, *float* or *double* precision). Note that this is set globally in ND4J, so all NDArrays should have the same data type. Setting the data type is discussed later in this document. - -In terms of indexing there are a few things to know. First, rows are dimension 0, and columns are dimension 1: thus `INDArray.size(0)` is the number of rows, and `INDArray.size(1)` is the number of columns. Like normal arrays in most programming languages, indexing is zero-based: thus rows have indexes `0` to `INDArray.size(0)-1`, and so on for the other dimensions. - -Throughout this document, we'll use the term `NDArray` to refer to the general concept of an n-dimensional array; the term `INDArray` refers specifically to the [Java interface](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ndarray/INDArray.java) that ND4J defines. In practice, these two terms can be used interchangeably. - -### NDArrays: How Are They Stored in Memory? - -The next few paragraphs describe some of architecture behind ND4J. Understanding this is not strictly necessary in order to use ND4J, but it may help you to understand what is going on behind the scenes. -NDArrays are stored in memory as a single flat array of numbers (or more generally, as a single contiguous block of memory), and hence differs a lot from typical Java multidimensional arrays such as a `float[][]` or `double[][][]`. - -Physically, the data that backs an INDArray is stored off-heap: that is, it is stored outside of the Java Virtual Machine (JVM). This has numerous benefits, including performance, interoperability with high-performance BLAS libraries, and the ability to avoid some shortcomings of the JVM in high-performance computing (such as issues with Java arrays being limited to 2^31 -1 (2.14 billion) elements due to integer indexing). - -In terms of encoding, an NDArray can be encoded in either C (row-major) or Fortran (column-major) order. For more details on row vs. column major order, see [Wikipedia](https://en.wikipedia.org/wiki/Row-major_order). Nd4J may use a combination of C and F order arrays together, at the same time. Most users can just use the default array ordering, but note that it is possible to use a specific ordering for a given array, should the need arise. - - -The following image shows how a simple 3x3 (2d) NDArray is stored in memory, - -![C vs. F order](/images/guide/c_vs_f_order.png) - -In the above array, we have: - -* `Shape = [3,3]` (3 rows, 3 columns) -* `Rank = 2` (2 dimensions) -* `Length = 9` (3x3=9) -* Stride - * C order stride: `[3,1]`: the values in consecutive rows are separated in the buffer by 3, and the values consecutive columns are separated in the buffer by 1 - * F order stride: `[1,3]`: the values in consecutive rows are separated in the buffer by 1, and the values in consecutive columns are separated in the buffer by 3 - - - - -### Views: When Two or More NDArrays Refer to the Same Data - -A key concept in ND4J is the fact that two NDArrays can actually point to the same underlying data in memory. Usually, we have one NDArray referring to some subset of another array, and this only occurs for certain operations (such as `INDArray.get()`, `INDArray.transpose()`, `INDArray.getRow()` etc. This is a powerful concept, and one that is worth understanding. - -There are two primary motivations for this: - -1. There are considerable performance benefits, most notably in avoiding copying arrays -2. We gain a lot of power in terms of how we can perform operations on our NDArrays - -Consider a simple operation like a matrix transpose on a large (10,000 x 10,000) matrix. Using views, we can perform this matrix transpose in constant time without performing any copies (i.e., O(1) in [big O notation](https://en.wikipedia.org/wiki/Big_O_notation)), avoiding the considerable cost copying all of the array elements. Of course, sometimes we *do* want to make a copy - at which point we can use the `INDArray.dup()` to get a copy. For example, to get a *copy* of a transposed matrix, use `INDArray out = myMatrix.transpose().dup()`. After this `dup()` call, there will be no link between the original array `myMatrix` and the array `out` (thus, changes to one will not impact the other). - - -So see how views can be powerful, consider a simple task: adding 1.0 to the first row of a larger array, `myArray`. We can do this easily, in one line: - -`myArray.getRow(0).addi(1.0)` - -Let's break down what is happening here. First, the `getRow(0)` operation returns an INDArray that is a view of the original. Note that both `myArrays` and `myArray.getRow(0)` point to the same area in memory: - -![getRow(0)](/images/guide/row_addi.png) - -then, after the addi(1.0) is performed, we have the following situation: - -![getRow(0).addi(1.0)](/images/guide/row_addi_2.png) - -As we can see, changes to the NDArray returned by `myArray.getRow(0)` will be reflected in the original array `myArray`; similarly, changes to `myArray` will be reflected in the row vector. - -## Creating NDArrays - -### Zero, One and Scalar-Value Initialized Arrays - -Two of the most commonly used methods of creating arrays are: - -* `Nd4j.zeros(int...)` -* `Nd4j.ones(int...)` - -The shape of the arrays are specified as integers. For example, to create a zero-filled array with 3 rows and 5 columns, use `Nd4j.zeros(3,5)`. - -These can often be combined with other operations to create arrays with other values. For example, to create an array filled with 10s: - -`INDArray tens = Nd4j.zeros(3,5).addi(10)` - -The above initialization works in two steps: first by allocating a 3x5 array filled with zeros, and then by adding 10 to each value. - -### Random Arrays - -Nd4j provides a few methods to generate INDArrays, where the contents are pseudo-random numbers. - -To generate uniform random numbers in the range 0 to 1, use `Nd4j.rand(int nRows, int nCols)` (for 2d arrays), or `Nd4j.rand(int[])` (for 3 or more dimensions). - -Similarly, to generate Gaussian random numbers with mean zero and standard deviation 1, use `Nd4j.randn(int nRows, int nCols)` or `Nd4j.randn(int[])`. - -For repeatability (i.e., to set Nd4j's random number generator seed) you can use `Nd4j.getRandom().setSeed(long)` - -### Creating NDArrays from Java arrays - -Nd4j provides convenience methods for the creation of arrays from Java float and double arrays. - -To create a 1d NDArray from a 1d Java array, use: - -* Row vector: `Nd4j.create(float[])` or `Nd4j.create(double[])` -* Column vector: `Nd4j.create(float[],new int[]{length,1})` or `Nd4j.create(double[],new int[]{length,1})` - -For 2d arrays, use `Nd4j.create(float[][])` or `Nd4j.create(double[][])`. - -For creating NDArrays from Java primitive arrays with 3 or more dimensions (`double[][][]` etc), one approach is to use the following: - -```java -double[] flat = ArrayUtil.flattenDoubleArray(myDoubleArray); -int[] shape = ...; //Array shape here -INDArray myArr = Nd4j.create(flat,shape,'c'); -``` - -### Creating NDArrays from Other NDArrays - -There are three primary ways of creating arrays from other arrays: - -* Creating an exact copy of an existing NDArray using `INDArray.dup()` -* Create the array as a subset of an existing NDArray -* Combine a number of existing NDArrays to create a new NDArray - -For the second case, you can use getRow(), get(), etc. See Getting and Setting Parts of NDArrays for details on this. - -Two methods for combining NDArrays are `Nd4j.hstack(INDArray...)` and `Nd4j.vstack(INDArray...)`. - -`hstack` (horizontal stack) takes as argument a number of matrices that have the same number of rows, and stacks them horizontally to produce a new array. The input NDArrays can have a different number of columns, however. - -Example: - -``` -int nRows = 2; -int nColumns = 2; -// Create INDArray of zeros -INDArray zeros = Nd4j.zeros(nRows, nColumns); -// Create one of all ones -INDArray ones = Nd4j.ones(nRows, nColumns); -//hstack -INDArray hstack = Nd4j.hstack(ones,zeros); -System.out.println("### HSTACK ####"); -System.out.println(hstack); - -``` - -Output: - -``` -### HSTACK #### -[[1.00, 1.00, 0.00, 0.00], -[1.00, 1.00, 0.00, 0.00]] -``` - -`vstack` (vertical stack) is the vertical equivalent of hstack. The input arrays must have the same number of columns. - -Example: - -``` -int nRows = 2; -int nColumns = 2; -// Create INDArray of zeros -INDArray zeros = Nd4j.zeros(nRows, nColumns); -// Create one of all ones -INDArray ones = Nd4j.ones(nRows, nColumns); -//vstack -INDArray vstack = Nd4j.vstack(ones,zeros); -System.out.println("### VSTACK ####"); -System.out.println(vstack); -``` - -Output: - -``` -### VSTACK #### -[[1.00, 1.00], - [1.00, 1.00], - [0.00, 0.00], - [0.00, 0.00]] -``` - -`ND4J.concat` combines arrays along a dimension. - -Example: - -``` -int nRows = 2; -int nColumns = 2; -//INDArray of zeros -INDArray zeros = Nd4j.zeros(nRows, nColumns); -// Create one of all ones -INDArray ones = Nd4j.ones(nRows, nColumns); -// Concat on dimension 0 -INDArray combined = Nd4j.concat(0,zeros,ones); -System.out.println("### COMBINED dimension 0####"); -System.out.println(combined); -//Concat on dimension 1 -INDArray combined2 = Nd4j.concat(1,zeros,ones); -System.out.println("### COMBINED dimension 1 ####"); -System.out.println(combined2); -``` - -Output: -``` -### COMBINED dimension 0#### -[[0.00, 0.00], - [0.00, 0.00], - [1.00, 1.00], - [1.00, 1.00]] -### COMBINED dimension 1 #### -[[0.00, 0.00, 1.00, 1.00], - [0.00, 0.00, 1.00, 1.00]] -``` - -`ND4J.pad` is used to pad an array. - -Example: -``` -int nRows = 2; -int nColumns = 2; -// Create INDArray of all ones -INDArray ones = Nd4j.ones(nRows, nColumns); -// pad the INDArray -INDArray padded = Nd4j.pad(ones, new int[]{1,1}, Nd4j.PadMode.CONSTANT ); -System.out.println("### Padded ####"); -System.out.println(padded); -``` - -Output: - - -``` -### Padded #### -[[0.00, 0.00, 0.00, 0.00], - [0.00, 1.00, 1.00, 0.00], - [0.00, 1.00, 1.00, 0.00], - [0.00, 0.00, 0.00, 0.00]] -``` - - -One other method that can occasionally be useful is `Nd4j.diag(INDArray in)`. This method has two uses, depending on the argument `in`: - -* If `in` in a vector, diag outputs a NxN matrix with the diagonal equal to the array `in` (where N is the length of `in`) -* If `in` is a NxN matrix, diag outputs a vector taken from the diagonal of `in` - - - -### Miscellaneous NDArray Creation Methods - -To create an [identity matrix](https://en.wikipedia.org/wiki/Identity_matrix) of size N, you can use `Nd4j.eye(N)`. - -To create a row vector with elements `[a, a+1, a+2, ..., b]` you can use the linspace command: - -`Nd4j.linspace(a, b, b-a+1)` - -Linspace can be combined with a reshape operation to get other shapes. For example, if you want a 2d NDArray with 5 rows and 5 columns, with values 1 to 25 inclusive, you can use the following: - -`Nd4j.linspace(1,25,25).reshape(5,5) ` - - -## Getting and Setting Individual Values - -For an INDArray, you can get or set values using the indexes of the element you want to get or set. For a rank N array (i.e., an array with N dimensions) you need N indices. - -Note: getting or setting values individually (for example, one at a time in a for loop) is generally a bad idea in terms of performance. When possible, try to use other INDArray methods that operate on a large number of elements at a time. - -To get values from a 2d array, you can use: `INDArray.getDouble(int row, int column)` - -For arrays of any dimensionality, you can use `INDArray.getDouble(int...)`. For example, to get the value at index `i,j,k` use `INDArray.getDouble(i,j,k)` - - -To set values, use one of the putScalar methods: - -* `INDArray.putScalar(int[],double)` -* `INDArray.putScalar(int[],float)` -* `INDArray.putScalar(int[],int)` - -Here, the `int[]` is the index, and the `double/float/int` is the value to be placed at that index. - - -Some additional functionality that might be useful in certain circumstances is the `NDIndexIterator` class. The NDIndexIterator allows you to get the indexes in a defined order (specifially, the C-order traversal order: [0,0,0], [0,0,1], [0,0,2], ..., [0,1,0], ... etc for a rank 3 array). - -To iterate over the values in a 2d array, you can use: - -```java -NdIndexIterator iter = new NdIndexIterator(nRows, nCols); -while (iter.hasNext()) { - int[] nextIndex = iter.next(); - double nextVal = myArray.getDouble(nextIndex); - //do something with the value -} -``` - - -## Getting and Setting Parts of NDArrays - - - -### getRow() and putRow() - -In order to get a single row from an INDArray, you can use `INDArray.getRow(int)`. This will obviously return a row vector. -Of note here is that this row is a view: changes to the returned row will impact the original array. This can be quite useful at times (for example: `myArr.getRow(3).addi(1.0)` to add 1.0 to the third row of a larger array); if you want a copy of a row, use `getRow(int).dup()`. - -Simiarly, to get multiple rows, use `INDArray.getRows(int...)`. This returns an array with the rows stacked; note however that this will be a copy (not a view) of the original rows, a view is not possible here due to the way NDArrays are stored in memory. - -For setting a single row, you can use `myArray.putRow(int rowIdx,INDArray row)`. This will set the `rowIdx`th row of `myArray` to the values contained in the INDArray `row`. - - -### Sub-Arrays: get(), put() and NDArrayIndex - -**Get:** - -A more powerful and general method is to use `INDArray.get(NDArrayIndex...)`. This functionality allows you to get an arbitrary sub-arrays based on certain indexes. -This is perhaps best explained by some examples: - -To get a single row (and all columns), you can use: - -`myArray.get(NDArrayIndex.point(rowIdx), NDArrayIndex.all()) ` - - -To get a range of rows (row `a` (inclusive) to row `b` (exclusive)) and all columns, you can use: - -`myArray.get(NDArrayIndex.interval(a,b), NDArrayIndex.all())` - -To get all rows and every second column, you can use: - -`myArray.get(NDArrayIndex.all(),NDArrayIndex.interval(0,2,nCols)) ` - -Though the above examples are for 2d arrays only, the NDArrayIndex approach extends to 3 or more dimensions. For 3 dimension, you would provide 3 INDArrayIndex objects instead of just two, as above. - - -Note that the `NDArrayIndex.interval(...)`, `.all()` and `.point(int)` methods always return views of the underlying arrays. Thus, changes to the arrays returned by `.get()` will be reflected in the original array. - - -**Put:** - -The same NDArrayIndex approach is also used to put elements to another array: in this case you use the `INDArray.put(INDArrayIndex[], INDArray toPut)` method. Clearly, the size of the NDArray `toPut` must match the size implied by the provided indexes. - - -Also note that `myArray.put(NDArrayIndex[],INDArray other)` is functionally equivalent to doing `myArray.get(INDArrayIndex...).assign(INDArray other)`. Again, this is because `.get(INDArrayIndex...)` returns a view of the underlying array, not a copy. - - -### Tensor Along Dimension - -(Note: ND4J versions 0.4-rc3.8 and earlier returned slightly different results for tensor along dimension, as compared to current versions). - -Tensor along dimension is a powerful technique, but can be a little hard to understand at first. The idea behind tensor along dimension (hereafter refered to as TAD) is to get a lower rank sub-array that is a view of the original array. - -The tensor along dimension method takes two arguments: - -- The *index* of the tensor to return (in the range of 0 to numTensors-1) -- The *dimensions* (1 or more values) along which to execute the TAD operation - -The simplest case is a tensor along a single row or column of a 2d array. Consider the following diagram (where dimension 0 (rows) are indexed going down the page, and dimension 1 (columns) are indexed going across the page): - -![Tensor Along Dimension](/images/guide/tad_2d.png) - -Note that the output of the tensorAlongDimension call with one dimension is a row vector in all cases. - -To understand why we get this output: consider the first case in the above diagram. There, we are taking the 0th (first) tensor *along* dimension 0 (dimension 0 being rows); the values (1,5,2) are in a line as we move along dimension 0, hence the output. Similarly, the `tensorAlongDimension(1,1)` is the second (*index=1*) tensor along dimension 1; values (5,3,5) are in a line as we move along dimension 1. - - -The TAD operation can also be executed along multiple dimensions. For example, by specifying two dimensions to execute the TAD operation along, we can use it to get a 2d sub-array from a 3d (or 4d, or 5d...) array. Similarly, by specifying 3 dimensions, we can use it to get a 3d from 4d or higher. - -There are two things we need to know about the output, for the TAD operation to be useful. - -First, we need to the number of tensors that we can get, for a given set of dimensions. To determine this, we can use the "number of tensors along dimensions" method, `INDArray.tensorssAlongDimension(int... dimensions)`. This method simply returns the number of tensors along the specified dimensions. In the examples above, we have: - -* `myArray.tensorssAlongDimension(0) = 3` -* `myArray.tensorssAlongDimension(1) = 3` -* `myArray.tensorssAlongDimension(0,1) = 1` -* `myArray.tensorssAlongDimension(1,0) = 1` - -(In the latter 2 cases, note that tensor along dimension would give us the same array out as the original array in - i.e., we get a 2d output from a 2d array). - -More generally, the *number* of tensors is given by the product of the remaining dimensions, and the *shape* of the tensors is given by the size of the specified dimensions in the original shape. - - -Here's some examples: - -- For input shape [a,b,c], tensorssAlongDimension(0) gives b*c tensors, and tensorAlongDimension(i,0) returns tensors of shape [1,a]. -- For input shape [a,b,c], tensorssAlongDimension(1) gives a*c tensors, and tensorAlongDimension(i,1) returns tensors of shape [1,b]. -- For input shape [a,b,c], tensorssAlongDimension(0,1) gives c tensors, and tensorAlongDimension(i,0,1) returns tensors of shape [a,b]. -- For input shape [a,b,c], tensorssAlongDimension(1,2) gives a tensors, and tensorAlongDimension(i,1,2) returns tensors of shape [b,c]. -- For input shape [a,b,c,d], tensorssAlongDimension(1,2) gives a*d tensors, and tensorAlongDimension(i,1,2) returns tensors of shape [b,c]. -- For input shape [a,b,c,d], tensorssAlongDimension(0,2,3) gives b tensors, and tensorAlongDimension(i,0,2,3) returns tensors of shape [a,c,d]. - - -### Slice - -[This section: Forthcoming.] - -## Performing Operations on NDArrays - -Nd4J has the concept of ops (operations) for many things you might want to do with (or to) an INDArray. -For example, ops are used to apply things like tanh operations, or add a scalar, or do element-wise operations. - -ND4J defines five types of operations: - -* Scalar -* Transform -* Accumulation -* Index Accumulation -* Broadcast - -And two methods of executing each: - -* Directly on the entire INDArray, or -* Along a dimension - -Before getting into the specifics of these operations, let's take a moment to consider the difference between *in-place* and *copy* operations. - -Many ops have both in-place and copy operations. Suppose we want to add two arrays. Nd4j defines two methods for this: `INDArray.add(INDArray)` and `INDArray.addi(INDArray)`. The former (add) is a copy operation; the latter is an in-place operation - the *i* in *addi* means in-place. This convention (*...i* means in-place, no *i* means copy) holds for other ops that are accessible via the INDArray interface. - -Suppose we have two INDArrays `x` and `y` and we do `INDArray z = x.add(y)` or `INDArray z = x.addi(y)`. The results of these operations are shown below. - -![Add](/images/guide/add_v_addi_1.png) - -![Addi](/images/guide/add_v_addi_2.png) - - -Note that with the `x.add(y)` operation, the original array `x` is not modified. Comparatively, with the in-place version `x.addi(y)`, the array `x` is modified. In both versions of the add operation, an INDArray is returned that contains the result. Note however that in the case of the `addi` operation, the result array us actually just the original array `x`. - - - - -### Scalar Ops - -[Scalar ops](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/scalar) are element-wise operations that also take a scalar (i.e., a number). Examples of scalar ops are add, max, multiply, set and divide operations (see the previous link for a full list). - -A number of the methods such as `INDArray.addi(Number)` and `INDArray.divi(Number)` actually execute scalar ops behind the scenes, so when available, it is more convenient to use these methods. - -To execute a scalar op more directly, you can use for example: - -`Nd4j.getExecutioner().execAndReturn(new ScalarAdd(myArray,1.0))` - -Note that `myArray` is modified by this operation. If this is not what you want, use `myArray.dup()`. - -Unlike the remaining ops, scalar ops don't have a sensible interpretation of executing them along a dimension. - -### Transform Ops - -Transform ops are operations such as element-wise logarithm, cosine, tanh, rectified linear, etc. Other examples include add, subtract and copy operations. Transform ops are commonly used in an element-wise manner (such as tanh on each element), but this is not always the case - for example, softmax is typically executed along a dimension. - -To execute an element-wise tanh operation directly (on the full NDArray) you can use: - -`INDArray tanh = Nd4j.getExecutioner().execAndReturn(new Tanh(myArr))` -As with scalar ops mentioned above, transform operations using the above method are *in-place* operations: that is, the NDArray myArr is modified, and the returned array `tanh` is actually the same object as the input `myArr`. Again, you can use `myArr.dup()` if you want a copy. - -The [Transforms class](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/ops/transforms/Transforms.java) also defines some convenience methods, such as: `INDArray tanh = Transforms.tanh(INDArray in,boolean copy);` This is equivalent to the method using `Nd4j.getExecutioner()` above. - -### Accumulation (Reduction) Ops - -When it comes to executing accumulations, there is a key difference between executing the accumulation on the entire NDArray, versus executing along a particular dimension (or dimensions). In the first case (executing on the entire array), only a single value is returned. In the second case (accumulating along a dimension) a new INDArray is returned. - -To get the sum of all values in the array: - -`double sum = Nd4j.getExecutioner().execAndReturn(new Sum(myArray)).getFinalResult().doubleValue();` - -or equivalently (and more conveniently) - -`double sum = myArray.sumNumber().doubleValue();` - - -Accumulation ops can also be executed along a dimension. For example, to get the sum of all values in each column (in each column = along dimension 0, or "for values in each row"), you can use: - -`INDArray sumOfColumns = Nd4j.getExecutioner().exec(new Sum(myArray),0);` - -or equivalently, - -`INDArray sumOfColumns = myArray.sum(0)` - -Suppose this was executed on a 3x3 input array. Visually, this sum operation along dimension 0 operation looks like: - -![Sum along dimension 0](/images/guide/sum_dim0.png) - -Note that here, the input has shape `[3,3]` (3 rows, 3 columns) and the output has shape `[1,3]` (i.e., our output is a row vector). Had we instead done the operation along dimension 1, we would get a column vector with shape `[3,1]`, with values `(12,13,11)`. - -Accumulations along dimensions also generalize to NDArrays with 3 or more dimensions. - -### Index Accumulation Ops - -[Index accumulation ops](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/indexaccum) are very similar to accumulation ops. The difference is that they return an integer index, instead of a double values. - -Examples of index accumulation ops are IMax (argmax), IMin (argmin) and IAMax (argmax of absolute values). - -To get the index of the maximum value in the array: - -`int idx = Nd4j.getExecutioner().execAndReturn(new IAMax(myArray)).getFinalResult();` - -Index accumulation ops are often most useful when executed along a dimension. For example, to get the index of the maximum value in each column (in each column = along dimension 0), you can use: - -`INDArray idxOfMaxInEachColumn = Nd4j.getExecutioner().exec(new IAMax(myArray),0);` - -Suppose this was executed on a 3x3 input array. Visually, this argmax/IAMax operation along dimension 0 operation looks like: - -![Argmax / IAMax](/images/guide/argmax_dim0.png) - -As with the accumulation op described above, the output has shape `[1,3]`. Again, had we instead done the operation along dimension 1, we would get a column vector with shape `[3,1]`, with values `(1,0,2)`. - - -### Broadcast and Vector Ops - -ND4J also defines broadcast and vector operations. - -Some of the more useful operations are vector operations, such as addRowVector and muliColumnVector. - -Consider for example the operation `x.addRowVector(y)` where `x` is a matrix and `y` is a row vector. In this case, the `addRowVector` operation adds the row vector `y` to each row of the matrix `x`, as shown below. - -![addRowVector](/images/guide/addrowvector.png) - -As with other ops, there are inplace and copy versions. There are also column column versions of these operations, such as `addColumnVector`, which adds a column vector to each column of the original INDArray. - - - -## Boolean Indexing: Selectively Apply Operations Based on a Condition - -[This section: Forthcoming.] - -[Link: Boolean Indexing Unit Tests](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/indexing/BooleanIndexingTest.java) - - -## Workspaces - -Workspaces are a feature of ND4J used to improve performance, by means of more efficient memory allocation and management. Specifically, workspaces are designed for cyclical workloads - such as training neural networks - as they allow for off-heap memory reuse (instead of continually allocating and deallocating memory on each iteration of the loop). The net effect is improved performance and reduced memory use. - -For more details on workspaces, see the following links: - -* Deeplearning4j Guide to Workspaces -* Workspaces Examples - -### Workspaces: Scope Panic - -Sometimes with workspaces, you may encounter an exception such as: -``` -org.nd4j.linalg.exception.ND4JIllegalStateException: Op [set] Y argument uses leaked workspace pointer from workspace [LOOP_EXTERNAL] -For more details, see the ND4J User Guide: nd4j.org/userguide#workspaces-panic -``` -or -``` -org.nd4j.linalg.exception.ND4JIllegalStateException: Op [set] Y argument uses outdated workspace pointer from workspace [LOOP_EXTERNAL] -For more details, see the ND4J User Guide: nd4j.org/userguide#workspaces-panic -``` - - -**Understanding Scope Panic Exceptions** - -In short: these exceptions mean that an INDArray that has been allocated in a workspace is being used incorrectly (for example, a bug or incorrect implementation of some method). This can occur for two reasons: - -1. The INDArray has 'leaked out' of the workspace in which is was defined -2. The INDArray is used within the correct workspace, but from a previous iteration - -In both cases, the underlying off-heap memory that the INDArray points to has been invalidated, and can no longer be used. - -An example sequence of events leading to a workspace leak: -1. Workspace W is opened -2. INDArray X is allocated in workspace W -3. Workspace W is closed, and hence the memory for X is no longer valid. -4. INDArray X is used in some operation, resulting in an exception - -An example sequence of events, leading to an outdated workspace pointer: -1. Workspace W is opened (iteration 1) -2. INDArray X is allocated in workspace W (iteration 1) -3. Workspace W is closed (iteration 1) -4. Workspace W is opened (iteration 2) -5. INDArray X (from iteration 1) is used in some operation, resulting in an exception - -**Workarounds and Fixes for Scope Panic Exceptions** - -There are two basic solutions, depending on the cause. - -First. if you have implemented some custom code (or are using workspaces manually), this usually indicates a bug in your code. -Generally, you have two options: -1. Detach the INDArray from all workspace, using the ```INDArray.detach()``` method. The consequence is that the returned array is no longer associated with a workspace, and can be used freely within or outside of any workspace. -2. Don't allocate the array in the workspace in the first place. You can temporarily 'turn off' a workspace using: ```try(MemoryWorkspace scopedOut = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()){ }```. The consequence is that any new arrays (created via Nd4j.create, for example) within the try block will not be associated with a workspace, and can be used outside of a workspace -3. Move/copy the array to a parent workspace, using one of the ```INDArray.leverage()``` or ```leverageTo(String)``` or ```migrate()``` methods. See the Javadoc of these methods for more details. - - -Second, if you are using workspaces as part of Deeplearning4j and have not implemented any custom functionality (i.e., you have not written your own layer, data pipeline, etc), then (on the off-chance you run into this), this most likely indicates a bug in the underlying library, which usually should be reported via a Github issue. One possible workaround in the mean time is to disable workspaces using the following code: -``` -.trainingWorkspaceMode(WorkspaceMode.NONE) -.inferenceWorkspaceMode(WorkspaceMode.NONE) -``` - -If the exception is due to an issue in the data pipeline, you can try wrapping your ```DataSetIterator``` or ```MultiDataSetIterator``` in an ```AsyncShieldDataSetIterator``` or ```AsyncShieldMultiDataSetIterator```. - - -For either cause, a final solution - if you are sure your code is correct - is to try disabling scope panic. *Note that this is NOT recommended and can crash the JVM if a legitimate issue is present*. To do this, use ```Nd4j.getExecutioner().setProfilingMode(OpExecutioner.ProfilingMode.DISABLED);``` before executing your code. - - -## Advanced and Miscellaneous Topics - - -### Setting the data type - -ND4J currently allows INDArrays to be backed by either float or double-precision values. The default is single-precision (float). To set the order that ND4J uses for arrays globally to double precision, you can use: - -```java -Nd4j.setDataType(DataBuffer.Type.DOUBLE); -``` - -Note that this should be done before using ND4J operations or creating arrays. - -Alternatively, you can set the property when launching the JVM: -``` --Ddtype=double -``` - - -### Reshaping - -[This section: Forthcoming.] - - -### Flattening - -Flattening is the process of taking a or more INDArrays and converting them into a single flat array (a row vector), given some traversal order of the arrays. - -Nd4j provides the following methods for this: - -```java -Nd4j.toFlattened(char order, INDArray... arrays) -Nd4j.toFlattened(char order, Collection) -``` -Nd4j also provides overloaded toFlattened methods with the default ordering. The order argument must be 'c' or 'f', and defines the order in which values are taken from the arrays: c order results in the arrays being flattened using array indexes in an order like [0,0,0], [0,0,1], etc (for 3d arrays) whereas f order results in values being taken in order [0,0,0], [1,0,0], etc. - - - -### Permute - -[This section: Forthcoming.] - - -### sortRows/sortColumns - -[This section: Forthcoming.] - - -### Directly accessing BLAS operations - -[This section: Forthcoming.] - - -### Serialization - -Nd4j provides serialization of INDArrays many formats. Here are some examples for binary and text serialization: -```java -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.serde.binary.BinarySerde; - -import java.io.*; -import java.nio.ByteBuffer; - -INDArray arrWrite = Nd4j.linspace(1,10,10); -INDArray arrRead; - -//1. Binary format -// Close the streams manually or use try with resources. -try (DataOutputStream sWrite = new DataOutputStream(new FileOutputStream(new File("tmp.bin")))) { - Nd4j.write(arrWrite, sWrite); - } - -try (DataInputStream sRead = new DataInputStream(new FileInputStream(new File("tmp.bin")))) { - arrRead = Nd4j.read(sRead); - } - -//2. Binary format using java.nio.ByteBuffer; -ByteBuffer buffer = BinarySerde.toByteBuffer(arrWrite); -arrRead = BinarySerde.toArray(buffer); - -//3. Text format -Nd4j.writeTxt(arrWrite, "tmp.txt"); -arrRead = Nd4j.readTxt("tmp.txt"); - -// To read csv format: -// The writeNumpy method has been deprecated. -arrRead =Nd4j.readNumpy("tmp.csv", ", "); -``` - -The [nd4j-serde](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-serde) directory provides packages for Aeron, base64, camel-routes, gsom, jackson and kryo. - - -## Quick Reference: A Summary Overview of ND4J Methods - -This section lists the most commonly used operations in ND4J, in a summary form. More details on most of these can be found later in this page. - -In this section, assume that `arr`, `arr1` etc are INDArrays. - -**Creating NDArrays**: - -* Create a zero-initialized array: `Nd4j.zeros(nRows, nCols)` or `Nd4j.zeros(int...)` -* Create a one-initialized array: `Nd4j.ones(nRows, nCols)` -* Create a copy (duplicate) of an NDArray: `arr.dup()` -* Create a row/column vector from a `double[]`: `myRow = Nd4j.create(myDoubleArr)`, `myCol = Nd4j.create(myDoubleArr,new int[]{10,1})` -* Create a 2d NDArray from a `double[][]`: `Nd4j.create(double[][])` -* Stacking a set of arrays to make a larger array: `Nd4j.hstack(INDArray...)`, `Nd4j.vstack(INDArray...)` for horizontal and vertical respectively -* Uniform random NDArrays: `Nd4j.rand(int,int)`, `Nd4j.rand(int[])` etc -* Normal(0,1) random NDArrays: `Nd4j.randn(int,int)`, `Nd4j.randn(int[])` - -**Determining the Size/Dimensions of an INDArray**: - -The following methods are defined by the INDArray interface: - -* Get the number of dimensions: `rank()` -* For 2d NDArrays only: `rows()`, `columns()` -* Size of the ith dimension: `size(i)` -* Get the size of all dimensions, as an int[]: `shape()` -* Determine the total number of elements in array: `arr.length()` -* See also: `isMatrix()`, `isVector()`, `isRowVector()`, `isColumnVector()` - -**Getting and Setting Single Values**: - -* Get the value at row i, column j: `arr.getDouble(i,j)` -* Getting a values from a 3+ dimenional array: `arr.getDouble(int[])` -* Set a single value in an array: `arr.putScalar(int[],double)` - -**Scalar operations**: -Scalar operations take a double/float/int value and do an operation for each As with element-wise operations, there are in-place and copy operations. - -* Add a scalar: arr1.add(myDouble) -* Substract a scalar: arr1.sub(myDouble) -* Multiply by a scalar: arr.mul(myDouble) -* Divide by a scalar: arr.div(myDouble) -* Reverse subtract (scalar - arr1): arr1.rsub(myDouble) -* Reverse divide (scalar / arr1): arr1.rdiv(myDouble) - - -**Element-Wise Operations**: -Note: there are copy (add, mul, etc) and in-place (addi, muli) operations. The former: arr1 is not modified. In the latter: arr1 is modified - -* Adding: `arr1.add(arr2)` -* Subtract: `arr.sub(arr2)` -* Multiply: `add1.mul(arr2)` -* Divide: `arr1.div(arr2)` -* Assignment (set each value in arr1 to those in arr2): `arr1.assign(arr2)` - -**Reduction Operations (sum, etc)**; -Note that these operations operate on the entire array. Call `.doubleValue()` to get a double out of the returned Number. - -* Sum of all elements: `arr.sumNumber()` -* Product of all elements: `arr.prod()` -* L1 and L2 norms: `arr.norm1()` and `arr.norm2()` -* Standard deviation of all elements: `arr.stdNumber()` - -**Linear Algebra Operations**: - -* Matrix multiplication: `arr1.mmul(arr2)` -* Transpose a matrix: `transpose()` -* Get the diagonal of a matrix: `Nd4j.diag(INDArray)` -* Matrix inverse: `InvertMatrix.invert(INDArray,boolean)` - -**Getting Parts of a Larger NDArray**: -Note: all of these methods return - -* Getting a row (2d NDArrays only): `getRow(int)` -* Getting multiple rows as a matrix (2d only): `getRows(int...)` -* Setting a row (2d NDArrays only): `putRow(int,INDArray)` -* Getting the first 3 rows, all columns: `Nd4j.create(0).get(NDArrayIndex.interval(0,3),NDArrayIndex.all());` - -**Element-Wise Transforms (Tanh, Sigmoid, Sin, Log etc)**: - -* Using [Transforms](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/ops/transforms/Transforms.java): `Transforms.sin(INDArray)`, `Transforms.log(INDArray)`, `Transforms.sigmoid(INDArray)` etc -* Directly (method 1): `Nd4j.getExecutioner().execAndReturn(new Tanh(INDArray))` -* Directly (method 2) `Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform("tanh",INDArray))` - - - -## FAQ: Frequently Asked Questions - -**Q: Does ND4J support sparse arrays?** - -At present: no. Support for spase arrays is planned for the future. - -**Q: Is it possible to dynamically grow or shrink the size on an INDArray?** -In the current version of ND4J, this is not possible. We may add this functionality in the future, however. - -There are two possible work-arounds: - -1. Allocate a new array and do a copy (for example, a .put() operation) -2. Initially, pre-allocate a larger than required NDArray, and then operate on a view of that array. Then, as you need a larger array, get a larger view on the original pre-allocated array. \ No newline at end of file diff --git a/docs/nd4j/templates/quickstart.md b/docs/nd4j/templates/quickstart.md deleted file mode 100644 index 44b5116a7..000000000 --- a/docs/nd4j/templates/quickstart.md +++ /dev/null @@ -1,558 +0,0 @@ ---- -title: Quickstart -short_title: Quick start tutorial -description: ND4J Key features and brief samples. -category: ND4J -weight: 1 ---- - - - - -## Introduction - -ND4J is a scientific computing library for the JVM. It is meant to be used in production environments rather than as a research tool, which means routines are designed to run fast with minimum RAM requirements. The main features are: -* A versatile n-dimensional array object. -* Linear algebra and signal processing functions. -* Multiplatform functionality including GPUs. - * all major operating systems: win/linux/osx/android. - * architectures: x86, arm, ppc. - -This quickstart follows the same layout and approach of the [Numpy quickstart](https://docs.scipy.org/doc/numpy/user/quickstart.html). This should help people familiar with Python and Numpy get started quickly with Nd4J. - -## Prerequisites -You can use Nd4J from any [JVM Language](https://en.wikipedia.org/wiki/List_of_JVM_languages). (For example: Scala, Kotlin). You can use Nd4J with any build tool. The sample code in this quick start uses the following: - -* [Java (developer version)](./deeplearning4j-quickstart#Java) 1.7 or later (Only 64-Bit versions supported) -* [Apache Maven](./deeplearning4j-quickstart#Maven) (automated build and dependency manager) -* [Git](./deeplearning4j-quickstart#Git) (distributed version control system) - -To improve readability we show you the output of `System.out.println(...)`. But we have not show the print statement in the sample code. If you are confident you know how to use maven and git, please feel free to skip to the [Basics](#Basics). In the remainder of this section we will build a small 'hello ND4J' application to verify the prequisites are set up correctly. - -Execute the following commands to get the project from github. - - -```shell -git clone https://github.com/RobAltena/HelloNd4J.git - -cd HelloNd4J - -mvn install - -mvn exec:java -Dexec.mainClass="HelloNd4j" -``` - -When everything is set up correctly you should see the following output: - -```shell -[ 0, 0] -``` - -## Basics -The main feature of Nd4j is the versatile n-dimensional array interface called INDArray. To improve performance Nd4j uses [off-heap memory](./deeplearning4j-config-memory) to store data. The INDArray is different from standard Java arrays. - -Some of the key properties and methods for an INDArray x are as follows: - -```java -import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.linalg.api.buffer.DataType; - -INDArray x = Nd4j.zeros(3,4); - -// The number of axes (dimensions) of the array. -int dimensions = x.rank(); - -// The dimensions of the array. The size in each dimension. -long[] shape = x.shape(); - -// The total number of elements. -long length = x.length(); - -// The type of the array elements. -DataType dt = x.dataType(); -``` - - -### Array Creation -To create INDArrays you use the static factory methods of the [Nd4j](https://deeplearning4j.org/api/latest/org/nd4j/linalg/factory/Nd4j.html ) class. - - - -The `Nd4j.createFromArray` function is overloaded to make it easy to create INDArrays from regular Java arrays. The example below uses Java `double` arrays. Similar create methods are overloaded for `float`, `int` and `long`. The `Nd4j.createFromArray` function has overloads up to 4d for all types. - -```java -double arr_2d[][]={{1.0,2.0,3.0},{4.0,5.0,6.0},{7.0,8.0,9.0}}; -INDArray x_2d = Nd4j.createFromArray(arr_2d); - -double arr_1d[]={1.0,2.0,3.0}; -INDArray x_1d = Nd4j.createFromArray(arr_1d); -``` - -Nd4j can create arrays initialized with zeros and ones using the functions `zeros` and `ones`. The `rand` function allows you to create an array initialized with random values. -The default datatype of the INDArray created is `float`. Some overloads allow you to set the datatype. - -```java -INDArray x = Nd4j.zeros(5); -//[ 0, 0, 0, 0, 0], FLOAT - -int [] shape = {5}; -x = Nd4j.zeros(DataType.DOUBLE, 5); -//[ 0, 0, 0, 0, 0], DOUBLE - -// For higher dimensions you can provide a shape array. 2D random matrix example: -int rows = 4; -int cols = 5; -int[] shape = {rows, cols}; -INDArray x = Nd4j.rand(shape); -``` - -Use the `arange` functions to create an array of evenly spaces values: - -```java -INDArray x = Nd4j.arange(5); -// [ 0, 1.0000, 2.0000, 3.0000, 4.0000] - -INDArray x = Nd4j.arange(2, 7); -// [ 2.0000, 3.0000, 4.0000, 5.0000, 6.0000] -``` - -The `linspace` function allows you to specify the number of points generated: -```java -INDArray x = Nd4j.linspace(1, 10, 5); //start, stop, count. -// [ 1.0000, 3.2500, 5.5000, 7.7500, 10.0000] - -// Evaluate a function over many points. -import static org.nd4j.linalg.ops.transforms.Transforms.sin; -INDArray x = Nd4j.linspace(0.0, Math.PI, 100, DataType.DOUBLE); -INDArray y = sin(x); -``` - -### Printing Arrays -The INDArray supports Java's `toString()` method. The current implementation has limited precision and a limited number of elements. The output is similar to printing NumPy arrays: -```java -INDArray x = Nd4j.arange(6); //1d array -System.out.println(x); //We just give the output of the print command from here on. -// [ 0, 1.0000, 2.0000, 3.0000, 4.0000, 5.0000] - -int [] shape = {4,3}; -x = Nd4j.arange(12).reshape(shape); //2d array -/* -[[ 0, 1.0000, 2.0000], - [ 3.0000, 4.0000, 5.0000], - [ 6.0000, 7.0000, 8.0000], - [ 9.0000, 10.0000, 11.0000]] -*/ - -int [] shape2 = {2,3,4}; -x = Nd4j.arange(24).reshape(shape2); //3d array -/* -[[[ 0, 1.0000, 2.0000, 3.0000], - [ 4.0000, 5.0000, 6.0000, 7.0000], - [ 8.0000, 9.0000, 10.0000, 11.0000]], - - [[ 12.0000, 13.0000, 14.0000, 15.0000], - [ 16.0000, 17.0000, 18.0000, 19.0000], - [ 20.0000, 21.0000, 22.0000, 23.0000]]] -*/ -``` - -### Basic Operations -You will have to use INDArray methods to perform operations on your arrays. There are in-place and copy overloads and scalar and element wise overloaded versions. The in-place operators return a reference to the array so you can conveniently chain operations together. Use in-place operators where possible to improve performance. Copy operators have new array creation overhead. - -```java -//Copy -arr_new = arr.add(scalar); // return a new array with scalar added to each element of arr. -arr_new = arr.add(other_arr); // return a new array with element wise addition of arr and other_arr. - -//in place. -arr_new = arr.addi(scalar); //Heads up: arr_new points to the same array as arr. -arr_new = arr.addi(other_arr); -``` - -addition: arr.add(...), arr.addi(...) -substraction: arr.sub(...), arr.subi(...) -multiplication: arr.mul(...), arr.muli(...) -division: arr.div(...), arr.divi(...) - -When you perform the basic operations you must make sure the underlying data types are the same. -```java -int [] shape = {5}; -INDArray x = Nd4j.zeros(shape, DataType.DOUBLE); -INDArray x2 = Nd4j.zeros(shape, DataType.INT); -INDArray x3 = x.add(x2); -// java.lang.IllegalArgumentException: Op.X and Op.Y must have the same data type, but got INT vs DOUBLE - -// casting x2 to DOUBLE solves the problem: -INDArray x3 = x.add(x2.castTo(DataType.DOUBLE)); -``` - - -The INDArray has methods implementing reduction/accumulation operations such as `sum`, `min`, `max`. -```java -int [] shape = {2,3}; -INDArray x = Nd4j.rand(shape); -x; -x.sum(); -x.min(); -x.max(); -/* -[[ 0.8621, 0.9224, 0.8407], - [ 0.1504, 0.5489, 0.9584]] -4.2830 -0.1504 -0.9584 -*/ -``` - -Provide a dimension argument to apply the operation across the specified dimension: - -```java -INDArray x = Nd4j.arange(12).reshape(3, 4); -/* -[[ 0, 1.0000, 2.0000, 3.0000], - [ 4.0000, 5.0000, 6.0000, 7.0000], - [ 8.0000, 9.0000, 10.0000, 11.0000]] -*/ - -x.sum(0); // Sum of each column. -//[ 12.0000, 15.0000, 18.0000, 21.0000] - -x.min(1); // Min of each row -//[ 0, 4.0000, 8.0000] - -x.cumsum(1); // cumulative sum across each row, -/* -[[ 0, 1.0000, 3.0000, 6.0000], - [ 4.0000, 9.0000, 15.0000, 22.0000], - [ 8.0000, 17.0000, 27.0000, 38.0000]] -*/ - -``` - - -### Transform operation -Nd4j provides familiar mathematical functions such as sin, cos, and exp. These are called transform operations. The result is returned as an INDArray. - -```java -import static org.nd4j.linalg.ops.transforms.Transforms.exp; -import static org.nd4j.linalg.ops.transforms.Transforms.sqrt; - -INDArray x = Nd4j.arange(3); -// [ 0, 1.0000, 2.0000] -exp(x); -// [ 1.0000, 2.7183, 7.3891] -sqrt(x); -// [ 0, 1.0000, 1.4142] -``` - -You can check out a complete list of transform operations in the [Javadoc](https://deeplearning4j.org/api/latest/org/nd4j/linalg/api/ops/impl/transforms/package-summary.html ) - - -### Matrix multiplication -We have already seen the element wise multiplcation in the basic operations. The other Matrix operations have their own methods: - -```java -INDArray x = Nd4j.arange(12).reshape(3, 4); -/* -[[ 0, 1.0000, 2.0000, 3.0000], - [ 4.0000, 5.0000, 6.0000, 7.0000], - [ 8.0000, 9.0000, 10.0000, 11.0000]] -*/ - -INDArray y = Nd4j.arange(12).reshape(4, 3); -/* -[[ 0, 1.0000, 2.0000], - [ 3.0000, 4.0000, 5.0000], - [ 6.0000, 7.0000, 8.0000], - [ 9.0000, 10.0000, 11.0000]] -*/ - -x.mmul(y); // matrix product. -/* -[[ 42.0000, 48.0000, 54.0000], - [ 114.0000, 136.0000, 158.0000], - [ 186.0000, 224.0000, 262.0000]] -*/ - -// dot product. -INDArray x = Nd4j.arange(12); -INDArray y = Nd4j.arange(12); -dot(x, y); -//506.0000 -``` - -### Indexing, Slicing and Iterating -Indexing, Slicing and Iterating is harder in Java than in Python. -To retreive individual values from an INDArray you can use the `getDouble`, `getFloat` or `getInt` methods. INDArrays cannot be indexed like Java arrays. You can get a Java array from an INDArray using `toDoubleVector()`, `toDoubleMatrix()`, `toFloatVector()` and `toFloatMatrix()` - -```java - -INDArray x = Nd4j.arange(12); -// [ 0, 1.0000, 2.0000, 3.0000, 4.0000, 5.0000, 6.0000, 7.0000, 8.0000, 9.0000, 10.0000, 11.0000] - -float f = x.getFloat(3); // Single element access. Other methods: getDouble, getInt, ... -// 3.0 - -float [] fArr = x.toFloatVector(); //Convert to Java array. -// [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0] - -INDArray x2 = x.get(NDArrayIndex.interval(2, 6)); -// [ 2.0000, 3.0000, 4.0000, 5.0000] - -// On a copy of x: From start to position 6, exclusive, set every 2nd element to -1.0 -INDArray y = x.dup(); -y.get(NDArrayIndex.interval(0, 2, 6)).assign(-1.0); -//[ -1.0000, 1.0000, -1.0000, 3.0000, -1.0000, 5.0000, 6.0000, 7.0000, 8.0000, 9.0000, 10.0000, 11.0000] - -// reversed copy of y. -INDArray y2 = Nd4j.reverse(y.dup()); -//[ 11.0000, 10.0000, 9.0000, 8.0000, 7.0000, 6.0000, 5.0000, -1.0000, 3.0000, -1.0000, 1.0000, -1.0000] - -``` - -For multidimensional arrays you should use `INDArray.get(NDArrayIndex...)`. The example below shows how to iterate over the rows and columns of a 2D array. Note that for 2D arrays we could have used the `getColumn` and `getRow` convenience methods. - -```java -// Iterate over the rows and columns of a 2d arrray. -int rows = 4; -int cols = 5; -int[] shape = {rows, cols}; - -INDArray x = Nd4j.rand(shape); -/* -[[ 0.2228, 0.2871, 0.3880, 0.7167, 0.9951], - [ 0.7181, 0.8106, 0.9062, 0.9291, 0.5115], - [ 0.5483, 0.7515, 0.3623, 0.7797, 0.5887], - [ 0.6822, 0.7785, 0.4456, 0.4231, 0.9157]] -*/ - -for (int row=0; row - - - -## Copies and View -When working with INDArrays the data is not always copied. Here are three cases you should be aware of. - -### No Copy at All -Simple assignments make no copy of the data. Java passes objects by reference. No copies are made on a method call. - -```java -INDArray x = Nd4j.rand(2,2); -INDArray y = x; // y and x point to the same INData object. - -public static void f(INDArray x){ - // No copy is made. Any changes to x are visible after the function call. - } - -``` - -### View or Shallow Copy -Some functions will return a view of an array. - -```java -INDArray x = Nd4j.rand(3,4); -INDArray x2 = x.ravel(); -INDArray x3 = x.reshape(6,2); - -x2.putScalar(5, -1.0); // Changes x, x2 and x3 - -x -/* -[[ 0.8546, 0.1509, 0.0331, 0.1308], - [ 0.1753, -1.0000, 0.2277, 0.1998], - [ 0.2741, 0.8257, 0.6946, 0.6851]] -*/ - -x2 -// [ 0.8546, 0.1509, 0.0331, 0.1308, 0.1753, -1.0000, 0.2277, 0.1998, 0.2741, 0.8257, 0.6946, 0.6851] - -x3 -/* -[[ 0.8546, 0.1509], - [ 0.0331, 0.1308], - [ 0.1753, -1.0000], - [ 0.2277, 0.1998], - [ 0.2741, 0.8257], - [ 0.6946, 0.6851]] -*/ - -``` - -### Deep Copy -To make a copy of the array use the `dup` method. This will give you a new array with new data. - -```java -INDArray x = Nd4j.rand(3,4); -INDArray x2 = x.ravel().dup(); - -x2.putScalar(5, -1.0); // Now only changes x2. - -x -/* -[[ 0.1604, 0.0322, 0.8910, 0.4604], - [ 0.7724, 0.1267, 0.1617, 0.7586], - [ 0.6117, 0.5385, 0.1251, 0.6886]] -*/ - -x2 -// [ 0.1604, 0.0322, 0.8910, 0.4604, 0.7724, -1.0000, 0.1617, 0.7586, 0.6117, 0.5385, 0.1251, 0.6886] -``` - -## Functions and Methods Overview - -### Array Creation - [arange](https://deeplearning4j.org/api/latest/org/nd4j/linalg/factory/Nd4j.html#arange-double-double- ), - [create](https://deeplearning4j.org/api/latest/org/nd4j/linalg/factory/Nd4j.html#create-org.nd4j.linalg.api.buffer.DataBuffer- ), - [copy](https://deeplearning4j.org/api/latest/org/nd4j/linalg/factory/Nd4j.html#copy-org.nd4j.linalg.api.ndarray.INDArray-org.nd4j.linalg.api.ndarray.INDArray- ), - [empty](https://deeplearning4j.org/api/latest/org/nd4j/linalg/factory/Nd4j.html#empty-org.nd4j.linalg.api.buffer.DataBuffer.Type- ), - [empty_like](https://deeplearning4j.org/api/latest/org/nd4j/linalg/factory/Nd4j.html#emptyLike-org.nd4j.linalg.api.ndarray.INDArray- ), - [eye]( https://deeplearning4j.org/api/latest/org/nd4j/linalg/factory/Nd4j.html#eye-long- ), - [linspace](https://deeplearning4j.org/api/latest/org/nd4j/linalg/factory/Nd4j.html#linspace-double-double-long- ), - [meshgrid](https://deeplearning4j.org/api/latest/org/nd4j/linalg/factory/Nd4j.html#meshgrid-org.nd4j.linalg.api.ndarray.INDArray-org.nd4j.linalg.api.ndarray.INDArray- ), - [ones](https://deeplearning4j.org/api/latest/org/nd4j/linalg/factory/Nd4j.html#ones-int...- ), - [ones_like](https://deeplearning4j.org/api/latest/org/nd4j/linalg/factory/Nd4j.html#onesLike-org.nd4j.linalg.api.ndarray.INDArray- ), - [rand](https://deeplearning4j.org/api/latest/org/nd4j/linalg/factory/Nd4j.html#rand-int-int- ), - [readTxt](https://deeplearning4j.org/api/latest/org/nd4j/linalg/factory/Nd4j.html#readTxt-java.lang.String- ), - [zeros](https://deeplearning4j.org/api/latest/org/nd4j/linalg/factory/Nd4j.html#zeros-int...- ), - [zeros_like](https://deeplearning4j.org/api/latest/org/nd4j/linalg/factory/Nd4j.html#zerosLike-org.nd4j.linalg.api.ndarray.INDArray- ) - -### Conversions -[convertToDoubles](https://deeplearning4j.org/api/latest/org/nd4j/linalg/api/ndarray/INDArray.html#convertToDoubles-- ), -[convertToFloats](https://deeplearning4j.org/api/latest/org/nd4j/linalg/api/ndarray/INDArray.html#convertToFloats-- ), -[convertToHalfs](https://deeplearning4j.org/api/latest/org/nd4j/linalg/api/ndarray/INDArray.html#convertToHalfs-- ) - -### Manipulations -[concatenate](https://deeplearning4j.org/api/latest/org/nd4j/linalg/factory/Nd4j.html#concat-int-org.nd4j.linalg.api.ndarray.INDArray...- ), -[hstack](https://deeplearning4j.org/api/latest/org/nd4j/linalg/factory/Nd4j.html#hstack-org.nd4j.linalg.api.ndarray.INDArray...- ), - [ravel](https://deeplearning4j.org/api/latest/org/nd4j/linalg/api/ndarray/INDArray.html#ravel-- ), - [repeat](https://deeplearning4j.org/api/latest/org/nd4j/linalg/api/ndarray/INDArray.html#repeat-int-long...- ), - [reshape](https://deeplearning4j.org/api/latest/org/nd4j/linalg/api/ndarray/INDArray.html#reshape-long...- ), -[squeeze](https://deeplearning4j.org/api/latest/org/nd4j/linalg/factory/Nd4j.html#squeeze-org.nd4j.linalg.api.ndarray.INDArray-int- ), -[swapaxes](https://deeplearning4j.org/api/latest/org/nd4j/linalg/api/ndarray/INDArray.html#swapAxes-int-int- ), -[tear](https://deeplearning4j.org/api/latest/org/nd4j/linalg/factory/Nd4j.html#tear-org.nd4j.linalg.api.ndarray.INDArray-int...- ), -[transpose](https://deeplearning4j.org/api/latest/org/nd4j/linalg/api/ndarray/INDArray.html#transpose-- ), -[vstack](https://deeplearning4j.org/api/latest/org/nd4j/linalg/factory/Nd4j.html#vstack-org.nd4j.linalg.api.ndarray.INDArray...- ) - -### Ordering -[argmax](https://deeplearning4j.org/api/latest/org/nd4j/linalg/api/ndarray/INDArray.html#argMax-int...- ), -[max](https://deeplearning4j.org/api/latest/org/nd4j/linalg/api/ndarray/INDArray.html#max-int...- ), -[min](https://deeplearning4j.org/api/latest/org/nd4j/linalg/api/ndarray/INDArray.html#min-int...- ), -[sort](https://deeplearning4j.org/api/latest/org/nd4j/linalg/factory/Nd4j.html#sort-org.nd4j.linalg.api.ndarray.INDArray-int-boolean- ) - -### Operations -[choice](https://deeplearning4j.org/api/latest/org/nd4j/linalg/factory/Nd4j.html#choice-org.nd4j.linalg.api.ndarray.INDArray-org.nd4j.linalg.api.ndarray.INDArray-org.nd4j.linalg.api.ndarray.INDArray- ), -[cumsum](https://deeplearning4j.org/api/latest/org/nd4j/linalg/api/ndarray/INDArray.html#cumsum-int- ), -[mmul](https://deeplearning4j.org/api/latest/org/nd4j/linalg/api/ndarray/INDArray.html#mmul-org.nd4j.linalg.api.ndarray.INDArray- ), -[prod](https://deeplearning4j.org/api/latest/org/nd4j/linalg/api/ndarray/INDArray.html#prod-int...- ), -[put](https://deeplearning4j.org/api/latest/org/nd4j/linalg/api/ndarray/INDArray.html#put-org.nd4j.linalg.api.ndarray.INDArray-org.nd4j.linalg.api.ndarray.INDArray- ), -[putWhere](https://deeplearning4j.org/api/latest/org/nd4j/linalg/api/ndarray/INDArray.html#putWhere-org.nd4j.linalg.api.ndarray.INDArray-org.nd4j.linalg.api.ndarray.INDArray-org.nd4j.linalg.indexing.conditions.Condition- ), - [sum](https://deeplearning4j.org/api/latest/org/nd4j/linalg/api/ndarray/INDArray.html#sum-int...- ) - -### Basic Statistics -[covarianceMatrix](https://deeplearning4j.org/api/latest/org/nd4j/linalg/dimensionalityreduction/PCA.html#getCovarianceMatrix--), -[mean](https://deeplearning4j.org/api/latest/org/nd4j/linalg/api/ndarray/INDArray.html#mean-int...- ), -[std](https://deeplearning4j.org/api/latest/org/nd4j/linalg/api/ndarray/INDArray.html#std-int...- ), -[var](https://deeplearning4j.org/api/latest/org/nd4j/linalg/api/ndarray/INDArray.html#var-int...- ) - -### Basic Linear Algebra - -[cross](https://deeplearning4j.org/api/latest/org/nd4j/linalg/api/ops/impl/shape/Cross.html ), - [dot](https://deeplearning4j.org/api/latest/org/nd4j/linalg/api/ops/impl/accum/Dot.html ), [gesvd](https://deeplearning4j.org/api/latest/org/nd4j/linalg/api/blas/Lapack.html#gesvd-org.nd4j.linalg.api.ndarray.INDArray-org.nd4j.linalg.api.ndarray.INDArray-org.nd4j.linalg.api.ndarray.INDArray-org.nd4j.linalg.api.ndarray.INDArray- ), -[mmul](https://deeplearning4j.org/api/latest/org/nd4j/linalg/api/ndarray/INDArray.html#mmul-org.nd4j.linalg.api.ndarray.INDArray-) diff --git a/docs/python_doc.py b/docs/python_doc.py deleted file mode 100644 index 471f72f79..000000000 --- a/docs/python_doc.py +++ /dev/null @@ -1,74 +0,0 @@ -# -*- coding: utf-8 -*- - -################################################################################ -# Copyright (c) 2015-2018 Skymind, Inc. -# -# This program and the accompanying materials are made available under the -# terms of the Apache License, Version 2.0 which is available at -# https://www.apache.org/licenses/LICENSE-2.0. -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# SPDX-License-Identifier: Apache-2.0 -################################################################################ - -import abc -import sys -from doc_generator import BaseDocumentationGenerator - - -class PythonDocumentationGenerator(BaseDocumentationGenerator): - - def __init__(self, args): - reload(sys) - sys.setdefaultencoding('utf8') - - super(PythonDocumentationGenerator, self).__init__(args) - - raise NotImplementedError - - """Process top class docstring - """ - @abc.abstractmethod - def process_main_docstring(self, doc_string): - raise NotImplementedError - - """Process method and other docstrings - """ - @abc.abstractmethod - def process_docstring(self, doc_string): - raise NotImplementedError - - """Takes unformatted signatures and doc strings and returns a properly - rendered piece that fits into our markdown layout. - """ - @abc.abstractmethod - def render(self, signature, doc_string, class_name, is_method): - raise NotImplementedError - - - """Returns main doc string of class/object in question. - """ - @abc.abstractmethod - def get_main_doc_string(self, class_string, class_name): - raise NotImplementedError - - - """Returns doc string and signature data for constructors. - """ - @abc.abstractmethod - def get_constructor_data(self, class_string, class_name, use_contructor): - raise NotImplementedError - - - """Returns doc string and signature data for methods - in the public API of an object - """ - @abc.abstractmethod - def get_public_method_data(self, class_string, includes, excludes): - raise NotImplementedError - diff --git a/docs/samediff/README.md b/docs/samediff/README.md deleted file mode 100644 index 73e21f3d7..000000000 --- a/docs/samediff/README.md +++ /dev/null @@ -1,10 +0,0 @@ -# samediff documentation - -To generate SameDiff docs into the `samediff/doc_sources` folder, run - -``` -python generate_docs.py \ - --project samediff \ - --language java \ - --code ../nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/ -``` \ No newline at end of file diff --git a/docs/samediff/pages.json b/docs/samediff/pages.json deleted file mode 100644 index 629ccec56..000000000 --- a/docs/samediff/pages.json +++ /dev/null @@ -1,60 +0,0 @@ -{ - "excludes": [ - "abstract" - ], - "indices": [ - { - "page": "samediff/index.md", - "module_index": "samediff" - }, - { - "page": "functions/index.md", - "module_index": "functions" - } - ], - "pages": [ - { - "page": "samediff/variables.md", - "class": [ - "samediff/SDVariable.java" - ], - "autogen_tag": "sdvars" - }, - { - "page": "samediff/samediff.md", - "class": [ - "samediff/SameDiff.java" - ], - "include": [ - "gradient", - "getGradient", - "updateVariableName" - ], - "constructors": true - }, - { - "page": "samediff/ops.md", - "class": [ - "samediff/SameDiff.java" - ], - "exclude": [ - "gradient", - "updateVariableName" - ], - "constructors": false - }, - { - "page": "samediff/execution.md", - "class": [ - "samediff/SameDiffOpExecutioner.java" - ] - }, - { - "page": "functions/function-factory.md", - "class": [ - "functions/DifferentialFunctionFactory.java" - ] - } - ] -} - diff --git a/docs/samediff/templates/adding-ops.md b/docs/samediff/templates/adding-ops.md deleted file mode 100644 index 89806aed4..000000000 --- a/docs/samediff/templates/adding-ops.md +++ /dev/null @@ -1,207 +0,0 @@ ---- -title: How to add new operations to SameDiff -short_title: Adding Ops -description: How to add differential functions and other ops to SameDiff graph. -category: SameDiff -weight: 2 ---- - -## How to add new operations to SameDiff - -### A quick SameDiff overview - -To get started with SameDiff, familiarize yourself with the `autodiff` module of the ND4J API located [here on GitHub.](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff) - -For better or worse, SameDiff code is organized in just a few key places. For basic usage and testing of SameDiff the following modules are key. We'll discuss some of them in more detail in just a bit. - -- `functions`: This module has the basic building blocks to build SameDiff variables and graphs. -- `execution`: has everything related to SameDiff graph execution. -- `gradcheck`: Utility functionality for checking SameDiff gradients, similar in structure to the respective tool in DL4J. -- `loss`: Loss functions for SameDiff -- `samediff`: Main SameDiff module to define, set up and run SameDiff operations and graphs. - -### Differential functions in the `functions` module - -See the `functions` module on [GitHub.](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/functions) - -The central abstraction of the `functions` module is `DifferentialFunction`, which underlies pretty much everything in SameDiff. Mathematically, what we're doing in SameDiff is build a directed acyclic graph whose nodes are differential functions, for which we can compute gradients. In that regard, `DifferentialFunction` makes up a SameDiff graph on a fundamental level. - -Note that each `DifferentialFunction` comes with a `SameDiff` instance. We'll discuss `SameDiff` and this relationship later on. Also, while there's only few key abstractions, they're essentially used everywhere, so it's almost impossible to discuss SameDiff concepts separately. Eventually we'll get around to each part. - -#### Properties and mappings - -Each differential function comes with _properties_. In the simplest case, a differential function just has a name. Depending on the operation in question, you'll usually have many more properties (think strides or kernel sizes in convolutions). When we import computation graphs from other projects (TensorFlow, ONNX, etc.) these properties need to be mapped to the conventions we're using internally. The methods `attributeAdaptersForFunction`, `mappingsForFunction`, `propertiesForFunction` and `resolvePropertiesFromSameDiffBeforeExecution` are what you want to look at to get started. - -Once properties are defined and properly mapped, you call `initFromTensorFlow` and `initFromOnnx` for TensorFlow and ONNX import, respectively. More on this later, when we discuss building SameDiff operations. - -#### Inputs and outputs - -A differential function is executed on a list of inputs, using function properties, and produces one or more output variables. You have access to many helper functions to set or access these variables: - -- `args()`: returns all input variables. -- `arg()`: returns the first input variable (the only one for unary operations). -- `larg()` and `rarg()`: return the first and second (read "left" and "right") argument for binary operations -- `outputVariables()`: returns a list of all output variables. Depending on the operation, this may be computed dynamically. As we'll see later on, to get the result for ops with a single output, we'll call `.outputVariables()[0]`. - -Handling output variables is tricky and one of the pitfalls in using and extending SameDiff. For instance, implementing `calculateOutputShape` for a differential function might be necessary, but if implemented incorrectly can lead to hard-to-debug failures. (Note that SameDiff will eventually call op execution in `libnd4j` and dynamic custom ops either infer output shapes or need to be provided with the correct output shape.) - -#### Automatic differentiation - -Automatic differentiation for a differential functions is implemented in a single method: `doDiff`. Each operation has to provide an implementation of `doDiff`. If you're implementing a SameDiff operation for a `libnd4j` op `x` and you're lucky to find `x_bp` (as in "back-propagation") you can use that and your `doDiff` implementation comes essentially for free. - -You'll also see a `diff` implementation that's used internally and calls `doDiff`. - -#### Differential function factory - -Importantly, each differential function has access to a _factory_, an instance of `DifferentialFunctionFactory`, by calling `f()`. More precisely, this will return the factory of the SameDiff instance the differential function has: - -```java -public DifferentialFunctionFactory f() { - return sameDiff.f(); -} -``` - -This is used in many places and gives you access to all differential functions currently registered in SameDiff. Think of this factory as a provider of operations. Here's an example of exposing `sum` to the `DifferentialFunctionFactory`: - -```java -public SDVariable sum(...) { - return new Sum(...).outputVariables()[0]; -} -``` - -We leave out the function arguments on purpose here. Note that all we do is redirect to the `Sum` operation defined elsewhere in ND4J and then return the first output variable (of type `SDVariable`, discussed in a second). Disregarding the implementation details for now, what this allows you to do is call `f().sum(...)` from anywhere you have access to a differential function factory. For instance, when implementing a SameDiff op `x` and you already have `x_bp` in your function factory, you can override `doDiff` for `x` - -```java -@Override -public List doDiff(List grad) { - ... - return Arrays.asList(f().x_bp(...)); -} -``` - - -### Building and executing graphs in `samediff` - -See the `samediff` module on [GitHub.](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff) - -Not surprisingly, this is where the magic happens. This module has the core structures that SameDiff operates with. First, let's have a look at the variables that make up SameDiff operations. - -#### SameDiff variables - -`SDVariable` (read SameDiff variable) extends `DifferentialFunction` and is to SameDiff what `INDArray` is to good old ND4J. In particular, SameDiff graphs operate on these variables and each individual operation takes in and spits out a list of `SDVariable`. An `SDVariable` comes with a name, is equipped with a `SameDiff` instance, has shape information and knows how to initialize itself with an ND4J `WeightInitScheme`. You'll also find a few helpers to set and get these properties. - -One of the few things an `SDVariable` can do that a `DifferentialFunction` can't it evaluate its result and return an underlying `INDArray` by calling `eval()`. This will run SameDiff internally and retrieve the result. A similar getter is `getArr()` which you can call at any point to get the current value of this variable. This functionality is used extensively in testing, to assert proper results. An `SDVariable` also has access to its current gradient through `gradient()`. Upon initialization there won't be any gradient, it will usually be computed at a later point. - -Apart from these methods, `SDVariable` also carries methods for concrete ops (and is in that regard a little similar to `DifferentialFunctionFactory`). For instance, defining `add` as follows: - -```java -public SDVariable add(double sameDiffVariable) { - return add(sameDiff.generateNewVarName(new AddOp().opName(),0),sameDiffVariable); -} -``` - -allows you to call `c = a.add(b)` on two SameDiff variables, the result of which can be accessed by `c.eval()`. - - -#### SameDiff - -The `SameDiff` class is the main workhorse of the module and brings together most of the concepts discussed so far. A little unfortunately, the inverse is also true and `SameDiff` instances are part of all other SameDiff module abstractions in some way or the other (which is why you've seen it many times already). Generally speaking, `SameDiff` is the main entry point for automatic differentiation and you use it to define a symbolic graph that carries operations on `SDVariable`s. Once built, a SameDiff graph can be run in a few ways, for instance `exec()` and `execAndEndResult()`. - -Convince yourself that invoking `SameDiff()` sets up a [million things!]( https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/SameDiff.java#L817-L846) Essentially, `SameDiff` will collect and give you access (in terms of both getters and setters) to - -- All differential functions for the graph, with all their properties, which can be accessed in various ways (e.g. name or id). -- All inputs and output information for said functions. -- All function properties and how to map them, `propertiesToResolve` and `propertiesForFunction` are of particular note. - -`SameDiff` is also the place where you expose new operations to the SameDiff module. Essentially, you write a little wrapper for the respective operation in the `DifferentialFunctionFactory` instance `f()`. Here's an example for cross products: - -```java -public SDVariable cross(SDVariable a, SDVariable b) { - return cross(null, a, b); -} - -public SDVariable cross(String name, SDVariable a, SDVariable b) { - SDVariable ret = f().cross(a, b); - return updateVariableNameAndReference(ret, name); -} -``` - -#### SameDiff execution examples and tests - -At this point it might be a good idea to check out and run a few examples. SameDiff tests are a good source for that. Here's an example of how to multiply two SameDiff variables - -```java -SameDiff sd = SameDiff.create(); - -INDArray inArr = Nd4j.linspace(1, n, n).reshape(inOrder, d0, d1, d2); -INDArray inMul2Exp = inArr.mul(2); - -SDVariable in = sd.var("in", inArr); -SDVariable inMul2 = in.mul(2.0); - -sd.exec(); -``` - -This example is taken from [SameDiffTests](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/samediff/SameDiffTests.java), one of the main test sources, in which you also find a few complete end-to-end examples. - -The second place you find tests is in [gradcheck](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/gradcheck). Whenever you add a new operation to SameDiff, add tests for the forward pass and gradient checks as well. - -The third set of relevant tests is stored in [imports](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/imports) and contains test for importing TensorFlow and ONNX graphs. On a side note, the resources for these import tests are generated in our [TFOpsTests](https://github.com/deeplearning4j/TFOpTests) project. - -### Creating and exposing new SameDiff ops - -We've seen how ND4J operations get picked up by `DifferentialFunctionFactory` and `SameDiff` to expose them to SameDiff at various levels. As for actually implementing these ops, you need to know a few things. In libnd4j you find two classes of operations, which are described [here](https://github.com/eclipse/deeplearning4j/blob/master/libnd4j/AddingNewOps.md) in detail. We'll show how to implement both op types. - -All operations go [here](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl), and most of the time it's obvious where exactly to put the ops. Special attention goes to `layers`, which is reserved for deep learning layer implementations (like [`Conv2D`](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/Conv2D.java)). These higher-level ops are based on the concept of [Modules](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/BaseModule.java), similar to modules in pytorch or layers in TensorFlow. These layer op implementation also provide a source of more involved op implementations. - -#### Implementing legacy operations - -Legacy (or XYZ) operations are the old breed of ND4J operations with a characteristic "xyz" signature. Here's how to implement cosine in ND4J by wrapping the `cos` legacy op from libn4j: [Cosine implementation](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/Cos.java#L38-L72). When it comes to SameDiff, the good thing about legacy ops is that they're already available in ND4J, but need to be augmented by SameDiff specific functionality to pass the muster. Since the cosine function does not have any properties, this implementation is straightforward. The parts that make this op SameDiff compliant are: - -- You specify SameDiff constructors [here](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/Cos.java#L38-L51) -- You implement `doDiff` [here] (https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/Cos.java#L38-L51) -- You specify a SameDiff `opName`, a TensorFlow `tensorflowName` and an ONNX `onnxName` [here](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/Cos.java#L74-L93). - -If you look closely, this is only part of the truth, since `Cos` extends `BaseTransformOp`, which implements other SameDiff functionality. (Note that `BaseTransformOp` is a [`BaseOp`](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/BaseOp.java), which extends `DifferentialFunction` from earlier.) For instance, `calculateOutputShape` is [implemented there](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/BaseTransformOp.java#L195-L207). If you want to implement a new transform, you can simply inherit from `BaseTransformOp`, too. For other op types like reductions etc. there are op base classes available as well, meaning you only need to address the three bullet points above. - -In the rare case you need to write a legacy op from scratch, you'll have to find the respective op number from libn4j, which can be found in `legacy_ops.h`. - -#### Implementing Dynamic Custom Operations - -[`DynamicCustomOp`](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/DynamicCustomOp.java) is the new kind of operation from libnd4j and all recent additions are implemented as such. This operation type in ND4J directly extends `DifferentialFunction`. - -[Here's](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/BatchToSpace.java) an example of the `BatchToSpace` operation, which inherits from `DynamicCustomOp`: - -- BatchToSpace is [initialized](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/BatchToSpace.java#L49-L67) with two properties, `blocks` and `crops`. Note how `blocks` and `crops`, which are both of integer type, get added to _integer arguments_ for the operation by calling `addIArgument`. For float arguments and other _types_, use `addTArgument` instead. -- The operation gets its own name and [names for import](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/BatchToSpace.java#L69-L82), -- and `doDiff` is [implemented](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/BatchToSpace.java#L84-L89). - -The BatchToSpace operation is then integrated into `DifferentialFunctionFactory` [here](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/functions/DifferentialFunctionFactory.java#L840-L844), exposed to `SameDiff` [here](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/SameDiff.java#L2105-L2107) and tested [here](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/gradcheck/GradCheckTransforms.java#L151-L191). - -The only thing BatchToSpace is currently missing is _property mapping_. We call the properties for this operation `blocks` and `crops`, but in ONNX or TensorFlow they might be called and stored quite differently. To look up the differences for mappings this correctly, see [`ops.proto`](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/resources/ops.proto) for TensorFlow and [`onnxops.json`](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/resources/onnxops.json) for ONNX. - - -Let's look at another operation that does property mapping right, namely [`DynamicPartition`](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/DynamicPartition.java). This op has precisely one property, called `numPartitions` in SameDiff. To map and use this property, you do the following: - -- Implement a little helper method called [`addArgs`](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/DynamicPartition.java#L59-L61) that is used in the constructor of the op and in an import helper one-liner that we're discussing next. It's not necessary, but encouraged to do this and call it `addArgs` consistently, for clarity. -- Override [`initFromTensorFlow` method](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/DynamicPartition.java#L63-L67) that maps properties for us using a `TFGraphMapper` instance and adding arguments with `addArgs`. Note that since ONNX does not support dynamic partitioning at the time of this writing (hence no `onnxName`) there's also no `initFromOnnx` method, which works pretty much the same way as `initFromTensorFlow`. -- For the TensorFlow import to work, we also need to [override `mappingsForFunction`](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/DynamicPartition.java#L70-L83). This example of a mapping is very simple, all it does is map TensorFlow's property name `num_partititions` to our name `numPartitions`. - -Note that while `DynamicPartition` has proper property mapping, it currently does not have a working `doDiff` implementation. - -As a last example, we show one that has a little more interesting property mapping setup, namely [`Dilation2D`](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/Dilation2D.java). Not only has this op far more properties to map, as you can see in [`mappingsForFunction`](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/Dilation2D.java#L59-L104), the properties also come with _property values_, as defined in [`attributeAdaptersForFunction`](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/Dilation2D.java#L106-L132). We've chosen to show this op because it is one that has property mapping, but is neither exposed to `DifferentialFunctionFactory` not `SameDiff`. - -Hence, the three `DynamicCustomOp` examples shown each come with their own defects and represent examples of the work that has to be done for SameDiff. To summarize, to add a new SameDiff op you need to: - -- Create a new operation in ND4J that extends `DifferentialFunction`. How exactly this implementation is set up depends on the - - op generation (legacy vs. dynamic custom) - - op type (transform, reduction, etc.) -- Define an own op name, as well as TensorFlow and ONNX names. -- Define necessary SameDiff constructors -- Use `addArgs` to add op arguments in a reusable way. -- Expose the operation in `DifferentialFunctionFactory` first and wrap it then in `SameDiff` (or `SDVariable` for variable methods). -- Implement `doDiff` for automatic differentiation. -- Override `mappingsForFunction` to map properties for TensorFlow and ONNX -- If necessary, also provide an attribute adapter by overriding `attributeAdaptersForFunction`. -- Add import one-liners for TensorFlow and ONNX by adding `initFromTensorFlow` and `initFromOnnx` (using `addArgs`). -- Test, test, test. diff --git a/docs/samediff/templates/building-graphs.md b/docs/samediff/templates/building-graphs.md deleted file mode 100644 index f61100923..000000000 --- a/docs/samediff/templates/building-graphs.md +++ /dev/null @@ -1 +0,0 @@ -# Getting started: building and running SameDiff graphs \ No newline at end of file diff --git a/docs/samediff/templates/dl4j-integration.md b/docs/samediff/templates/dl4j-integration.md deleted file mode 100644 index 9058d5e52..000000000 --- a/docs/samediff/templates/dl4j-integration.md +++ /dev/null @@ -1 +0,0 @@ -# Getting started: How SameDiff fits into DL4J \ No newline at end of file diff --git a/docs/samediff/templates/execution.md b/docs/samediff/templates/execution.md deleted file mode 100644 index 1d01c14cb..000000000 --- a/docs/samediff/templates/execution.md +++ /dev/null @@ -1,3 +0,0 @@ -# SameDiff graph execution - -{{autogenerated}} \ No newline at end of file diff --git a/docs/samediff/templates/function-factory.md b/docs/samediff/templates/function-factory.md deleted file mode 100644 index 1f378b474..000000000 --- a/docs/samediff/templates/function-factory.md +++ /dev/null @@ -1,3 +0,0 @@ -# Samediff's differential function factory - -{{autogenerated}} \ No newline at end of file diff --git a/docs/samediff/templates/graphs.md b/docs/samediff/templates/graphs.md deleted file mode 100644 index 151174aa1..000000000 --- a/docs/samediff/templates/graphs.md +++ /dev/null @@ -1,3 +0,0 @@ -# SameDiff graphs - -{{autogenerated}} \ No newline at end of file diff --git a/docs/samediff/templates/model-import.md b/docs/samediff/templates/model-import.md deleted file mode 100644 index 400ea5fae..000000000 --- a/docs/samediff/templates/model-import.md +++ /dev/null @@ -1,71 +0,0 @@ ---- -title: Getting started: importing TensorFlow models into SameDiff -short_title: Model import -description: importing TensorFlow models into SameDiff -category: SameDiff -weight: 3 ---- - -# Getting started: importing TensorFlow models into SameDiff - -## What models can be imported into SameDiff - -Currently SameDiff supports the import of TensorFlow frozen graphs through the various SameDiff.importFrozenTF methods. -TensorFlow documentation on frozen models can be found [here](https://www.TensorFlow.org/guide/saved_model#the_savedmodel_format_on_disk). - - import org.nd4j.autodiff.SameDiff.SameDiff; - - SameDiff sd = SameDiff.importFrozenTF(modelFile); - - ## Finding the model input/outputs and running inference - - After you import the TensorFlow model there are 2 ways to find the inputs and outputs. The first method is to look at the output of - - sd.summary(); - - Where the input variables are the output of no ops, and the output variables are the input of no ops. Another way to find the inputs is - - List inputs = sd.inputs(); - - To run inference use: - - INDArray out = sd.batchOutput() - .input(inputs, inputArray) - .output(outputs) - .execSingle(); - -For multiple outputs, use `exec()` instead of `execSingle()`, to return a `Map` of outputs instead. -Alternatively, you can use methods such as `SameDiff.output(Map placeholders, String... outputs)` to get the same output. - -## Import Validation. -We have a TensorFlow graph analyzing utility which will report any missing operations (operations that still need to be implemented) [here](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/TensorFlow/TensorFlowImportValidator.java) - -## Advanced: Node Skipping and Import Overrides -It is possible to remove nodes from the network. For example TensorFlow 1.x models can have hard coded dropout layers. -See the [BERT Graph test](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/imports/TFGraphs/BERTGraphTest.java#L114-L150) for an example. - -## List of models known to work with SameDiff. - -- [PorV-RNN](https://deeplearning4jblob.blob.core.windows.net/testresources/PorV-RNN_frozenmodel.pb) -- [alexnet](https://deeplearning4jblob.blob.core.windows.net/testresources/alexnet_frozenmodel.pb) -- [cifar10_gan_85](https://deeplearning4jblob.blob.core.windows.net/testresources/cifar10_gan_85_frozenmodel.pb) -- [deeplab_mobilenetv2_coco_voc_trainval](http://download.tensorflow.org/models/deeplabv3_mnv2_pascal_trainval_2018_01_29.tar.gz) -- [densenet_2018_04_27](https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/densenet_2018_04_27.tgz) -- [inception_resnet_v2_2018_04_27](https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/inception_resnet_v2_2018_04_27.tgz) -- [inception_v4_2018_04_27](https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/inception_v4_2018_04_27.tgz) -- [labels](https://github.com/KonduitAI/dl4j-test-resources/tree/master/src/main/resources/tf_graphs/zoo_models/labels) -- [mobilenet_v1_0.5_128](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_0.5_128.tgz) -- [mobilenet_v2_1.0_224](http://download.tensorflow.org/models/tflite_11_05_08/mobilenet_v2_1.0_224.tgz) -- [nasnet_mobile_2018_04_27](https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/nasnet_mobile_2018_04_27.tgz) -- [resnetv2_imagenet_frozen_graph](http://download.tensorflow.org/models/official/resnetv2_imagenet_frozen_graph.pb) -- [squeezenet_2018_04_27](https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/squeezenet_2018_04_27.tgz) -- [temperature_bidirectional_63](https://deeplearning4jblob.blob.core.windows.net/testresources/temperature_bidirectional_63_frozenmodel.pb) -- [temperature_stacked_63](https://deeplearning4jblob.blob.core.windows.net/testresources/temperature_stacked_63_frozenmodel.pb) -- [text_gen_81](https://deeplearning4jblob.blob.core.windows.net/testresources/text_gen_81_frozenmodel.pb) - -## Operations Coverage - -SameDiff's TensorFlow import is still being developed, and does not yet have support for every single operation and datatype in TensorFlow. -Almost all of the common/standard operations are importable and tested, however - including almost everything in the tf, tf.math, tf.layers, tf.losses, tf.bitwise and tf.nn namespaces. The majority of existing pretrained models out there should be importable into SameDiff. - -If you run into an operation that can't be imported, feel free to [open an issue](https://github.com/eclipse/deeplearning4j/issues). \ No newline at end of file diff --git a/docs/samediff/templates/ops.md b/docs/samediff/templates/ops.md deleted file mode 100644 index 81f2b3900..000000000 --- a/docs/samediff/templates/ops.md +++ /dev/null @@ -1,275 +0,0 @@ ---- -title: Operations in SameDiff -short_title: Ops -description: What kind of operations is there in `SameDiff` and how to use them -category: SameDiff -weight: 4 ---- - -# SameDiff operations - -Operations in `SameDiff` work mostly the way you'd expect them to. You take variables - in our framework, those are -objects of type `SDVariable` - apply operations to them, and thus produce new variables. Before we proceed to the -overview of the available operations, let us list some of their common properties. - -## Common properties of operations - -- Variables of any *variable type* may be used in any operation, as long as their *data types* match those that are -required by the operation (again, see our [variables](./samediff/variables) section for what variable types are). Most -often an operation will require its `SDVariable` to have a floating point data type. -- Variables created by operations have `ARRAY` variable type. -- For all operations, you may define a `String` name of your resulting variable, although for most operations this -is not obligatory. The name goes as the first argument in each operation, like so: -```java -SDVariable linear = weights.mmul("matrix_product", input).add(bias); -SDVariable output = sameDiff.nn.sigmoid("output", linear); -``` -Named variables may be accessed from outside using a `SameDiff` method `getVariable(String name)`. For the code above, -this method will allow you to infer the value of both `output` as well as the result of `mmul` operation. Note that we -haven't even explicitly defined this result as a separate `SDVariable`, and yet a corresponding `SDVariable` will be -created internally and added to our instance of `SameDiff` under the `String` name `"matrix_product"`. In fact, a unique -`String` name is given to every `SDVariable` you produce by operations: if you don't give a name explicitly, it is -assigned to the resulting `SDVariable` automatically based on the operation's name. - - -## Overview of operations -The number of currently available operations, including overloads totals several hundreds, they range in complexity from s -imple additions and multiplications via producing outputs of convolutional layers to creation of dedicated recurrent -neural network modules, and much more. The sheer number of operations would've made it cumbersome to list them all on a -single page. So, if you are already looking for something specific, you'll be better off checking our -[javadoc](https://deeplearning4j.org/api/latest/), which already contains a detailed information on each operation, or -by simply browsing through autocompletion suggestions (if your IDE supports that). Here we rather try to give you an -idea of what operations you may expect to find and where to seek for them. - -All operations may be split into two major branches: those which are methods of `SDVariable` and those of `SameDiff` -classes. Let us have a closer look at each: - -### `SDVariable` operations -We have already seen `SDVariable` operations in previous examples, in expressions like -```java -SDVariable z = x.add(y); -``` -where `x` and `y` are `SDVariable`'s. - -Among `SDVariable` methods, you will find: -- `BLAS`-type operations to perform linear algebra: things like `add`, `neg`, `mul` (used for both scaling and elementwise -multiplication) and `mmul` (matrix multiplication), `dot`, `rdiv`, etc.; -- comparison operations like `gt` or `lte`, used both to compare each element to a fixed `double` value as well as for -elementwise comparison with another `SDVariable` of the same shape, and alike; -- basic reduction operations: things like `min`, `sum`, `prod` (product of elements in array), `mean`, `norm2`, -`argmax` (index of the maximal element), `squaredDifference` and so on, which may be taken along specified dimensions; -- basic statistics operations for computing mean and standard deviation along given dimensions: `mean` and `std`. -- operations for restructuring of the underlying array: `reshape` and `permute`, along with `shape` - an operation that -delivers the shape of a variable as an array of integers - the dimension sizes; - -`SDVariable` operations may be easily chained, producing lines like: -```java -SDVariable regressionCost = weights.mmul(input).add("regression_prediction", bias).squaredDifference(labels); -``` - -### `SameDiff` operations -The operations that are methods of `SameDiff` are called via one of 6 auxiliary objects present in each `SameDiff`, -which split all operations into 6 uneven branches: -- `math` - for general mathematical operations; -- `random` - creating different random number generators; -- `nn` - general neural network tools; -- `cnn` - convolutional neural network tools; -- `rnn` - recurrent neural network tools; -- `loss` - loss functions; -In order to use a particular operation, you need to call one of these 6 objects form your `SameDiff` instance, and then -an operation itself, like that: -```java -SDVariable y = sameDiff.math.sin(x); -``` -or -```java -SDVariable y = samediff.math().sin(x); -``` -The distribution of operations among the auxiliary objects has no structural bearing beyond organizing things in a more -intuitive way. So, for instance, if you're not sure whether to seek for, say, `tanh` operation in `math` or in `nn`, -don't worry: we have it in both. - -Let us briefly describe what kinds of operations you may expect to find in each of the branches: - -### `math` - basic mathematical operations -Math module mostly consists of general mathematical functions and statistics methods. Those include: - -- power functions, e.g. `square`, `cube`, `sqrt`, `pow`, `reciprocal` etc.; -- trigonometric functions, e.g. `sin`, `atan` etc.; -- exponential/hyperbolic functions, like `exp`, `sinh`, `log`, `atanh` etc.; -- miscellaneous elementwise operations, like taking absolute value, rounding and clipping, such as `abs`, `sign`, -`ceil`, `round`, `clipByValue`, `clipByNorm` etc.; -- reductions along specified dimensions: `min`, `amax`, `mean`, `asum`, `logEntropy`, and similar; -- distance (reduction) operations, such as `euclideanDistance`, `manhattanDistance`, `jaccardDistance`, `cosineDistance`, -`hammingDistance`, `cosineSimilarity`, along specified dimensions, for two identically shaped `SDVariables`; -- specific matrix operations: `matrixInverse`, `matrixDeterminant`, `diag` (creating a diagonal matrix), `trace`, `eye` -(creating identity matrix with variable dimensions), and several others; -- more statistics operations: `standardize`, `moment`, `normalizeMoments`, `erf` and `erfc` (Gaussian error function and -its complementary); -- counting and indexing reductions: methods like `conuntZero` (number of zero elements), `iamin` (index of the element -with the smallest absolute value), `firstIndex` (an index of the first element satisfying a specified `Condition` function); -- reductions indicating properties of the underlying arrays. These include e.g. `isNaN` (elementwise checking), `isMax` -(shape-preserving along specified dimensions), `isNonDecreasing` (reduction along specified dimensions); -- elementwise logical operations: `and`, `or`, `xor`, `not`. - -Most operations in `math` have very simple structure, and are inferred like that: -```java -SDVariable activation = sameDiff.math.cube(input); -``` -Operations may be chained, although in a more cumbersome way in comparison to the `SDVariable` operations, e.g.: -```java -SDVariable matrixNorm1 = sameDiff.math.max(sameDiff.math.sum(sameDiff.math.abs(matrix), 1)); -``` -Observe that the (integer) argument `1` in the `sum` operation tells us that we have to take maximum absolute value -along the `1`'s dimension, i.e. the column of the matrix. - -### `random` - creating random values Random -These operations create variables whose underlying arrays will be filled with random numbers following some distribution -- say, Bernoulli, normal, binomial etc.. These values will be reset at each iteration. If you wish, for instance, -to create a variable that will add a Gaussian noise to entries of the MNIST database, you may do something like: -```java -double mean = 0.; -double deviation = 0.05; -long[] shape = new long[28, 28]; -SDVariable noise_mnist = sameDiff.random.normal("noise_mnist", mean, deviation, shape); -``` -The shape of you random variable may vary. Suppose, for instance, that you have audio signals of varying length, and you -want to add noise to them. Then, you need to specify an `SDVariable`, say, `windowShape` with an integer -[data type](./samediff/variabeles/datatype!!!), and proceed like that -```java -SDVariabel noise_audio = sameDiff.random.normal("noise_audio", mean, deviation, windowShape); -``` - -### `nn` - general neural network tools -Here we store methods for neural networks that are not necessarily associated with convolutional ones. Among them are -- creation of dense linear and ReLU layers (with or without bias), and separate bias addition: `linear`, `reluLayer`, -`biasAdd`; -- popular activation functions, e.g. `relu`, `sigmoid`, `tanh`, `softmax` as well as their less used versions like -`leakyRelu`, `elu`, `hardTanh`, and many more; -- padding for 2d arrays with method `pad`, supporting several padding types, with both constant and variable padding width; -- explosion/overfitting prevention, such as `dropout`, `layerNorm` and `batchNorm` for layer resp. batch normalization; - -Some methods were created for internal use, but are openly available. Those include: -- derivatives for several popular activation functions - these are mostly designed for speeding up -backpropagation; -- attention modules - basically, building blocks for recurrent neural networks we shall discuss below. - -While activations in `nn` are fairly simple, other operations become more involved. Say, to create a linear -or a ReLU layer, up to three predefined `SDVariable` objects may be required, as in the following code: -```java -SDVariable denseReluLayer = sameDiff.nn.reluLayer(input, weights, bias); -``` -where `input`, `weights` and `bias` need to have dimensions suiting each other. - -To create, say, a dense layer with softmax activation, you may proceed as follows: -```java -SDVariable linear = sameDiff.nn.linear(input, weight, bias); -SDVariable output = sameDiff.nn.softmax(linear); -``` - -### `cnn` - convolutional neural networks tools -The `cnn` module contains layers and operations typically used in convolutional neural networks - -different activations may be picked up from the `nn` module. Among `cnn` operations we currently have creation of: -- linear convolution layers, currently for tensors of dimension up to 3 (minibatch not included): `conv1d`, `conv2d`, -`conv3d`, `depthWiseConv2d`, `separableConv2D`/`sconv2d`; -- linear deconvolution layers, currently `deconv1d`, `deconv2d`, `deconv3d`; -- pooling, e.g. `maxPoooling2D`, `avgPooling1D`; -- specialized reshaping methods: `batchToSpace`, `spaceToDepth`, `col2Im` and alike; -- upsampling, currently presented by `upsampling2d` operation; -- local response normalization: `localResponseNormalization`, currently for 2d convolutional layers only; - -Convolution and deconvolution operations are specified by a number of static parameters like kernel size, -dilation, having or not having bias etc.. To facilitate the creation process, we pack the required parameters into -easily constructable and alterable configuration objects. Desired activations may be borrowed from the `nn` module. So, -for example, if we want to create a 3x3 convolutional layer with `relu` activation, we may proceed as follows: -```java -Conv2DConfig config2d = new Conv2DConfig().builder().kW(3).kH(3).pW(2).pH(2).build(); -SDVariable convolution2dLinear = sameDiff.cnn.conv2d(input, weights, config2d); -SDVariable convolution2dOutput = sameDiff.nn.relu(convolution2dLinear); -``` -In the first line, we construct a convolution configuration using its default constructor. Then we specify the -kernel size (this is mandatory) and optional padding size, keeping other settings default (unit stride, no -dilation, no bias, `NCHW` data format). We then employ this configuration to create a linear convolution with predefined -`SDVariables` for input and weights; the shape of `weights` is to be tuned to that of `input` and to `config` -beforehand. Thus, if in the above example `input` has shape, say, `[-1, nIn, height, width]`, then `weights` are to have -a form `[nIn, nOut, 3, 3]` (because we have 3x3 convolution kernel). The shape of the resulting variable `convoluton2d` -will be predetermined by these parameters (in our case, it will be `[-1, nOut, height, width]`). Finally, in the last -line we apply a `relu` activation. - -### `rnn` - Recurrent neural networks - -This module contains arguably the most sophisticated methods in the framework. Currently it allows you to create -- simple recurrent units, using `sru` and `sruCell` methods; -- LSTM units, using `lstmCell`, `lstmBlockCell` and `lstmLayer`; -- Graves LSTM units, using `gru` methods. - -As of now, recurrent operations require special configuration objects as input, in which you need to pack all the -variables that will be used in a unit. This is subject to change in the later versions. For instance, to -create a simple recurrent unit, you need to proceed like that: -```java -SRUConfiguration sruConfig = new SRUConfiguration(input, weights, bias, init); -SDVariable sruOutput = samediff.rnn().sru(sruConfig); -``` -Here, the arguments in the `SRUConfiguration` constructor are variables that are to be defined beforehand. Obviously -their shapes should be matching, and these shapes predetermine the shape of `output`. - -### `loss` - Loss functions -In this branch we keep common loss functions. Most loss functions may be created quite simply, like that: -```java -SDVariable logLoss = sameDiff.loss.logLoss("logLoss", label, predictions); -``` -where `labels` and `predictions` are `SDVariable`'s. A `String` name is a mandatory parameter in most `loss` methods, -yet it may be set to `null` - in this case, the name will be generated automatically. You may also create weighted loss -functions by adding another `SDVariable` parameters containing weights, as well as specify a reduction method (see below) -for the loss over the minibatch. Thus, a full-fledged `logLoss` operation may -look like: -```java -SDVariable wLogLossMean = sameDiff.loss.logLoss("wLogLossMean", label, predictions, weights, LossReduce.MEAN_BY_WEIGHT); -``` -Some loss operations may allow/require further arguments, depending on their type: e.g. a dimension along which the -loss is to be computed (as in `cosineLoss`), or some real-valued parameters. - -As for reduction methods, over the minibatch, there are currently 4 of them available. Thus, initially loss values for -each sample of the minibatch are computed, then they are multiplied by weights (if specified), and finally one of the -following routines takes place: -- `NONE` - leaving the resulting (weighted)loss values as-is; the result is an `INDArray` with the length of the -minibatch: `sum_loss = sum(weights * loss_per_sample)`. -- `SUM` - summing the values, producing a scalar result. -- `MEAN_BY_WEIGHT` - first computes the sum as above, and then divides it by the sum of all weights, producing a scalar -value: `mean_loss = sum(weights * loss_per_sample) / sum(weights)`. If weights are not -specified, they all are set to `1.0` and this reduction is equivalent to getting mean loss value over the minibatch. -- `MEAN_BY_NONZERO_WEIGHT_COUNT` - divides the weighted sum by the number of nonzero weight, producing a scalar: -`mean_count_loss = sum(weights * loss_per_sample) / count(weights != 0)`. Useful e.g. when you want to compute the mean -only over a subset of *valid* samples, setting weights by either `0.` or `1.`. When weights are not given, it just -produces mean, and thus equivalent to `MEAN_BY_WEIGHT`. - - -## The *don'ts* of operations - -In order for `SameDiff` operations to work properly, several main rules are to be upheld. Failing to do so may result in -an exception or, worse even, to a working code producing undesired results. All the things we mention in the current -section describe what **you better not** do. - -- All variables in an operation have to belong to the same instance of `SamdeDiff` (see the [variables](./samediff/variables) -section on how variables are added to a `SameDiff` instance). In other words, **you better not** -```java -SDVariable x = sameDiff0.var(DataType.FLOAT, 1); -SDVariable y = sameDiff1.placeHolder(DataType.FLOAT, 1); -SDVariable z = x.add(y); -``` -- At best, a new variable is to be created for a result of an operation or a chain of operations. In other words, **you -better not** redefine existing variables **and better not** leave operations returning no result. In other words, try to -**avoid** the code like this: -```java -SDVariable z = x.add(y); -//DON'T!!! -z.mul(2); -x = z.mul(y); -``` -A properly working version of the above code (if we've desired to obtain 2xy+2y2 in an unusual way) will be -```java -SDVariable z = x.add(y); -SDVariable _2z = z.mul(2); -w = _2z.mul(y); -``` - To learn more why it functions like that, see our [graph section](./samediff/graph). diff --git a/docs/samediff/templates/overview.md b/docs/samediff/templates/overview.md deleted file mode 100644 index dd046c014..000000000 --- a/docs/samediff/templates/overview.md +++ /dev/null @@ -1 +0,0 @@ -# DL4J SameDiff computation graph engine \ No newline at end of file diff --git a/docs/samediff/templates/variables.md b/docs/samediff/templates/variables.md deleted file mode 100644 index 5b52900ca..000000000 --- a/docs/samediff/templates/variables.md +++ /dev/null @@ -1,224 +0,0 @@ ---- -title: Types of variables in SameDiff -short_title: Variables -description: What types of variables are used in SameDiff, their properties and how to switch these types. -category: SameDiff -weight: 3 ---- - -# Variables in `SameDiff` - -## What are variables - -All values defining or passing through each `SameDiff` instance - be it weights, bias, inputs, activations or -general parameters - all are handled by objects of class `SDVariable`. - -Observe that by variables we normally mean not just single values - as it is done in various online examples describing -autodifferentiation - but rather whole multidimensional arrays of them. - -## Variable types - -All variables in `SameDiff` belong to one of four *variable types*, constituting an enumeration `VariableType`. -Here they are: - -- `VARIABLE`: are trainable parameters of your network, e.g. weights and bias of a layer. Naturally, we want them -to be both stored for further usage - we say, that they are *persistent* - as well as being updated during training. -- `CONSTANT`: are those parameters which, like variables, are persistent for the network, but are not being -trained; they, however, may be changed externally by the user. -- `PLACEHOLDER`: store temporary values that are to be supplied from the outside, like inputs and labels. -Accordingly, since new placeholders' values are provided at each iteration, they are not stored: in other words, -unlike `VARIABLE` and `CONSTANT`, `PLACEHOLDER` is *not* persistent. -- `ARRAY`: are temporary values as well, representing outputs of [operations](./samediff/ops) within a `SameDiff`, for -instance sums of vectors, activations of a layer, and many more. They are being recalculated at each iteration, and -therefor, like `PLACEHOLDER`, are not persistent. - -To infer the type of a particular variable, you may use the method `getVariableType`, like so: -```java -VariableType varType = yourVariable.getVariableType(); -``` -The current value of a variable in a form of `INDArray` may be obtained using `getArr` or `getArr(true)` - the latter -one if you wish the program to throw an exception if the variable's value is not initialized. - -## Data types - -The data within each variable also has its *data type*, contained in `DataType` enum. Currently in `DataType` there -are three *floating point* types: `FLOAT`, `DOUBLE` and `HALF`; four *integer* types: `LONG`, `INT`, `SHORT` and -`UBYTE`; one *boolean* type `BOOL` - all of them will be referred as *numeric* types. In addition, there is a -*string* type dubbed `UTF8`; and two helper data types `COMPRESSED` and `UNKNOWN`. The 16-bit floating point format `BFLOAT16` and unsigned integer types (`UINT16`, `UINT32` and `UINT64`) will be available in `1.0.0-beta5`. - -To infer the data type of your variable, use -```java -DataType dataType = yourVariable.dataType(); -``` -You may need to trace your variable's data type since at times it does matter, which types you use in an operation. For -example, a convolution product, like this one -```java -SDVariable prod = samediff.cnn.conv1d(input, weights, config); -``` -will require its `SDVariable` arguments `input` and `weights` to be of one of the floating point data types, and will -throw an exception otherwise. Also, as we shall discuss just below, all the `SDVariables` of type `VARIABLE` are -supposed to be of floating point type. - -## Common features of variables - -Before we go to the differences between variables, let us first look at the properties they all share -- All variables are ultimately derived from an instance of `SameDiff`, serving as parts of its -[graph](./samediff/graphs). In fact, each variable has a `SameDiff` as one of its fields. -- Results (outputs) of all operations are of `ARRAY` type. -- All `SDVariable`'s involved in an operation are to belong to the *same* `SameDiff`. -- All variables may or may not be given names - in the latter case, a name is actually created automatically. Either -way, the names need to be/are created unique. We shall come back to naming below. - -## Differences between variable types - -Let us now have a closer look at each type of variables, and what distinguish them from each other. - -### Variables - -Variables are the trainable parameters of your network. This predetermines their nature in `SameDiff`. As we briefly -mentioned above, variables' values need to be -both preserved for application, and updated during training. Training means, that we iteratively -update the values by small fractions of their gradients, and this only makes sense if variables are of *floating -point* types (see data types above). - -Variables may be added to your `SameDiff` using different versions of `var` function from your `SameDiff` instance. -For example, the code -```java -SDVariable weights = samediff.var("weights", DataType.FLOAT, 784, 10); -``` -adds a variable constituting of a 784x10 array of `float` numbers - weights for a single layer MNIST perceptron -in this case - to a pre-existing `SameDiff` instance `samediff`. - -However, this way the values within a variable will be set as zeros. You may also create a variable with values from -a preset `INDArray`. Say -```java -SDVariable weights = samediff.var("weigths", Nd4j.nrand(784, 10).div(28)); -``` -will create a variable filled with normally distributed randomly generated numbers with variance `1/28`. You may put -any other array creation methods instead of `nrand`, or any preset array, of course. Also, you may use some popular -initialization scheme, like so: - -```java -SDVariable weights = samediff.var("weights", new XavierInitScheme('c', 784, 10), DataType.FLOAT, 784, 10); -``` -Now, the weights will be randomly initialized using the Xavier scheme. There are other ways to create and - -fill variables: you may look them up in the 'known subclasses' section [of our javadoc](https://deeplearning4j.org/api/latest/org/nd4j/weightinit/WeightInitScheme.html"). - -### Constants - -Constants hold values that are stored, but - unlike variables - remain unchanged during training. These, for -instance, may be some hyperparamters you wish to have in your network and be able to access from the outside. Or -they may be pretrained weights of a neural network that you wish to keep unchanged (see more on that in -[Changing Variable Type](https://deeplearning4j.org/api/latest/) below). Constants may be of any data type -- so e.g. `int` and `boolean` are allowed alongside with `float` and `double`. - -In general, constants are added to `SameDiff` by means of `constant` methods. A constant may be created form an -`INDArray`, like that: -```java -SDVariable constant = samediff.constant("constants", Nd4j.create(new float[] {3.1415f, 42f})); -``` -A constant consisting of a single scalar value may be created using one of the `scalar` methods: -```java -INDArray someScalar = samediff.scalar("scalar", 42); -``` -Again, we refer to the [javadoc](https://deeplearning4j.org/api/latest/) for the whole reference. - -### Placeholders - -The most common placeholders you'll normally have in a `SameDiff` are inputs and, when applicable, labels. You may -create placeholders of any data type, depending on the operations you use them in. To add a placeholder to a `SameDiff`, -you may call one of `placeHolder` methods, e.g. like that: -```java -SDVariable in = samediff.placeHolder("input", DataType.FLOAT, -1, 784); -``` -as in MNIST example. Here we specify name, data type and then shape of your placeholder - here, we have -28x28 grayscale pictures rendered as 1d vectors (therefore 784) coming in batches of length we don't know beforehand -(therefore -1). - -### Arrays - -Variables of `ARRAY` type appear as outputs of [operations](./samediff/ops) within `SameDiff`. -Accordingly, the data type of an array-type variable depends on the kind of operation it is produced by and variable -type(s) ot its argument(s). Arrays are not persistent - they are one-time values that will be recalculated from scratch -at the next step. However, unlike placeholders, gradients are computed for them, as those are needed to update the values -of `VARIABLE`'s. - -There are as many ways array-type variables are created as there are operations, so you're better up focusing on -our [operations section](./samediff/ops), our [javadoc](https://deeplearning4j.org/api/latest/) and [examples](./samediff/exampes). - -## Recap table - -Let us summarize the main properties of variable types in one table: - -| | Trainable | Gradients | Persistent | Workspaces | Datatypes | Instantiated from | -| ---------- | ----------- | --------- | ---------- | -----------| ---------- | ---------- | -| `VARIABLE` | Yes | Yes | Yes | Yes | Float only | Instance | -| `CONSTANT` | No | No | Yes | No | Any | Instance | -| `PLACEHOLDER` | No | No | No | No | Any | Instance | -| `ARRAY` | No | Yes | No | Yes | Any | Operations | - -We haven't discussed what 'Workspaces' mean - if you do not know, do not worry, this is an internal technical term that -basically describes how memory is managed internally. - -## Changing variable types - -You may change variable types as well. For now, there are three of such options: - -### Variable to constant -At times - for instance if you perform transfer learning - you may wish to turn a variable into a constant. This is -done like so: -```java -samediff.convertToConstant(someVariable); -``` -where `someVariable` is an instance of `SDVariable` of `VARIABLE` type. The variable `someVariable` will not be trained -any more. - -### Constant to variable -Conversely, constants - if they are of *floating point* data type - may be converted to variables. So, for instance, if -you wish your frozen weights to become trainable again -```java -samediff.convertToVariable(frozenWeights); //not frozen any more -``` -### Placeholder to constant -Placeholders may be converted to constants as well - for instance, if you need to freeze one of the inputs. There are no -restrictions on the data type, yet, since placeholder values are not persistent, their value should be set before you -turn them into constants. This can be done as follows -```java -placeHolder.setArray(someArray); -samediff.convertToConstant(placeHolder); -``` -For now it is not possible to turn a constant back into a placeholder, we may consider adding this functionality if -there is a need for that. For now, if you wish to effectively freeze your placeholder but be able to use it again, -consider supplying it with constant values rather than turning it into a constant. - -## Variables' names and values -### Getting variables from `SameDiff` -Recall that every variable in an instance of `SameDiff` has its unique `String` name. Your `SameDiff` actually tracks your -variables by their names, and allows you to retrieve them by using `getVariable(String name)` method. - -Consider the following line: -```java -SDVariable regressionCost = weights.mmul(input).sub("regression_prediction", bias).squaredDifference(labels); -``` -Here, in the function `sub` we actually have implicitly introduced a variable (of type `ARRAY`) that holds the -result of the subtraction. By adding a name into the operations's argument, we've secured ourselves the possibility -to retrieve the variable from elsewhere: say, if later you need to infer the difference between the labels and the -prediction as a vector, you may just write: -```java -SDVariable errorVector = samediff.getVariable("regressionPrediction").sub(labels); -``` -This becomes especially handy if your whole `SameDiff` instance is initialized elsewhere, and you still need to get -hold of some of its variables - say, multiple outputs. - -You can get and set the name of an `SDVariable` the methods `getVarName` and `setVarName` -respectively. When renaming, note that variable's name is to remain unique within its `SameDiff`. - -### Getting variable's value -You may retrieve any variable's current value as an `INDArray` using the method `eval()`. Note that for non-persistent -variables, the value should first be set. For variables with gradients, the gradient's value may also be inferred using -the method `getGradient`. - - - - diff --git a/docs/scala_doc.py b/docs/scala_doc.py deleted file mode 100644 index b868e8df7..000000000 --- a/docs/scala_doc.py +++ /dev/null @@ -1,131 +0,0 @@ -# -*- coding: utf-8 -*- - -################################################################################ -# Copyright (c) 2015-2018 Skymind, Inc. -# -# This program and the accompanying materials are made available under the -# terms of the Apache License, Version 2.0 which is available at -# https://www.apache.org/licenses/LICENSE-2.0. -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# SPDX-License-Identifier: Apache-2.0 -################################################################################ - -import re -import sys -from doc_generator import BaseDocumentationGenerator - - -# TODO this is just a java clone. Properly read out classes and methods ("def") -class ScalaDocumentationGenerator(BaseDocumentationGenerator): - - def __init__(self, args): - reload(sys) - sys.setdefaultencoding('utf8') - - super(ScalaDocumentationGenerator, self).__init__(args) - - '''Doc strings (in Java/Scala) need to be stripped of all '*' values. - Convert '@param' to '- param'. Strip line with author as well. - - TODO can be vastly improved. - ''' - def process_main_docstring(self, doc_string): - lines = doc_string.split('\n') - doc = [line.replace('*', '').lstrip(' ').rstrip('/') for line in lines[1:-1] if not '@' in line] - return '\n'.join(doc) - - - '''Doc strings (in Java/Scala) need to be stripped of all '*' values. - Convert '@param' to '- param'. TODO can be vastly improved. - ''' - def process_docstring(self, doc_string): - lines = doc_string.split('\n') - doc = [line.replace('*', '').lstrip(' ').replace('@', '- ') for line in lines] - return '\n'.join(doc) - - - '''Takes unformatted signatures and doc strings and returns a properly - rendered piece that fits into our markdown layout. - ''' - def render(self, signature, doc_string, class_name, is_method): - if is_method: # Method name from signature - method_regex = r'public (?:static )?[a-zA-Z0-9]* ([a-zA-Z0-9]*)\(' - name = re.findall(method_regex, signature)[0] - else: # Constructor takes class name - name = class_name - sub_blocks = ['##### {} \n{}'.format(name, self.to_code_snippet(signature))] - if doc_string: - sub_blocks.append(doc_string + '\n') - return '\n\n'.join(sub_blocks) - - - '''Returns main doc string of class/object in question. - ''' - def get_main_doc_string(self, class_string, class_name): - print(class_name) - doc_regex = r'\/\*\*\n([\S\s]*?.*)\*\/\n' # match "/** ... */" at the top - doc_string = re.search(doc_regex, class_string) - try: - doc_match = doc_string.group(); - except: - doc_match = '' - doc = self.process_main_docstring(doc_match) - if not doc_string: - print('Warning, no doc string found for class {}'.format(class_name)) - doc_index = 0 if not doc_match else doc_string.end() - return doc, class_string[doc_index:] - - - '''Returns doc string and signature data for constructors. - ''' - def get_constructor_data(self, class_string, class_name, use_contructor): - constructors = [] - if 'public ' + class_name in class_string and use_contructor: - doc_regex = r'\/\*\*\n([\S\s]*?.*)\*\/\n[\S\s]*?(public ' \ - + class_name + '.[\S\s]*?){' - result = re.search(doc_regex, class_string) - if result: - doc_string, signature = result.groups() - doc = self.process_docstring(doc_string) - class_string = class_string[result.end():] - constructors.append((signature, doc)) - else: - print("Warning, no doc string found for constructor {}".format(class_name)) - return constructors, class_string - - - '''Returns doc string and signature data for methods - in the public API of an object - ''' - def get_public_method_data(self, class_string, includes, excludes): - method_regex = r'public (?:static )?[a-zA-Z0-9]* ([a-zA-Z0-9]*)\(' - - # Either use all methods or use include methods that can be found - method_strings = re.findall(method_regex, class_string) - if includes: - method_strings = [i for i in includes if i in method_strings] - - # Exclude all 'exclude' methods - method_strings = [m for m in method_strings if m not in excludes] - - methods = [] - for method in method_strings: - # print("Processing doc string for method {}".format(method)) - doc_regex = r'\/\*\*\n([\S\s]*?.*)\*\/\n[\S\s]*?' + \ - '(public (?:static )?[a-zA-Z0-9]* ' + method + '[\S\s]*?){' - # TODO: this will sometimes run forever. fix regex - result = re.search(doc_regex, class_string) - if result: - doc_string, signature = result.groups() - doc = self.process_docstring(doc_string) - class_string = class_string[result.end():] - methods.append((signature, doc)) - else: - print("Warning, no doc string found for method {}".format(method)) - return methods diff --git a/docs/scalnet/README.md b/docs/scalnet/README.md deleted file mode 100644 index 56759786c..000000000 --- a/docs/scalnet/README.md +++ /dev/null @@ -1,10 +0,0 @@ -# scalnet documentation - -To generate docs into the`scalnet/doc_sources` folder, run - -``` -python generate_docs.py \ - --project scalnet \ - --language scala \ - --code ../scalnet/src/main/scala/org/deeplearning4j/scalnet -``` \ No newline at end of file diff --git a/docs/scalnet/mkdocs.yml b/docs/scalnet/mkdocs.yml deleted file mode 100644 index b686eb260..000000000 --- a/docs/scalnet/mkdocs.yml +++ /dev/null @@ -1,34 +0,0 @@ -site_name: Scalnet documentation -theme: readthedocs -docs_dir: doc_sources -repo_url: https://github.com/deeplearning4j/deeplearning4j/tree/master/scalnet -site_url: http://deeplearning4j.org -site_description: 'Scalnet documentation' - -dev_addr: '0.0.0.0:8000' - -pages: -- Home: index.md -- Why ScalNet: why-scalnet.md -- Getting started: - - Guide to Sequential: getting-started/scalnet-sequential-guide.md - - Guide to NeuralNet: getting-started/scalnet-model-guide.md -- Models: - - About Scalnet models: models/about-scalnet.md - - Sequential: models/sequential.md - - NeuralNet: models/neural-net.md -- Layers: - - About ScalNet layers: layers/about-scalnet-layers.md - - Core Layers: layers/core.md - - Convolutional Layers: layers/convolutional.md - - Pooling Layers: layers/pooling.md - - Recurrent Layers: layers/recurrent.md - - Embedding Layers: layers/embeddings.md - - Advanced Activations Layers: layers/advanced-activations.md - - Noise layers: layers/noise.md -- Losses: losses.md -- Optimizers: optimizers.md -- Activations: activations.md -- Initializers: initializers.md -- Regularizers: regularizers.md -- Constraints: constraints.md diff --git a/docs/scalnet/pages.json b/docs/scalnet/pages.json deleted file mode 100644 index 1360c11eb..000000000 --- a/docs/scalnet/pages.json +++ /dev/null @@ -1,54 +0,0 @@ -{ - "excludes": [ - "abstract" - ], - "pages":[ - { - "page": "models/model-import.md", - "class": [ - "models/Model.scala" - ] - }, - { - "page": "models/sequential.md", - "class": [ - "models/Sequential.scala" - ] - }, - { - "page": "models/neural-net.md", - "class": [ - "models/NeuralNet.scala" - ] - }, - { - "page": "layers/core.md", - "module": "layers/core" - }, - { - "page": "layers/convolutional.md", - "module": "layers/convolutional" - }, - { - "page": "layers/pooling.md", - "module": "layers/pooling" - }, - { - "page": "layers/recurrent.md", - "module": "layers/recurrent" - }, - { - "page": "layers/embeddings.md", - "module": "layers/embeddings" - }, - { - "page": "layers/advanced-activations.md", - "module": "layers/advanced/activations" - }, - { - "page": "layers/noise.md", - "module": "layers/noise" - } - ] -} - diff --git a/docs/scalnet/templates/activations.md b/docs/scalnet/templates/activations.md deleted file mode 100644 index 44fc3d800..000000000 --- a/docs/scalnet/templates/activations.md +++ /dev/null @@ -1,2 +0,0 @@ -## Available activations - diff --git a/docs/scalnet/templates/constraints.md b/docs/scalnet/templates/constraints.md deleted file mode 100644 index aef7fa27f..000000000 --- a/docs/scalnet/templates/constraints.md +++ /dev/null @@ -1 +0,0 @@ -## Supported constraints \ No newline at end of file diff --git a/docs/scalnet/templates/getting-started/scalnet-model-guide.md b/docs/scalnet/templates/getting-started/scalnet-model-guide.md deleted file mode 100644 index 995467977..000000000 --- a/docs/scalnet/templates/getting-started/scalnet-model-guide.md +++ /dev/null @@ -1 +0,0 @@ -# Getting started with ScalNet NeuralNet models diff --git a/docs/scalnet/templates/getting-started/scalnet-sequential-guide.md b/docs/scalnet/templates/getting-started/scalnet-sequential-guide.md deleted file mode 100644 index fe736ebf3..000000000 --- a/docs/scalnet/templates/getting-started/scalnet-sequential-guide.md +++ /dev/null @@ -1 +0,0 @@ -# Getting started with ScalNet Sequential models diff --git a/docs/scalnet/templates/index.md b/docs/scalnet/templates/index.md deleted file mode 100644 index 365be3115..000000000 --- a/docs/scalnet/templates/index.md +++ /dev/null @@ -1,3 +0,0 @@ -# Deeplearing4j: ScalNet - -{{autogenerated}} \ No newline at end of file diff --git a/docs/scalnet/templates/initializers.md b/docs/scalnet/templates/initializers.md deleted file mode 100644 index f0a99ff17..000000000 --- a/docs/scalnet/templates/initializers.md +++ /dev/null @@ -1,2 +0,0 @@ -## Supported initializers - diff --git a/docs/scalnet/templates/layers/about-scalnet-layers.md b/docs/scalnet/templates/layers/about-scalnet-layers.md deleted file mode 100644 index ee8fa1a1a..000000000 --- a/docs/scalnet/templates/layers/about-scalnet-layers.md +++ /dev/null @@ -1,2 +0,0 @@ -# About ScalNet layers - diff --git a/docs/scalnet/templates/losses.md b/docs/scalnet/templates/losses.md deleted file mode 100644 index 5ea272cd5..000000000 --- a/docs/scalnet/templates/losses.md +++ /dev/null @@ -1,2 +0,0 @@ -## Supported loss functions - diff --git a/docs/scalnet/templates/models/about-scalnet.md b/docs/scalnet/templates/models/about-scalnet.md deleted file mode 100644 index 1da8f9978..000000000 --- a/docs/scalnet/templates/models/about-scalnet.md +++ /dev/null @@ -1 +0,0 @@ -# About ScalNet models \ No newline at end of file diff --git a/docs/scalnet/templates/models/neural-net.md b/docs/scalnet/templates/models/neural-net.md deleted file mode 100644 index 6bfff75d4..000000000 --- a/docs/scalnet/templates/models/neural-net.md +++ /dev/null @@ -1 +0,0 @@ -{{autogenerated}} diff --git a/docs/scalnet/templates/models/sequential.md b/docs/scalnet/templates/models/sequential.md deleted file mode 100644 index a4d5e2062..000000000 --- a/docs/scalnet/templates/models/sequential.md +++ /dev/null @@ -1 +0,0 @@ -{{autogenerated}} \ No newline at end of file diff --git a/docs/scalnet/templates/optimizers.md b/docs/scalnet/templates/optimizers.md deleted file mode 100644 index 0499c44bb..000000000 --- a/docs/scalnet/templates/optimizers.md +++ /dev/null @@ -1,2 +0,0 @@ -## Supported optimizers - diff --git a/docs/scalnet/templates/regularizers.md b/docs/scalnet/templates/regularizers.md deleted file mode 100644 index d64ee71b1..000000000 --- a/docs/scalnet/templates/regularizers.md +++ /dev/null @@ -1 +0,0 @@ -## Supported regularizers diff --git a/docs/scalnet/templates/why-scalnet.md b/docs/scalnet/templates/why-scalnet.md deleted file mode 100644 index a416f110d..000000000 --- a/docs/scalnet/templates/why-scalnet.md +++ /dev/null @@ -1 +0,0 @@ -# Why use ScalNet? \ No newline at end of file diff --git a/libnd4j/CMakeLists.txt b/libnd4j/CMakeLists.txt index 63a83c05b..9610d2890 100755 --- a/libnd4j/CMakeLists.txt +++ b/libnd4j/CMakeLists.txt @@ -49,12 +49,12 @@ elseif(WIN32) set(CMAKE_CXX_FLAGS_RELEASE "-D_RELEASE=true") set(CMAKE_CXX_FLAGS_DEBUG " /FS /EHsc") else() - set(CMAKE_CXX_FLAGS_RELEASE "-O3 -fPIC -fmax-errors=2 -D_RELEASE=true") - set(CMAKE_CXX_FLAGS_DEBUG " -g -O2 -fPIC -fmax-errors=2") + set(CMAKE_CXX_FLAGS_RELEASE "-O3 -fPIC -D_RELEASE=true") + set(CMAKE_CXX_FLAGS_DEBUG " -g -O2 -fPIC") endif() else() - set(CMAKE_CXX_FLAGS_RELEASE "-O3 -fPIC -fmax-errors=2 -D_RELEASE=true") - set(CMAKE_CXX_FLAGS_DEBUG " -g -O0 -fPIC -fmax-errors=2") + set(CMAKE_CXX_FLAGS_RELEASE "-O3 -fPIC -D_RELEASE=true") + set(CMAKE_CXX_FLAGS_DEBUG " -g -O0 -fPIC") if (SD_CPU) set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=address") @@ -221,21 +221,16 @@ include_directories(${FLATBUFFERS_PATH}/include) configure_file(include/config.h.in include/config.h) include_directories(${CMAKE_CURRENT_BINARY_DIR}/include) -if (NOT DEFINED ENV{CLION_IDE}) - message("NOT CLION") - include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) - add_subdirectory(blas) - if(SD_BUILD_TESTS) - # tests are always compiled with all ops included - set(SD_ALL_OPS true) - set(SD_BUILD_MINIFIER true) - add_subdirectory(tests_cpu) - endif() -endif () -if ($ENV{CLION_IDE}) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) +add_subdirectory(blas) +if(SD_BUILD_TESTS) + # tests are always compiled with all ops included + set(SD_ALL_OPS true) + set(SD_BUILD_MINIFIER true) add_subdirectory(tests_cpu) -endif () +endif() + if (MSVC_DEV) set(SD_BUILD_MINIFIER false) diff --git a/libnd4j/blas/CMakeLists.txt b/libnd4j/blas/CMakeLists.txt index a793063bc..a12b70194 100755 --- a/libnd4j/blas/CMakeLists.txt +++ b/libnd4j/blas/CMakeLists.txt @@ -120,8 +120,13 @@ elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_TUNE}") elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") # using GCC - SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_TUNE}") + SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_TUNE} -fmax-errors=2 ") set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-rpath,$ORIGIN/") + + if (CMAKE_BUILD_TYPE STREQUAL "Debug" AND NOT(APPLE) AND NOT(WIN32)) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -rdynamic -Wl,-export-dynamic") + SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -export-dynamic") + endif() endif() @@ -361,11 +366,6 @@ elseif(SD_CPU) endif() endif() - if (CMAKE_BUILD_TYPE STREQUAL "Debug" AND NOT(APPLE) AND NOT(WIN32)) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -rdynamic -Wl,-export-dynamic") - SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -export-dynamic") - endif() - install(TARGETS ${SD_LIBRARY_NAME} DESTINATION .) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/cpu) endif() diff --git a/libnd4j/include/array/ConstantDescriptor.h b/libnd4j/include/array/ConstantDescriptor.h index 589ba2353..89e36c2a9 100644 --- a/libnd4j/include/array/ConstantDescriptor.h +++ b/libnd4j/include/array/ConstantDescriptor.h @@ -35,7 +35,7 @@ namespace sd { std::vector _floatValues; public: ConstantDescriptor(double* values, int length); - ConstantDescriptor(Nd4jLong* values, int length); + ConstantDescriptor(Nd4jLong const* values, int length); ConstantDescriptor(std::initializer_list values); explicit ConstantDescriptor(std::vector &values); diff --git a/libnd4j/include/array/NDArray.h b/libnd4j/include/array/NDArray.h index 6ab301200..7936f6688 100644 --- a/libnd4j/include/array/NDArray.h +++ b/libnd4j/include/array/NDArray.h @@ -125,7 +125,7 @@ namespace sd { void templatedDoubleAssign(void *xBuffer, const Nd4jLong xOffset, const void *yBuffer, const Nd4jLong yOffset) const; template - FORCEINLINE R templatedGet(void *buffer, const Nd4jLong index) const; + FORCEINLINE R templatedGet(void const* buffer, const Nd4jLong index) const; /* template R templatedGetIndex(void *buffer, Nd4jLong *indices) const; @@ -193,7 +193,7 @@ namespace sd { #ifndef __JAVACPP_HACK__ NDArray(std::shared_ptr buffer, const ShapeDescriptor& descriptor, sd::LaunchContext* context = sd::LaunchContext::defaultContext(), const Nd4jLong offset = 0); - NDArray(std::shared_ptr buffer, const char order, const std::vector &shape, sd::LaunchContext* context = sd::LaunchContext::defaultContext()); + NDArray(std::shared_ptr buffer, char order, const std::vector &shape, sd::LaunchContext* context = sd::LaunchContext::defaultContext()); /** * This contructors create scalar array containing string utf8 @@ -250,13 +250,14 @@ namespace sd { /** * do not allocate memory, memory for array is passed from outside */ - NDArray(void *buffer, Nd4jLong* shapeInfo, sd::LaunchContext* context = sd::LaunchContext::defaultContext(), const bool isBuffAlloc = false); + NDArray(void *buffer, Nd4jLong* shapeInfo, sd::LaunchContext* context = sd::LaunchContext::defaultContext(), bool isBuffAlloc = false); + NDArray(void *buffer, const Nd4jLong* shapeInfo, sd::LaunchContext* context = sd::LaunchContext::defaultContext(), bool isBuffAlloc = false); /** * do not allocate memory, memory for array is passed from outside * we suppose the content of both (device and host) buffers is identical */ - NDArray(void *buffer, void *bufferD, Nd4jLong* shapeInfo, sd::LaunchContext* context = sd::LaunchContext::defaultContext(), const bool isBuffAlloc = false, const bool isBuffDAlloc = false); + NDArray(void *buffer, void *bufferD, const Nd4jLong* shapeInfo, sd::LaunchContext* context = sd::LaunchContext::defaultContext(), bool isBuffAlloc = false, bool isBuffDAlloc = false); /** * copy constructor @@ -277,28 +278,28 @@ namespace sd { /** * constructor creates new NDArray using shape information from "shapeInfo", set all elements in new array to zeros, if copyStrides is true then use stride values from "shapeInfo", else calculate strides independently */ - NDArray(Nd4jLong* shapeInfo, const bool copyStrides = false, sd::LaunchContext* context = sd::LaunchContext::defaultContext(), const bool nullify = true); + NDArray(const Nd4jLong* shapeInfo, bool copyStrides = false, sd::LaunchContext* context = sd::LaunchContext::defaultContext(), bool nullify = true); /** * constructor creates new NDArray using shape information from "shapeInfo", set all elements in new array to be zeros, if copyStrides is true then use stride values from "shapeInfo", else calculate strides independently * set dtype as array type */ - NDArray(Nd4jLong* shapeInfo, const sd::DataType dtype, const bool copyStrides = false, sd::LaunchContext* context = sd::LaunchContext::defaultContext(), const bool nullify = true); + NDArray(const Nd4jLong* shapeInfo, sd::DataType dtype, bool copyStrides = false, sd::LaunchContext* context = sd::LaunchContext::defaultContext(), bool nullify = true); /** * this constructor creates new array using shape information contained in vector argument */ - NDArray(const char order, const std::vector &shape, sd::DataType dtype = DOUBLE, sd::LaunchContext* context = sd::LaunchContext::defaultContext()); + NDArray(char order, const std::vector &shape, sd::DataType dtype = DOUBLE, sd::LaunchContext* context = sd::LaunchContext::defaultContext()); /** * This constructor creates new array with elements copied from data and using shape information stored in shape, elements from data will be casted to dtype */ - NDArray(const char order, const std::vector &shape, const std::vector& data, sd::DataType dtype = DOUBLE, sd::LaunchContext* context = sd::LaunchContext::defaultContext()); + NDArray(char order, const std::vector &shape, const std::vector& data, sd::DataType dtype = DOUBLE, sd::LaunchContext* context = sd::LaunchContext::defaultContext()); /** * this constructor creates new array using given buffer (without memory allocation) and shape information stored in shape */ - NDArray(void *buffer, const char order, const std::vector &shape, sd::DataType dtype, sd::LaunchContext* context = sd::LaunchContext::defaultContext(), const bool isBuffAlloc = false); + NDArray(void *buffer, char order, const std::vector &shape, sd::DataType dtype, sd::LaunchContext* context = sd::LaunchContext::defaultContext(), const bool isBuffAlloc = false); /** * This method returns new array with the same shape & data type @@ -317,12 +318,12 @@ namespace sd { * this constructor creates new NDArray with shape matching "other" array, * doesn't copy "other" elements into new array !!! */ - explicit NDArray(const NDArray* other, const bool copyStrides = false, sd::LaunchContext* context = sd::LaunchContext ::defaultContext()); + explicit NDArray(const NDArray* other, bool copyStrides = false, sd::LaunchContext* context = sd::LaunchContext ::defaultContext()); /** * this constructor creates scalar(and set its value = 0) or empty array depending on bool argument isScalar */ - NDArray(sd::DataType dtype, sd::LaunchContext* context = sd::LaunchContext::defaultContext(), const bool isScalar = true); + NDArray(sd::DataType dtype, sd::LaunchContext* context = sd::LaunchContext::defaultContext(), bool isScalar = true); /** * This method blocks until asynchronous operation finishes @@ -364,9 +365,11 @@ namespace sd { * @param offset * @return */ - void *bufferWithOffset(Nd4jLong offset) const; + void const* bufferWithOffset(Nd4jLong offset) const; + void* bufferWithOffset(Nd4jLong offset); - void* specialBufferWithOffset(Nd4jLong offset) const; + void const* specialBufferWithOffset(Nd4jLong offset) const; + void* specialBufferWithOffset(Nd4jLong offset); /** * copy assignment operator * in particular, when _dataType != other._dataType and both shapes are the same, there will be allocation of new _buffer and _dataType acquires other._dataType @@ -450,38 +453,39 @@ namespace sd { /** * returns host buffer */ - FORCEINLINE void* getBuffer() const; FORCEINLINE void* buffer(); + FORCEINLINE const void* buffer() const; /** * returns buffer offset (offset is the same for host and device buffers) */ - FORCEINLINE Nd4jLong getBufferOffset() const; - FORCEINLINE Nd4jLong bufferOffset(); + FORCEINLINE Nd4jLong bufferOffset() const; /** * if _bufferD==nullptr return _buffer, else return _bufferD */ void* specialBuffer(); - void* getSpecialBuffer() const; + const void* specialBuffer() const; /** * returns device buffer if compilation is for cuda case, otherwise returns host buffer */ - void* getPlatformBuffer() const; void* platformBuffer(); + const void* platformBuffer() const; template - T* bufferAsT() const; + T* bufferAsT(); + + template + const T* bufferAsT() const; /** * returns _shapeInfo */ - FORCEINLINE Nd4jLong* shapeInfo(); - FORCEINLINE Nd4jLong* getShapeInfo() const; + FORCEINLINE const Nd4jLong* shapeInfo() const; /** @@ -493,12 +497,9 @@ namespace sd { /** * if _shapeInfoD==nullptr return _shapeInfo, else return _shapeInfoD */ - FORCEINLINE Nd4jLong* specialShapeInfo(); - FORCEINLINE Nd4jLong* getSpecialShapeInfo() const; + FORCEINLINE const Nd4jLong* specialShapeInfo() const; - - Nd4jLong* platformShapeInfo(); - Nd4jLong* getPlatformShapeInfo() const; + const Nd4jLong* platformShapeInfo() const; /** * permutes (in-place) the dimensions in array according to "dimensions" array @@ -1509,8 +1510,8 @@ bool NDArray::isAttached() { } template -FORCEINLINE R NDArray::templatedGet(void *buffer, Nd4jLong index) const { - auto b = reinterpret_cast(buffer); +FORCEINLINE R NDArray::templatedGet(void const* buffer, Nd4jLong index) const { + auto b = reinterpret_cast(buffer); auto v = static_cast(b[index]); return v; } @@ -1625,9 +1626,9 @@ bool NDArray::nonNull() const { return true; if(!Environment::getInstance()->isCPU()) - return getDataBuffer()->special() != nullptr && getSpecialShapeInfo() != nullptr; + return getDataBuffer()->special() != nullptr && specialShapeInfo() != nullptr; - return getDataBuffer()->primary() != nullptr && getShapeInfo() != nullptr; + return getDataBuffer()->primary() != nullptr && shapeInfo() != nullptr; } ////////////////////////////////////////////////////////////////////////// @@ -1744,7 +1745,7 @@ bool NDArray::isEmpty() const { if (this->_shapeInfo == nullptr) return false; - return ArrayOptions::arrayType(this->getShapeInfo()) == ArrayType::EMPTY; + return ArrayOptions::arrayType(this->shapeInfo()) == ArrayType::EMPTY; } ////////////////////////////////////////////////////////////////////////// @@ -1804,7 +1805,7 @@ T& NDArray::t(const Nd4jLong i, const Nd4jLong j) { syncToHost(); Nd4jLong coords[2] = {i, j}; - auto offset = shape::getOffset(getShapeInfo(), coords); + auto offset = shape::getOffset(shapeInfo(), coords); tickWriteHost(); return *(reinterpret_cast(bufferWithOffset(offset))); } @@ -1821,7 +1822,7 @@ T& NDArray::t(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k) { syncToHost(); Nd4jLong coords[3] = {i, j, k}; - auto offset = shape::getOffset(getShapeInfo(), coords); + auto offset = shape::getOffset(shapeInfo(), coords); tickWriteHost(); return *(reinterpret_cast(bufferWithOffset(offset))); } @@ -1838,7 +1839,7 @@ T& NDArray::t(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLo syncToHost(); Nd4jLong coords[4] = {i, j, k, w}; - auto offset = shape::getOffset(getShapeInfo(), coords); + auto offset = shape::getOffset(shapeInfo(), coords); tickWriteHost(); return *(reinterpret_cast(bufferWithOffset(offset))); } @@ -1856,7 +1857,7 @@ T NDArray::t(const Nd4jLong i) const { syncToHost(); tickReadHost(); - return *(reinterpret_cast(bufferWithOffset(getOffset(i)))); + return *(reinterpret_cast(bufferWithOffset(getOffset(i)))); } //////////////////////////////////////////////////////////////////////// @@ -1872,9 +1873,9 @@ T NDArray::t(const Nd4jLong i, const Nd4jLong j) const { syncToHost(); Nd4jLong coords[2] = {i, j}; - auto offset = shape::getOffset(getShapeInfo(), coords); + auto offset = shape::getOffset(shapeInfo(), coords); tickReadHost(); - return *(reinterpret_cast(bufferWithOffset(offset))); + return *(reinterpret_cast(bufferWithOffset(offset))); } template @@ -1889,9 +1890,9 @@ T NDArray::t(const Nd4jLong i, const Nd4jLong j) const { syncToHost(); Nd4jLong coords[3] = {i, j, k}; - auto offset = shape::getOffset(getShapeInfo(), coords); + auto offset = shape::getOffset(shapeInfo(), coords); tickReadHost(); - return *(reinterpret_cast(bufferWithOffset(offset))); + return *(reinterpret_cast(bufferWithOffset(offset))); } template @@ -1906,9 +1907,9 @@ T NDArray::t(const Nd4jLong i, const Nd4jLong j) const { syncToHost(); Nd4jLong coords[4] = {i, j, k, w}; - auto offset = shape::getOffset(getShapeInfo(), coords); + auto offset = shape::getOffset(shapeInfo(), coords); tickReadHost(); - return *(reinterpret_cast(bufferWithOffset(offset))); + return *(reinterpret_cast(bufferWithOffset(offset))); } #ifndef __JAVACPP_HACK__ @@ -1924,8 +1925,7 @@ std::shared_ptr NDArray::dataBuffer() { #endif //////////////////////////////////////////////////////////////////////// -void* NDArray::getBuffer() const { - +const void* NDArray::buffer() const { return _buffer->primary() != nullptr ? static_cast(_buffer->primary()) + (_offset * sizeOfT()) : nullptr; } @@ -1934,18 +1934,13 @@ void* NDArray::buffer() { return _buffer->primary() != nullptr ? static_cast(_buffer->primary()) + (_offset * sizeOfT()) : nullptr; } -//////////////////////////////////////////////////////////////////////// -Nd4jLong* NDArray::getShapeInfo() const { - return _shapeInfo; -} - ////////////////////////////////////////////////////////////////////////// -Nd4jLong* NDArray::shapeInfo() { +const Nd4jLong* NDArray::shapeInfo() const { return _shapeInfo; } //////////////////////////////////////////////////////////////////////// -Nd4jLong* NDArray::specialShapeInfo() { +const Nd4jLong* NDArray::specialShapeInfo() const { if (_shapeInfoD == nullptr) return _shapeInfo; // FIXME: this should be fixed once CUDA backend added @@ -1953,23 +1948,10 @@ Nd4jLong* NDArray::specialShapeInfo() { } //////////////////////////////////////////////////////////////////////// -Nd4jLong NDArray::getBufferOffset() const { +Nd4jLong NDArray::bufferOffset() const { return _offset; } -//////////////////////////////////////////////////////////////////////// -Nd4jLong NDArray::bufferOffset() { - return _offset; -} - -//////////////////////////////////////////////////////////////////////// -Nd4jLong* NDArray::getSpecialShapeInfo() const{ - if (_shapeInfoD == nullptr) - return _shapeInfo; - // FIXME: this should be fixed once CUDA backend added - return _shapeInfoD; -} - #if defined(__CUDACC__) //&& defined(BUILD_TESTS) // for CUDA we need stil stuff inline diff --git a/libnd4j/include/array/NDArray.hXX b/libnd4j/include/array/NDArray.hXX index 7756fb7ae..42f5f47f3 100644 --- a/libnd4j/include/array/NDArray.hXX +++ b/libnd4j/include/array/NDArray.hXX @@ -143,7 +143,7 @@ NDArray::NDArray(void* buffer, const char order, const std::vector &sh //////////////////////////////////////////////////////////////////////// // creates new NDArray using shape information from "shapeInfo" array, set all elements in new array to be zeros -NDArray::NDArray(Nd4jLong* shapeInfo, const sd::DataType dtype, const bool copyStrides, sd::LaunchContext * context, const bool nullify) { +NDArray::NDArray(const Nd4jLong* shapeInfo, const sd::DataType dtype, const bool copyStrides, sd::LaunchContext * context, const bool nullify) { if (shapeInfo == nullptr) throw std::runtime_error("NDArray constructor: can't be initalized without shapeinfo"); @@ -213,72 +213,76 @@ NDArray::NDArray(sd::LaunchContext * context) { _length = 0; } -//////////////////////////////////////////////////////////////////////// -// creates new NDArray using shape information from "shapeInfo" array, set all elements in new array to be zeros, set dtype as array type -NDArray::NDArray(Nd4jLong* shapeInfo, const bool copyStrides, sd::LaunchContext * context, const bool nullify): - NDArray(shapeInfo, ArrayOptions::dataType(shapeInfo), copyStrides, context) { -} - -//////////////////////////////////////////////////////////////////////// -NDArray::NDArray(std::shared_ptr buffer, const ShapeDescriptor& descriptor, sd::LaunchContext* context, const Nd4jLong offset) { - - _context = context; - _offset = offset; - - setShapeInfo(descriptor); - - _buffer = buffer; - - _isView = offset > 0 || _length * DataTypeUtils::sizeOf(_dataType) < buffer->getLenInBytes(); -} - -//////////////////////////////////////////////////////////////////////// -// do not allocate memory, memory for array is passed from outside -NDArray::NDArray(void *buffer, Nd4jLong *shapeInfo, sd::LaunchContext * context, const bool isBuffAlloc) { - - if (buffer == nullptr && ArrayOptions::arrayType(shapeInfo) != ArrayType::EMPTY) - throw std::runtime_error("NDArray constructor: can't be initalized with nullptr buffer !"); - - if (shapeInfo == nullptr) - throw std::runtime_error("NDArray constructor: can't be initalized without shapeinfo !"); - - if ((int) shapeInfo[0] > MAX_RANK) - throw std::invalid_argument("NDArray constructor: rank of NDArray can't exceed 32 !"); - - _context = context; - _isAttached = getContext()->getWorkspace() != nullptr; - _offset = 0; - - setShapeInfo(ShapeDescriptor(shapeInfo)); - - if (this->isEmpty()) { - tickReadDevice(); - tickReadHost(); + //////////////////////////////////////////////////////////////////////// + // creates new NDArray using shape information from "shapeInfo" array, set all elements in new array to be zeros, set dtype as array type + NDArray::NDArray(const Nd4jLong* shapeInfo, const bool copyStrides, sd::LaunchContext * context, const bool nullify): + NDArray(shapeInfo, ArrayOptions::dataType(shapeInfo), copyStrides, context) { } - else { - _buffer = std::make_shared(buffer, lengthOf() * sizeOfT(), dataType(), isBuffAlloc, getContext()->getWorkspace()); + + //////////////////////////////////////////////////////////////////////// + NDArray::NDArray(std::shared_ptr buffer, const ShapeDescriptor& descriptor, sd::LaunchContext* context, const Nd4jLong offset) { + + _context = context; + _offset = offset; + + setShapeInfo(descriptor); + + _buffer = buffer; + + _isView = offset > 0 || _length * DataTypeUtils::sizeOf(_dataType) < buffer->getLenInBytes(); } -} -//////////////////////////////////////////////////////////////////////// -// do not allocate memory, memory for array is passed from outside -// we suppose the content of both (device and host) buffers is identical -NDArray::NDArray(void *buffer, void* bufferD, Nd4jLong *shapeInfo, sd::LaunchContext * context, const bool isBuffAlloc, const bool isBuffDAlloc) { + NDArray::NDArray(void *buffer, Nd4jLong *shapeInfo, sd::LaunchContext * context, const bool isBuffAlloc) : NDArray::NDArray(buffer, const_cast(shapeInfo), context, isBuffAlloc) { + // + } - if (shapeInfo == nullptr) - throw std::runtime_error("NDArray constructor cuda: can't be initalized without shapeinfo"); + //////////////////////////////////////////////////////////////////////// + // do not allocate memory, memory for array is passed from outside + NDArray::NDArray(void *buffer, const Nd4jLong *shapeInfo, sd::LaunchContext * context, const bool isBuffAlloc) { - if ((int) shapeInfo[0] > MAX_RANK) - throw std::invalid_argument("NDArray constructor cuda: rank of NDArray can't exceed 32"); + if (buffer == nullptr && ArrayOptions::arrayType(shapeInfo) != ArrayType::EMPTY) + throw std::runtime_error("NDArray constructor: can't be initalized with nullptr buffer !"); - _context = context; - _offset = 0; + if (shapeInfo == nullptr) + throw std::runtime_error("NDArray constructor: can't be initalized without shapeinfo !"); - setShapeInfo(ShapeDescriptor(shapeInfo)); + if ((int) shapeInfo[0] > MAX_RANK) + throw std::invalid_argument("NDArray constructor: rank of NDArray can't exceed 32 !"); - if (!isEmpty()) - _buffer = std::make_shared(buffer, bufferD, lengthOf() * sizeOfT(), dataType(), isBuffAlloc, isBuffDAlloc, getContext()->getWorkspace()); -} + _context = context; + _isAttached = getContext()->getWorkspace() != nullptr; + _offset = 0; + + setShapeInfo(ShapeDescriptor(shapeInfo)); + + if (this->isEmpty()) { + tickReadDevice(); + tickReadHost(); + } + else { + _buffer = std::make_shared(buffer, lengthOf() * sizeOfT(), dataType(), isBuffAlloc, getContext()->getWorkspace()); + } + } + + //////////////////////////////////////////////////////////////////////// + // do not allocate memory, memory for array is passed from outside + // we suppose the content of both (device and host) buffers is identical + NDArray::NDArray(void *buffer, void* bufferD, const Nd4jLong *shapeInfo, sd::LaunchContext * context, const bool isBuffAlloc, const bool isBuffDAlloc) { + + if (shapeInfo == nullptr) + throw std::runtime_error("NDArray constructor cuda: can't be initalized without shapeinfo"); + + if ((int) shapeInfo[0] > MAX_RANK) + throw std::invalid_argument("NDArray constructor cuda: rank of NDArray can't exceed 32"); + + _context = context; + _offset = 0; + + setShapeInfo(ShapeDescriptor(shapeInfo)); + + if (!isEmpty()) + _buffer = std::make_shared(buffer, bufferD, lengthOf() * sizeOfT(), dataType(), isBuffAlloc, isBuffDAlloc, getContext()->getWorkspace()); + } ////////////////////////////////////////////////////////////////////////// NDArray::NDArray(std::shared_ptr buffer, const char order, const std::vector &shape, sd::LaunchContext* context) { @@ -1046,7 +1050,7 @@ std::vector NDArray::asByteVector() { auto dataLength = offsetsBuffer[numWords]; std::vector result(headerLength + dataLength); - memcpy(result.data(), getBuffer(), headerLength + dataLength); + memcpy(result.data(), buffer(), headerLength + dataLength); return result; } else { @@ -1056,10 +1060,10 @@ std::vector NDArray::asByteVector() { if (this->isView()) { auto tmp = this->dup(this->ordering()); syncToHost(); - memcpy(result.data(), tmp.getBuffer(), (unsigned long long) lengthOf() * sizeOfT()); + memcpy(result.data(), tmp.buffer(), (unsigned long long) lengthOf() * sizeOfT()); } else { syncToHost(); - memcpy(result.data(), getBuffer(), (unsigned long long) lengthOf() * sizeOfT()); + memcpy(result.data(), buffer(), (unsigned long long) lengthOf() * sizeOfT()); } return result; } @@ -1085,7 +1089,7 @@ void NDArray::streamline(char o) { syncToDevice(); std::shared_ptr newBuffer = std::make_shared(this->lengthOf() * sizeOfT(), dataType(), getContext()->getWorkspace()); auto shapeBuffer = ConstantShapeHelper::getInstance()->bufferForShapeInfo(dataType(), order, rankOf(), shapeOf()); - NativeOpExecutioner::execTransformSame(getContext(), transform::Copy, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), newBuffer->primary(), static_cast(shapeBuffer.primary()), newBuffer->special(), static_cast(shapeBuffer.special()), nullptr, nullptr, nullptr); + NativeOpExecutioner::execTransformSame(getContext(), transform::Copy, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), newBuffer->primary(), static_cast(shapeBuffer.primary()), newBuffer->special(), static_cast(shapeBuffer.special()), nullptr, nullptr, nullptr); setShapeInfo(static_cast(shapeBuffer.primary())); _buffer = newBuffer; _offset = 0; @@ -1140,7 +1144,7 @@ void NDArray::copyBuffersContinuouslyFrom(const NDArray& other, size_t sizeToCop if(offsetThis == 0) offsetThis = bufferOffset(); if(offsetOther == 0) - offsetOther = other.getBufferOffset(); + offsetOther = other.bufferOffset(); dataBuffer()->copyBufferFrom(*other.getDataBuffer(), sizeToCopyInBytes, offsetThis, offsetOther); } @@ -1154,10 +1158,7 @@ void NDArray::assign(const NDArray& other, bool allowParallelism) { if (other.isEmpty()) { if (!isEmpty()) { - ArrayOptions::setPropertyBit(shapeInfo(), ARRAY_EMPTY); - syncShape(); - _buffer = std::make_shared(); - _offset = 0; + throw std::runtime_error("Cannot assign empty array to non-empty array"); } return; } @@ -1171,7 +1172,7 @@ void NDArray::assign(const NDArray& other, bool allowParallelism) { if(lengthOf() == 1) { NDArray::preparePrimaryUse({this}, {&other}); - BUILD_DOUBLE_SELECTOR(dataType(), other.dataType(), templatedDoubleAssign, (buffer(), 0, other.getBuffer(), 0), LIBND4J_TYPES, LIBND4J_TYPES); + BUILD_DOUBLE_SELECTOR(dataType(), other.dataType(), templatedDoubleAssign, (buffer(), 0, other.buffer(), 0), LIBND4J_TYPES, LIBND4J_TYPES); NDArray::registerPrimaryUse({this}, {&other}); this->syncToDevice(); } @@ -1179,12 +1180,12 @@ void NDArray::assign(const NDArray& other, bool allowParallelism) { if (dataType() != other.dataType()) { auto tmp = other.cast(dataType()); NDArray::prepareSpecialUse({this}, {&tmp}); - NativeOpExecutioner::execScalar(getContext(), scalar::CopyPws, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), tmp.getBuffer(), tmp.getShapeInfo(), tmp.getSpecialBuffer(), tmp.getSpecialShapeInfo(), nullptr, allowParallelism); + NativeOpExecutioner::execScalar(getContext(), scalar::CopyPws, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr, allowParallelism); NDArray::registerSpecialUse({this}, {}); } else { NDArray::prepareSpecialUse({this}, {&other}); - NativeOpExecutioner::execScalar(getContext(), scalar::CopyPws, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), nullptr, allowParallelism); + NativeOpExecutioner::execScalar(getContext(), scalar::CopyPws, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), nullptr, allowParallelism); NDArray::registerSpecialUse({this}, {&other}); } } @@ -1198,7 +1199,7 @@ void NDArray::assign(const NDArray& other, bool allowParallelism) { } NDArray::prepareSpecialUse({this}, {&other}); - NativeOpExecutioner::execTransformAny(getContext(), transform::Assign, other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, nullptr, nullptr, allowParallelism); + NativeOpExecutioner::execTransformAny(getContext(), transform::Assign, other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, nullptr, nullptr, allowParallelism); NDArray::registerSpecialUse({this}, {&other}); } } @@ -1216,7 +1217,7 @@ void NDArray::assign(const T& value, bool allowParallelism) { auto temp = NDArrayFactory::create(dataType(), value, this->getContext()); NDArray::prepareSpecialUse({this}, {&temp}); - NativeOpExecutioner::execScalar(getContext(), sd::scalar::CopyPws, buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), temp.buffer(), temp.shapeInfo(), temp.specialBuffer(), temp.getSpecialShapeInfo(), nullptr, allowParallelism); + NativeOpExecutioner::execScalar(getContext(), sd::scalar::CopyPws, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), temp.buffer(), temp.shapeInfo(), temp.specialBuffer(), temp.specialShapeInfo(), nullptr, allowParallelism); NDArray::registerSpecialUse({this}, {&temp}); } template ND4J_EXPORT void NDArray::assign(const double& value, bool allowParallelism); @@ -1254,7 +1255,7 @@ NDArray NDArray::varianceNumber(sd::variance::Ops op, bool biasCorrected) { NDArray res(DataTypeUtils::pickFloatingType(dataType()), getContext()); NDArray::prepareSpecialUse({&res}, {this}); - NativeOpExecutioner::execSummaryStatsScalar(getContext(), op, buffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), nullptr, res.buffer(), res.shapeInfo(), res.specialBuffer(), res.specialShapeInfo(), biasCorrected); + NativeOpExecutioner::execSummaryStatsScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, res.buffer(), res.shapeInfo(), res.specialBuffer(), res.specialShapeInfo(), biasCorrected); NDArray::registerSpecialUse({&res}, {this}); return res; @@ -1268,7 +1269,7 @@ NDArray NDArray::sumNumber() const { NDArray res(dataType(), getContext()); NDArray::prepareSpecialUse({&res}, {this}); - NativeOpExecutioner::execReduceSameScalar(getContext(), sd::reduce::SameOps::Sum, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), nullptr, res.buffer(), res.shapeInfo(), res.specialBuffer(), res.specialShapeInfo()); + NativeOpExecutioner::execReduceSameScalar(getContext(), sd::reduce::SameOps::Sum, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, res.buffer(), res.shapeInfo(), res.specialBuffer(), res.specialShapeInfo()); NDArray::registerSpecialUse({&res}, {this}); return res; @@ -1283,7 +1284,7 @@ NDArray NDArray::meanNumber() const { NDArray res(DataTypeUtils::pickFloatingType(dataType()), getContext()); NDArray::prepareSpecialUse({&res}, {this}); - NativeOpExecutioner::execReduceFloatScalar(getContext(), sd::reduce::FloatOps::Mean, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), nullptr, res.buffer(), res.shapeInfo(), res.specialBuffer(), res.specialShapeInfo()); + NativeOpExecutioner::execReduceFloatScalar(getContext(), sd::reduce::FloatOps::Mean, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, res.buffer(), res.shapeInfo(), res.specialBuffer(), res.specialShapeInfo()); NDArray::registerSpecialUse({&res}, {this}); return res; } @@ -1315,7 +1316,7 @@ void NDArray::templatedSet(void *buffer, const Nd4jLong *indices, const void *va auto t = reinterpret_cast(buffer); const auto y = *(reinterpret_cast(value)); - auto xOffset = shape::getOffset(getShapeInfo(), indices); + auto xOffset = shape::getOffset(shapeInfo(), indices); t[xOffset] = static_cast(y); } BUILD_DOUBLE_TEMPLATE(template ND4J_EXPORT void NDArray::templatedSet, (void *buffer, const Nd4jLong *indices, const void *value), LIBND4J_TYPES, LIBND4J_TYPES); @@ -1339,9 +1340,13 @@ void NDArray::setContext(sd::LaunchContext *context) { } ////////////////////////////////////////////////////////////////////////// -void* NDArray::bufferWithOffset(Nd4jLong offset) const { +void const* NDArray::bufferWithOffset(Nd4jLong offset) const { + return const_cast(buffer() != nullptr ? static_cast(buffer()) + (offset * sizeOfT()) : nullptr); +} - return getBuffer() != nullptr ? static_cast(getBuffer()) + (offset * sizeOfT()) : nullptr; +////////////////////////////////////////////////////////////////////////// +void* NDArray::bufferWithOffset(Nd4jLong offset) { + return const_cast(buffer() != nullptr ? static_cast(buffer()) + (offset * sizeOfT()) : nullptr); } ////////////////////////////////////////////////////////////////////////// @@ -1431,7 +1436,7 @@ NDArray NDArray::reduceNumber(sd::reduce::FloatOps op, void *extraParams) const NDArray result(shape, true, this->getContext()); NDArray::prepareSpecialUse({&result}, {this}); - NativeOpExecutioner::execReduceFloatScalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), extraParams, result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo()); + NativeOpExecutioner::execReduceFloatScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams, result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo()); NDArray::registerSpecialUse({&result}, {this}); return result; @@ -1445,7 +1450,7 @@ NDArray NDArray::reduceNumber(sd::reduce::SameOps op, void *extraParams) const { NDArray result(dataType(), getContext()); NDArray::prepareSpecialUse({&result}, {this}); - NativeOpExecutioner::execReduceSameScalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), extraParams, result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo()); + NativeOpExecutioner::execReduceSameScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams, result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo()); NDArray::registerSpecialUse({&result}, {this}); return result; @@ -1460,7 +1465,7 @@ NDArray NDArray::reduceNumber(sd::reduce::BoolOps op, void *extraParams) const { NDArray result(shape, true, this->getContext()); NDArray::prepareSpecialUse({&result}, {this}); - NativeOpExecutioner::execReduceBoolScalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), extraParams, result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo()); + NativeOpExecutioner::execReduceBoolScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams, result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo()); NDArray::registerSpecialUse({&result}, {this}); return result; @@ -1475,7 +1480,7 @@ NDArray NDArray::reduceNumber(sd::reduce::LongOps op, void *extraParams) const { NDArray result(shape, true, this->getContext()); NDArray::prepareSpecialUse({&result}, {this}); - NativeOpExecutioner::execReduceLongScalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), extraParams, result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo()); + NativeOpExecutioner::execReduceLongScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams, result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo()); NDArray::registerSpecialUse({&result}, {this}); return result; @@ -1489,7 +1494,7 @@ void NDArray::reduceNumber(sd::reduce::FloatOps op, NDArray& target, void *extra throw std::invalid_argument("NDArray::reduceNumber FloatOps: target array should be scalar and have corresponding float type!"); NDArray::prepareSpecialUse({&target}, {this}); - NativeOpExecutioner::execReduceFloatScalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), extraParams, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); + NativeOpExecutioner::execReduceFloatScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); NDArray::registerSpecialUse({&target}, {this}); } @@ -1502,7 +1507,7 @@ void NDArray::reduceNumber(sd::reduce::SameOps op, NDArray& target, void *extraP throw std::invalid_argument("NDArray::reduceNumber SameOps: target array should be scalar and have same type as this array!"); NDArray::prepareSpecialUse({&target}, {this}); - NativeOpExecutioner::execReduceSameScalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), extraParams, target.getBuffer(), target.getShapeInfo(), target.specialBuffer(), target.getSpecialShapeInfo()); + NativeOpExecutioner::execReduceSameScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); NDArray::registerSpecialUse({&target}, {this}); } @@ -1515,7 +1520,7 @@ void NDArray::reduceNumber(sd::reduce::BoolOps op, NDArray& target, void *extraP throw std::invalid_argument("NDArray::reduceNumber BoolOps: target array should be scalar and have bool type!"); NDArray::prepareSpecialUse({&target}, {this}); - NativeOpExecutioner::execReduceBoolScalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), extraParams, target.getBuffer(), target.getShapeInfo(), target.specialBuffer(), target.getSpecialShapeInfo()); + NativeOpExecutioner::execReduceBoolScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); NDArray::registerSpecialUse({&target}, {this}); } @@ -1528,7 +1533,7 @@ void NDArray::reduceNumber(sd::reduce::LongOps op, NDArray& target, void *extraP throw std::invalid_argument("NDArray::reduceNumber LongOps: target array should be scalar and have long type!"); NDArray::prepareSpecialUse({&target}, {this}); - NativeOpExecutioner::execReduceLongScalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), extraParams, target.getBuffer(), target.getShapeInfo(), target.specialBuffer(), target.getSpecialShapeInfo()); + NativeOpExecutioner::execReduceLongScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); NDArray::registerSpecialUse({&target}, {this}); } @@ -1540,7 +1545,7 @@ NDArray NDArray::indexReduceNumber(sd::indexreduce::Ops op, ExtraArguments *extr auto res = NDArrayFactory::create(0); NDArray::NDArray::prepareSpecialUse({&res}, {this}); - NativeOpExecutioner::execIndexReduceScalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), extraParams == nullptr ? nullptr : extraParams->argumentsAsT(this->dataType()), res.buffer(), res.shapeInfo(), res.specialBuffer(), res.specialShapeInfo()); + NativeOpExecutioner::execIndexReduceScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams == nullptr ? nullptr : extraParams->argumentsAsT(this->dataType()), res.buffer(), res.shapeInfo(), res.specialBuffer(), res.specialShapeInfo()); NDArray::NDArray::registerSpecialUse({&res}, {this}); return res; @@ -1734,7 +1739,7 @@ static void printFormatted(NDArray const* arr, int depth, int limit) { //std::unique_ptr arrs(arr->allTensorsAlongDimension({0})); size_t restCount = 2; printf("["); - restCount = ShapeUtils::getNumOfSubArrs(arr->getShapeInfo(), {0}); + restCount = ShapeUtils::getNumOfSubArrs(arr->shapeInfo(), {0}); for (size_t arrIndex = 0; arrIndex < restCount; ++arrIndex) { NDArray subArr = (*arr)(arrIndex, {0}); printFormatted(&subArr, depth + 1, limit); @@ -1792,14 +1797,14 @@ void NDArray::printIndexedBuffer(const char* msg, Nd4jLong limit) const { ////////////////////////////////////////////////////////////////////////// template void* NDArray::templatedPointerShift(const Nd4jLong offset) const { - return reinterpret_cast(getBuffer()) + offset; + return const_cast(reinterpret_cast(buffer()) + offset); } BUILD_SINGLE_TEMPLATE(template ND4J_EXPORT void* NDArray::templatedPointerShift, (const Nd4jLong offset) const, LIBND4J_TYPES); ////////////////////////////////////////////////////////////////////////// // method makes copy of this array and applies to the copy transpose operation, this array remains unaffected NDArray NDArray::transpose() const &{ - NDArray newArr(getDataBuffer(), ShapeDescriptor(getShapeInfo()), getContext(), getBufferOffset()); + NDArray newArr(getDataBuffer(), ShapeDescriptor(shapeInfo()), getContext(), bufferOffset()); newArr.transposei(); return newArr; @@ -1818,7 +1823,7 @@ NDArray NDArray::transpose() && { void NDArray::transpose(NDArray& target) const { auto correctShape = ShapeUtils::evalTranspShapeInfo(*this, getContext()->getWorkspace()); - if(!shape::equalsStrict(correctShape, target.getShapeInfo())) + if(!shape::equalsStrict(correctShape, target.shapeInfo())) throw std::runtime_error("NDArray::transpose method: the shapeInfo of target array is wrong !"); target._buffer = _buffer; @@ -1920,7 +1925,7 @@ Nd4jLong NDArray::argMax(std::initializer_list dimensions) { // create new array with corresponding order and shape, new array will point to the same _buffer as this array NDArray NDArray::reshape(const char order, const std::vector& shape, const bool copyToNewBuff) const & { - NDArray newArr(getDataBuffer(), ShapeDescriptor(getShapeInfo()), getContext(), getBufferOffset()); + NDArray newArr(getDataBuffer(), ShapeDescriptor(shapeInfo()), getContext(), bufferOffset()); newArr.reshapei(order, shape, copyToNewBuff); return newArr; @@ -2001,7 +2006,7 @@ NDArray NDArray::permute(const int* dimensions, const int rank) const & { // evaluate shapeInfo for output (permuted) array ret auto shapeInfoPermuted = ShapeUtils::evalPermShapeInfo(dimensions, rank, *this, getContext()->getWorkspace()); - NDArray ret(getDataBuffer(), ShapeDescriptor(shapeInfoPermuted), getContext(), getBufferOffset()); + NDArray ret(getDataBuffer(), ShapeDescriptor(shapeInfoPermuted), getContext(), bufferOffset()); ret._isView = true; return ret; } @@ -2157,19 +2162,26 @@ bool NDArray::isUnitary() { ////////////////////////////////////////////////////////////////////////// template <> -std::string* ND4J_EXPORT NDArray::bufferAsT() const { +const std::string* ND4J_EXPORT NDArray::bufferAsT() const { throw std::runtime_error("This method is NOT supposed to be used"); } ////////////////////////////////////////////////////////////////////////// template -T* NDArray::bufferAsT() const { +const T* NDArray::bufferAsT() const { // FIXME: do we REALLY want sync here? syncToHost(); - return reinterpret_cast(getBuffer()); + return reinterpret_cast(buffer()); } -BUILD_SINGLE_UNCHAINED_TEMPLATE(template ND4J_EXPORT , * NDArray::bufferAsT() const, LIBND4J_TYPES); +BUILD_SINGLE_UNCHAINED_TEMPLATE(template ND4J_EXPORT const, * NDArray::bufferAsT() const, LIBND4J_TYPES); + +template +T* NDArray::bufferAsT() { + syncToHost(); + return reinterpret_cast(buffer()); +} +BUILD_SINGLE_UNCHAINED_TEMPLATE(template ND4J_EXPORT, * NDArray::bufferAsT(), LIBND4J_TYPES); //////////////////////////////////////////////////////////////////////// NDArray NDArray::subarray(IndicesList& idx) const { @@ -2282,7 +2294,7 @@ NDArray NDArray::asT() const{ auto result = isScalar() ? NDArray('c', {}, std::vector{0.}, DataTypeUtils::fromT(), this->getContext()) : NDArray(ordering(), getShapeAsVector(), DataTypeUtils::fromT(), this->getContext()); NDArray::prepareSpecialUse({&result}, {this}); - NativeOpExecutioner::execTransformAny(getContext(), transform::AnyOps::Assign, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), result.getBuffer(), result.getShapeInfo(), result.getSpecialBuffer(), result.getSpecialShapeInfo(), nullptr, nullptr, nullptr); + NativeOpExecutioner::execTransformAny(getContext(), transform::AnyOps::Assign, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), nullptr, nullptr, nullptr); NDArray::registerSpecialUse({&result}, {this}); return result; @@ -2449,20 +2461,20 @@ void NDArray::operator+=(const NDArray& other) { if (this->lengthOf() != 1 && other.lengthOf() == 1) { NDArray::prepareSpecialUse({this}, {this, &other}); - NativeOpExecutioner::execScalar(getContext(), sd::scalar::Add, buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(getContext(), sd::scalar::Add, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({this}, {this, &other}); } else if (other.lengthOf() == lengthOf() && this->rankOf() == other.rankOf()) { NDArray::prepareSpecialUse({this}, {this, &other}); - NativeOpExecutioner::execPairwiseTransform(getContext(), sd::pairwise::Add, buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), nullptr); + NativeOpExecutioner::execPairwiseTransform(getContext(), sd::pairwise::Add, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr); NDArray::registerSpecialUse({this}, {this, &other}); } else{ - Nd4jLong *bShape = nullptr; + const Nd4jLong *bShape = nullptr; if(!ShapeUtils::evalBroadcastShapeInfo(*this, other, true, bShape, getContext()->getWorkspace())) throw std::invalid_argument("NDArray::operator+=: the shapes of this and other arrays are not suitable for broadcast operation !"); - if(shape::equalsTypesAndShapesSoft(getShapeInfo(), bShape)) { + if(shape::equalsTypesAndShapesSoft(shapeInfo(), bShape)) { this->applyTrueBroadcast(sd::BroadcastOpsTuple::Add(), other, *this, false); } else { @@ -2483,20 +2495,20 @@ void NDArray::operator-=(const NDArray& other) { if (lengthOf() != 1 && other.lengthOf() == 1) { NDArray::prepareSpecialUse({this}, {this, &other}); - NativeOpExecutioner::execScalar(getContext(), sd::scalar::Subtract, buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(getContext(), sd::scalar::Subtract, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({this}, {this, &other}); } else if (other.lengthOf() == lengthOf() && this->rankOf() == other.rankOf()) { NDArray::prepareSpecialUse({this}, {this, &other}); - NativeOpExecutioner::execPairwiseTransform(getContext(), sd::pairwise::Subtract, buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), nullptr); + NativeOpExecutioner::execPairwiseTransform(getContext(), sd::pairwise::Subtract, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr); NDArray::registerSpecialUse({this}, {this, &other}); } else{ - Nd4jLong *bShape = nullptr; + const Nd4jLong *bShape = nullptr; if(!ShapeUtils::evalBroadcastShapeInfo(*this, other, true, bShape, getContext()->getWorkspace())) throw std::invalid_argument("NDArray::operator-=: the shapes of this and other arrays are not suitable for broadcast operation !"); - if(shape::equalsTypesAndShapesSoft(getShapeInfo(), bShape)) { + if(shape::equalsTypesAndShapesSoft(shapeInfo(), bShape)) { this->applyTrueBroadcast(sd::BroadcastOpsTuple::Subtract(), other, *this, false); } else { @@ -2516,16 +2528,16 @@ void NDArray::operator*=(const NDArray& other) { if (lengthOf() != 1 && other.lengthOf() == 1) { NDArray::prepareSpecialUse({this}, {this, &other}); - NativeOpExecutioner::execScalar(getContext(), sd::scalar::Multiply, buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(getContext(), sd::scalar::Multiply, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({this}, {this, &other}); } else if (other.lengthOf() == lengthOf() && this->rankOf() == other.rankOf()) { NDArray::prepareSpecialUse({this}, {this, &other}); - NativeOpExecutioner::execPairwiseTransform(getContext(), sd::pairwise::Multiply, buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), nullptr); + NativeOpExecutioner::execPairwiseTransform(getContext(), sd::pairwise::Multiply, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr); NDArray::registerSpecialUse({this}, {this, &other}); } else{ - Nd4jLong *bShape = nullptr; + const Nd4jLong *bShape = nullptr; if(!ShapeUtils::evalBroadcastShapeInfo(*this, other, true, bShape, getContext()->getWorkspace())) throw std::invalid_argument("NDArray::operator*=: the shapes of this and other arrays are not suitable for broadcast operation !"); @@ -2553,16 +2565,16 @@ void NDArray::operator/=(const NDArray& other) { if (lengthOf() != 1 && other.lengthOf() == 1) { NDArray::prepareSpecialUse({this}, {this, &other}); - NativeOpExecutioner::execScalar(getContext(), sd::scalar::Divide, buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(getContext(), sd::scalar::Divide, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({this}, {this, &other}); } else if (other.lengthOf() == lengthOf() && this->rankOf() == other.rankOf()) { NDArray::prepareSpecialUse({this}, {this, &other}); - NativeOpExecutioner::execPairwiseTransform(getContext(), sd::pairwise::Divide, buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), nullptr); + NativeOpExecutioner::execPairwiseTransform(getContext(), sd::pairwise::Divide, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr); NDArray::registerSpecialUse({this}, {this, &other}); } else{ - Nd4jLong *bShape = nullptr; + const Nd4jLong *bShape = nullptr; if(!ShapeUtils::evalBroadcastShapeInfo(*this, other, true, bShape, getContext()->getWorkspace())) throw std::invalid_argument("NDArray::operator/=: the shapes of this and other arrays are not suitable for broadcast operation !"); @@ -2587,7 +2599,7 @@ void NDArray::operator+=(const T value) { NDArray::prepareSpecialUse({this}, {&other}); - NativeOpExecutioner::execScalar(getContext(), sd::scalar::Add, buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(getContext(), sd::scalar::Add, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({this}, {}); } @@ -2609,7 +2621,7 @@ void NDArray::operator-=(const T value) { NDArray::prepareSpecialUse({this}, {&other}); - NativeOpExecutioner::execScalar(getContext(), sd::scalar::Subtract, buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(getContext(), sd::scalar::Subtract, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({this}, {}); } @@ -2629,7 +2641,7 @@ void NDArray::operator*=(const T scalar) { auto other = NDArrayFactory::create(this->dataType(), scalar, getContext()); NDArray::prepareSpecialUse({this}, {&other}); - NativeOpExecutioner::execScalar(getContext(), sd::scalar::Multiply, buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(getContext(), sd::scalar::Multiply, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({this}, {}); } @@ -2652,7 +2664,7 @@ void NDArray::operator/=(const T scalar) { auto other = NDArrayFactory::create(this->dataType(), scalar, getContext()); NDArray::prepareSpecialUse({this}, {&other}); - NativeOpExecutioner::execScalar(getContext(), sd::scalar::Divide, buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(getContext(), sd::scalar::Divide, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({this}, {}); } template ND4J_EXPORT void NDArray::operator/=(const double scalar); @@ -2672,10 +2684,10 @@ NDArray NDArray::operator-() const & { if (isS()) throw std::runtime_error("NDArray::negative-: you can't use this method on String array!"); - NDArray result(getShapeInfo(), false, getContext()); + NDArray result(shapeInfo(), false, getContext()); NDArray::prepareSpecialUse({&result}, {this}); - NativeOpExecutioner::execTransformSame(getContext(), sd::transform::Neg, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), result.buffer(), result.getShapeInfo(), result.specialBuffer(), result.getSpecialShapeInfo(), nullptr, nullptr, nullptr); + NativeOpExecutioner::execTransformSame(getContext(), sd::transform::Neg, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), nullptr, nullptr, nullptr); NDArray::registerSpecialUse({&result}, {this}); return result; @@ -2687,7 +2699,7 @@ NDArray NDArray::operator-() && { throw std::runtime_error("NDArray::negative-: you can't use this method on String array!"); NDArray::prepareSpecialUse({this}, {this}); - NativeOpExecutioner::execTransformSame(getContext(), sd::transform::Neg, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), buffer(), getShapeInfo(), specialBuffer(), getSpecialShapeInfo(), nullptr, nullptr, nullptr); + NativeOpExecutioner::execTransformSame(getContext(), sd::transform::Neg, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, nullptr, nullptr); NDArray::registerSpecialUse({this}, {this}); return std::move(*this); @@ -2757,7 +2769,7 @@ double NDArray::getTrace() const { for(int j = 0; j < rank; ++j) indices[j] = 1; - auto offset = shape::getOffset(getShapeInfo(), indices); + auto offset = shape::getOffset(shapeInfo(), indices); for(int i = 0; i < rank; ++i) if(minDim > shape[i]) @@ -2779,7 +2791,7 @@ NDArray NDArray::quantize(const NDArray& array) { auto ws = array.getContext()->getWorkspace(); - Nd4jLong* shapeInfo = ShapeBuilders::copyShapeInfo(array.getShapeInfo(), true, ws); + Nd4jLong* shapeInfo = ShapeBuilders::copyShapeInfo(array.shapeInfo(), true, ws); ArrayOptions::setPropertyBit(shapeInfo, ARRAY_QUANTIZED); std::shared_ptr buffer = std::make_shared(TypeCast::estimateQuantizedSize(array.lengthOf()), ArrayOptions::dataType(shapeInfo), ws); @@ -2812,31 +2824,31 @@ void NDArray::applyTrueBroadcast(sd::BroadcastOpsTuple op, const NDArray& other, // } if(checkTargetShape) { - Nd4jLong* newShapeInfo = nullptr; + const Nd4jLong* newShapeInfo = nullptr; if(!ShapeUtils::evalBroadcastShapeInfo(*this, other, true, newShapeInfo, getContext()->getWorkspace())) // the rank of target array must be equal to max->rankOf)() throw std::runtime_error("NDArray::applyTrueBroadcast method: the shapes of this and other arrays are not suitable for broadcast operation !"); - if(!shape::equalsTypesAndShapesSoft(target.getShapeInfo(), newShapeInfo)) + if(!shape::equalsTypesAndShapesSoft(target.shapeInfo(), newShapeInfo)) throw std::runtime_error("NDArray::applyTrueBroadcast method: the shape or type of target array is wrong !"); } - Nd4jLong* xShapeInfoH = getShapeInfo(); - Nd4jLong* yShapeInfoH = other.getShapeInfo(); - Nd4jLong* xShapeInfoD = getSpecialShapeInfo(); - Nd4jLong* yShapeInfoD = other.getSpecialShapeInfo(); + Nd4jLong const* xShapeInfoH = shapeInfo(); + Nd4jLong const* yShapeInfoH = other.shapeInfo(); + Nd4jLong const* xShapeInfoD = specialShapeInfo(); + Nd4jLong const* yShapeInfoD = other.specialShapeInfo(); if(!isSameShape(target)) { - auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.getShapeInfo(), getShapeInfo(), getContext()->getWorkspace()); - xShapeInfoH = reinterpret_cast(xPack.primary()); - xShapeInfoD = reinterpret_cast(xPack.special()); + auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace()); + xShapeInfoH = reinterpret_cast(xPack.primary()); + xShapeInfoD = reinterpret_cast(xPack.special()); } if(!other.isSameShape(target)) { - auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.getShapeInfo(), other.getShapeInfo(), other.getContext()->getWorkspace()); - yShapeInfoH = reinterpret_cast(yPack.primary()); - yShapeInfoD = reinterpret_cast(yPack.special()); + auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace()); + yShapeInfoH = reinterpret_cast(yPack.primary()); + yShapeInfoD = reinterpret_cast(yPack.special()); } NDArray::prepareSpecialUse({&target}, {this, &other}); - NativeOpExecutioner::execBroadcast(getContext(), op.b, getBuffer(), xShapeInfoH, getSpecialBuffer(), xShapeInfoD, other.getBuffer(), yShapeInfoH, other.getSpecialBuffer(), yShapeInfoD, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); + NativeOpExecutioner::execBroadcast(getContext(), op.b, buffer(), xShapeInfoH, specialBuffer(), xShapeInfoD, other.buffer(), yShapeInfoH, other.specialBuffer(), yShapeInfoD, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); registerSpecialUse({&target}, {this, &other}); } @@ -2861,7 +2873,7 @@ void NDArray::applyTrueBroadcast(sd::BroadcastBoolOpsTuple op, const NDArray& ot // } if(checkTargetShape) { - Nd4jLong* newShapeInfo = nullptr; + const Nd4jLong* newShapeInfo = nullptr; if(!ShapeUtils::evalBroadcastShapeInfo(*this, other, true, newShapeInfo, getContext()->getWorkspace())) // the rank of target array must be equal to max->rankOf)() throw std::runtime_error("NDArray::applyTrueBroadcast method: the shapes of this and other arrays are not suitable for broadcast operation !"); if(!shape::equalsSoft(target._shapeInfo, newShapeInfo) || target.dataType() != DataType::BOOL) @@ -2870,24 +2882,24 @@ void NDArray::applyTrueBroadcast(sd::BroadcastBoolOpsTuple op, const NDArray& ot throw std::invalid_argument("NDArray::applyTrueBroadcast bool method: this and other arrays must have the same type !"); } - Nd4jLong* xShapeInfoH = getShapeInfo(); - Nd4jLong* yShapeInfoH = other.getShapeInfo(); - Nd4jLong* xShapeInfoD = getSpecialShapeInfo(); - Nd4jLong* yShapeInfoD = other.getSpecialShapeInfo(); + Nd4jLong const* xShapeInfoH = shapeInfo(); + Nd4jLong const* yShapeInfoH = other.shapeInfo(); + Nd4jLong const* xShapeInfoD = specialShapeInfo(); + Nd4jLong const* yShapeInfoD = other.specialShapeInfo(); if(!isSameShape(target)) { - auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.getShapeInfo(), getShapeInfo(), getContext()->getWorkspace()); - xShapeInfoH = reinterpret_cast(xPack.primary()); - xShapeInfoD = reinterpret_cast(xPack.special()); + auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace()); + xShapeInfoH = reinterpret_cast(xPack.primary()); + xShapeInfoD = reinterpret_cast(xPack.special()); } if(!other.isSameShape(target)) { - auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.getShapeInfo(), other.getShapeInfo(), other.getContext()->getWorkspace()); - yShapeInfoH = reinterpret_cast(yPack.primary()); - yShapeInfoD = reinterpret_cast(yPack.special()); + auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace()); + yShapeInfoH = reinterpret_cast(yPack.primary()); + yShapeInfoD = reinterpret_cast(yPack.special()); } NDArray::prepareSpecialUse({&target}, {this, &other}); - NativeOpExecutioner::execBroadcastBool(getContext(), op.b, getBuffer(), xShapeInfoH, getSpecialBuffer(), xShapeInfoD, other.getBuffer(), yShapeInfoH, other.getSpecialBuffer(), yShapeInfoD, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr); + NativeOpExecutioner::execBroadcastBool(getContext(), op.b, buffer(), xShapeInfoH, specialBuffer(), xShapeInfoD, other.buffer(), yShapeInfoH, other.specialBuffer(), yShapeInfoD, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr); registerSpecialUse({&target}, {this, &other}); } @@ -2912,7 +2924,7 @@ void NDArray::applyTrueBroadcast(sd::BroadcastIntOpsTuple op, const NDArray& oth // } if(checkTargetShape) { - Nd4jLong* newShapeInfo = nullptr; + const Nd4jLong* newShapeInfo = nullptr; if(!ShapeUtils::evalBroadcastShapeInfo(*this, other, false, newShapeInfo, getContext()->getWorkspace())) // the rank of target array must be equal to max->rankOf)() throw std::runtime_error("NDArray::applyTrueBroadcast method: the shapes of this and other arrays are not suitable for broadcast operation !"); if(!shape::equalsSoft(target._shapeInfo, newShapeInfo) || target.dataType() != this->dataType()) @@ -2921,24 +2933,24 @@ void NDArray::applyTrueBroadcast(sd::BroadcastIntOpsTuple op, const NDArray& oth throw std::invalid_argument("NDArray::applyTrueBroadcast int method: this and other arrays must have the same type !"); } - Nd4jLong* xShapeInfoH = getShapeInfo(); - Nd4jLong* yShapeInfoH = other.getShapeInfo(); - Nd4jLong* xShapeInfoD = getSpecialShapeInfo(); - Nd4jLong* yShapeInfoD = other.getSpecialShapeInfo(); + Nd4jLong const* xShapeInfoH = shapeInfo(); + Nd4jLong const* yShapeInfoH = other.shapeInfo(); + Nd4jLong const* xShapeInfoD = specialShapeInfo(); + Nd4jLong const* yShapeInfoD = other.specialShapeInfo(); if(!isSameShape(target)) { - auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.getShapeInfo(), getShapeInfo(), getContext()->getWorkspace()); - xShapeInfoH = reinterpret_cast(xPack.primary()); - xShapeInfoD = reinterpret_cast(xPack.special()); + auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace()); + xShapeInfoH = reinterpret_cast(xPack.primary()); + xShapeInfoD = reinterpret_cast(xPack.special()); } if(!other.isSameShape(target)) { - auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.getShapeInfo(), other.getShapeInfo(), other.getContext()->getWorkspace()); - yShapeInfoH = reinterpret_cast(yPack.primary()); - yShapeInfoD = reinterpret_cast(yPack.special()); + auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace()); + yShapeInfoH = reinterpret_cast(yPack.primary()); + yShapeInfoD = reinterpret_cast(yPack.special()); } NDArray::prepareSpecialUse({&target}, {this, &other}); - NativeOpExecutioner::execBroadcastInt(getContext(), op.b, getBuffer(), xShapeInfoH, getSpecialBuffer(), xShapeInfoD, other.getBuffer(), yShapeInfoH, other.getSpecialBuffer(), yShapeInfoD, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); + NativeOpExecutioner::execBroadcastInt(getContext(), op.b, buffer(), xShapeInfoH, specialBuffer(), xShapeInfoD, other.buffer(), yShapeInfoH, other.specialBuffer(), yShapeInfoD, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); registerSpecialUse({&target}, {this, &other}); } @@ -2951,7 +2963,7 @@ NDArray NDArray::applyTrueBroadcast(sd::BroadcastOpsTuple op, const NDArray& oth return NDArray(other); } - Nd4jLong* newShapeInfo = nullptr; + const Nd4jLong* newShapeInfo = nullptr; if(!ShapeUtils::evalBroadcastShapeInfo(*this, other, true, newShapeInfo, getContext()->getWorkspace())) // the rank of new array = max->rankOf)() throw std::runtime_error("NDArray::applyTrueBroadcast method: the shapes of this and other arrays are not suitable for broadcast operation !"); NDArray result(newShapeInfo, true, getContext()); @@ -2970,11 +2982,11 @@ NDArray NDArray::applyTrueBroadcast(sd::BroadcastOpsTuple op, NDArray&& other, E return NDArray(other); } - Nd4jLong* newShapeInfo = nullptr; + const Nd4jLong* newShapeInfo = nullptr; if(!ShapeUtils::evalBroadcastShapeInfo(*this, other, true, newShapeInfo, getContext()->getWorkspace())) // the rank of new array = max->rankOf)() throw std::runtime_error("NDArray::applyTrueBroadcast method: the shapes of this and other arrays are not suitable for broadcast operation !"); - if(!shape::shapeEquals(newShapeInfo, other.getShapeInfo())) { + if(!shape::shapeEquals(newShapeInfo, other.shapeInfo())) { NDArray result(newShapeInfo, true, getContext()); this->applyTrueBroadcast(op, other, result, false, extraArgs); @@ -2994,11 +3006,11 @@ NDArray NDArray::applyTrueBroadcast(sd::BroadcastOpsTuple op, const NDArray& oth return NDArray(other); } - Nd4jLong* newShapeInfo = nullptr; + const Nd4jLong* newShapeInfo = nullptr; if(!ShapeUtils::evalBroadcastShapeInfo(*this, other, true, newShapeInfo, getContext()->getWorkspace())) // the rank of new array = max->rankOf)() throw std::runtime_error("NDArray::applyTrueBroadcast method: the shapes of this and other arrays are not suitable for broadcast operation !"); - if(!shape::shapeEquals(newShapeInfo, getShapeInfo())) { + if(!shape::shapeEquals(newShapeInfo, shapeInfo())) { NDArray result(newShapeInfo, true, getContext()); this->applyTrueBroadcast(op, other, result, false, extraArgs); @@ -3018,12 +3030,12 @@ NDArray NDArray::applyTrueBroadcast(sd::BroadcastOpsTuple op, NDArray&& other, E return NDArray(other); } - Nd4jLong* newShapeInfo = nullptr; + const Nd4jLong* newShapeInfo = nullptr; if(!ShapeUtils::evalBroadcastShapeInfo(*this, other, true, newShapeInfo, getContext()->getWorkspace())) // the rank of new array = max->rankOf)() throw std::runtime_error("NDArray::applyTrueBroadcast method: the shapes of this and other arrays are not suitable for broadcast operation !"); - const bool thisMove = shape::shapeEquals(newShapeInfo, getShapeInfo()); - const bool otherMove = shape::shapeEquals(newShapeInfo, other.getShapeInfo()); + const bool thisMove = shape::shapeEquals(newShapeInfo, shapeInfo()); + const bool otherMove = shape::shapeEquals(newShapeInfo, other.shapeInfo()); if(!thisMove && !otherMove) { @@ -3060,12 +3072,12 @@ void NDArray::applyBroadcast(sd::broadcast::Ops op, const std::vector& dime // if (other.lengthOf() == lengthOf() && this->rankOf() == other.rankOf()) { // NDArray::prepareSpecialUse({&target}, {this, &other}); - // NativeOpExecutioner::execPairwiseTransform(getContext(), fromBroadcastToPairwise(op), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr); + // NativeOpExecutioner::execPairwiseTransform(getContext(), fromBroadcastToPairwise(op), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr); // NDArray::registerSpecialUse({&target}, {this, &other}); // return; // } - if(target.dataType() != DataTypeUtils::pickPairwiseResultType(shapeInfo(), other.getShapeInfo())) + if(target.dataType() != DataTypeUtils::pickPairwiseResultType(shapeInfo(), other.shapeInfo())) throw std::invalid_argument("NDArray::applyBroadcast method: wrong type of target array !"); if(!target.isSameShape(this) && !target.isSameShape(other)) throw std::invalid_argument("NDArray::applyBroadcast method: one of of two input arrays (this or other) should has the same shape as target array!"); @@ -3075,24 +3087,24 @@ void NDArray::applyBroadcast(sd::broadcast::Ops op, const std::vector& dime if (dimensions.size() > 1) std::sort(copy.begin(), copy.end()); - Nd4jLong* xShapeInfoH = getShapeInfo(); - Nd4jLong* yShapeInfoH = other.getShapeInfo(); - Nd4jLong* xShapeInfoD = getSpecialShapeInfo(); - Nd4jLong* yShapeInfoD = other.getSpecialShapeInfo(); + Nd4jLong const* xShapeInfoH = shapeInfo(); + Nd4jLong const* yShapeInfoH = other.shapeInfo(); + Nd4jLong const* xShapeInfoD = specialShapeInfo(); + Nd4jLong const* yShapeInfoD = other.specialShapeInfo(); if(!isSameShape(target)) { - auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.getShapeInfo(), getShapeInfo(), getContext()->getWorkspace(), copy); - xShapeInfoH = reinterpret_cast(xPack.primary()); - xShapeInfoD = reinterpret_cast(xPack.special()); + auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace(), copy); + xShapeInfoH = reinterpret_cast(xPack.primary()); + xShapeInfoD = reinterpret_cast(xPack.special()); } if(!other.isSameShape(target)) { - auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.getShapeInfo(), other.getShapeInfo(), other.getContext()->getWorkspace(), copy); - yShapeInfoH = reinterpret_cast(yPack.primary()); - yShapeInfoD = reinterpret_cast(yPack.special()); + auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace(), copy); + yShapeInfoH = reinterpret_cast(yPack.primary()); + yShapeInfoD = reinterpret_cast(yPack.special()); } NDArray::prepareSpecialUse({&target}, {this, &other}); - NativeOpExecutioner::execBroadcast(getContext(), op, buffer(), xShapeInfoH, specialBuffer(), xShapeInfoD, other.getBuffer(), yShapeInfoH, other.getSpecialBuffer(), yShapeInfoD, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); + NativeOpExecutioner::execBroadcast(getContext(), op, buffer(), xShapeInfoH, specialBuffer(), xShapeInfoD, other.buffer(), yShapeInfoH, other.specialBuffer(), yShapeInfoD, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); registerSpecialUse({&target}, {this, &other}); } @@ -3112,7 +3124,7 @@ void NDArray::applyBroadcast(sd::broadcast::BoolOps op, const std::vector& // if (other.lengthOf() == lengthOf() && this->rankOf() == other.rankOf()) { // NDArray::prepareSpecialUse({&target}, {this, &other}); - // NativeOpExecutioner::execPairwiseBoolTransform(getContext(), fromBroadcastToPairwiseBool(op), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr); + // NativeOpExecutioner::execPairwiseBoolTransform(getContext(), fromBroadcastToPairwiseBool(op), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr); // NDArray::registerSpecialUse({&target}, {this, &other}); // return; // } @@ -3129,24 +3141,24 @@ void NDArray::applyBroadcast(sd::broadcast::BoolOps op, const std::vector& if (dimensions.size() > 1) std::sort(copy.begin(), copy.end()); - Nd4jLong* xShapeInfoH = getShapeInfo(); - Nd4jLong* yShapeInfoH = other.getShapeInfo(); - Nd4jLong* xShapeInfoD = getSpecialShapeInfo(); - Nd4jLong* yShapeInfoD = other.getSpecialShapeInfo(); + Nd4jLong const* xShapeInfoH = shapeInfo(); + Nd4jLong const* yShapeInfoH = other.shapeInfo(); + Nd4jLong const* xShapeInfoD = specialShapeInfo(); + Nd4jLong const* yShapeInfoD = other.specialShapeInfo(); if(!isSameShape(target)) { - auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.getShapeInfo(), getShapeInfo(), getContext()->getWorkspace(), copy); - xShapeInfoH = reinterpret_cast(xPack.primary()); - xShapeInfoD = reinterpret_cast(xPack.special()); + auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace(), copy); + xShapeInfoH = reinterpret_cast(xPack.primary()); + xShapeInfoD = reinterpret_cast(xPack.special()); } if(!other.isSameShape(target)) { - auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.getShapeInfo(), other.getShapeInfo(), other.getContext()->getWorkspace(), copy); - yShapeInfoH = reinterpret_cast(yPack.primary()); - yShapeInfoD = reinterpret_cast(yPack.special()); + auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace(), copy); + yShapeInfoH = reinterpret_cast(yPack.primary()); + yShapeInfoD = reinterpret_cast(yPack.special()); } NDArray::prepareSpecialUse({&target}, {this, &other}); - NativeOpExecutioner::execBroadcastBool(getContext(), op, buffer(), xShapeInfoH, specialBuffer(), xShapeInfoD, other.getBuffer(), yShapeInfoH, other.getSpecialBuffer(), yShapeInfoD, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr); + NativeOpExecutioner::execBroadcastBool(getContext(), op, buffer(), xShapeInfoH, specialBuffer(), xShapeInfoD, other.buffer(), yShapeInfoH, other.specialBuffer(), yShapeInfoD, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr); registerSpecialUse({&target}, {this, &other}); } @@ -3167,7 +3179,7 @@ void NDArray::applyBroadcast(sd::broadcast::IntOps op, const std::vector& d // if (other.lengthOf() == lengthOf() && this->rankOf() == other.rankOf()) { // NDArray::prepareSpecialUse({&target}, {this, &other}); - // NativeOpExecutioner::execPairwiseIntTransform(getContext(), fromBroadcastToPairwiseInt(op), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr); + // NativeOpExecutioner::execPairwiseIntTransform(getContext(), fromBroadcastToPairwiseInt(op), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr); // NDArray::registerSpecialUse({&target}, {this, &other}); // return; // } @@ -3184,24 +3196,24 @@ void NDArray::applyBroadcast(sd::broadcast::IntOps op, const std::vector& d if (dimensions.size() > 1) std::sort(copy.begin(), copy.end()); - Nd4jLong* xShapeInfoH = getShapeInfo(); - Nd4jLong* yShapeInfoH = other.getShapeInfo(); - Nd4jLong* xShapeInfoD = getSpecialShapeInfo(); - Nd4jLong* yShapeInfoD = other.getSpecialShapeInfo(); + Nd4jLong const* xShapeInfoH = shapeInfo(); + Nd4jLong const* yShapeInfoH = other.shapeInfo(); + Nd4jLong const* xShapeInfoD = specialShapeInfo(); + Nd4jLong const* yShapeInfoD = other.specialShapeInfo(); if(!isSameShape(target)) { - auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.getShapeInfo(), getShapeInfo(), getContext()->getWorkspace(), copy); - xShapeInfoH = reinterpret_cast(xPack.primary()); - xShapeInfoD = reinterpret_cast(xPack.special()); + auto xPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), shapeInfo(), getContext()->getWorkspace(), copy); + xShapeInfoH = reinterpret_cast(xPack.primary()); + xShapeInfoD = reinterpret_cast(xPack.special()); } if(!other.isSameShape(target)) { - auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.getShapeInfo(), other.getShapeInfo(), other.getContext()->getWorkspace(), copy); - yShapeInfoH = reinterpret_cast(yPack.primary()); - yShapeInfoD = reinterpret_cast(yPack.special()); + auto yPack = ConstantShapeHelper::getInstance()->createShapeInfoWithUnitiesForBroadcast(target.shapeInfo(), other.shapeInfo(), other.getContext()->getWorkspace(), copy); + yShapeInfoH = reinterpret_cast(yPack.primary()); + yShapeInfoD = reinterpret_cast(yPack.special()); } NDArray::prepareSpecialUse({&target}, {this, &other}); - NativeOpExecutioner::execBroadcastInt(getContext(), op, buffer(), xShapeInfoH, specialBuffer(), xShapeInfoD, other.getBuffer(), yShapeInfoH, other.getSpecialBuffer(), yShapeInfoD, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); + NativeOpExecutioner::execBroadcastInt(getContext(), op, buffer(), xShapeInfoH, specialBuffer(), xShapeInfoD, other.buffer(), yShapeInfoH, other.specialBuffer(), yShapeInfoD, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); registerSpecialUse({&target}, {this, &other}); } @@ -3364,7 +3376,7 @@ void NDArray::applyPairwiseTransform(sd::pairwise::Ops op, const NDArray& other, throw std::invalid_argument("NDArray::applyPairwiseTransform method - type of target array must be the same as type of this or other array !"); NDArray::prepareSpecialUse({&target}, {this, &other}); - NativeOpExecutioner::execPairwiseTransform(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), extraParams != nullptr ? extraParams->argumentsAsT(target.dataType()) : nullptr); + NativeOpExecutioner::execPairwiseTransform(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), extraParams != nullptr ? extraParams->argumentsAsT(target.dataType()) : nullptr); NDArray::registerSpecialUse({&target}, {this, &other}); if (extraParams != nullptr) @@ -3383,7 +3395,7 @@ void NDArray::applyPairwiseTransform(sd::pairwise::BoolOps op, const NDArray& ot throw std::invalid_argument("NDArray::applyPairwiseTransform BoolOps method - this and other arrays must have the same type !"); NDArray::prepareSpecialUse({&target}, {this, &other}); - NativeOpExecutioner::execPairwiseBoolTransform(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), extraParams != nullptr ? extraParams->argumentsAsT(target.dataType()) : nullptr); + NativeOpExecutioner::execPairwiseBoolTransform(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), extraParams != nullptr ? extraParams->argumentsAsT(target.dataType()) : nullptr); NDArray::registerSpecialUse({&target}, {this, &other}); } @@ -3399,7 +3411,7 @@ void NDArray::applyPairwiseTransform(sd::pairwise::IntOps op, const NDArray& oth throw std::invalid_argument("NDArray::applyPairwiseTransform IntOps method - this and other arrays must have the same type !"); NDArray::prepareSpecialUse({&target}, {this, &other}); - NativeOpExecutioner::execPairwiseIntTransform(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), extraParams != nullptr ? extraParams->argumentsAsT(target.dataType()) : nullptr); + NativeOpExecutioner::execPairwiseIntTransform(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), extraParams != nullptr ? extraParams->argumentsAsT(target.dataType()) : nullptr); NDArray::registerSpecialUse({&target}, {this, &other}); } @@ -3429,12 +3441,12 @@ void NDArray::varianceAlongDimension(sd::variance::Ops op, NDArray& target, cons NDArray::prepareSpecialUse({&target}, {this}); if(rankOf() == dimensions.size() || dimensions.empty()) - NativeOpExecutioner::execSummaryStatsScalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), nullptr, target.getBuffer(), target.getShapeInfo(), target.getSpecialBuffer(), target.getSpecialShapeInfo(), biasCorrected); + NativeOpExecutioner::execSummaryStatsScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), biasCorrected); else { std::vector copy(dimensions); auto pDims = sd::Environment::getInstance()->isCPU() ? copy.data() : nullptr; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->getShapeInfo(), dimensions); - NativeOpExecutioner::execSummaryStats(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.getSpecialBuffer(), target.specialShapeInfo(), pDims, dimensions.size(), packX.platformShapeInfo(), packX.platformOffsets(), biasCorrected); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimensions); + NativeOpExecutioner::execSummaryStats(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), pDims, dimensions.size(), packX.platformShapeInfo(), packX.platformOffsets(), biasCorrected); synchronize("NDArray::varianceAlongDimension"); } @@ -3534,7 +3546,7 @@ bool NDArray::equalsTo(const NDArray *other, double eps) const { // we need to be able to compare [1, len] to [len] if ((rankOf() == 1 && other->rankOf() == 2) || (rankOf() == 2 && other->rankOf() == 1)) { // FIXME: do something here? - } else if (!shape::equalsSoft(getShapeInfo(), other->getShapeInfo())) + } else if (!shape::equalsSoft(shapeInfo(), other->shapeInfo())) return false; if (isS()) { @@ -3576,11 +3588,11 @@ bool NDArray::equalsTo(const NDArray *other, double eps) const { ExtraArguments extras({0.0, 0.0, eps}); NDArray::prepareSpecialUse({&tmp}, {this, other}); - NativeOpExecutioner::execReduce3Scalar(getContext(), reduce3::EqualsWithEps, getBuffer(), getShapeInfo(), - getSpecialBuffer(), getSpecialShapeInfo(), - extras.argumentsAsT(DataType::FLOAT32), other->getBuffer(), - other->getShapeInfo(), other->getSpecialBuffer(), - other->getSpecialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), + NativeOpExecutioner::execReduce3Scalar(getContext(), reduce3::EqualsWithEps, buffer(), shapeInfo(), + specialBuffer(), specialShapeInfo(), + extras.argumentsAsT(DataType::FLOAT32), other->buffer(), + other->shapeInfo(), other->specialBuffer(), + other->specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo()); NDArray::registerSpecialUse({&tmp}, {this, other}); @@ -3722,7 +3734,7 @@ utf8string NDArray::e(const Nd4jLong i) const { syncToHost(); tickReadHost(); - return *(reinterpret_cast(getBuffer())[rp]); + return *(reinterpret_cast(buffer())[rp]); } ///////////////////////////////////////////////////////////////////////// @@ -3733,7 +3745,7 @@ T NDArray::e(const Nd4jLong i) const { NDArray::preparePrimaryUse({}, {this}); NDArray::registerPrimaryUse({}, {this}); - BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), return templatedGet<, T>(getBuffer(), rp), LIBND4J_TYPES); + BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), return templatedGet<, T>(buffer(), rp), LIBND4J_TYPES); } BUILD_SINGLE_UNCHAINED_TEMPLATE(template ND4J_EXPORT , NDArray::e(const Nd4jLong) const, LIBND4J_TYPES); @@ -3747,12 +3759,12 @@ T NDArray::e(const Nd4jLong i, const Nd4jLong j) const { throw std::invalid_argument("NDArray::e(i,j): one of input indexes is out of array length or rank!=2 !"); const Nd4jLong coords[2] = {i, j}; - const auto xOffset = shape::getOffset(getShapeInfo(), coords); + const auto xOffset = shape::getOffset(shapeInfo(), coords); NDArray::preparePrimaryUse({}, {this}); NDArray::registerPrimaryUse({}, {this}); - BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), return templatedGet<, T>(getBuffer(), xOffset), LIBND4J_TYPES); + BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), return templatedGet<, T>(buffer(), xOffset), LIBND4J_TYPES); return static_cast(119); } @@ -3767,12 +3779,12 @@ T NDArray::e(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k) const { throw std::invalid_argument("NDArray::e(i,j,k): one of input indexes is out of array length or rank!=3 !"); const Nd4jLong coords[3] = {i, j, k}; - const auto xOffset = shape::getOffset(getShapeInfo(), coords); + const auto xOffset = shape::getOffset(shapeInfo(), coords); NDArray::preparePrimaryUse({}, {this}); NDArray::registerPrimaryUse({}, {this}); - BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), return templatedGet<, T>(getBuffer(), xOffset), LIBND4J_TYPES); + BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), return templatedGet<, T>(buffer(), xOffset), LIBND4J_TYPES); return static_cast(119); } @@ -3787,12 +3799,12 @@ T NDArray::e(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLon throw std::invalid_argument("NDArray::e(i,j,k,l): one of input indexes is out of array length or rank!=4 !"); const Nd4jLong coords[4] = {i, j, k, l}; - const auto xOffset = shape::getOffset(getShapeInfo(), coords); + const auto xOffset = shape::getOffset(shapeInfo(), coords); NDArray::preparePrimaryUse({}, {this}); NDArray::registerPrimaryUse({}, {this}); - BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), return templatedGet<, T>(getBuffer(), xOffset), LIBND4J_TYPES); + BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), return templatedGet<, T>(buffer(), xOffset), LIBND4J_TYPES); return static_cast(119); } @@ -3805,7 +3817,7 @@ NDArray NDArray::e(const Nd4jLong i) const { NDArray scalar(dataType(), getContext()); - scalar.copyBuffersContinuouslyFrom(*this, sizeOfT(), 0, getBufferOffset() + offset); + scalar.copyBuffersContinuouslyFrom(*this, sizeOfT(), 0, bufferOffset() + offset); return scalar; } @@ -3884,7 +3896,7 @@ NDArray NDArray::transform(sd::transform::FloatOps op, void *extraParams) const NDArray result(ordering(), getShapeAsVector(), DataTypeUtils::pickFloatingType(dataType()), getContext()); NDArray::prepareSpecialUse({&result}, {this}); - NativeOpExecutioner::execTransformFloat(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), extraParams, nullptr, nullptr); + NativeOpExecutioner::execTransformFloat(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), extraParams, nullptr, nullptr); NDArray::registerSpecialUse({&result}, {this}); return result; @@ -3896,7 +3908,7 @@ NDArray NDArray::transform(sd::transform::FloatOps op, void *extraParams) && { throw std::runtime_error("NDArray::transform SameOps: you can't use this method on String array!"); NDArray::prepareSpecialUse({this}, {this}); - NativeOpExecutioner::execTransformFloat(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams, nullptr, nullptr); + NativeOpExecutioner::execTransformFloat(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams, nullptr, nullptr); NDArray::registerSpecialUse({this}, {this}); return std::move(*this); @@ -3907,10 +3919,10 @@ NDArray NDArray::transform(sd::transform::SameOps op, void *extraParams) const & if (isS()) throw std::runtime_error("NDArray::transform SameOps: you can't use this method on String array!"); - NDArray result(getShapeInfo(), false, getContext()); + NDArray result(shapeInfo(), false, getContext()); NDArray::prepareSpecialUse({&result}, {this}); - NativeOpExecutioner::execTransformSame(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), extraParams, nullptr, nullptr); + NativeOpExecutioner::execTransformSame(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), extraParams, nullptr, nullptr); NDArray::registerSpecialUse({&result}, {this}); return result; @@ -3922,7 +3934,7 @@ NDArray NDArray::transform(sd::transform::SameOps op, void *extraParams) && { throw std::runtime_error("NDArray::transform SameOps: you can't use this method on String array!"); NDArray::prepareSpecialUse({this}, {this}); - NativeOpExecutioner::execTransformSame(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams, nullptr, nullptr); + NativeOpExecutioner::execTransformSame(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams, nullptr, nullptr); NDArray::registerSpecialUse({this}, {this}); return std::move(*this); @@ -3933,10 +3945,10 @@ NDArray NDArray::transform(sd::transform::StrictOps op, void *extraParams) const if (!this->isR()) throw std::runtime_error("Source array must have one of FLOAT types"); - NDArray result(getShapeInfo(), false, getContext()); + NDArray result(shapeInfo(), false, getContext()); NDArray::prepareSpecialUse({&result}, {this}); - NativeOpExecutioner::execTransformStrict(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), extraParams, nullptr, nullptr); + NativeOpExecutioner::execTransformStrict(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), extraParams, nullptr, nullptr); NDArray::registerSpecialUse({&result}, {this}); return result; @@ -3948,7 +3960,7 @@ NDArray NDArray::transform(sd::transform::StrictOps op, void *extraParams) && { throw std::runtime_error("Source array must have one of FLOAT types"); NDArray::prepareSpecialUse({this}, {this}); - NativeOpExecutioner::execTransformStrict(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams, nullptr, nullptr); + NativeOpExecutioner::execTransformStrict(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams, nullptr, nullptr); NDArray::registerSpecialUse({this}, {this}); return std::move(*this); @@ -3962,7 +3974,7 @@ NDArray NDArray::transform(sd::transform::BoolOps op, void *extraParams) const & NDArray result(ordering(), getShapeAsVector(), sd::DataType::BOOL, getContext()); NDArray::prepareSpecialUse({&result}, {this}); - NativeOpExecutioner::execTransformBool(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), extraParams, nullptr, nullptr); + NativeOpExecutioner::execTransformBool(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), extraParams, nullptr, nullptr); NDArray::registerSpecialUse({&result}, {this}); return result; @@ -3974,7 +3986,7 @@ NDArray NDArray::transform(sd::transform::BoolOps op, void *extraParams) && { throw std::runtime_error("NDArray::transform BoolOps: you can't use this method on String array!"); NDArray::prepareSpecialUse({this}, {this}); - NativeOpExecutioner::execTransformBool(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams, nullptr, nullptr); + NativeOpExecutioner::execTransformBool(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), extraParams, nullptr, nullptr); NDArray::registerSpecialUse({this}, {this}); return std::move(*this); @@ -3987,11 +3999,11 @@ void NDArray::applyScalarArr(sd::scalar::Ops op, const NDArray& scalar, NDArray& if (scalar.lengthOf() != 1) throw std::invalid_argument("NDArray::applyScalarArr method: operand is not a scalar!"); - if(target.dataType() != DataTypeUtils::pickPairwiseResultType(shapeInfo(), scalar.getShapeInfo()) && !(target.dataType() == dataType() || target.dataType() == scalar.dataType())) + if(target.dataType() != DataTypeUtils::pickPairwiseResultType(shapeInfo(), scalar.shapeInfo()) && !(target.dataType() == dataType() || target.dataType() == scalar.dataType())) throw std::invalid_argument("NDArray::applyScalarArr method: wrong type of target array!"); NDArray::prepareSpecialUse({&target}, {this, &scalar}); - NativeOpExecutioner::execScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), scalar.getBuffer(), scalar.getShapeInfo(), scalar.getSpecialBuffer(), scalar.getSpecialShapeInfo(), extraParams != nullptr ? extraParams->argumentsAsT(target.dataType()): nullptr); + NativeOpExecutioner::execScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), scalar.buffer(), scalar.shapeInfo(), scalar.specialBuffer(), scalar.specialShapeInfo(), extraParams != nullptr ? extraParams->argumentsAsT(target.dataType()): nullptr); NDArray::registerSpecialUse({&target}, {this, &scalar}); } @@ -4007,7 +4019,7 @@ void NDArray::applyScalarArr(sd::scalar::BoolOps op, const NDArray& scalar, NDAr } NDArray::prepareSpecialUse({&target}, {this, &scalar}); - NativeOpExecutioner::execScalarBool(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), scalar.getBuffer(), scalar.getShapeInfo(), scalar.getSpecialBuffer(), scalar.getSpecialShapeInfo(), extraParams != nullptr ? extraParams->argumentsAsT(target.dataType()): nullptr); + NativeOpExecutioner::execScalarBool(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), scalar.buffer(), scalar.shapeInfo(), scalar.specialBuffer(), scalar.specialShapeInfo(), extraParams != nullptr ? extraParams->argumentsAsT(target.dataType()): nullptr); NDArray::registerSpecialUse({&target}, {this, &scalar}); } @@ -4024,7 +4036,7 @@ void NDArray::applyScalarArr(sd::scalar::IntOps op, const NDArray& scalar, NDArr } NDArray::prepareSpecialUse({&target}, {this, &scalar}); - NativeOpExecutioner::execScalarInt(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), scalar.getBuffer(), scalar.getShapeInfo(), scalar.getSpecialBuffer(), scalar.getSpecialShapeInfo(), extraParams != nullptr ? extraParams->argumentsAsT(target.dataType()): nullptr); + NativeOpExecutioner::execScalarInt(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), scalar.buffer(), scalar.shapeInfo(), scalar.specialBuffer(), scalar.specialShapeInfo(), extraParams != nullptr ? extraParams->argumentsAsT(target.dataType()): nullptr); NDArray::registerSpecialUse({&target}, {this, &scalar}); } @@ -4100,14 +4112,14 @@ void NDArray::applyIndexReduce(sd::indexreduce::Ops op, NDArray& target, const s NDArray::prepareSpecialUse({&target}, {this}); if (target.lengthOf() == 1) { - NativeOpExecutioner::execIndexReduceScalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), params, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); + NativeOpExecutioner::execIndexReduceScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), params, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); } else { std::vector copy = dimensions; shape::checkDimensions(rankOf(), copy); auto pDims = sd::Environment::getInstance()->isCPU() ? copy.data() : nullptr; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(getShapeInfo(), copy); - NativeOpExecutioner::execIndexReduce(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), params, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets()); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(shapeInfo(), copy); + NativeOpExecutioner::execIndexReduce(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), params, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets()); synchronize("NDArray::applyIndexReduce"); } @@ -4147,7 +4159,7 @@ NDArray NDArray::applyReduce3(sd::reduce3::Ops op, const NDArray& other, const E void* params = extraParams != nullptr ? const_cast(extraParams)->argumentsAsT(dataType()) : nullptr; NDArray::prepareSpecialUse({&result}, {this, &other}); - NativeOpExecutioner::execReduce3Scalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), params, other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo()); + NativeOpExecutioner::execReduce3Scalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), params, other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo()); NDArray::registerSpecialUse({&result}, {this, &other}); return result; @@ -4175,19 +4187,19 @@ NDArray NDArray::applyReduce3(sd::reduce3::Ops op, const NDArray& other, const s // perform calculations if(rankOf() == copy.size() && other.rankOf() == copy.size()) { - NativeOpExecutioner::execReduce3Scalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), params, other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo()); + NativeOpExecutioner::execReduce3Scalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), params, other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo()); } else { auto pDims = sd::Environment::getInstance()->isCPU() ? copy.data() : nullptr; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(getShapeInfo(), copy); - auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(other.getShapeInfo(), copy); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(shapeInfo(), copy); + auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(other.shapeInfo(), copy); if(!shape::equalsSoft(packX.primaryShapeInfo(), packY.primaryShapeInfo()) || (packX.numberOfTads() != packY.numberOfTads() && packX.numberOfTads() != 1 && packY.numberOfTads() != 1)) throw std::runtime_error("NDArray::applyReduce3 cuda method: arrays tads are inconsistent !"); - NativeOpExecutioner::execReduce3(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), params, other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets(), packY.platformShapeInfo(), packY.platformOffsets()); + NativeOpExecutioner::execReduce3(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), params, other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets(), packY.platformShapeInfo(), packY.platformOffsets()); } registerSpecialUse({&result}, {this, &other}); @@ -4208,8 +4220,8 @@ NDArray NDArray::applyAllReduce3(sd::reduce3::Ops op, const NDArray& other, cons shape::checkDimensions(rankOf(), copy); shape::checkDimensions(other.rankOf(), copy); - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(getShapeInfo(), copy); - auto packY = ConstantTadHelper::getInstance()->tadForDimensions(other.getShapeInfo(), copy); + auto packX = ConstantTadHelper::getInstance()->tadForDimensions(shapeInfo(), copy); + auto packY = ConstantTadHelper::getInstance()->tadForDimensions(other.shapeInfo(), copy); // check tads shapes if(!shape::equalsSoft(packX.primaryShapeInfo(), packY.primaryShapeInfo())) @@ -4227,7 +4239,7 @@ NDArray NDArray::applyAllReduce3(sd::reduce3::Ops op, const NDArray& other, cons auto pDims = sd::Environment::getInstance()->isCPU() ? copy.data() : nullptr; NDArray::prepareSpecialUse({&result}, {this, &other}); - NativeOpExecutioner::execReduce3All(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), params, other.getBuffer(), other.getShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets(), packY.platformShapeInfo(), packY.platformOffsets()); + NativeOpExecutioner::execReduce3All(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), params, other.buffer(), other.shapeInfo(), other.specialBuffer(), other.specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets(), packY.platformShapeInfo(), packY.platformOffsets()); NDArray::registerSpecialUse({&result}, {this, &other}); return result; @@ -4246,18 +4258,18 @@ void NDArray::reduceAlongDimension(sd::reduce::FloatOps op, NDArray& target, con if(checkTargetShape) { auto newShape = ShapeUtils::evalReduceShapeInfo(target.ordering(), copy, *this, keepDims, supportOldShapes, getContext()->getWorkspace()); - if(!shape::shapeEquals(newShape, target.getShapeInfo())) + if(!shape::shapeEquals(newShape, target.shapeInfo())) throw std::runtime_error("NDArray::reduceAlongDimension FloatOps: wrong target shape!"); } NDArray::prepareSpecialUse({&target}, {this}); if(rankOf() == copy.size() || copy.empty()) { - NativeOpExecutioner::execReduceFloatScalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(),nullptr, target.getBuffer(), target.getShapeInfo(), target.getSpecialBuffer(), target.getSpecialShapeInfo()); + NativeOpExecutioner::execReduceFloatScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(),nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); } else { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(getShapeInfo(), copy); - NativeOpExecutioner::execReduceFloat(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), nullptr, target.getBuffer(), target.getShapeInfo(), target.getSpecialBuffer(), target.getSpecialShapeInfo(), copy.data(), copy.size(), packX.platformShapeInfo(), packX.platformOffsets()); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(shapeInfo(), copy); + NativeOpExecutioner::execReduceFloat(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), copy.data(), copy.size(), packX.platformShapeInfo(), packX.platformOffsets()); } synchronize("NDArray::reduceAlongDimension FloatOps"); @@ -4277,19 +4289,19 @@ void NDArray::reduceAlongDimension(sd::reduce::SameOps op, NDArray& target, cons if(checkTargetShape) { auto newShape = ShapeUtils::evalReduceShapeInfo(target.ordering(), copy, *this, keepDims, supportOldShapes, getContext()->getWorkspace()); - if(!shape::shapeEquals(newShape, target.getShapeInfo())) + if(!shape::shapeEquals(newShape, target.shapeInfo())) throw std::runtime_error("NDArray::reduceAlongDimension SameOps: wrong target shape!"); } NDArray::prepareSpecialUse({&target}, {this}); if(rankOf() == copy.size() || copy.empty()) { - NativeOpExecutioner::execReduceSameScalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), nullptr, target.getBuffer(), target.getShapeInfo(), target.getSpecialBuffer(), target.getSpecialShapeInfo()); + NativeOpExecutioner::execReduceSameScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); } else { //if (!isEmpty()) { auto pDims = sd::Environment::getInstance()->isCPU() ? copy.data() : nullptr; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->getShapeInfo(), copy); - NativeOpExecutioner::execReduceSame(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), nullptr, target.getBuffer(), target.getShapeInfo(), target.getSpecialBuffer(), target.getSpecialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets()); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), copy); + NativeOpExecutioner::execReduceSame(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets()); } synchronize("NDArray::reduceAlongDimension SameOps"); @@ -4309,19 +4321,19 @@ void NDArray::reduceAlongDimension(sd::reduce::LongOps op, NDArray& target, cons if(checkTargetShape) { auto newShape = ShapeUtils::evalReduceShapeInfo(target.ordering(), copy, *this, keepDims, supportOldShapes, getContext()->getWorkspace()); - if(!shape::shapeEquals(newShape, target.getShapeInfo())) + if(!shape::shapeEquals(newShape, target.shapeInfo())) throw std::runtime_error("NDArray::reduceAlongDimension LongOps: wrong target shape!"); } NDArray::prepareSpecialUse({&target}, {this}); if(rankOf() == copy.size() || copy.empty()) { - NativeOpExecutioner::execReduceLongScalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), nullptr, target.getBuffer(), target.getShapeInfo(), target.getSpecialBuffer(), target.getSpecialShapeInfo()); + NativeOpExecutioner::execReduceLongScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); } else { auto pDims = sd::Environment::getInstance()->isCPU() ? copy.data() : nullptr; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->getShapeInfo(), copy); - NativeOpExecutioner::execReduceLong(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), nullptr, target.getBuffer(), target.getShapeInfo(), target.getSpecialBuffer(), target.getSpecialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets()); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), copy); + NativeOpExecutioner::execReduceLong(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets()); } synchronize("NDArray::reduceAlongDimension LongOps"); @@ -4341,19 +4353,19 @@ void NDArray::reduceAlongDimension(sd::reduce::BoolOps op, NDArray& target, cons if(checkTargetShape) { auto newShape = ShapeUtils::evalReduceShapeInfo(target.ordering(), copy, *this, keepDims, supportOldShapes, getContext()->getWorkspace()); - if(!shape::shapeEquals(newShape, target.getShapeInfo())) + if(!shape::shapeEquals(newShape, target.shapeInfo())) throw std::runtime_error("NDArray::reduceAlongDimension BoolOps cuda: wrong target shape!"); } NDArray::prepareSpecialUse({&target}, {this}); if(rankOf() == copy.size() || copy.empty()) { - NativeOpExecutioner::execReduceBoolScalar(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), nullptr, target.getBuffer(), target.getShapeInfo(), target.getSpecialBuffer(), target.getSpecialShapeInfo()); + NativeOpExecutioner::execReduceBoolScalar(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo()); } else { auto pDims = sd::Environment::getInstance()->isCPU() ? copy.data() : nullptr; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->getShapeInfo(), copy); - NativeOpExecutioner::execReduceBool(getContext(), op, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), nullptr, target.getBuffer(), target.getShapeInfo(), target.getSpecialBuffer(), target.getSpecialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets()); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), copy); + NativeOpExecutioner::execReduceBool(getContext(), op, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), nullptr, target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), pDims, copy.size(), packX.platformShapeInfo(), packX.platformOffsets()); } synchronize("NDArray::reduceAlongDimension LongOps"); @@ -4372,7 +4384,7 @@ void NDArray::p(const Nd4jLong i, const T value) { const void *pV = reinterpret_cast(const_cast(&value)); NDArray::preparePrimaryUse({this}, {}, true); - BUILD_SINGLE_PARTIAL_SELECTOR(this->dataType(), templatedSet<, T>(this->getBuffer(), rp, pV), LIBND4J_TYPES); + BUILD_SINGLE_PARTIAL_SELECTOR(this->dataType(), templatedSet<, T>(this->buffer(), rp, pV), LIBND4J_TYPES); NDArray::registerPrimaryUse({this}, {}); } @@ -4400,10 +4412,10 @@ void NDArray::p(const Nd4jLong i, const Nd4jLong j, const T value) { void *p = reinterpret_cast(const_cast(&value)); Nd4jLong coords[2] = {i, j}; - auto xOffset = shape::getOffset(getShapeInfo(), coords); + auto xOffset = shape::getOffset(shapeInfo(), coords); NDArray::preparePrimaryUse({this}, {}, true); - BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), templatedSet<, T>(this->getBuffer(), xOffset, p), LIBND4J_TYPES); + BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), templatedSet<, T>(this->buffer(), xOffset, p), LIBND4J_TYPES); NDArray::registerPrimaryUse({this}, {}); } template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const double value); @@ -4432,8 +4444,8 @@ void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const T va void *p = reinterpret_cast(const_cast(&value)); Nd4jLong coords[3] = {i, j, k}; - auto xOffset = shape::getOffset(getShapeInfo(), coords); - BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), templatedSet<, T>(this->getBuffer(), xOffset, p), LIBND4J_TYPES); + auto xOffset = shape::getOffset(shapeInfo(), coords); + BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), templatedSet<, T>(this->buffer(), xOffset, p), LIBND4J_TYPES); NDArray::registerPrimaryUse({this}, {}); } template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const double value); @@ -4459,10 +4471,10 @@ void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4j void *p = reinterpret_cast(const_cast(&value)); Nd4jLong coords[4] = {i, j, k, l}; - auto xOffset = shape::getOffset(getShapeInfo(), coords); + auto xOffset = shape::getOffset(shapeInfo(), coords); NDArray::preparePrimaryUse({this}, {}, true); - BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), templatedSet<, T>(this->getBuffer(), xOffset, p), LIBND4J_TYPES); + BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), templatedSet<, T>(this->buffer(), xOffset, p), LIBND4J_TYPES); NDArray::registerPrimaryUse({this}, {}); } template ND4J_EXPORT void NDArray::p(const Nd4jLong i, const Nd4jLong j, const Nd4jLong k, const Nd4jLong l, const double value); @@ -4489,7 +4501,7 @@ void NDArray::p(const Nd4jLong i, const NDArray& scalar) { NDArray::preparePrimaryUse({this}, {&scalar}, true); auto rp = getOffset(i); - BUILD_SINGLE_SELECTOR(scalar.dataType(), templatedSet, (getBuffer(), rp, scalar.dataType(), scalar.getBuffer()), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(scalar.dataType(), templatedSet, (buffer(), rp, scalar.dataType(), scalar.buffer()), LIBND4J_TYPES); NDArray::registerPrimaryUse({this}, {&scalar}); } @@ -4501,13 +4513,13 @@ void NDArray::p(const Nd4jLong i, const NDArray& scalar) { if (i >= _length) throw std::invalid_argument("NDArray::p(i, NDArray_scalar): input index is out of array length !"); -// void *p = reinterpret_cast(scalar.getBuffer()); +// void *p = reinterpret_cast(scalar.buffer()); Nd4jLong coords[4] = {i, j, k, l}; - auto xOffset = shape::getOffset(getShapeInfo(), coords); + auto xOffset = shape::getOffset(shapeInfo(), coords); NDArray::preparePrimaryUse({this}, {&scalar}, true); -// BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), templatedSet<, T>(this->getBuffer(), xOffset, p), LIBND4J_TYPES); - BUILD_SINGLE_SELECTOR(scalar.dataType(), templatedSet, (this->getBuffer(), xOffset, scalar.dataType(), scalar.getBuffer()), LIBND4J_TYPES); +// BUILD_SINGLE_PARTIAL_SELECTOR(dataType(), templatedSet<, T>(this->buffer(), xOffset, p), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(scalar.dataType(), templatedSet, (this->buffer(), xOffset, scalar.dataType(), scalar.buffer()), LIBND4J_TYPES); NDArray::registerPrimaryUse({this}, {&scalar}); } @@ -4523,10 +4535,10 @@ void NDArray::addRowVector(const NDArray& row, NDArray& target) const { int dimension = 1; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->getShapeInfo(), dimension); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimension); NDArray::prepareSpecialUse({&target}, {this, &row}); - NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Add, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), row.getBuffer(), row.getShapeInfo(), row.getSpecialBuffer(), row.getSpecialShapeInfo(), target.getBuffer(), target.getShapeInfo(), target.getSpecialBuffer(), target.getSpecialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); + NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Add, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), row.buffer(), row.shapeInfo(), row.specialBuffer(), row.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); NDArray::registerSpecialUse({&target}, {this, &row}); } @@ -4542,10 +4554,10 @@ void NDArray::subRowVector(const NDArray& row, NDArray& target) const { int dimension = 1; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->getShapeInfo(), dimension); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimension); NDArray::prepareSpecialUse({&target}, {this, &row}); - NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Subtract, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), row.getBuffer(), row.getShapeInfo(), row.getSpecialBuffer(), row.getSpecialShapeInfo(), target.getBuffer(), target.getShapeInfo(), target.getSpecialBuffer(), target.getSpecialShapeInfo(), &dimension, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); + NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Subtract, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), row.buffer(), row.shapeInfo(), row.specialBuffer(), row.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), &dimension, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); NDArray::registerSpecialUse({&target}, {this, &row}); } @@ -4562,10 +4574,10 @@ void NDArray::mulRowVector(const NDArray &row, NDArray &target) const { int dimension = 1; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->getShapeInfo(), dimension); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimension); NDArray::prepareSpecialUse({&target}, {this, &row}); - NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Multiply, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), row.getBuffer(), row.getShapeInfo(), row.getSpecialBuffer(), row.getSpecialShapeInfo(), target.getBuffer(), target.getShapeInfo(), target.getSpecialBuffer(), target.getSpecialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); + NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Multiply, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), row.buffer(), row.shapeInfo(), row.specialBuffer(), row.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); NDArray::registerSpecialUse({&target}, {this, &row}); } @@ -4583,10 +4595,10 @@ void NDArray::divRowVector(const NDArray &row, NDArray &target) const { int dimension = 1; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->getShapeInfo(), dimension); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimension); NDArray::prepareSpecialUse({&target}, {this, &row}); - NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Divide, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), row.getBuffer(), row.getShapeInfo(), row.getSpecialBuffer(), row.getSpecialShapeInfo(), target.getBuffer(), target.getShapeInfo(), target.getSpecialBuffer(), target.getSpecialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); + NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Divide, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), row.buffer(), row.shapeInfo(), row.specialBuffer(), row.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); NDArray::registerSpecialUse({&target}, {this, &row}); } @@ -4601,10 +4613,10 @@ void NDArray::addiRowVector(const NDArray& row) { int dimension = 1; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->getShapeInfo(), dimension); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimension); NDArray::prepareSpecialUse({this}, {&row}); - NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Add, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), row.getBuffer(), row.getShapeInfo(), row.getSpecialBuffer(), row.getSpecialShapeInfo(), this->buffer(), this->shapeInfo(), this->specialBuffer(), this->specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); + NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Add, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), row.buffer(), row.shapeInfo(), row.specialBuffer(), row.specialShapeInfo(), this->buffer(), this->shapeInfo(), this->specialBuffer(), this->specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); NDArray::registerSpecialUse({this}, {&row}); } @@ -4619,10 +4631,10 @@ void NDArray::addColumnVector(const NDArray &column, NDArray &target) const { int dimension = 0; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->getShapeInfo(), dimension); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimension); NDArray::prepareSpecialUse({&target}, {this, &column}); - NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Add, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), column.getBuffer(), column.getShapeInfo(), column.getSpecialBuffer(), column.getSpecialShapeInfo(), target.getBuffer(), target.getShapeInfo(), target.getSpecialBuffer(), target.getSpecialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); + NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Add, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), column.buffer(), column.shapeInfo(), column.specialBuffer(), column.specialShapeInfo(), target.buffer(), target.shapeInfo(), target.specialBuffer(), target.specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); NDArray::registerSpecialUse({&target}, {this, &column}); } @@ -4636,10 +4648,10 @@ void NDArray::addiColumnVector(const NDArray &column) { int dimension = 0; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->getShapeInfo(), dimension); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimension); NDArray::prepareSpecialUse({this}, {&column}); - NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Add, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), column.getBuffer(), column.getShapeInfo(), column.getSpecialBuffer(), column.getSpecialShapeInfo(), this->buffer(), this->shapeInfo(), this->specialBuffer(), this->specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); + NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Add, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), column.buffer(), column.shapeInfo(), column.specialBuffer(), column.specialShapeInfo(), this->buffer(), this->shapeInfo(), this->specialBuffer(), this->specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); NDArray::registerSpecialUse({this}, {&column}); } @@ -4653,10 +4665,10 @@ void NDArray::muliColumnVector(const NDArray& column) { int dimension = 0; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->getShapeInfo(), dimension); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(this->shapeInfo(), dimension); NDArray::prepareSpecialUse({this}, {&column}); - NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Multiply, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), column.getBuffer(), column.getShapeInfo(), column.getSpecialBuffer(), column.getSpecialShapeInfo(), this->buffer(), this->shapeInfo(), this->specialBuffer(), this->specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); + NativeOpExecutioner::execBroadcast(getContext(), sd::broadcast::Ops::Multiply, buffer(), shapeInfo(), specialBuffer(), specialShapeInfo(), column.buffer(), column.shapeInfo(), column.specialBuffer(), column.specialShapeInfo(), this->buffer(), this->shapeInfo(), this->specialBuffer(), this->specialShapeInfo(), nullptr, 1, packX.platformShapeInfo(), packX.platformOffsets(), nullptr, nullptr); NDArray::registerSpecialUse({this}, {&column}); } @@ -4694,7 +4706,7 @@ ResultSet NDArray::multipleTensorsAlongDimension(const std::vector &indices if (indices.size() == 0) return result; - auto pack = ConstantTadHelper::getInstance()->tadForDimensions(getShapeInfo(), const_cast(dimensions.data()), dimensions.size()); + auto pack = ConstantTadHelper::getInstance()->tadForDimensions(shapeInfo(), const_cast(dimensions.data()), dimensions.size()); auto tadLength = shape::length(pack.primaryShapeInfo()); auto numTads = lengthOf() / tadLength; @@ -4705,7 +4717,7 @@ ResultSet NDArray::multipleTensorsAlongDimension(const std::vector &indices throw std::runtime_error("Bad index"); } - auto array = new NDArray(getDataBuffer(), ShapeDescriptor(pack.primaryShapeInfo()), getContext(), pack.primaryOffsets()[idx] + getBufferOffset()); + auto array = new NDArray(getDataBuffer(), ShapeDescriptor(pack.primaryShapeInfo()), getContext(), pack.primaryOffsets()[idx] + bufferOffset()); result.push_back(array); } @@ -4777,7 +4789,7 @@ NDArray NDArray::diagonal(const char type) const { indices[i] = 1; } - auto step = shape::getOffset(getShapeInfo(), indices); + auto step = shape::getOffset(shapeInfo(), indices); if(type == 'c') { outShapeInfo[1] = diagSize; @@ -4796,7 +4808,7 @@ NDArray NDArray::diagonal(const char type) const { ArrayOptions::setDataType(outShapeInfo, this->dataType()); - NDArray result(_buffer, ShapeDescriptor(outShapeInfo), getContext(), getBufferOffset()); + NDArray result(_buffer, ShapeDescriptor(outShapeInfo), getContext(), bufferOffset()); RELEASE(outShapeInfo, getContext()->getWorkspace()); @@ -4819,7 +4831,7 @@ ResultSet NDArray::allTensorsAlongDimension(const std::vector &dimensions) auto numTads = pack.numberOfTads(); for (Nd4jLong idx = 0; idx < numTads; idx++ ) { - auto array = new NDArray(_buffer, ShapeDescriptor(pack.primaryShapeInfo()), getContext(), pack.primaryOffsets()[idx] + getBufferOffset()); + auto array = new NDArray(_buffer, ShapeDescriptor(pack.primaryShapeInfo()), getContext(), pack.primaryOffsets()[idx] + bufferOffset()); array->_isView = true; result.push_back(array); } @@ -4862,9 +4874,9 @@ NDArray NDArray::operator()(const std::vector& idx, const bool keepUni Nd4jLong offset; - shape::calcSubArrShapeInfoAndOffset(idx.data(), getShapeInfo(), subArrShapeInfo, offset, keepUnitiesInShape, isStrided, numOfUntiesInSubArrShape); + shape::calcSubArrShapeInfoAndOffset(idx.data(), shapeInfo(), subArrShapeInfo, offset, keepUnitiesInShape, isStrided, numOfUntiesInSubArrShape); - NDArray result(_buffer, ShapeDescriptor(subArrShapeInfo), getContext(), offset + getBufferOffset()); + NDArray result(_buffer, ShapeDescriptor(subArrShapeInfo), getContext(), offset + bufferOffset()); result._isView = true; RELEASE(subArrShapeInfo, getContext()->getWorkspace()); @@ -5025,7 +5037,7 @@ NDArray operator+(NDArray&& arr, const T& scalar) { auto tmp = NDArrayFactory::create(arr.dataType(), scalar, arr.getContext()); NDArray::prepareSpecialUse({&arr}, {&arr, &tmp}); - NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::Add, arr.getBuffer(), arr.getShapeInfo(), arr.getSpecialBuffer(), arr.getSpecialShapeInfo(), arr.buffer(), arr.getShapeInfo(), arr.specialBuffer(), arr.getSpecialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::Add, arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({&arr}, {&arr, &tmp}); return std::move(arr); @@ -5044,10 +5056,10 @@ NDArray operator+(const NDArray& arr, const T& scalar) { throw std::runtime_error("operator+(const NDArray& arr, const T& scalar): you can't use this method on String array!"); auto tmp = NDArrayFactory::create(arr.dataType(), scalar, arr.getContext()); - NDArray result(arr.getShapeInfo(), DataTypeUtils::pickPairwiseResultType(arr.dataType(), DataTypeUtils::fromT()), false, arr.getContext()); + NDArray result(arr.shapeInfo(), DataTypeUtils::pickPairwiseResultType(arr.dataType(), DataTypeUtils::fromT()), false, arr.getContext()); NDArray::prepareSpecialUse({&result}, {&arr, &tmp}); - NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::Add, arr.getBuffer(), arr.getShapeInfo(), arr.getSpecialBuffer(), arr.getSpecialShapeInfo(), result.buffer(), result.getShapeInfo(), result.specialBuffer(), result.getSpecialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::Add, arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({&result}, {&arr, &tmp}); return result; @@ -5095,7 +5107,7 @@ NDArray operator-(NDArray&& arr, const T& scalar) { auto tmp = NDArrayFactory::create(arr.dataType(), scalar, arr.getContext()); NDArray::prepareSpecialUse({&arr}, {&arr, &tmp}); - NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::Subtract, arr.getBuffer(), arr.getShapeInfo(), arr.getSpecialBuffer(), arr.getSpecialShapeInfo(), arr.buffer(), arr.getShapeInfo(), arr.specialBuffer(), arr.getSpecialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::Subtract, arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({&arr}, {&arr, &tmp}); return std::move(arr); @@ -5111,10 +5123,10 @@ NDArray operator-(const NDArray& arr, const T& scalar) { throw std::runtime_error("operator-(const NDArray& arr, const T& scalar): you can't use this method on String array!"); auto tmp = NDArrayFactory::create(arr.dataType(), scalar, arr.getContext()); - NDArray result(arr.getShapeInfo(), DataTypeUtils::pickPairwiseResultType(arr.dataType(), DataTypeUtils::fromT()), false, arr.getContext()); + NDArray result(arr.shapeInfo(), DataTypeUtils::pickPairwiseResultType(arr.dataType(), DataTypeUtils::fromT()), false, arr.getContext()); NDArray::prepareSpecialUse({&result}, {&arr, &tmp}); - NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::Subtract, arr.getBuffer(), arr.getShapeInfo(), arr.getSpecialBuffer(), arr.getSpecialShapeInfo(), result.buffer(), result.getShapeInfo(), result.specialBuffer(), result.getSpecialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::Subtract, arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({&result}, {&arr, &tmp}); return result; @@ -5138,7 +5150,7 @@ NDArray operator-(const T& scalar, NDArray&& arr) { auto tmp = NDArrayFactory::create(arr.dataType(), scalar, arr.getContext()); NDArray::prepareSpecialUse({&arr}, {&arr, &tmp}); - NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::ReverseSubtract, arr.getBuffer(), arr.getShapeInfo(), arr.getSpecialBuffer(), arr.getSpecialShapeInfo(), arr.getBuffer(), arr.getShapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::ReverseSubtract, arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({&arr}, {&arr, &tmp}); return std::move(arr); @@ -5158,10 +5170,10 @@ NDArray operator-(const T& scalar, const NDArray& arr) { throw std::runtime_error("operator-(const T& scalar, const NDArray& arr): you can't use this method on String array!"); auto tmp = NDArrayFactory::create(arr.dataType(), scalar, arr.getContext()); - NDArray result(arr.getShapeInfo(), DataTypeUtils::pickPairwiseResultType(arr.dataType(), DataTypeUtils::fromT()), false, arr.getContext()); + NDArray result(arr.shapeInfo(), DataTypeUtils::pickPairwiseResultType(arr.dataType(), DataTypeUtils::fromT()), false, arr.getContext()); NDArray::prepareSpecialUse({&result}, {&arr, &tmp}); - NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::ReverseSubtract, arr.getBuffer(), arr.getShapeInfo(), arr.getSpecialBuffer(), arr.getSpecialShapeInfo(), result.getBuffer(), result.getShapeInfo(), result.specialBuffer(), result.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::ReverseSubtract, arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({&result}, {&arr, &tmp}); return result; @@ -5186,7 +5198,7 @@ NDArray operator*(NDArray&& arr, const T& scalar) { auto tmp = NDArrayFactory::create(arr.dataType(), scalar, arr.getContext()); NDArray::prepareSpecialUse({&arr}, {&arr, &tmp}); - NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::Multiply, arr.getBuffer(), arr.getShapeInfo(), arr.getSpecialBuffer(), arr.getSpecialShapeInfo(), arr.buffer(), arr.getShapeInfo(), arr.specialBuffer(), arr.getSpecialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::Multiply, arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({&arr}, {&arr, &tmp}); return std::move(arr); @@ -5206,10 +5218,10 @@ NDArray operator*(const NDArray& arr, const T& scalar) { throw std::runtime_error("operator*(const NDArray& arr, const T& scalar): you can't use this method on String array!"); auto tmp = NDArrayFactory::create(arr.dataType(), scalar, arr.getContext()); - NDArray result(arr.getShapeInfo(), DataTypeUtils::pickPairwiseResultType(arr.dataType(), DataTypeUtils::fromT()), false, arr.getContext()); + NDArray result(arr.shapeInfo(), DataTypeUtils::pickPairwiseResultType(arr.dataType(), DataTypeUtils::fromT()), false, arr.getContext()); NDArray::prepareSpecialUse({&result}, {&arr, &tmp}); - NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::Multiply, arr.getBuffer(), arr.getShapeInfo(), arr.getSpecialBuffer(), arr.getSpecialShapeInfo(), result.buffer(), result.getShapeInfo(), result.specialBuffer(), result.getSpecialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::Multiply, arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({&result}, {&arr, &tmp}); return result; @@ -5262,7 +5274,7 @@ NDArray operator/(NDArray&& arr, const T& scalar) { auto tmp = NDArrayFactory::create(arr.dataType(), scalar, arr.getContext()); NDArray::prepareSpecialUse({&arr}, {&arr, &tmp}); - NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::Divide, arr.getBuffer(), arr.getShapeInfo(), arr.getSpecialBuffer(), arr.getSpecialShapeInfo(), arr.buffer(), arr.getShapeInfo(), arr.specialBuffer(), arr.getSpecialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::Divide, arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({&arr}, {&arr, &tmp}); return std::move(arr); @@ -5281,10 +5293,10 @@ NDArray operator/(const NDArray& arr, const T& scalar) { throw std::runtime_error("operator/(const NDArray& arr, const T& scalar): you can't use this method on String array!"); auto tmp = NDArrayFactory::create(arr.dataType(), scalar, arr.getContext()); - NDArray result(arr.getShapeInfo(), DataTypeUtils::pickPairwiseResultType(arr.dataType(), DataTypeUtils::fromT()), false, arr.getContext()); + NDArray result(arr.shapeInfo(), DataTypeUtils::pickPairwiseResultType(arr.dataType(), DataTypeUtils::fromT()), false, arr.getContext()); NDArray::prepareSpecialUse({&result}, {&arr, &tmp}); - NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::Divide, arr.getBuffer(), arr.getShapeInfo(), arr.getSpecialBuffer(), arr.getSpecialShapeInfo(), result.buffer(), result.getShapeInfo(), result.specialBuffer(), result.getSpecialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::Divide, arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({&result}, {&arr, &tmp}); return result; @@ -5309,7 +5321,7 @@ NDArray operator/(const T& scalar, NDArray&& arr) { auto tmp = NDArrayFactory::create(arr.dataType(), scalar, arr.getContext()); NDArray::prepareSpecialUse({&arr}, {&arr, &tmp}); - NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::ReverseDivide, arr.getBuffer(), arr.getShapeInfo(), arr.getSpecialBuffer(), arr.getSpecialShapeInfo(), arr.getBuffer(), arr.getShapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::ReverseDivide, arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({&arr}, {&arr, &tmp}); return std::move(arr); @@ -5330,10 +5342,10 @@ NDArray operator/(const T& scalar, const NDArray& arr) { throw std::runtime_error("operator/(const T& scalar, const NDArray& arr): you can't use this method on String array!"); auto tmp = NDArrayFactory::create(arr.dataType(), scalar, arr.getContext()); - NDArray result(arr.getShapeInfo(), DataTypeUtils::pickPairwiseResultType(arr.dataType(), DataTypeUtils::fromT()), false, arr.getContext()); + NDArray result(arr.shapeInfo(), DataTypeUtils::pickPairwiseResultType(arr.dataType(), DataTypeUtils::fromT()), false, arr.getContext()); NDArray::prepareSpecialUse({&result}, {&arr, &tmp}); - NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::ReverseDivide, arr.getBuffer(), arr.getShapeInfo(), arr.getSpecialBuffer(), arr.getSpecialShapeInfo(), result.getBuffer(), result.getShapeInfo(), result.specialBuffer(), result.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); + NativeOpExecutioner::execScalar(arr.getContext(), sd::scalar::ReverseDivide, arr.buffer(), arr.shapeInfo(), arr.specialBuffer(), arr.specialShapeInfo(), result.buffer(), result.shapeInfo(), result.specialBuffer(), result.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({&result}, {&arr, &tmp}); return result; @@ -5365,10 +5377,10 @@ NDArray operator+(T1&& arr1, T2&& arr2) { else if(isArr2Rvalue) result = const_cast(&arr2); else - result = new NDArray(arr1.getShapeInfo(), DataTypeUtils::pickPairwiseResultType(arr1.getShapeInfo(), arr2.getShapeInfo()), false, arr1.getContext()); + result = new NDArray(arr1.shapeInfo(), DataTypeUtils::pickPairwiseResultType(arr1.shapeInfo(), arr2.shapeInfo()), false, arr1.getContext()); NDArray::prepareSpecialUse({result}, {&arr1, &arr2}); - NativeOpExecutioner::execPairwiseTransform(arr1.getContext(), sd::pairwise::Add, arr1.getBuffer(), arr1.getShapeInfo(), arr1.getSpecialBuffer(), arr1.getSpecialShapeInfo(), arr2.getBuffer(), arr2.getShapeInfo(), arr2.getSpecialBuffer(), arr2.getSpecialShapeInfo(), result->buffer(), result->getShapeInfo(), result->specialBuffer(), result->getSpecialShapeInfo(), nullptr); + NativeOpExecutioner::execPairwiseTransform(arr1.getContext(), sd::pairwise::Add, arr1.buffer(), arr1.shapeInfo(), arr1.specialBuffer(), arr1.specialShapeInfo(), arr2.buffer(), arr2.shapeInfo(), arr2.specialBuffer(), arr2.specialShapeInfo(), result->buffer(), result->shapeInfo(), result->specialBuffer(), result->specialShapeInfo(), nullptr); NDArray::registerSpecialUse({result}, {&arr1, &arr2}); if(!isArr1Rvalue && !isArr2Rvalue) { @@ -5415,10 +5427,10 @@ NDArray operator-(T1&& arr1, T2&& arr2) { else if(isArr2Rvalue) result = const_cast(&arr2); else - result = new NDArray(arr1.getShapeInfo(), DataTypeUtils::pickPairwiseResultType(arr1.getShapeInfo(), arr2.getShapeInfo()), false, arr1.getContext()); + result = new NDArray(arr1.shapeInfo(), DataTypeUtils::pickPairwiseResultType(arr1.shapeInfo(), arr2.shapeInfo()), false, arr1.getContext()); NDArray::prepareSpecialUse({result}, {&arr1, &arr2}); - NativeOpExecutioner::execPairwiseTransform(arr1.getContext(), sd::pairwise::Subtract, arr1.getBuffer(), arr1.getShapeInfo(), arr1.getSpecialBuffer(), arr1.getSpecialShapeInfo(), arr2.getBuffer(), arr2.getShapeInfo(), arr2.getSpecialBuffer(), arr2.getSpecialShapeInfo(), result->buffer(), result->getShapeInfo(), result->specialBuffer(), result->getSpecialShapeInfo(), nullptr); + NativeOpExecutioner::execPairwiseTransform(arr1.getContext(), sd::pairwise::Subtract, arr1.buffer(), arr1.shapeInfo(), arr1.specialBuffer(), arr1.specialShapeInfo(), arr2.buffer(), arr2.shapeInfo(), arr2.specialBuffer(), arr2.specialShapeInfo(), result->buffer(), result->shapeInfo(), result->specialBuffer(), result->specialShapeInfo(), nullptr); NDArray::registerSpecialUse({result}, {&arr1, &arr2}); if(!isArr1Rvalue && !isArr2Rvalue) { @@ -5465,10 +5477,10 @@ NDArray operator*(T1&& arr1, T2&& arr2) { else if(isArr2Rvalue) result = const_cast(&arr2); else - result = new NDArray(arr1.getShapeInfo(), DataTypeUtils::pickPairwiseResultType(arr1.getShapeInfo(), arr2.getShapeInfo()), false, arr1.getContext()); + result = new NDArray(arr1.shapeInfo(), DataTypeUtils::pickPairwiseResultType(arr1.shapeInfo(), arr2.shapeInfo()), false, arr1.getContext()); NDArray::prepareSpecialUse({result}, {&arr1, &arr2}); - NativeOpExecutioner::execPairwiseTransform(arr1.getContext(), sd::pairwise::Multiply, arr1.getBuffer(), arr1.getShapeInfo(), arr1.getSpecialBuffer(), arr1.getSpecialShapeInfo(), arr2.getBuffer(), arr2.getShapeInfo(), arr2.getSpecialBuffer(), arr2.getSpecialShapeInfo(), result->buffer(), result->getShapeInfo(), result->specialBuffer(), result->getSpecialShapeInfo(), nullptr); + NativeOpExecutioner::execPairwiseTransform(arr1.getContext(), sd::pairwise::Multiply, arr1.buffer(), arr1.shapeInfo(), arr1.specialBuffer(), arr1.specialShapeInfo(), arr2.buffer(), arr2.shapeInfo(), arr2.specialBuffer(), arr2.specialShapeInfo(), result->buffer(), result->shapeInfo(), result->specialBuffer(), result->specialShapeInfo(), nullptr); NDArray::registerSpecialUse({result}, {&arr1, &arr2}); if(!isArr1Rvalue && !isArr2Rvalue) { @@ -5515,10 +5527,10 @@ NDArray operator/(T1&& arr1, T2&& arr2) { else if(isArr2Rvalue) result = const_cast(&arr2); else - result = new NDArray(arr1.getShapeInfo(), DataTypeUtils::pickPairwiseResultType(arr1.getShapeInfo(), arr2.getShapeInfo()), false, arr1.getContext()); + result = new NDArray(arr1.shapeInfo(), DataTypeUtils::pickPairwiseResultType(arr1.shapeInfo(), arr2.shapeInfo()), false, arr1.getContext()); NDArray::prepareSpecialUse({result}, {&arr1, &arr2}); - NativeOpExecutioner::execPairwiseTransform(arr1.getContext(), sd::pairwise::Divide, arr1.getBuffer(), arr1.getShapeInfo(), arr1.getSpecialBuffer(), arr1.getSpecialShapeInfo(), arr2.getBuffer(), arr2.getShapeInfo(), arr2.getSpecialBuffer(), arr2.getSpecialShapeInfo(), result->buffer(), result->getShapeInfo(), result->specialBuffer(), result->getSpecialShapeInfo(), nullptr); + NativeOpExecutioner::execPairwiseTransform(arr1.getContext(), sd::pairwise::Divide, arr1.buffer(), arr1.shapeInfo(), arr1.specialBuffer(), arr1.specialShapeInfo(), arr2.buffer(), arr2.shapeInfo(), arr2.specialBuffer(), arr2.specialShapeInfo(), result->buffer(), result->shapeInfo(), result->specialBuffer(), result->specialShapeInfo(), nullptr); NDArray::registerSpecialUse({result}, {&arr1, &arr2}); if(!isArr1Rvalue && !isArr2Rvalue) { diff --git a/libnd4j/include/array/NDArrayLambda.hXX b/libnd4j/include/array/NDArrayLambda.hXX index 50d9bc8d6..f213b6aa6 100644 --- a/libnd4j/include/array/NDArrayLambda.hXX +++ b/libnd4j/include/array/NDArrayLambda.hXX @@ -23,26 +23,26 @@ #include #include -static Nd4jLong __device__ __noinline__ getIndexOffset(Nd4jLong index, Nd4jLong *shapeInfo) { +static Nd4jLong __device__ __noinline__ getIndexOffset(Nd4jLong index, const Nd4jLong *shapeInfo) { return shape::getIndexOffset(index, shapeInfo); } -static Nd4jLong __device__ __noinline__ length(Nd4jLong *shapeInfo) { +static Nd4jLong __device__ __noinline__ length(const Nd4jLong *shapeInfo) { return shape::length(shapeInfo); } -template static _CUDA_G void lambdaKernel(void* vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, Lambda lambda); -template static _CUDA_G void lambdaIndexedKernel(void* vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, Lambda lambda); -template static _CUDA_G void lambdaIndexedPairwiseKernel(void* vx, Nd4jLong *xShapeInfo, void* vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, Lambda lambda); -template static _CUDA_G void lambdaPairwiseKernel(void* vx, Nd4jLong *xShapeInfo, void* vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, Lambda lambda); -template static _CUDA_G void lambdaTriplewiseKernel(void* vw, Nd4jLong *wShapeInfo, void* vx, Nd4jLong *xShapeInfo, void* vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, Lambda lambda); +template static _CUDA_G void lambdaKernel(const void* vx, const Nd4jLong *xShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Lambda lambda); +template static _CUDA_G void lambdaIndexedKernel(const void* vx, const Nd4jLong *xShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Lambda lambda); +template static _CUDA_G void lambdaIndexedPairwiseKernel(const void* vx, const Nd4jLong *xShapeInfo, const void* vy, const Nd4jLong *yShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Lambda lambda); +template static _CUDA_G void lambdaPairwiseKernel(const void* vx, const Nd4jLong *xShapeInfo, const void* vy, const Nd4jLong *yShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Lambda lambda); +template static _CUDA_G void lambdaTriplewiseKernel(const void* vw, const Nd4jLong *wShapeInfo, const void* vx, const Nd4jLong *xShapeInfo, const void* vy, const Nd4jLong *yShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Lambda lambda); template class LambdaHelper { public: template - FORCEINLINE static void lambdaLauncher(cudaStream_t *stream, void* vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, Lambda lambda) { + FORCEINLINE static void lambdaLauncher(cudaStream_t *stream, const void* vx, const Nd4jLong *xShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Lambda lambda) { lambdaKernel<<<256, 512, 1024, *stream>>>(vx, xShapeInfo, vz, zShapeInfo, lambda); auto err = cudaStreamSynchronize(*stream); if (err != 0) @@ -50,7 +50,7 @@ public: } template - FORCEINLINE static void lambdaIndexedLauncher(cudaStream_t *stream, void* vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, Lambda lambda) { + FORCEINLINE static void lambdaIndexedLauncher(cudaStream_t *stream, const void* vx, const Nd4jLong *xShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Lambda lambda) { lambdaIndexedKernel<<<256, 512, 1024, *stream>>>(vx, xShapeInfo, vz, zShapeInfo, lambda); auto err = cudaStreamSynchronize(*stream); if (err != 0) @@ -58,7 +58,7 @@ public: } template - FORCEINLINE static void lambdaPairwiseLauncher(cudaStream_t *stream, void* vx, Nd4jLong *xShapeInfo, void* vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, Lambda lambda) { + FORCEINLINE static void lambdaPairwiseLauncher(cudaStream_t *stream, const void* vx, const Nd4jLong *xShapeInfo, const void* vy, const Nd4jLong *yShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Lambda lambda) { lambdaPairwiseKernel<<<256, 512, 1024, *stream>>>(vx, xShapeInfo, vy, yShapeInfo, vz, zShapeInfo, lambda); auto err = cudaStreamSynchronize(*stream); if (err != 0) @@ -66,7 +66,7 @@ public: } template - FORCEINLINE static void lambdaIndexedPairwiseLauncher(cudaStream_t *stream, void* vx, Nd4jLong *xShapeInfo, void* vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, Lambda lambda) { + FORCEINLINE static void lambdaIndexedPairwiseLauncher(cudaStream_t *stream, const void* vx, const Nd4jLong *xShapeInfo, const void* vy, const Nd4jLong *yShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Lambda lambda) { lambdaIndexedPairwiseKernel<<<256, 512, 1024, *stream>>>(vx, xShapeInfo, vy, yShapeInfo, vz, zShapeInfo, lambda); auto err = cudaStreamSynchronize(*stream); if (err != 0) @@ -74,7 +74,7 @@ public: } template - FORCEINLINE static void lambdaTriplewiseLauncher(cudaStream_t *stream, void* vw, Nd4jLong *wShapeInfo, void* vx, Nd4jLong *xShapeInfo, void* vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, Lambda lambda) { + FORCEINLINE static void lambdaTriplewiseLauncher(cudaStream_t *stream,const void* vw, const Nd4jLong *wShapeInfo, const void* vx, const Nd4jLong *xShapeInfo, const void* vy, const Nd4jLong *yShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Lambda lambda) { lambdaTriplewiseKernel<<<256, 512, 1024, *stream>>>(vw, wShapeInfo, vx, xShapeInfo, vy, yShapeInfo, vz, zShapeInfo, lambda); auto err = cudaStreamSynchronize(*stream); if (err != 0) @@ -84,8 +84,8 @@ public: //////////////////////////////////////////////////////////////////////// template -static _CUDA_G void lambdaKernel(void* vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, Lambda lambda) { - auto x = reinterpret_cast(vx); +static _CUDA_G void lambdaKernel(const void* vx, const Nd4jLong *xShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Lambda lambda) { + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto xEws = shape::elementWiseStride(xShapeInfo); @@ -113,8 +113,8 @@ static _CUDA_G void lambdaKernel(void* vx, Nd4jLong *xShapeInfo, void *vz, Nd4jL //////////////////////////////////////////////////////////////////////// template -static _CUDA_G void lambdaIndexedKernel(void* vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, Lambda lambda) { - auto x = reinterpret_cast(vx); +static _CUDA_G void lambdaIndexedKernel(const void* vx, const Nd4jLong *xShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Lambda lambda) { + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto xEws = shape::elementWiseStride(xShapeInfo); @@ -142,9 +142,9 @@ static _CUDA_G void lambdaIndexedKernel(void* vx, Nd4jLong *xShapeInfo, void *vz //////////////////////////////////////////////////////////////////////// template -static _CUDA_G void lambdaIndexedPairwiseKernel(void* vx, Nd4jLong *xShapeInfo, void* vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, Lambda lambda) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); +static _CUDA_G void lambdaIndexedPairwiseKernel(const void* vx, const Nd4jLong *xShapeInfo, const void* vy, const Nd4jLong *yShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Lambda lambda) { + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto xEws = shape::elementWiseStride(xShapeInfo); @@ -175,9 +175,9 @@ static _CUDA_G void lambdaIndexedPairwiseKernel(void* vx, Nd4jLong *xShapeInfo, //////////////////////////////////////////////////////////////////////// template -static _CUDA_G void lambdaPairwiseKernel(void* vx, Nd4jLong *xShapeInfo, void* vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, Lambda lambda) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); +static _CUDA_G void lambdaPairwiseKernel(const void* vx, const Nd4jLong *xShapeInfo, const void* vy, const Nd4jLong *yShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Lambda lambda) { + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto xEws = shape::elementWiseStride(xShapeInfo); @@ -208,10 +208,10 @@ static _CUDA_G void lambdaPairwiseKernel(void* vx, Nd4jLong *xShapeInfo, void* v //////////////////////////////////////////////////////////////////////// template -static _CUDA_G void lambdaTriplewiseKernel(void* vw, Nd4jLong *wShapeInfo, void* vx, Nd4jLong *xShapeInfo, void* vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, Lambda lambda) { - auto w = reinterpret_cast(vw); - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); +static _CUDA_G void lambdaTriplewiseKernel(const void* vw, const Nd4jLong *wShapeInfo, const void* vx, const Nd4jLong *xShapeInfo, const void* vy, const Nd4jLong *yShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Lambda lambda) { + auto w = reinterpret_cast(vw); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto wEws = shape::elementWiseStride(wShapeInfo); @@ -271,7 +271,7 @@ void NDArray::applyPairwiseLambda(const NDArray& other, Lambda func, NDArray& ta //throw datatype_exception::build("NDArray::applyLambda X/Z data types must be the same", dtype, target.dataType()); prepareSpecialUse({&target}, {this, &other}); - BUILD_SINGLE_SELECTOR(dtype, LambdaHelper ,::lambdaPairwiseLauncher(this->_context->getCudaStream(), this->specialBuffer(), this->specialShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), target.specialBuffer(), target.specialShapeInfo(), func), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(dtype, LambdaHelper ,::lambdaPairwiseLauncher(this->_context->getCudaStream(), this->specialBuffer(), this->specialShapeInfo(), other.specialBuffer(), other.specialShapeInfo(), target.specialBuffer(), target.specialShapeInfo(), func), LIBND4J_TYPES); registerSpecialUse({&target}, {this, &other}); } @@ -298,7 +298,7 @@ void NDArray::applyIndexedPairwiseLambda(NDArray& other, Lambda func, NDArray& t throw std::runtime_error("NDArray::applyIndexedPairwiseLambda X/Y/Z data types must be the same"); prepareSpecialUse({&target}, {this, &other}); - BUILD_SINGLE_SELECTOR(dtype, LambdaHelper ,::lambdaIndexedPairwiseLauncher(this->_context->getCudaStream(), this->specialBuffer(), this->specialShapeInfo(), other.getSpecialBuffer(), other.getSpecialShapeInfo(), target.specialBuffer(), target.specialShapeInfo(), func), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(dtype, LambdaHelper ,::lambdaIndexedPairwiseLauncher(this->_context->getCudaStream(), this->specialBuffer(), this->specialShapeInfo(), other.specialBuffer(), other.specialShapeInfo(), target.specialBuffer(), target.specialShapeInfo(), func), LIBND4J_TYPES); registerSpecialUse({&target}, {this, &other}); } diff --git a/libnd4j/include/array/ShapeList.h b/libnd4j/include/array/ShapeList.h index 2d0fde4ad..f0034ac81 100644 --- a/libnd4j/include/array/ShapeList.h +++ b/libnd4j/include/array/ShapeList.h @@ -28,26 +28,24 @@ namespace sd { class ND4J_EXPORT ShapeList { protected: - std::vector _shapes; + std::vector _shapes; bool _destroyed = false; bool _autoremovable = false; bool _workspace = false; public: - ShapeList(Nd4jLong* shape = nullptr); - ShapeList(std::initializer_list shapes); - ShapeList(std::initializer_list shapes, bool isWorkspace); - ShapeList(std::vector& shapes); + ShapeList(const Nd4jLong* shape = nullptr); + ShapeList(const std::vector &shapes, bool isWorkspace); + ShapeList(const std::vector& shapes); //ShapeList(bool autoRemovable); ~ShapeList(); - std::vector* asVector(); + std::vector* asVector(); void destroy(); - int size(); - Nd4jLong* at(int idx); - void push_back(Nd4jLong *shape); - void push_back(std::vector& shape); + int size() const; + const Nd4jLong* at(int idx); + void push_back(const Nd4jLong *shape); /** * PLEASE NOTE: This method should be called ONLY if shapes were generated at workspaces. Otherwise you'll get memory leak diff --git a/libnd4j/include/array/TadPack.h b/libnd4j/include/array/TadPack.h index 09b084548..3cd95fa59 100644 --- a/libnd4j/include/array/TadPack.h +++ b/libnd4j/include/array/TadPack.h @@ -28,18 +28,18 @@ namespace sd { private: ConstantDataBuffer _tadShape; ConstantDataBuffer _tadOffsets; - Nd4jLong _numTads; - int _shapeInfoLength; + Nd4jLong _numTads = 0 ; + int _shapeInfoLength = 0; public: explicit TadPack(ConstantDataBuffer &shapes, ConstantDataBuffer &offets, Nd4jLong numTads); TadPack() = default; ~TadPack() = default; - Nd4jLong* primaryShapeInfo() const; - Nd4jLong* primaryOffsets() const; + const Nd4jLong* primaryShapeInfo() const; + const Nd4jLong* primaryOffsets() const; - Nd4jLong* specialShapeInfo() const; - Nd4jLong* specialOffsets() const; + const Nd4jLong* specialShapeInfo() const; + const Nd4jLong* specialOffsets() const; Nd4jLong numberOfTads() const; int shapeInfoLength() const; @@ -48,8 +48,8 @@ namespace sd { * These methods return either primary or special pointers depending on platform binaries were compiled for * @return */ - Nd4jLong *platformShapeInfo() const; - Nd4jLong *platformOffsets() const; + const Nd4jLong *platformShapeInfo() const; + const Nd4jLong *platformOffsets() const; }; } diff --git a/libnd4j/include/array/cpu/NDArray.cpp b/libnd4j/include/array/cpu/NDArray.cpp index 1d97ba61c..87369f740 100644 --- a/libnd4j/include/array/cpu/NDArray.cpp +++ b/libnd4j/include/array/cpu/NDArray.cpp @@ -52,10 +52,9 @@ namespace sd { //////////////////////////////////////////////////////////////////////// void* NDArray::platformBuffer() { return buffer(); } -void* NDArray::getPlatformBuffer() const { return getBuffer(); } +void const* NDArray::platformBuffer() const { return buffer(); } -Nd4jLong* NDArray::getPlatformShapeInfo() const { return getShapeInfo(); } -Nd4jLong* NDArray::platformShapeInfo() { return shapeInfo(); } +Nd4jLong const* NDArray::platformShapeInfo() const { return shapeInfo(); } void NDArray::syncToDevice() const { } void NDArray::syncToHost() const { } @@ -85,15 +84,15 @@ void NDArray::fillAsTriangular(const float val, int lower, int upper, NDArray& t upper = target.sizeAt(-1); const T value = static_cast(val); - const auto x = reinterpret_cast(getBuffer()); - auto z = reinterpret_cast(target.getBuffer()); + const auto x = reinterpret_cast(buffer()); + auto z = reinterpret_cast(target.buffer()); const int xRank = rankOf(); const int zRank = target.rankOf(); const auto zLen = target.lengthOf(); - const bool areSameOffsets = shape::haveSameShapeAndStrides(getShapeInfo(), target.getShapeInfo()); + const bool areSameOffsets = shape::haveSameShapeAndStrides(shapeInfo(), target.shapeInfo()); auto func = PRAGMA_THREADS_FOR { @@ -101,8 +100,8 @@ void NDArray::fillAsTriangular(const float val, int lower, int upper, NDArray& t for (auto i = start; i < stop; i++) { - shape::index2coordsCPU(start, i, target.getShapeInfo(), coords); - const auto zOffset = shape::getOffset(target.getShapeInfo(), coords); + shape::index2coordsCPU(start, i, target.shapeInfo(), coords); + const auto zOffset = shape::getOffset(target.shapeInfo(), coords); // if( (row + upper < col) || (row + lower > col) ) if ((coords[zRank - 2] + upper < coords[zRank - 1]) || (coords[zRank - 2] + lower > coords[zRank - 1])) @@ -113,7 +112,7 @@ void NDArray::fillAsTriangular(const float val, int lower, int upper, NDArray& t coords[0] = coords[1]; } - const auto xOffset = areSameOffsets ? zOffset : shape::getOffset(getShapeInfo(), coords); + const auto xOffset = areSameOffsets ? zOffset : shape::getOffset(shapeInfo(), coords); z[zOffset] = x[xOffset]; if (xRank != zRank) // restore first coordinate @@ -140,7 +139,7 @@ void NDArray::setIdentity() { for(int j = 0; j < rank; ++j) indices[j] = 1; - Nd4jLong offset = shape::getOffset(getShapeInfo(), indices); + Nd4jLong offset = shape::getOffset(shapeInfo(), indices); for(int i = 0; i < rank; ++i) if(minDim > shape[i]) @@ -214,23 +213,28 @@ void NDArray::printCurrentBuffer(const bool host, const char* msg, const int pre } + //////////////////////////////////////////////////////////////////////// + void* NDArray::specialBufferWithOffset(Nd4jLong offset) { + return nullptr; + } + //////////////////////////////////////////////////////////////////////// -void* NDArray::specialBufferWithOffset(Nd4jLong offset) const { +const void* NDArray::specialBufferWithOffset(Nd4jLong offset) const { return nullptr; } //////////////////////////////////////////////////////////////////////// void* NDArray::specialBuffer() { if (_buffer->special() == nullptr) - return getBuffer(); + return buffer(); // FIXME: this should be fixed once CUDA backend added return static_cast(_buffer->special()) + (_offset * sizeOfT()); } //////////////////////////////////////////////////////////////////////// -void* NDArray::getSpecialBuffer() const { +void const* NDArray::specialBuffer() const { if (_buffer->special() == nullptr) - return getBuffer(); + return buffer(); // FIXME: this should be fixed once CUDA backend added return static_cast(_buffer->special()) + (_offset * sizeOfT()); } @@ -253,7 +257,7 @@ NDArray NDArray::tile(const std::vector& reps) const { NDArray result(*this); if(diff < 0) { // reshape to higher dimension std::vector shapeNew = reps; // there is requirement to have unities at first "diff" positions of new shape - memcpy(&shapeNew[-diff], result.getShapeInfo()+1, rankOld * sizeof(Nd4jLong)); // put old shape numbers at rest of positions + memcpy(&shapeNew[-diff], result.shapeInfo()+1, rankOld * sizeof(Nd4jLong)); // put old shape numbers at rest of positions result.reshapei(ordering(), shapeNew); } return result; // nothing to do, if diff >= 0 -> identity tile @@ -274,8 +278,8 @@ NDArray NDArray::tile(const std::vector& reps) const { auto func = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i++) { - auto yOffset = shape::subArrayOffset(i, newShapeInfo, getShapeInfo()); - BUILD_SINGLE_SELECTOR(xType, this->template templatedAssign,(result.getBuffer(), i, this->getBuffer(), yOffset), LIBND4J_TYPES); + auto yOffset = shape::subArrayOffset(i, newShapeInfo, shapeInfo()); + BUILD_SINGLE_SELECTOR(xType, this->template templatedAssign,(result.buffer(), i, this->buffer(), yOffset), LIBND4J_TYPES); } }; @@ -286,8 +290,8 @@ NDArray NDArray::tile(const std::vector& reps) const { auto func = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i++) { auto xOffset = result.getOffset(i); - auto yOffset = shape::subArrayOffset(i, newShapeInfo, getShapeInfo()); - BUILD_SINGLE_SELECTOR(xType, this->template templatedAssign,(result.getBuffer(), xOffset, this->getBuffer(), yOffset), LIBND4J_TYPES); + auto yOffset = shape::subArrayOffset(i, newShapeInfo, shapeInfo()); + BUILD_SINGLE_SELECTOR(xType, this->template templatedAssign,(result.buffer(), xOffset, this->buffer(), yOffset), LIBND4J_TYPES); } }; @@ -307,7 +311,7 @@ void NDArray::tile(const std::vector& reps, NDArray& target) const { // evaluate true tile shapeInfo for comparison with target shapeInfo auto newShapeInfo = ShapeUtils::evalTileShapeInfo(*this, reps, getContext()->getWorkspace()); - if(!shape::equalsSoft(newShapeInfo, target.getShapeInfo())) { + if(!shape::equalsSoft(newShapeInfo, target.shapeInfo())) { delete []newShapeInfo; throw std::runtime_error("NDArray::tile method - shapeInfo of target array is not suitable for tile operation !"); } @@ -319,14 +323,14 @@ void NDArray::tile(const std::vector& reps, NDArray& target) const { if(target.ordering() == 'c' && ews == 1) { // ews == 1 always here //#pragma omp parallel for simd if(targetLen > Environment::getInstance()->elementwiseThreshold()) schedule(guided) for(Nd4jLong i=0; i 1) { for(Nd4jLong i=0; i& reps, NDArray& target) const { for(Nd4jLong i=0; i= 1) { for(Nd4jLong i=0; i(this)->setShapeInfo(this->getShapeInfo()); + const_cast(this)->setShapeInfo(this->shapeInfo()); // now we actually migrate data buffer _buffer->migrate(); @@ -142,7 +142,7 @@ void NDArray::fillAsTriangular(const float val, int lower, int upper, NDArray& t PointersManager manager(getContext(), "NDArray::fillAsTriangular"); NDArray::prepareSpecialUse({&target}, {this}); - fillAsTriangularCuda<<getCudaStream()>>>(getPlatformBuffer(), getPlatformShapeInfo(), target.getPlatformBuffer(), target.getPlatformShapeInfo(), static_cast(val), lower, upper); + fillAsTriangularCuda<<getCudaStream()>>>(platformBuffer(), platformShapeInfo(), target.platformBuffer(), target.platformShapeInfo(), static_cast(val), lower, upper); NDArray::registerSpecialUse({&target}, {this}); manager.synchronize(); @@ -206,7 +206,7 @@ void NDArray::setIdentity() { PointersManager manager(getContext(), "NDArray::setIdentity"); syncToDevice(); - BUILD_SINGLE_SELECTOR(dataType(), identityMatrixCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, getContext()->getCudaStream(), getPlatformBuffer(), getPlatformShapeInfo(), 1.f), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(dataType(), identityMatrixCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, getContext()->getCudaStream(), platformBuffer(), platformShapeInfo(), 1.f), LIBND4J_TYPES); tickWriteDevice(); manager.synchronize(); @@ -293,12 +293,16 @@ void NDArray::registerPrimaryUse(const std::vector& writeList, c ////////////////////////////////////////////////////////////////////////// void NDArray::syncShape() const { - cudaMemcpy(getSpecialShapeInfo(), getShapeInfo(), shape::shapeInfoByteLength(getShapeInfo()), cudaMemcpyHostToDevice); + cudaMemcpy(const_cast(specialShapeInfo()), shapeInfo(), shape::shapeInfoByteLength(shapeInfo()), cudaMemcpyHostToDevice); } ////////////////////////////////////////////////////////////////////////// -void* NDArray::specialBufferWithOffset(Nd4jLong offset) const { - return getSpecialBuffer() != nullptr ? static_cast(getSpecialBuffer()) + (offset * sizeOfT()) : nullptr; +void const* NDArray::specialBufferWithOffset(Nd4jLong offset) const { + return specialBuffer() != nullptr ? static_cast(specialBuffer()) + (offset * sizeOfT()) : nullptr; +} + +void* NDArray::specialBufferWithOffset(Nd4jLong offset){ + return specialBuffer() != nullptr ? static_cast(specialBuffer()) + (offset * sizeOfT()) : nullptr; } ////////////////////////////////////////////////////////////////////////// @@ -318,7 +322,7 @@ NDArray NDArray::tile(const std::vector& reps) const { NDArray result(*this); if(diff < 0) { // reshape to higher dimension std::vector shapeNew = reps; // need to have unities at first "diff" positions of new shape - memcpy(&shapeNew[-diff], result.getShapeInfo()+1, rankOld * sizeof(Nd4jLong)); // put old shape numbers at rest of positions + memcpy(&shapeNew[-diff], result.shapeInfo()+1, rankOld * sizeof(Nd4jLong)); // put old shape numbers at rest of positions result.reshapei(ordering(), shapeNew); } return result; // nothing to do, if diff >= 0 -> identity tile @@ -332,13 +336,13 @@ NDArray NDArray::tile(const std::vector& reps) const { NDArray result(newBuff, ShapeDescriptor(newShapeInfo), getContext()); // fill newBuff, loop through all elements of newBuff - // looping through getBuffer() goes automatically by means of getSubArrayIndex applying + // looping through buffer() goes automatically by means of getSubArrayIndex applying const auto resultLen = result.lengthOf(); auto xType = this->dataType(); auto stream = getContext()->getCudaStream(); prepareSpecialUse({&result}, {this}); - BUILD_SINGLE_SELECTOR(xType, tileKernelH, (this->getSpecialBuffer(), this->getSpecialShapeInfo(), result.getSpecialBuffer(), result.getSpecialShapeInfo(), resultLen, stream), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(xType, tileKernelH, (this->specialBuffer(), this->specialShapeInfo(), result.specialBuffer(), result.specialShapeInfo(), resultLen, stream), LIBND4J_TYPES); registerSpecialUse({&result}, {this}); return result; @@ -354,18 +358,18 @@ void NDArray::tile(const std::vector& reps, NDArray& target) const { // evaluate true tile shapeInfo for comparison with target shapeInfo auto newShapeInfo = ShapeUtils::evalTileShapeInfo(*this, reps, getContext()->getWorkspace()); - if(!shape::equalsSoft(newShapeInfo, target.getShapeInfo())) { + if(!shape::equalsSoft(newShapeInfo, target.shapeInfo())) { throw std::runtime_error("NDArray::tile method - shapeInfo of target array is not suitable for tile operation !"); } // fill newBuff, loop through all elements of newBuff - // looping through getBuffer() goes automatically by means of getSubArrayIndex applying + // looping through buffer() goes automatically by means of getSubArrayIndex applying const int ews = target.ews(); const int targetLen = target.lengthOf(); auto stream = getContext()->getCudaStream(); prepareSpecialUse({&target}, {this}); - BUILD_SINGLE_SELECTOR_TWICE(target.dataType(), tileKernelHH, (getSpecialBuffer(), getSpecialShapeInfo(), target.getSpecialBuffer(), target.getSpecialShapeInfo(), targetLen, ews, stream), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR_TWICE(target.dataType(), tileKernelHH, (specialBuffer(), specialShapeInfo(), target.specialBuffer(), target.specialShapeInfo(), targetLen, ews, stream), LIBND4J_TYPES); registerSpecialUse({&target}, {this}); } @@ -384,7 +388,7 @@ void NDArray::tile(NDArray& target) const { auto stream = getContext()->getCudaStream(); prepareSpecialUse({&target}, {this}); - BUILD_SINGLE_SELECTOR_TWICE(target.dataType(), tileKernelHH, (getSpecialBuffer(), getSpecialShapeInfo(), target.getSpecialBuffer(), target.getSpecialShapeInfo(), targetLen, ews, stream), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR_TWICE(target.dataType(), tileKernelHH, (specialBuffer(), specialShapeInfo(), target.specialBuffer(), target.specialShapeInfo(), targetLen, ews, stream), LIBND4J_TYPES); registerSpecialUse({&target}, {this}); } @@ -467,7 +471,7 @@ NDArray NDArray::repeat(const int axis, const std::vector& repeats) const { const int* reps = reinterpret_cast(manager.replicatePointer(repeats.data(), repeats.size() * sizeof(int))); prepareSpecialUse({&output}, {this}); - BUILD_SINGLE_SELECTOR_TWICE(dataType(), repeatCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, getContext()->getCudaStream(), getSpecialBuffer(), getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), reps, repeats.size(), axis), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR_TWICE(dataType(), repeatCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, getContext()->getCudaStream(), specialBuffer(), specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), reps, repeats.size(), axis), LIBND4J_TYPES); prepareSpecialUse({&output}, {this}); manager.synchronize(); @@ -491,7 +495,7 @@ void NDArray::repeat(const int axis, const std::vector& repeats, NDArray& t const int* reps = reinterpret_cast(manager.replicatePointer(repeats.data(), repeats.size() * sizeof(int))); prepareSpecialUse({&target}, {this}); - BUILD_DOUBLE_SELECTOR(dataType(), target.dataType(), repeatCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, getContext()->getCudaStream(), getSpecialBuffer(), getSpecialShapeInfo(), target.specialBuffer(), target.specialShapeInfo(), reps, repeats.size(), axis), LIBND4J_TYPES, LIBND4J_TYPES); + BUILD_DOUBLE_SELECTOR(dataType(), target.dataType(), repeatCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, getContext()->getCudaStream(), specialBuffer(), specialShapeInfo(), target.specialBuffer(), target.specialShapeInfo(), reps, repeats.size(), axis), LIBND4J_TYPES, LIBND4J_TYPES); prepareSpecialUse({&target}, {this}); manager.synchronize(); @@ -501,16 +505,20 @@ void NDArray::repeat(const int axis, const std::vector& repeats, NDArray& t //////////////////////////////////////////////////////////////////////// void* NDArray::specialBuffer() { - if (_buffer->special() == nullptr) - return getBuffer(); + if (_buffer->special() == nullptr) { + syncToDevice(); + tickReadHost(); + } // FIXME: this should be fixed once CUDA backend added return static_cast(_buffer->special()) + (_offset * sizeOfT()); } //////////////////////////////////////////////////////////////////////// -void* NDArray::getSpecialBuffer() const { - if (_buffer->special() == nullptr) - return getBuffer(); +void const* NDArray::specialBuffer() const { + if (_buffer->special() == nullptr) { + syncToDevice(); + tickReadHost(); + } // FIXME: this should be fixed once CUDA backend added return static_cast(_buffer->special()) + (_offset * sizeOfT()); } @@ -526,7 +534,7 @@ void NDArray::printCurrentBuffer(const bool host, const char* msg, const int pre printf("%s", msg); if(host) { - if(getBuffer() == nullptr || _length == 0) + if(buffer() == nullptr || _length == 0) { printf("NDArray::printActualBuffer: host buffer is nullptr !\n"); return; } const T* buff = bufferAsT(); @@ -535,7 +543,7 @@ void NDArray::printCurrentBuffer(const bool host, const char* msg, const int pre printf("\n"); } else { - if(getSpecialBuffer() == nullptr || _length == 0) + if(specialBuffer() == nullptr || _length == 0) { printf("NDArray::printSpecialBuffer: special buffer is nullptr !\n"); return; } void* pHost = operator new(sizeof(T) * _length); @@ -545,7 +553,7 @@ void NDArray::printCurrentBuffer(const bool host, const char* msg, const int pre cudaMemcpyAsync(reinterpret_cast(pHost) + i, specialBufferWithOffset(i), sizeof(T), cudaMemcpyDeviceToHost, *(getContext()->getCudaStream())); } else - cudaMemcpyAsync(pHost, getSpecialBuffer(), sizeOfT() * _length, cudaMemcpyDeviceToHost, *getContext()->getCudaStream()); + cudaMemcpyAsync(pHost, specialBuffer(), sizeOfT() * _length, cudaMemcpyDeviceToHost, *getContext()->getCudaStream()); cudaError_t cudaResult = cudaStreamSynchronize(*getContext()->getCudaStream()); if(cudaResult != 0) diff --git a/libnd4j/include/array/impl/ConstantDescriptor.cpp b/libnd4j/include/array/impl/ConstantDescriptor.cpp index ebb27090d..829ac5b34 100644 --- a/libnd4j/include/array/impl/ConstantDescriptor.cpp +++ b/libnd4j/include/array/impl/ConstantDescriptor.cpp @@ -28,7 +28,7 @@ namespace sd { _floatValues.emplace_back(values[e]); } - ConstantDescriptor::ConstantDescriptor(Nd4jLong * values, int length) { + ConstantDescriptor::ConstantDescriptor(Nd4jLong const* values, int length) { for (int e = 0; e < length; e++) _integerValues.emplace_back(values[e]); } diff --git a/libnd4j/include/array/impl/NDArrayFactory.cpp b/libnd4j/include/array/impl/NDArrayFactory.cpp index 870fdc198..f14aa9dbb 100644 --- a/libnd4j/include/array/impl/NDArrayFactory.cpp +++ b/libnd4j/include/array/impl/NDArrayFactory.cpp @@ -417,7 +417,7 @@ NDArray NDArrayFactory::create(const std::vector &values, sd::LaunchContext * NDArray res(buffer, ShapeDescriptor::vectorDescriptor(values.size(), DataTypeUtils::fromT()), context); - memcpyFromVector(res.getBuffer(), values); + memcpyFromVector(res.buffer(), values); res.tickWriteHost(); res.syncToDevice(); diff --git a/libnd4j/include/array/impl/NDArrayList.cpp b/libnd4j/include/array/impl/NDArrayList.cpp index ecd4bcaca..1aa9d2d4b 100644 --- a/libnd4j/include/array/impl/NDArrayList.cpp +++ b/libnd4j/include/array/impl/NDArrayList.cpp @@ -153,7 +153,7 @@ namespace sd { inputs[e] = _chunks[e]; } - auto inShapeInfo = inputs[0]->getShapeInfo(); + auto inShapeInfo = inputs[0]->shapeInfo(); int rank = shape::rank(inShapeInfo); NDArray* array = nullptr; diff --git a/libnd4j/include/array/impl/ShapeList.cpp b/libnd4j/include/array/impl/ShapeList.cpp index 1a883cc7e..d26132516 100644 --- a/libnd4j/include/array/impl/ShapeList.cpp +++ b/libnd4j/include/array/impl/ShapeList.cpp @@ -26,7 +26,7 @@ namespace sd { // _autoremovable = autoRemovable; // } - ShapeList::ShapeList(Nd4jLong* shape) { + ShapeList::ShapeList(const Nd4jLong* shape) { if (shape != nullptr) _shapes.push_back(shape); } @@ -36,21 +36,15 @@ namespace sd { destroy(); } - ShapeList::ShapeList(std::initializer_list shapes) { - for (auto v:shapes) - _shapes.push_back(v); - } - - ShapeList::ShapeList(std::initializer_list shapes, bool isWorkspace) : ShapeList(shapes){ + ShapeList::ShapeList(const std::vector &shapes, bool isWorkspace) : ShapeList(shapes){ _workspace = isWorkspace; } - ShapeList::ShapeList(std::vector& shapes) { - for (auto v:shapes) - _shapes.push_back(v); + ShapeList::ShapeList(const std::vector& shapes) { + _shapes = shapes; } - std::vector* ShapeList::asVector() { + std::vector* ShapeList::asVector() { return &_shapes; } @@ -66,33 +60,21 @@ namespace sd { _destroyed = true; } - int ShapeList::size() { + int ShapeList::size() const { return (int) _shapes.size(); } - Nd4jLong* ShapeList::at(int idx) { + const Nd4jLong* ShapeList::at(int idx) { if (_shapes.size() <= idx) throw std::runtime_error("Can't find requested variable by index"); return _shapes.at(idx); } - void ShapeList::push_back(Nd4jLong *shape) { + void ShapeList::push_back(const Nd4jLong *shape) { _shapes.push_back(shape); } - void ShapeList::push_back(std::vector& shape) { - int dLen = shape::shapeInfoLength(shape.at(0)); - - if (shape.size() != dLen) - throw std::runtime_error("Bad shape was passed in"); - - auto nShape = new Nd4jLong[dLen]; - std::memcpy(nShape, shape.data(), shape::shapeInfoByteLength(shape.at(0))); - - _shapes.push_back(nShape); - } - void ShapeList::detach() { for (int e = 0; e < _shapes.size(); e++) { _shapes[e] = shape::detachShape(_shapes[e]); diff --git a/libnd4j/include/array/impl/TadPack.cpp b/libnd4j/include/array/impl/TadPack.cpp index 1bd5b8f70..7a3bdbe36 100644 --- a/libnd4j/include/array/impl/TadPack.cpp +++ b/libnd4j/include/array/impl/TadPack.cpp @@ -29,18 +29,19 @@ namespace sd { _numTads = numTads; } - Nd4jLong* TadPack::primaryShapeInfo() const { + const Nd4jLong* TadPack::primaryShapeInfo() const { return reinterpret_cast(_tadShape.primary()); } - Nd4jLong* TadPack::primaryOffsets() const { + + const Nd4jLong* TadPack::primaryOffsets() const { return reinterpret_cast(_tadOffsets.primary()); } - Nd4jLong* TadPack::specialShapeInfo() const { + const Nd4jLong* TadPack::specialShapeInfo() const { return reinterpret_cast(_tadShape.special()); } - Nd4jLong* TadPack::specialOffsets() const { + const Nd4jLong* TadPack::specialOffsets() const { return reinterpret_cast(_tadOffsets.special()); } @@ -48,11 +49,11 @@ namespace sd { return _numTads; } - Nd4jLong* TadPack::platformShapeInfo() const { + const Nd4jLong* TadPack::platformShapeInfo() const { return sd::Environment::getInstance()->isCPU() ? primaryShapeInfo() : specialShapeInfo(); } - Nd4jLong* TadPack::platformOffsets() const { + const Nd4jLong* TadPack::platformOffsets() const { return sd::Environment::getInstance()->isCPU() ? primaryOffsets() : specialOffsets(); } diff --git a/libnd4j/include/execution/impl/Threads.cpp b/libnd4j/include/execution/impl/Threads.cpp index 2d0ae1144..51339abf1 100644 --- a/libnd4j/include/execution/impl/Threads.cpp +++ b/libnd4j/include/execution/impl/Threads.cpp @@ -571,7 +571,7 @@ namespace samediff { // create temporary array int64_t intermediatery[256]; - auto span = delta / numThreads; + auto span = (numElements / numThreads) - (numElements % numThreads); // execute threads in parallel for (uint32_t e = 0; e < numThreads; e++) { @@ -615,7 +615,7 @@ namespace samediff { // create temporary array double intermediatery[256]; - auto span = delta / numThreads; + auto span = (numElements / numThreads) - (numElements % numThreads); // execute threads in parallel for (uint32_t e = 0; e < numThreads; e++) { diff --git a/libnd4j/include/graph/Context.h b/libnd4j/include/graph/Context.h index 96d7e8b12..de6608b46 100644 --- a/libnd4j/include/graph/Context.h +++ b/libnd4j/include/graph/Context.h @@ -196,12 +196,14 @@ namespace sd { #endif void setInputArray(int index, NDArray *array, bool removable = false); - void setInputArray(int index, void *buffer, void *shapeInfo, void *specialBuffer, void *specialShapeInfo); - void setInputArray(int index, void *databuffer, void *shapeInfo, void *specialShapeInfo); + void setInputArray(int index, void *buffer, void const* shapeInfo, void *specialBuffer, void const* specialShapeInfo); + void setInputArray(int index, void *buffer, void * shapeInfo, void *specialBuffer, void * specialShapeInfo); + void setInputArray(int index, void *databuffer, void const* shapeInfo, void const* specialShapeInfo); void setOutputArray(int index, NDArray *array, bool removable = false); - void setOutputArray(int index, void *buffer, void *shapeInfo, void *specialBuffer, void *specialShapeInfo); - void setOutputArray(int index, void *databuffer, void *shapeInfo, void *specialShapeInfo); + void setOutputArray(int index, void *buffer, const void * shapeInfo, void *specialBuffer, const void * specialShapeInfo); + void setOutputArray(int index, void *buffer, void * shapeInfo, void *specialBuffer, void * specialShapeInfo); + void setOutputArray(int index, void *databuffer, void const* shapeInfo, void const* specialShapeInfo); void setTArguments(double *arguments, int numberOfArguments); void setIArguments(Nd4jLong *arguments, int numberOfArguments); diff --git a/libnd4j/include/graph/impl/Context.cpp b/libnd4j/include/graph/impl/Context.cpp index 954329f42..ae5bc59a0 100644 --- a/libnd4j/include/graph/impl/Context.cpp +++ b/libnd4j/include/graph/impl/Context.cpp @@ -407,8 +407,12 @@ namespace sd { _handles.emplace_back(array); } - void Context::setInputArray(int index, void *buffer, void *shapeInfo, void *specialBuffer, void *specialShapeInfo) { - auto array = new NDArray(buffer, specialBuffer, reinterpret_cast(shapeInfo)); + void Context::setInputArray(int index, void *buffer, void * shapeInfo, void *specialBuffer, void * specialShapeInfo) { + this->setInputArray(index, buffer, const_cast(shapeInfo), specialBuffer, const_cast(specialShapeInfo)); + } + + void Context::setInputArray(int index, void *buffer, void const* shapeInfo, void *specialBuffer, void const* specialShapeInfo) { + auto array = new NDArray(buffer, specialBuffer, reinterpret_cast(shapeInfo)); if (_fastpath_in.size() < index + 1) _fastpath_in.resize(index+1); @@ -430,11 +434,15 @@ namespace sd { _handles.emplace_back(array); } - void Context::setOutputArray(int index, void *buffer, void *shapeInfo, void *specialBuffer, void *specialShapeInfo) { + void Context::setOutputArray(int index, void *buffer, void * shapeInfo, void *specialBuffer, void * specialShapeInfo) { + this->setOutputArray(index, buffer, const_cast(shapeInfo), specialBuffer, const_cast(specialShapeInfo)); + } + + void Context::setOutputArray(int index, void *buffer, const void * shapeInfo, void *specialBuffer, const void * specialShapeInfo) { if (_fastpath_out.size() < index + 1) _fastpath_out.resize(index+1); - auto array = new NDArray(buffer, specialBuffer, reinterpret_cast(shapeInfo)); + auto array = new NDArray(buffer, specialBuffer, reinterpret_cast(shapeInfo)); _fastpath_out[index] = array; _handles.emplace_back(array); @@ -443,7 +451,7 @@ namespace sd { array->setContext(_context); } - void Context::setInputArray(int index, void *vdatabuffer, void *shapeInfo, void *specialShapeInfo) { + void Context::setInputArray(int index, void *vdatabuffer, void const* shapeInfo, void const* specialShapeInfo) { auto dataBuffer = reinterpret_cast(vdatabuffer); if (_fastpath_in.size() < index + 1) @@ -451,9 +459,9 @@ namespace sd { NDArray *array; if (dataBuffer != nullptr) - array = new NDArray(dataBuffer->dataBuffer(), reinterpret_cast(shapeInfo), sd::LaunchContext::defaultContext(), dataBuffer->offset() / DataTypeUtils::sizeOf(ArrayOptions::dataType(reinterpret_cast(shapeInfo)))); + array = new NDArray(dataBuffer->dataBuffer(), reinterpret_cast(shapeInfo), sd::LaunchContext::defaultContext(), dataBuffer->offset() / DataTypeUtils::sizeOf(ArrayOptions::dataType(reinterpret_cast(shapeInfo)))); else - array = new NDArray(nullptr, nullptr, reinterpret_cast(shapeInfo)); + array = new NDArray(nullptr, nullptr, reinterpret_cast(shapeInfo)); _fastpath_in[index] = array; _handles.emplace_back(array); @@ -462,7 +470,7 @@ namespace sd { array->setContext(_context); } - void Context::setOutputArray(int index, void *vdatabuffer, void *shapeInfo, void *specialShapeInfo) { + void Context::setOutputArray(int index, void *vdatabuffer, void const* shapeInfo, void const* specialShapeInfo) { auto dataBuffer = reinterpret_cast(vdatabuffer); if (_fastpath_out.size() < index + 1) @@ -470,9 +478,9 @@ namespace sd { NDArray *array; if (dataBuffer != nullptr) - array = new NDArray(dataBuffer->dataBuffer(), reinterpret_cast(shapeInfo), sd::LaunchContext::defaultContext(), dataBuffer->offset() / DataTypeUtils::sizeOf(ArrayOptions::dataType(reinterpret_cast(shapeInfo)))); + array = new NDArray(dataBuffer->dataBuffer(), reinterpret_cast(shapeInfo), sd::LaunchContext::defaultContext(), dataBuffer->offset() / DataTypeUtils::sizeOf(ArrayOptions::dataType(reinterpret_cast(shapeInfo)))); else - array = new NDArray(nullptr, nullptr, reinterpret_cast(shapeInfo)); + array = new NDArray(nullptr, nullptr, reinterpret_cast(shapeInfo)); _fastpath_out[index] = array; _handles.emplace_back(array); diff --git a/libnd4j/include/graph/impl/Graph.cpp b/libnd4j/include/graph/impl/Graph.cpp index 15db128a8..177adbe07 100644 --- a/libnd4j/include/graph/impl/Graph.cpp +++ b/libnd4j/include/graph/impl/Graph.cpp @@ -50,8 +50,8 @@ namespace sd { Nd4jLong result = 0L; Nd4jLong lastStep = 0L; - std::vector shapes; - MAP_IMPL, Nd4jLong*> shapesMap; + std::vector shapes; + MAP_IMPL, Nd4jLong const*> shapesMap; int cntFD = 0; @@ -83,12 +83,12 @@ namespace sd { auto in = node->input()->at(0); auto block = node->getContextPrototype(); - std::vector inputShapes; + std::vector inputShapes; int *oldShape; for (auto v: *node->input()) { nd4j_debug(" inputs for estimation are: %i:%i\n", v.first, v.second); if (v.first < 0) { - inputShapes.push_back(_variableSpace->getVariable(v.first)->getNDArray()->getShapeInfo()); + inputShapes.push_back(_variableSpace->getVariable(v.first)->getNDArray()->shapeInfo()); } else { inputShapes.push_back(shapesMap.at(v)); } @@ -102,7 +102,7 @@ namespace sd { int cnt = 0; for (auto newShape: *outSha->asVector()) { std::pair pairAddr(node->id(), cnt++); - std::pair, Nd4jLong*> pairShape(pairAddr, newShape); + std::pair, Nd4jLong const*> pairShape(pairAddr, newShape); shapesMap.insert(pairShape); @@ -122,11 +122,11 @@ namespace sd { auto x = _variableSpace->getVariable(in); auto z = _variableSpace->getVariable(node->id()); - auto newShape = new Nd4jLong[shape::shapeInfoLength(x->getNDArray()->getShapeInfo())]; - memcpy(newShape, x->getNDArray()->getShapeInfo(), shape::shapeInfoByteLength(x->getNDArray()->getShapeInfo())); + auto newShape = new Nd4jLong[shape::shapeInfoLength(x->getNDArray()->shapeInfo())]; + memcpy(newShape, x->getNDArray()->shapeInfo(), shape::shapeInfoByteLength(x->getNDArray()->shapeInfo())); std::pair pairAddr(node->id(), 0); - std::pair, Nd4jLong*> pairShape(pairAddr, newShape); + std::pair, Nd4jLong const*> pairShape(pairAddr, newShape); shapesMap.insert(pairShape); @@ -141,7 +141,7 @@ namespace sd { memcpy(newShape, prevShape, shape::shapeInfoByteLength(prevShape)); std::pair pairAddr(node->id(), 0); - std::pair, Nd4jLong*> pairShape(pairAddr, newShape); + std::pair, Nd4jLong const*> pairShape(pairAddr, newShape); shapesMap.insert(pairShape); @@ -152,30 +152,30 @@ namespace sd { } } else if (node->getOpClass() == OpClass_REDUCTION) { - Nd4jLong *newShape = nullptr; + Nd4jLong const* newShape = nullptr; // if that's scalar output - we don't care about previous node if (node->getDimensions()->size() == 0 || (node->getDimensions()->size() == 1 && node->getDimensions()->at(0) == sd::DataTypeUtils::max())) { - newShape = new Nd4jLong[8]; - - newShape[0] = 2; - newShape[1] = 1; - newShape[2] = 1; - newShape[3] = 1; - newShape[4] = 1; - newShape[5] = 8192; // set type as FLOAT32 by default - newShape[6] = 1; - newShape[7] = 99; - +// auto aNewShape = new Nd4jLong[8]; +// +// aNewShape[0] = 2; +// aNewShape[1] = 1; +// aNewShape[2] = 1; +// aNewShape[3] = 1; +// aNewShape[4] = 1; +// aNewShape[5] = 8192; // set type as FLOAT32 by default +// aNewShape[6] = 1; +// aNewShape[7] = 99; + newShape = ConstantShapeHelper::getInstance()->createShapeInfo(DataType::FLOAT32, 'c', {1,1}); } else { auto in = node->input()->at(0); - Nd4jLong *oldShape = nullptr; + Nd4jLong const* oldShape = nullptr; // calculate tads here if (in.first < 0) { auto x = _variableSpace->getVariable(in)->getNDArray(); - oldShape = x->getShapeInfo(); + oldShape = x->shapeInfo(); } else { oldShape = shapesMap.at(in); @@ -188,7 +188,7 @@ namespace sd { } std::pair pairAddr(node->id(), 0); - std::pair, Nd4jLong*> pairShape(pairAddr, newShape); + std::pair, Nd4jLong const*> pairShape(pairAddr, newShape); shapesMap.insert(pairShape); diff --git a/libnd4j/include/graph/profiling/NodeProfile.h b/libnd4j/include/graph/profiling/NodeProfile.h index 871eb5748..83f0b88fc 100644 --- a/libnd4j/include/graph/profiling/NodeProfile.h +++ b/libnd4j/include/graph/profiling/NodeProfile.h @@ -88,8 +88,8 @@ namespace sd { void setObjectsSize(Nd4jLong bytes); void setTotalSize(Nd4jLong bytes); - void addInputShape(Nd4jLong *shapeInfo); - void addOutputShape(Nd4jLong *shapeInfo); + void addInputShape(Nd4jLong const* shapeInfo); + void addOutputShape(Nd4jLong const* shapeInfo); Nd4jLong getActivationsSize() const; Nd4jLong getTemporarySize() const; diff --git a/libnd4j/include/graph/profiling/impl/NodeProfile.cpp b/libnd4j/include/graph/profiling/impl/NodeProfile.cpp index bd48fbd28..8db4472e6 100644 --- a/libnd4j/include/graph/profiling/impl/NodeProfile.cpp +++ b/libnd4j/include/graph/profiling/impl/NodeProfile.cpp @@ -116,11 +116,11 @@ namespace sd { return _executionTime; } - void NodeProfile::addInputShape(Nd4jLong *shapeInfo) { + void NodeProfile::addInputShape(Nd4jLong const* shapeInfo) { _inputShapes.emplace_back(ShapeUtils::shapeInfoAsString(shapeInfo)); } - void NodeProfile::addOutputShape(Nd4jLong *shapeInfo) { + void NodeProfile::addOutputShape(Nd4jLong const*shapeInfo) { _outputShapes.emplace_back(ShapeUtils::shapeInfoAsString(shapeInfo)); } diff --git a/libnd4j/include/helpers/ConstantShapeHelper.h b/libnd4j/include/helpers/ConstantShapeHelper.h index 4454776a4..73281c507 100644 --- a/libnd4j/include/helpers/ConstantShapeHelper.h +++ b/libnd4j/include/helpers/ConstantShapeHelper.h @@ -51,20 +51,20 @@ namespace sd { ConstantDataBuffer bufferForShapeInfo(sd::DataType dataType, char order, const std::vector &shape); ConstantDataBuffer bufferForShapeInfo(const ShapeDescriptor &descriptor); ConstantDataBuffer bufferForShapeInfo(const Nd4jLong *shapeInfo); - ConstantDataBuffer bufferForShapeInfo(const sd::DataType dataType, const char order, const int rank, const Nd4jLong* shape); - ConstantDataBuffer createShapeInfoWithUnitiesForBroadcast(const Nd4jLong* maxShapeInfo, const Nd4jLong* minShapeInfo, sd::memory::Workspace* workspace = nullptr, const std::vector dimensions = {}); + ConstantDataBuffer bufferForShapeInfo(sd::DataType dataType, char order, int rank, const Nd4jLong* shape); + ConstantDataBuffer createShapeInfoWithUnitiesForBroadcast(const Nd4jLong* maxShapeInfo, const Nd4jLong* minShapeInfo, sd::memory::Workspace* workspace = nullptr, const std::vector &dimensions = {}); - Nd4jLong* emptyShapeInfo(const sd::DataType dataType); - Nd4jLong* scalarShapeInfo(const sd::DataType dataType); - Nd4jLong* vectorShapeInfo(const Nd4jLong length, const sd::DataType dataType); - Nd4jLong* createShapeInfo(const ShapeDescriptor &descriptor); - Nd4jLong* createShapeInfo(const sd::DataType dataType, const char order, const std::vector &shape); - Nd4jLong* createShapeInfo(const sd::DataType dataType, const char order, const int rank, const Nd4jLong* shape); - Nd4jLong* createShapeInfo(const sd::DataType dataType, const Nd4jLong* shapeInfo); + const Nd4jLong* emptyShapeInfo(sd::DataType dataType); + const Nd4jLong* scalarShapeInfo(sd::DataType dataType); + const Nd4jLong* vectorShapeInfo(Nd4jLong length, sd::DataType dataType); + const Nd4jLong* createShapeInfo(const ShapeDescriptor &descriptor); + const Nd4jLong* createShapeInfo(sd::DataType dataType, char order, const std::vector &shape); + const Nd4jLong* createShapeInfo(sd::DataType dataType, char order, int rank, const Nd4jLong* shape); + const Nd4jLong* createShapeInfo(sd::DataType dataType, const Nd4jLong* shapeInfo); - Nd4jLong* createFromExisting(Nd4jLong *shapeInfo, sd::memory::Workspace *workspace); - Nd4jLong* createFromExisting(Nd4jLong *shapeInfo, bool destroyOriginal = true); + const Nd4jLong* createFromExisting(Nd4jLong *shapeInfo, sd::memory::Workspace *workspace); + const Nd4jLong* createFromExisting(Nd4jLong *shapeInfo, bool destroyOriginal = true); bool checkBufferExistenceForShapeInfo(ShapeDescriptor &descriptor); diff --git a/libnd4j/include/helpers/Loops.h b/libnd4j/include/helpers/Loops.h index 508b84f20..f18bcc63d 100644 --- a/libnd4j/include/helpers/Loops.h +++ b/libnd4j/include/helpers/Loops.h @@ -41,43 +41,43 @@ namespace sd { public: template - static FORCEINLINE void loopReduce(X* x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, E* extraParams, int64_t start, int64_t stop); + static FORCEINLINE void loopReduce(const X* x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, E* extraParams, int64_t start, int64_t stop); }; template class ReductionFloatLoops : public ReductionLoops { public: - static void wrapper(const int opNum, X* x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, Z* extraParams, int64_t start, int64_t stop); + static void wrapper(int opNum, const X* x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, Z* extraParams, int64_t start, int64_t stop); template - static void innerloopReduce(X* x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, Z* extraParams, int64_t start, int64_t stop); + static void innerloopReduce(const X* x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, Z* extraParams, int64_t start, int64_t stop); }; template class ND4J_EXPORT ReductionBoolLoops : public ReductionLoops { public: - static void wrapper(const int opNum, X* x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop); + static void wrapper(int opNum, const X* x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop); template - static void innerloopReduce(X* x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop); + static void innerloopReduce(const X* x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop); }; template class ND4J_EXPORT ReductionLongLoops : public ReductionLoops { public: - static void wrapper(const int opNum, X* x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop); + static void wrapper(int opNum, const X* x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop); template - static void innerloopReduce(X* x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop); + static void innerloopReduce(const X* x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop); }; template class ND4J_EXPORT ReductionSameLoops : public ReductionLoops { public: - static void wrapper(const int opNum, X* x, Nd4jLong* xShapeInfo, X* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop); + static void wrapper(int opNum, const X* x, const Nd4jLong* xShapeInfo, X* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop); template - static void innerloopReduce(X* x, Nd4jLong* xShapeInfo, X* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop); + static void innerloopReduce(const X* x, const Nd4jLong* xShapeInfo, X* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop); }; @@ -85,10 +85,10 @@ namespace sd { class ND4J_EXPORT IndexReductionLoops { private: public: - static void wrapIndexReduce(const int opNum, void* x, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* extraParams); + static void wrapIndexReduce(int opNum, const void* x, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* extraParams); template - static void loopIndexReduce(X* x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, X* extraParams); + static void loopIndexReduce(const X* x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, X* extraParams); }; @@ -98,7 +98,7 @@ namespace sd { public: template - static FORCEINLINE void loopTransform(X* x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, E* extraParams, uint64_t threadId, uint64_t numThreads); + static FORCEINLINE void loopTransform(const X* x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, E* extraParams, uint64_t threadId, uint64_t numThreads); }; template @@ -106,20 +106,20 @@ namespace sd { public: template - static FORCEINLINE void loopReduce3(X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop); + static FORCEINLINE void loopReduce3(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop); template - static FORCEINLINE void loopReduce3All(X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* xTadShapeInfo, Nd4jLong* xTadOffsets, Nd4jLong* yTadShapeInfo, Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop); + static FORCEINLINE void loopReduce3All(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop); - static void wrapper(const int opNum, X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop); + static void wrapper(int opNum, const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop); - static void wrapperAll(const int opNum, X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* xTadShapeInfo, Nd4jLong* xTadOffsets, Nd4jLong* yTadShapeInfo, Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop); + static void wrapperAll(int opNum, const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop); template - static void innerloopReduce3(X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop); + static void innerloopReduce3(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop); template - static void innerloopReduce3All(X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* xTadShapeInfo, Nd4jLong* xTadOffsets, Nd4jLong* yTadShapeInfo, Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop); + static void innerloopReduce3All(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop); }; @@ -263,10 +263,11 @@ namespace sd { ////////////////////////////////////////////////////////////////////////////// template template - void sd::ReductionLoops::loopReduce(X* x, Nd4jLong* xShapeInfo, - Z* z, Nd4jLong* zShapeInfo, - Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, - E* extraParams, int64_t start, int64_t stop) { + void sd::ReductionLoops::loopReduce(const X* x, const Nd4jLong* xShapeInfo, + Z* z, const Nd4jLong* zShapeInfo, + const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, + E* extraParams, + int64_t start, int64_t stop) { const LoopKind::Kind kindOfLoop = LoopKind::deduceKindOfLoopTadXZ(xShapeInfo, zShapeInfo, tadShapeInfo); @@ -492,9 +493,10 @@ namespace sd { ////////////////////////////////////////////////////////////////////////////// template template - void sd::TransformLoops::loopTransform(X* x, Nd4jLong* xShapeInfo, - Z* z, Nd4jLong* zShapeInfo, - E* extraParams, uint64_t threadId, uint64_t numThreads) { + void sd::TransformLoops::loopTransform(const X* x, const Nd4jLong* xShapeInfo, + Z* z, const Nd4jLong* zShapeInfo, + E* extraParams, + uint64_t threadId, uint64_t numThreads) { const LoopKind::Kind kindOfLoop = LoopKind::deduceKindOfLoopXZ(xShapeInfo, zShapeInfo); @@ -682,11 +684,11 @@ namespace sd { ////////////////////////////////////////////////////////////////////////////// template template - void sd::Reduction3Loops::loopReduce3(X* x, Nd4jLong* xShapeInfo, - X* y, Nd4jLong* yShapeInfo, - Z* z, Nd4jLong* zShapeInfo, - int* dims, int dimsLen, - Z* extraParameters, int64_t start, int64_t stop) { + void sd::Reduction3Loops::loopReduce3(const X* x, const Nd4jLong* xShapeInfo, + const X* y, const Nd4jLong* yShapeInfo, + Z* z, const Nd4jLong* zShapeInfo, + int* dims, int dimsLen, + Z* extraParameters, int64_t start, int64_t stop) { // both tads have same shape, however strides and ews may differ @@ -695,7 +697,7 @@ namespace sd { const Nd4jLong xLen = shape::length(xShapeInfo); const Nd4jLong yLen = shape::length(yShapeInfo); - Nd4jLong* xTadShapeInfo = nullptr, * yTadShapeInfo = nullptr, * xTadOffsets = nullptr, * yTadOffsets = nullptr; + const Nd4jLong* xTadShapeInfo = nullptr, * yTadShapeInfo = nullptr, * xTadOffsets = nullptr, * yTadOffsets = nullptr; TadPack tadPackX, tadPackY; std::vector zeroOffsets; @@ -962,12 +964,13 @@ namespace sd { ////////////////////////////////////////////////////////////////////////////// template template - void sd::Reduction3Loops::loopReduce3All(X* x, Nd4jLong* xShapeInfo, - X* y, Nd4jLong* yShapeInfo, - Z* z, Nd4jLong* zShapeInfo, - Nd4jLong* xTadShapeInfo, Nd4jLong* xTadOffsets, - Nd4jLong* yTadShapeInfo, Nd4jLong* yTadOffsets, - Z* extraParameters, int64_t start, int64_t stop) { + void sd::Reduction3Loops::loopReduce3All(const X* x, const Nd4jLong* xShapeInfo, + const X* y, const Nd4jLong* yShapeInfo, + Z* z, const Nd4jLong* zShapeInfo, + const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, + const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, + Z* extraParameters, + int64_t start, int64_t stop) { // both tads have same shape, however strides and ews may differ diff --git a/libnd4j/include/helpers/ShapeUtils.h b/libnd4j/include/helpers/ShapeUtils.h index 8d2a119c3..cb2faa43d 100644 --- a/libnd4j/include/helpers/ShapeUtils.h +++ b/libnd4j/include/helpers/ShapeUtils.h @@ -35,28 +35,28 @@ namespace sd { static std::vector evalShapeForTensorDot(const NDArray* a, const NDArray* b, const std::vector& axesA, const std::vector& axesB, std::vector& permutAt, std::vector& permutBt, std::vector& shapeAt, std::vector& shapeBt); // evaluate resulting shape after reduce operation - static Nd4jLong* evalReduceShapeInfo(const char order, std::vector& dimensions, const NDArray& arr, const sd::DataType dataType, const bool keepDims = false, const bool supportOldShapes = false, sd::memory::Workspace* workspace = nullptr); - static Nd4jLong* evalReduceShapeInfo(const char order, std::vector& dimensions, const Nd4jLong* shapeInfo, const sd::DataType dataType, const bool keepDims = false, const bool supportOldShapes = false, sd::memory::Workspace* workspace = nullptr); - static Nd4jLong* evalReduceShapeInfo(const char order, std::vector& dimensions, const NDArray& arr, const bool keepDims = false, const bool supportOldShapes = false, sd::memory::Workspace* workspace = nullptr); - static Nd4jLong* evalReduceShapeInfo(const char order, std::vector& dimensions, const Nd4jLong* shapeInfo, const bool keepDims = false, const bool supportOldShapes = false, sd::memory::Workspace* workspace = nullptr); + static const Nd4jLong* evalReduceShapeInfo(const char order, std::vector& dimensions, const NDArray& arr, const sd::DataType dataType, const bool keepDims = false, const bool supportOldShapes = false, sd::memory::Workspace* workspace = nullptr); + static const Nd4jLong* evalReduceShapeInfo(const char order, std::vector& dimensions, const Nd4jLong* shapeInfo, const sd::DataType dataType, const bool keepDims = false, const bool supportOldShapes = false, sd::memory::Workspace* workspace = nullptr); + static const Nd4jLong* evalReduceShapeInfo(const char order, std::vector& dimensions, const NDArray& arr, const bool keepDims = false, const bool supportOldShapes = false, sd::memory::Workspace* workspace = nullptr); + static const Nd4jLong* evalReduceShapeInfo(const char order, std::vector& dimensions, const Nd4jLong* shapeInfo, const bool keepDims = false, const bool supportOldShapes = false, sd::memory::Workspace* workspace = nullptr); /** * evaluate output shape for reduce operation when input shape is empty * behavior is analogous to tf */ - static Nd4jLong* evalReduceShapeInfoEmpty(const char order, std::vector& dimensions, const Nd4jLong *shapeInfo, const sd::DataType dataType, const bool keepDims, sd::memory::Workspace* workspace); + static const Nd4jLong* evalReduceShapeInfoEmpty(const char order, std::vector& dimensions, const Nd4jLong *shapeInfo, const sd::DataType dataType, const bool keepDims, sd::memory::Workspace* workspace); // evaluate shape for array which is result of repeat operation applied to arr static std::vector evalRepeatShape(int axis, const std::vector& repeats, const NDArray& arr); // evaluate shapeInfo of permuted array // if setContigStrides = true, then set contiguous strides in output shapeInfo in accordance with arr order - static Nd4jLong* evalPermShapeInfo(const int* dimensions, const int rank, const NDArray& arr, sd::memory::Workspace* workspace, const bool setContigStrides = false); - static Nd4jLong* evalPermShapeInfo(const Nd4jLong* dimensions, const int rank, const NDArray& arr, sd::memory::Workspace* workspace); + static const Nd4jLong* evalPermShapeInfo(const int* dimensions, const int rank, const NDArray& arr, sd::memory::Workspace* workspace, const bool setContigStrides = false); + static const Nd4jLong* evalPermShapeInfo(const Nd4jLong* dimensions, const int rank, const NDArray& arr, sd::memory::Workspace* workspace); // evaluate shapeInfo of transposed array // if setContigStrides = true, then set contiguous strides in output shapeInfo in accordance with arr order - static Nd4jLong* evalTranspShapeInfo(const NDArray& arr, sd::memory::Workspace* workspace, const bool setContigStrides = false); + static const Nd4jLong* evalTranspShapeInfo(const NDArray& arr, sd::memory::Workspace* workspace, const bool setContigStrides = false); static bool copyVectorPart(std::vector& target, std::vector& source, int rank, int offset); @@ -67,13 +67,13 @@ namespace sd { // check whether 2 arrays have mutually broadcastable shapes // shape comparison starts from the end static bool areShapesBroadcastable(const NDArray &arr1, const NDArray &arr2); - static bool areShapesBroadcastable(Nd4jLong* shapeX, Nd4jLong* shapeY); + static bool areShapesBroadcastable(const Nd4jLong* shapeX, const Nd4jLong* shapeY); static bool areShapesBroadcastable(const std::vector& shape1, const std::vector& shape2); // check the possibility of broadcast operation, if true then return shapeInfo of resulting array // if evalMinMax == false then array with larger rank has to be passed as first argument - static bool evalBroadcastShapeInfo(const NDArray& max, const NDArray& min, const bool evalMinMax, Nd4jLong*& resultShapeInfo, sd::memory::Workspace* workspace); - static bool evalBroadcastShapeInfo(Nd4jLong *max, Nd4jLong *min, const bool evalMinMax, Nd4jLong*& resultShapeInfo, sd::memory::Workspace* workspace); + static bool evalBroadcastShapeInfo(const NDArray& max, const NDArray& min, const bool evalMinMax, const Nd4jLong*& resultShapeInfo, sd::memory::Workspace* workspace); + static bool evalBroadcastShapeInfo(const Nd4jLong *max, const Nd4jLong *min, const bool evalMinMax, const Nd4jLong*& resultShapeInfo, sd::memory::Workspace* workspace); // evaluate sorted vector of max axes to create tads along in case of simple broadcast operation // if simple broadcast is not possible then empty vector is returned @@ -88,10 +88,10 @@ namespace sd { static std::vector getDimsWithSameShape(const NDArray& max, const NDArray& min); // evaluate shapeInfo for resulting array of tile operation - static Nd4jLong* evalTileShapeInfo(const NDArray& arr, const std::vector& reps, sd::memory::Workspace* workspace); + static const Nd4jLong* evalTileShapeInfo(const NDArray& arr, const std::vector& reps, sd::memory::Workspace* workspace); // returns shape part of shapeInfo as std::vector - static std::vector pullShapeFromShapeInfo(Nd4jLong *shapeInfo); + static std::vector pullShapeFromShapeInfo(const Nd4jLong *shapeInfo); static std::string shapeAsString(const NDArray* array); static std::string shapeAsString(const std::vector& shape); @@ -104,13 +104,13 @@ namespace sd { static std::vector shapeAsVector(const Nd4jLong* shapeInfo); // evaluate shapeInfo for diagonal array which is made using input arr elements as diagonal - static Nd4jLong* evalDiagShapeInfo(const Nd4jLong* shapeInfo, sd::memory::Workspace* workspace); + static const Nd4jLong* evalDiagShapeInfo(const Nd4jLong* shapeInfo, sd::memory::Workspace* workspace); static std::vector evalBroadcastBackwardAxis(const Nd4jLong *operand, const Nd4jLong *result); // utility to calculate matrix product shape with give source shapes and additional params // returns ShapeList pointer with result shape - static Nd4jLong* matrixProductShape(Nd4jLong* theFirstShape, Nd4jLong* theSecondShape, bool shouldTranspondFirst, bool shouldTranspondSecond, sd::DataType dtype, sd::memory::Workspace* workspace); + static const Nd4jLong* matrixProductShape(const Nd4jLong* theFirstShape, const Nd4jLong* theSecondShape, bool shouldTranspondFirst, bool shouldTranspondSecond, sd::DataType dtype, sd::memory::Workspace* workspace); /** * This method evaluates permutation vector necessary for reducing of shapeFrom to shapeTo diff --git a/libnd4j/include/helpers/TAD.h b/libnd4j/include/helpers/TAD.h index 6df5a05a2..cd58e421e 100644 --- a/libnd4j/include/helpers/TAD.h +++ b/libnd4j/include/helpers/TAD.h @@ -55,20 +55,20 @@ namespace shape { Nd4jLong tadIndex = 0; int dimensionLength; int* dimension = nullptr; - Nd4jLong *shapeInfo = nullptr; - Nd4jLong *tadOnlyShapeInfo = nullptr; + Nd4jLong const* shapeInfo = nullptr; + Nd4jLong* tadOnlyShapeInfo = nullptr; Nd4jLong numTads = 0; int tadRank = 0; - Nd4jLong *tadShape = nullptr; - Nd4jLong *tadStride = nullptr; - Nd4jLong *tadOffsets = nullptr; + Nd4jLong* tadShape = nullptr; + Nd4jLong* tadStride = nullptr; + Nd4jLong* tadOffsets = nullptr; Nd4jLong tadOffsetForBlock = 0; int rank = 0; int numOnes = 0; //pointers to original int originalDimensionLength; - int *originalDimension = nullptr; - Nd4jLong *originalShapeInfo = nullptr; + int const* originalDimension = nullptr; + Nd4jLong const* originalShapeInfo = nullptr; bool squeezed = false; bool newSqueezeDimensions = false; int numOnesInMiddle = 0; @@ -81,7 +81,7 @@ namespace shape { void *ptrManager = nullptr; int *ptrOutput = nullptr; - INLINEDEF bool dimensionsDescending(int rank, int *dimensions, int length); + INLINEDEF bool dimensionsDescending(int rank, int const* dimensions, int length); #ifdef __CUDACC__ __host__ __device__ @@ -114,12 +114,12 @@ namespace shape { #ifdef __CUDACC__ __host__ __device__ #endif - INLINEDEF void init(Nd4jLong *shapeInfo,int *dimension,int dimensionLength); + INLINEDEF void init(Nd4jLong const* shapeInfo,int const* dimension,int dimensionLength); #ifdef __CUDACC__ __host__ __device__ #endif - INLINEDEF void init(int index, Nd4jLong *shapeInfo,int *dimension,int dimensionLength); + INLINEDEF void init(int index, Nd4jLong const* shapeInfo,int const* dimension,int dimensionLength); @@ -134,12 +134,12 @@ namespace shape { #ifdef __CUDACC__ __host__ __device__ #endif - INLINEDEF void permuteShapeBufferInPlace(Nd4jLong *shapeBuffer, int* rearrange, Nd4jLong *out); + INLINEDEF void permuteShapeBufferInPlace(Nd4jLong const* shapeBuffer, int const* rearrange, Nd4jLong *out); #ifdef __CUDACC__ __host__ __device__ #endif - INLINEDEF Nd4jLong* permuteShapeBuffer(Nd4jLong *shapeBuffer, int *rearrange); + INLINEDEF Nd4jLong* permuteShapeBuffer(Nd4jLong const* shapeBuffer, int *rearrange); @@ -153,7 +153,7 @@ namespace shape { #ifdef __CUDACC__ __host__ __device__ #endif - INLINEDEF Nd4jLong lengthPerSlice(Nd4jLong *shapeBuffer); + INLINEDEF Nd4jLong lengthPerSlice(Nd4jLong const* shapeBuffer); #ifdef __CUDACC__ @@ -253,7 +253,7 @@ namespace shape { #ifdef __CUDACC__ __host__ __device__ #endif - INLINEDEF Nd4jLong tadLength(Nd4jLong *shapeInfo, int *dimension, int dimensionLength); + INLINEDEF Nd4jLong tadLength(Nd4jLong const* shapeInfo, int const* dimension, int dimensionLength); /** * Computes the number @@ -263,7 +263,7 @@ namespace shape { #ifdef __CUDACC__ __host__ __device__ #endif - INLINEDEF Nd4jLong tensorsAlongDimension(Nd4jLong *shapeInfo, int *dimension, int dimensionLength); + INLINEDEF Nd4jLong tensorsAlongDimension(Nd4jLong const* shapeInfo, int const* dimension, int dimensionLength); #ifdef __CUDACC__ @@ -337,19 +337,19 @@ namespace shape { this->wholeThing = this->numTads == 1 || ((this->dimensionLength == this->rank || this->numTads == shape::length(this->shapeInfo)) && ews == 1); } - INLINEDEF void TAD::init(int tadIndex, Nd4jLong *shapeInfo,int *dimension,int dimensionLength) { + INLINEDEF void TAD::init(int tadIndex, Nd4jLong const* shapeInfo,int const* dimension,int dimensionLength) { this->tadIndex = tadIndex; this->init(shapeInfo, dimension, dimensionLength); } - INLINEDEF void TAD::init(Nd4jLong *shapeInfo, int *dimension,int dimensionLength) { + INLINEDEF void TAD::init(Nd4jLong const* shapeInfo, int const* dimension,int dimensionLength) { this->originalShapeInfo = shapeInfo; this->originalDimension = dimension; this->originalDimensionLength = dimensionLength; //start off as original references this->shapeInfo = shapeInfo; this->dimensionLength = dimensionLength; - this->dimension = dimension; + this->dimension = const_cast(dimension); this->rank = shape::rank(shapeInfo); this->numTads = dimensionLength == 0 ? 1 : this->tensorsAlongDimension(this->shapeInfo, this->dimension, this->dimensionLength); @@ -420,19 +420,19 @@ namespace shape { } - INLINEDEF void TAD::permuteShapeBufferInPlace(Nd4jLong* shapeBuffer, int* rearrange, Nd4jLong* out) { + INLINEDEF void TAD::permuteShapeBufferInPlace(Nd4jLong const* shapeBuffer, int const* rearrange, Nd4jLong* out) { memcpy(out, shapeBuffer, sizeof(Nd4jLong) * shape::shapeInfoLength(this->rank)); doPermuteShapeInfo(out, rearrange); } - INLINEDEF Nd4jLong* TAD::permuteShapeBuffer(Nd4jLong* shapeBuffer, int *rearrange) { + INLINEDEF Nd4jLong* TAD::permuteShapeBuffer(Nd4jLong const* shapeBuffer, int *rearrange) { int len = shape::shapeInfoLength(this->rank); Nd4jLong *copy = shape::copyOf(len,shapeBuffer); doPermuteShapeInfo(copy,rearrange); return copy; } - INLINEDEF bool TAD::dimensionsDescending(int rank, int *dimensions, int length) { + INLINEDEF bool TAD::dimensionsDescending(int rank, int const* dimensions, int length) { int desired = rank - 1; for (int e = length - 1; e >= 0; e--) { if (dimensions[e] != desired--) @@ -465,7 +465,7 @@ namespace shape { this->tadStride = shape::stride(this->tadOnlyShapeInfo); } - INLINEDEF Nd4jLong TAD::lengthPerSlice(Nd4jLong* shapeBuffer) { + INLINEDEF Nd4jLong TAD::lengthPerSlice(Nd4jLong const* shapeBuffer) { int dimension = 0; Nd4jLong *remove = shape::removeIndex(shape::shapeOf(shapeBuffer),&dimension,shape::rank(shapeBuffer),1); Nd4jLong prod = shape::prodLong(remove, shape::rank(shapeBuffer) - 1); @@ -635,7 +635,7 @@ namespace shape { } - INLINEDEF Nd4jLong* TAD::tensorShape() { + INLINEDEF Nd4jLong* TAD::tensorShape(){ if(this->tadShape != nullptr) return this->tadShape; @@ -902,7 +902,7 @@ namespace shape { } - INLINEDEF Nd4jLong TAD::tadLength(Nd4jLong *shapeInfo, int *dimension, int dimensionLength) { + INLINEDEF Nd4jLong TAD::tadLength(Nd4jLong const* shapeInfo, int const* dimension, int dimensionLength) { if(dimensionLength == 1) { return shape::shapeOf(shapeInfo)[dimension[0]]; } @@ -919,7 +919,7 @@ namespace shape { } - INLINEDEF Nd4jLong TAD::tensorsAlongDimension(Nd4jLong *shapeInfo, int *dimension, int dimensionLength) { + INLINEDEF Nd4jLong TAD::tensorsAlongDimension(Nd4jLong const* shapeInfo, int const* dimension, int dimensionLength) { return shape::length(shapeInfo) / this->tadLength(shapeInfo,dimension,dimensionLength); } diff --git a/libnd4j/include/helpers/cpu/ConstantShapeHelper.cpp b/libnd4j/include/helpers/cpu/ConstantShapeHelper.cpp index a69614906..fc8abe8aa 100644 --- a/libnd4j/include/helpers/cpu/ConstantShapeHelper.cpp +++ b/libnd4j/include/helpers/cpu/ConstantShapeHelper.cpp @@ -55,22 +55,16 @@ namespace sd { ConstantDataBuffer ConstantShapeHelper::bufferForShapeInfo(const ShapeDescriptor &descriptor) { int deviceId = 0; - _mutex.lock(); + std::lock_guard lock(_mutex); if (_cache[deviceId].count(descriptor) == 0) { auto hPtr = descriptor.toShapeInfo(); ConstantDataBuffer buffer(hPtr, nullptr, shape::shapeInfoLength(hPtr)*sizeof(Nd4jLong), DataType::INT64); ShapeDescriptor descriptor1(descriptor); _cache[deviceId][descriptor1] = buffer; - auto r = _cache[deviceId][descriptor1]; - _mutex.unlock(); - - return r; + return _cache[deviceId][descriptor1]; } else { - auto r = _cache[deviceId].at(descriptor); - _mutex.unlock(); - - return r; + return _cache[deviceId].at(descriptor); } } @@ -82,52 +76,45 @@ namespace sd { bool ConstantShapeHelper::checkBufferExistenceForShapeInfo(ShapeDescriptor &descriptor) { bool result; int deviceId = 0; - _mutex.lock(); + std::lock_guard lock(_mutex); - if (_cache[deviceId].count(descriptor) == 0) - result = false; - else - result = true; - - _mutex.unlock(); - - return result; + return _cache[deviceId].count(descriptor) != 0; } - Nd4jLong* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const char order, const int rank, const Nd4jLong* shape) { + const Nd4jLong* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const char order, const int rank, const Nd4jLong* shape) { ShapeDescriptor descriptor(dataType, order, shape, rank); return bufferForShapeInfo(descriptor).primaryAsT(); } - Nd4jLong* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const Nd4jLong* shapeInfo) { + const Nd4jLong* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const Nd4jLong* shapeInfo) { return ConstantShapeHelper::createShapeInfo(dataType, shape::order(shapeInfo), shape::rank(shapeInfo), shape::shapeOf(const_cast(shapeInfo))); } - Nd4jLong* ConstantShapeHelper::emptyShapeInfo(const sd::DataType dataType) { + const Nd4jLong* ConstantShapeHelper::emptyShapeInfo(const sd::DataType dataType) { auto descriptor = ShapeDescriptor::emptyDescriptor(dataType); return bufferForShapeInfo(descriptor).primaryAsT(); } - Nd4jLong* ConstantShapeHelper::scalarShapeInfo(const sd::DataType dataType) { + const Nd4jLong* ConstantShapeHelper::scalarShapeInfo(const sd::DataType dataType) { auto descriptor = ShapeDescriptor::scalarDescriptor(dataType); return bufferForShapeInfo(descriptor).primaryAsT(); } - Nd4jLong* ConstantShapeHelper::vectorShapeInfo(const Nd4jLong length, const sd::DataType dataType) { + const Nd4jLong* ConstantShapeHelper::vectorShapeInfo(const Nd4jLong length, const sd::DataType dataType) { auto descriptor = ShapeDescriptor::vectorDescriptor(length, dataType); return bufferForShapeInfo(descriptor).primaryAsT(); } - Nd4jLong* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const char order, const std::vector &shape) { + const Nd4jLong* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const char order, const std::vector &shape) { ShapeDescriptor descriptor(dataType, order, shape); return bufferForShapeInfo(descriptor).primaryAsT(); } - Nd4jLong* ConstantShapeHelper::createShapeInfo(const ShapeDescriptor &descriptor) { + const Nd4jLong* ConstantShapeHelper::createShapeInfo(const ShapeDescriptor &descriptor) { return bufferForShapeInfo(descriptor).primaryAsT(); } - Nd4jLong* ConstantShapeHelper::createFromExisting(Nd4jLong *shapeInfo, bool destroyOriginal) { + const Nd4jLong* ConstantShapeHelper::createFromExisting(Nd4jLong *shapeInfo, bool destroyOriginal) { ShapeDescriptor descriptor(shapeInfo); auto result = createShapeInfo(descriptor); @@ -137,7 +124,7 @@ namespace sd { return result; } - Nd4jLong* ConstantShapeHelper::createFromExisting(Nd4jLong *shapeInfo, sd::memory::Workspace *workspace) { + const Nd4jLong* ConstantShapeHelper::createFromExisting(Nd4jLong *shapeInfo, sd::memory::Workspace *workspace) { ShapeDescriptor descriptor(shapeInfo); auto result = createShapeInfo(descriptor); @@ -148,7 +135,7 @@ namespace sd { //////////////////////////////////////////////////////////////////////// -ConstantDataBuffer ConstantShapeHelper::createShapeInfoWithUnitiesForBroadcast(const Nd4jLong* maxShapeInfo, const Nd4jLong* minShapeInfo, sd::memory::Workspace* workspace, const std::vector dimensions) { +ConstantDataBuffer ConstantShapeHelper::createShapeInfoWithUnitiesForBroadcast(const Nd4jLong* maxShapeInfo, const Nd4jLong* minShapeInfo, sd::memory::Workspace* workspace, const std::vector &dimensions) { Nd4jLong* newShapeInfo = nullptr; ALLOCATE(newShapeInfo, workspace, shape::shapeInfoLength(shape::rank(maxShapeInfo)), Nd4jLong); diff --git a/libnd4j/include/helpers/cpu/MmulHelper.cpp b/libnd4j/include/helpers/cpu/MmulHelper.cpp index 73f3e54bd..26a6643c3 100644 --- a/libnd4j/include/helpers/cpu/MmulHelper.cpp +++ b/libnd4j/include/helpers/cpu/MmulHelper.cpp @@ -44,9 +44,9 @@ static void usualGemm(const NDArray* vA, const NDArray* vB, NDArray* vC, const bool betaPersent = beta; - const Nd4jLong* aShapeInfo = vA->getShapeInfo(); - const Nd4jLong* bShapeInfo = vB->getShapeInfo(); - const Nd4jLong* cShapeInfo = vC->getShapeInfo(); + const Nd4jLong* aShapeInfo = vA->shapeInfo(); + const Nd4jLong* bShapeInfo = vB->shapeInfo(); + const Nd4jLong* cShapeInfo = vC->shapeInfo(); const int aRank = vA->rankOf(); const int bRank = vB->rankOf(); @@ -111,9 +111,9 @@ static void usualGemv(const NDArray* vA, const NDArray* vX, NDArray* vY, const const bool betaPersent = beta; - const Nd4jLong* aShapeInfo = vA->getShapeInfo(); - const Nd4jLong* xShapeInfo = vX->getShapeInfo(); - const Nd4jLong* yShapeInfo = vY->getShapeInfo(); + const Nd4jLong* aShapeInfo = vA->shapeInfo(); + const Nd4jLong* xShapeInfo = vX->shapeInfo(); + const Nd4jLong* yShapeInfo = vY->shapeInfo(); const int N = vX->lengthOf(); const int M = vY->lengthOf(); @@ -294,13 +294,13 @@ NDArray* MmulHelper::mmulMxV(const NDArray* A, const NDArray* X, sd::NDArray* Y, if(A->rankOf() != 2) throw std::runtime_error("MmulHelper::mmulMxV: rank of A array is not equal 2 !"); - if(!shape::isCommonVector(X->getShapeInfo(), xLenDim)) + if(!shape::isCommonVector(X->shapeInfo(), xLenDim)) throw std::runtime_error("MmulHelper::mmulMxV: X array must be vector !"); const auto M = A->sizeAt(0); const auto N = A->sizeAt(1); - if(Y != nullptr && !shape::isCommonVector(Y->getShapeInfo(), yLenDim)) + if(Y != nullptr && !shape::isCommonVector(Y->shapeInfo(), yLenDim)) throw std::runtime_error("MmulHelper::mmulMxV: Y array must be vector !"); if(X->lengthOf() != N) throw std::runtime_error("MmulHelper::mmulMxV: X vector has wrong length !"); @@ -347,10 +347,10 @@ NDArray* MmulHelper::mmulMxV(const NDArray* A, const NDArray* X, sd::NDArray* Y, // choose appropriate cuda gemm api depending on data types if(typeDouble) { - BlasHelper::getInstance()->dgemv()(blasOrder, CblasNoTrans, M, N, alpha, (double*)pA->getBuffer(), lda, (double*)X->getBuffer(), incx, beta, (double*)Y->getBuffer(), incy); + BlasHelper::getInstance()->dgemv()(blasOrder, CblasNoTrans, M, N, alpha, (double*)pA->buffer(), lda, (double*)X->buffer(), incx, beta, (double*)Y->buffer(), incy); } else if(typeFloat) { - BlasHelper::getInstance()->sgemv()(blasOrder, CblasNoTrans, M, N, (float)alpha, (float*)pA->getBuffer(), lda, (float*)X->getBuffer(), incx, (float)beta, (float*)Y->getBuffer(), incy); + BlasHelper::getInstance()->sgemv()(blasOrder, CblasNoTrans, M, N, (float)alpha, (float*)pA->buffer(), lda, (float*)X->buffer(), incx, (float)beta, (float*)Y->buffer(), incy); } if(pA != A) @@ -371,9 +371,9 @@ NDArray* MmulHelper::dot(const NDArray* X, const NDArray* Y, sd::NDArray* Z, con int xLenDim(0), yLenDim(0); - if(!shape::isCommonVector(X->getShapeInfo(), xLenDim)) + if(!shape::isCommonVector(X->shapeInfo(), xLenDim)) throw std::runtime_error("MmulHelper::dot: X array must be vector !"); - if(!shape::isCommonVector(Y->getShapeInfo(), yLenDim)) + if(!shape::isCommonVector(Y->shapeInfo(), yLenDim)) throw std::runtime_error("MmulHelper::dot: Y array must be vector !"); if(Z != nullptr && !Z->isScalar()) throw std::runtime_error("MmulHelper::dot: Z array must be scalar !"); @@ -393,8 +393,8 @@ NDArray* MmulHelper::dot(const NDArray* X, const NDArray* Y, sd::NDArray* Z, con const auto yType = Y->dataType(); const auto zType = Z->dataType(); - BUILD_SINGLE_SELECTOR_THRICE(xType, usualDot, (length, alpha, X->getBuffer(), incx, Y->getBuffer(), incy, beta, Z->getBuffer()), NUMERIC_TYPES); - //BUILD_TRIPLE_SELECTOR(xType, yType, zType, usualDot, (length, alpha, X->getBuffer(), incx, Y->getBuffer(), incy, beta, Z->getBuffer()), LIBND4J_TYPES, FLOAT_TYPES, FLOAT_TYPES); + BUILD_SINGLE_SELECTOR_THRICE(xType, usualDot, (length, alpha, X->buffer(), incx, Y->buffer(), incy, beta, Z->buffer()), NUMERIC_TYPES); + //BUILD_TRIPLE_SELECTOR(xType, yType, zType, usualDot, (length, alpha, X->buffer(), incx, Y->buffer(), incy, beta, Z->buffer()), LIBND4J_TYPES, FLOAT_TYPES, FLOAT_TYPES); return Z; } @@ -419,9 +419,9 @@ static void batchedGemm(const NDArray* vA, const NDArray* vB, NDArray* vC, const bool betaPersent = beta; - const Nd4jLong* aShapeInfo = vA->getShapeInfo(); - const Nd4jLong* bShapeInfo = vB->getShapeInfo(); - const Nd4jLong* cShapeInfo = vC->getShapeInfo(); + const Nd4jLong* aShapeInfo = vA->shapeInfo(); + const Nd4jLong* bShapeInfo = vB->shapeInfo(); + const Nd4jLong* cShapeInfo = vC->shapeInfo(); const int aRank = vA->rankOf(); const int bRank = vB->rankOf(); @@ -576,13 +576,13 @@ NDArray* MmulHelper::mmulNxN(const NDArray* A, const NDArray* B, NDArray* C, con // multiplication const std::vector dimsToExclude = ShapeUtils::evalDimsToExclude(C->rankOf(), {-2, -1}); - const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(C->getShapeInfo(), dimsToExclude); + const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(C->shapeInfo(), dimsToExclude); std::vector idxRanges(2 * C->rankOf()); // #pragma omp parallel for schedule(guided) firstprivate(idxRanges) for(Nd4jLong i = 0; i < numOfSubArrs; ++i) { - ShapeUtils::evalIdxRangesForSubArr(i, C->getShapeInfo(), dimsToExclude, idxRanges.data()); + ShapeUtils::evalIdxRangesForSubArr(i, C->shapeInfo(), dimsToExclude, idxRanges.data()); NDArray cSubArr = (*C)(idxRanges); if(aRank > bRank) { diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops.hpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops.hpp index fe6019b5a..a64f0fc91 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops.hpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops.hpp @@ -26,10 +26,10 @@ using namespace simdOps; ////////////////////////////////////////////////////////////////////////////// template template -void sd::IndexReductionLoops::loopIndexReduce(X* x, Nd4jLong* xShapeInfo, - Z* z, Nd4jLong* zShapeInfo, - Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, - X* extraParams) { +void sd::IndexReductionLoops::loopIndexReduce(const X* x, const Nd4jLong* xShapeInfo, + Z* z, const Nd4jLong* zShapeInfo, + const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, + X* extraParams) { sd::LoopKind::Kind kindOfLoop = sd::LoopKind::deduceKindOfLoopTadXZ(xShapeInfo, zShapeInfo, tadShapeInfo); if(kindOfLoop == sd::LoopKind::SMALLARR2DX) @@ -305,8 +305,8 @@ void sd::IndexReductionLoops::loopIndexReduce(X* x, Nd4jLong* xShapeInfo, } template -void sd::IndexReductionLoops::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* vz, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams) { - auto x = reinterpret_cast(vx); +void sd::IndexReductionLoops::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* vz, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams) { + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_0.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_0.cpp index 68ae29fc9..97318dae8 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_0.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_0.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_0, (sd::DataType::INT32, int32_t)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_0, (sd::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_1.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_1.cpp index fe68715ca..680bf7a64 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_1.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_1.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_1, (sd::DataType::INT32, int32_t)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_1, (sd::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_2.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_2.cpp index 8627003fd..e22635b85 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_2.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_2.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_2, (sd::DataType::INT32, int32_t)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_2, (sd::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_3.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_3.cpp index 8b2f4e1a7..f85096f0a 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_3.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_3.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_3, (sd::DataType::INT32, int32_t)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_3, (sd::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_4.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_4.cpp index e87921565..5272eba7e 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_4.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_4.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_4, (sd::DataType::INT32, int32_t)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_4, (sd::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_5.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_5.cpp index 062b006fd..683d6d0c0 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_5.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_5.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_5, (sd::DataType::INT32, int32_t)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_5, (sd::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_6.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_6.cpp index 4182de6fd..0ff70b7b5 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_6.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_6.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_6, (sd::DataType::INT32, int32_t)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_6, (sd::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_7.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_7.cpp index 53a4ed23f..64d93c5e3 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_7.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_7.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_7, (sd::DataType::INT32, int32_t)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_7, (sd::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_8.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_8.cpp index 2cf4b6ae7..dd586ab26 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_8.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_8.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_8, (sd::DataType::INT32, int32_t)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_8, (sd::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_9.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_9.cpp index b6b1da4a0..bb7ef80f7 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_9.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int32_9.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_9, (sd::DataType::INT32, int32_t)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_9, (sd::DataType::INT32, int32_t)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_0.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_0.cpp index de4cf1872..8d0c55ce1 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_0.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_0.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_0, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_0, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_1.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_1.cpp index 71a19bab2..7c5824559 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_1.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_1.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_1, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_1, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_2.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_2.cpp index 22d430e9e..3bb6e6b7c 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_2.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_2.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_2, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_2, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_3.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_3.cpp index c2434f63a..49f977901 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_3.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_3.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_3, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_3, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_4.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_4.cpp index be628bb63..73f0e9872 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_4.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_4.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_4, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_4, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_5.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_5.cpp index a5e8a596f..b27aaf341 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_5.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_5.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_5, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_5, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_6.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_6.cpp index 3e96a0574..452184acd 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_6.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_6.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_6, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_6, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_7.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_7.cpp index a6c02301f..59cbc51cf 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_7.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_7.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_7, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_7, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_8.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_8.cpp index e461c9bcd..51fc49cea 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_8.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_8.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_8, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_8, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_9.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_9.cpp index 33e5ba403..b774dde52 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_9.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops_int64_9.cpp @@ -21,4 +21,4 @@ #include "./IndexReductionLoops.hpp" -BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_9, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void sd::IndexReductionLoops, ::wrapIndexReduce(const int opNum, const void* vx, const Nd4jLong* xShapeInfo, void* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES_9, (sd::DataType::INT64, Nd4jLong)); \ No newline at end of file diff --git a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_0.cpp b/libnd4j/include/helpers/cpu/loops/Reduction3Loops_0.cpp index f721c5994..00b15673b 100644 --- a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_0.cpp +++ b/libnd4j/include/helpers/cpu/loops/Reduction3Loops_0.cpp @@ -28,7 +28,7 @@ namespace sd { template template - void Reduction3Loops::innerloopReduce3(X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop) { + void Reduction3Loops::innerloopReduce3(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS Reduction3Loops::template loopReduce3(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams, start, stop); #endif @@ -36,21 +36,21 @@ namespace sd { template template - void Reduction3Loops::innerloopReduce3All(X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* xTadShapeInfo, Nd4jLong* xTadOffsets, Nd4jLong* yTadShapeInfo, Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop) { + void Reduction3Loops::innerloopReduce3All(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS Reduction3Loops::template loopReduce3All(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams, start, stop); #endif } template - void Reduction3Loops::wrapper(const int opNum, X *x, Nd4jLong *xShapeInfo, X *y, Nd4jLong *yShapeInfo, Y *z, Nd4jLong *zShapeInfo, int* dims, int dimsLen, Y *extraParams, int64_t start, int64_t stop) { + void Reduction3Loops::wrapper(const int opNum, const X *x, const Nd4jLong *xShapeInfo, const X *y, const Nd4jLong *yShapeInfo, Y *z, const Nd4jLong *zShapeInfo, int* dims, int dimsLen, Y *extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce3, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams, start, stop), REDUCE3_OPS); #endif } template - void Reduction3Loops::wrapperAll(const int opNum, X *x, Nd4jLong *xShapeInfo, X *y, Nd4jLong *yShapeInfo, Y *z, Nd4jLong *zShapeInfo, Nd4jLong* xTadShapeInfo, Nd4jLong* xTadOffsets, Nd4jLong* yTadShapeInfo, Nd4jLong* yTadOffsets, Y* extraParams, int64_t start, int64_t stop) { + void Reduction3Loops::wrapperAll(const int opNum, const X *x, const Nd4jLong *xShapeInfo, const X *y, const Nd4jLong *yShapeInfo, Y *z, const Nd4jLong *zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Y* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce3All, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams, start, stop), REDUCE3_OPS); #endif diff --git a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_1.cpp b/libnd4j/include/helpers/cpu/loops/Reduction3Loops_1.cpp index 19a248896..da8d3db7e 100644 --- a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_1.cpp +++ b/libnd4j/include/helpers/cpu/loops/Reduction3Loops_1.cpp @@ -28,7 +28,7 @@ namespace sd { template template - void Reduction3Loops::innerloopReduce3(X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop) { + void Reduction3Loops::innerloopReduce3(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS Reduction3Loops::template loopReduce3(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams, start, stop); #endif @@ -36,21 +36,21 @@ namespace sd { template template - void Reduction3Loops::innerloopReduce3All(X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* xTadShapeInfo, Nd4jLong* xTadOffsets, Nd4jLong* yTadShapeInfo, Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop) { + void Reduction3Loops::innerloopReduce3All(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS Reduction3Loops::template loopReduce3All(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams, start, stop); #endif } template - void Reduction3Loops::wrapper(const int opNum, X *x, Nd4jLong *xShapeInfo, X *y, Nd4jLong *yShapeInfo, Y *z, Nd4jLong *zShapeInfo, int* dims, int dimsLen, Y *extraParams, int64_t start, int64_t stop) { + void Reduction3Loops::wrapper(const int opNum, const X *x, const Nd4jLong *xShapeInfo, const X *y, const Nd4jLong *yShapeInfo, Y *z, const Nd4jLong *zShapeInfo, int* dims, int dimsLen, Y *extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce3, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams, start, stop), REDUCE3_OPS); #endif } template - void Reduction3Loops::wrapperAll(const int opNum, X *x, Nd4jLong *xShapeInfo, X *y, Nd4jLong *yShapeInfo, Y *z, Nd4jLong *zShapeInfo, Nd4jLong* xTadShapeInfo, Nd4jLong* xTadOffsets, Nd4jLong* yTadShapeInfo, Nd4jLong* yTadOffsets, Y* extraParams, int64_t start, int64_t stop) { + void Reduction3Loops::wrapperAll(const int opNum, const X *x, const Nd4jLong *xShapeInfo, const X *y, const Nd4jLong *yShapeInfo, Y *z, const Nd4jLong *zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Y* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce3All, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams, start, stop), REDUCE3_OPS); #endif diff --git a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_2.cpp b/libnd4j/include/helpers/cpu/loops/Reduction3Loops_2.cpp index e90050e4e..06588a2fb 100644 --- a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_2.cpp +++ b/libnd4j/include/helpers/cpu/loops/Reduction3Loops_2.cpp @@ -28,7 +28,7 @@ namespace sd { template template - void Reduction3Loops::innerloopReduce3(X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop) { + void Reduction3Loops::innerloopReduce3(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS Reduction3Loops::template loopReduce3(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams, start, stop); #endif @@ -36,21 +36,21 @@ namespace sd { template template - void Reduction3Loops::innerloopReduce3All(X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* xTadShapeInfo, Nd4jLong* xTadOffsets, Nd4jLong* yTadShapeInfo, Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop) { + void Reduction3Loops::innerloopReduce3All(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS Reduction3Loops::template loopReduce3All(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams, start, stop); #endif } template - void Reduction3Loops::wrapper(const int opNum, X *x, Nd4jLong *xShapeInfo, X *y, Nd4jLong *yShapeInfo, Y *z, Nd4jLong *zShapeInfo, int* dims, int dimsLen, Y *extraParams, int64_t start, int64_t stop) { + void Reduction3Loops::wrapper(const int opNum, const X *x, const Nd4jLong *xShapeInfo, const X *y, const Nd4jLong *yShapeInfo, Y *z, const Nd4jLong *zShapeInfo, int* dims, int dimsLen, Y *extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce3, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams, start, stop), REDUCE3_OPS); #endif } template - void Reduction3Loops::wrapperAll(const int opNum, X *x, Nd4jLong *xShapeInfo, X *y, Nd4jLong *yShapeInfo, Y *z, Nd4jLong *zShapeInfo, Nd4jLong* xTadShapeInfo, Nd4jLong* xTadOffsets, Nd4jLong* yTadShapeInfo, Nd4jLong* yTadOffsets, Y* extraParams, int64_t start, int64_t stop) { + void Reduction3Loops::wrapperAll(const int opNum, const X *x, const Nd4jLong *xShapeInfo, const X *y, const Nd4jLong *yShapeInfo, Y *z, const Nd4jLong *zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Y* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce3All, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams, start, stop), REDUCE3_OPS); #endif diff --git a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_3.cpp b/libnd4j/include/helpers/cpu/loops/Reduction3Loops_3.cpp index d109d1013..405b0275b 100644 --- a/libnd4j/include/helpers/cpu/loops/Reduction3Loops_3.cpp +++ b/libnd4j/include/helpers/cpu/loops/Reduction3Loops_3.cpp @@ -28,7 +28,7 @@ namespace sd { template template - void Reduction3Loops::innerloopReduce3(X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop) { + void Reduction3Loops::innerloopReduce3(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, int* dims, int dimsLen, Z* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS Reduction3Loops::template loopReduce3(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams, start, stop); #endif @@ -36,21 +36,21 @@ namespace sd { template template - void Reduction3Loops::innerloopReduce3All(X* x, Nd4jLong* xShapeInfo, X* y, Nd4jLong* yShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* xTadShapeInfo, Nd4jLong* xTadOffsets, Nd4jLong* yTadShapeInfo, Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop) { + void Reduction3Loops::innerloopReduce3All(const X* x, const Nd4jLong* xShapeInfo, const X* y, const Nd4jLong* yShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Z* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS Reduction3Loops::template loopReduce3All(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams, start, stop); #endif } template - void Reduction3Loops::wrapper(const int opNum, X *x, Nd4jLong *xShapeInfo, X *y, Nd4jLong *yShapeInfo, Y *z, Nd4jLong *zShapeInfo, int* dims, int dimsLen, Y *extraParams, int64_t start, int64_t stop) { + void Reduction3Loops::wrapper(const int opNum, const X *x, const Nd4jLong *xShapeInfo, const X *y, const Nd4jLong *yShapeInfo, Y *z, const Nd4jLong *zShapeInfo, int* dims, int dimsLen, Y *extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce3, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dims, dimsLen, extraParams, start, stop), REDUCE3_OPS); #endif } template - void Reduction3Loops::wrapperAll(const int opNum, X *x, Nd4jLong *xShapeInfo, X *y, Nd4jLong *yShapeInfo, Y *z, Nd4jLong *zShapeInfo, Nd4jLong* xTadShapeInfo, Nd4jLong* xTadOffsets, Nd4jLong* yTadShapeInfo, Nd4jLong* yTadOffsets, Y* extraParams, int64_t start, int64_t stop) { + void Reduction3Loops::wrapperAll(const int opNum, const X *x, const Nd4jLong *xShapeInfo, const X *y, const Nd4jLong *yShapeInfo, Y *z, const Nd4jLong *zShapeInfo, const Nd4jLong* xTadShapeInfo, const Nd4jLong* xTadOffsets, const Nd4jLong* yTadShapeInfo, const Nd4jLong* yTadOffsets, Y* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce3All, PARAMS(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, xTadShapeInfo, xTadOffsets, yTadShapeInfo, yTadOffsets, extraParams, start, stop), REDUCE3_OPS); #endif diff --git a/libnd4j/include/helpers/cpu/loops/ReductionLoops_bool.cpp b/libnd4j/include/helpers/cpu/loops/ReductionLoops_bool.cpp index 31ec60d93..e122717fc 100644 --- a/libnd4j/include/helpers/cpu/loops/ReductionLoops_bool.cpp +++ b/libnd4j/include/helpers/cpu/loops/ReductionLoops_bool.cpp @@ -26,17 +26,18 @@ namespace sd { template template - void ReductionBoolLoops::innerloopReduce(X* x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop) { + void ReductionBoolLoops::innerloopReduce(const X* x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS ReductionLoops::template loopReduce(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop); #endif } template - void ReductionBoolLoops::wrapper(const int opNum, X *x, Nd4jLong *xShapeInfo, Y *z, - Nd4jLong *zShapeInfo, Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets, - X *extraParams, int64_t start, int64_t stop) { + void ReductionBoolLoops::wrapper(const int opNum, + const X *x, const Nd4jLong *xShapeInfo, + Y *z, const Nd4jLong *zShapeInfo, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + X *extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop), REDUCE_BOOL_OPS); #endif diff --git a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_0.cpp b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_0.cpp index f4243d1c9..a3879bee3 100644 --- a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_0.cpp +++ b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_0.cpp @@ -28,16 +28,18 @@ namespace sd { template template - void ReductionFloatLoops::innerloopReduce(X * x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, Z* extraParams, int64_t start, int64_t stop) { + void ReductionFloatLoops::innerloopReduce(const X * x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, Z* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS ReductionLoops::template loopReduce(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop); #endif } template - void ReductionFloatLoops::wrapper(const int opNum, X *x, Nd4jLong *xShapeInfo, Y *z, - Nd4jLong *zShapeInfo, Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets, Y *extraParams, int64_t start, int64_t stop) { + void ReductionFloatLoops::wrapper(const int opNum, const X *x, const Nd4jLong *xShapeInfo, + Y *z, const Nd4jLong *zShapeInfo, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + Y *extraParams, + int64_t start, int64_t stop) { #ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop), REDUCE_FLOAT_OPS); #endif diff --git a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_1.cpp b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_1.cpp index 1c5b46d40..6dd555037 100644 --- a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_1.cpp +++ b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_1.cpp @@ -28,16 +28,19 @@ namespace sd { template template - void ReductionFloatLoops::innerloopReduce(X * x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, Z* extraParams, int64_t start, int64_t stop) { + void ReductionFloatLoops::innerloopReduce(const X * x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, Z* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS ReductionLoops::template loopReduce(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop); #endif } template - void ReductionFloatLoops::wrapper(const int opNum, X *x, Nd4jLong *xShapeInfo, Y *z, - Nd4jLong *zShapeInfo, Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets, Y *extraParams, int64_t start, int64_t stop) { + void ReductionFloatLoops::wrapper(const int opNum, + const X *x, const Nd4jLong *xShapeInfo, + Y *z, const Nd4jLong *zShapeInfo, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + Y *extraParams, + int64_t start, int64_t stop) { #ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop), REDUCE_FLOAT_OPS); #endif diff --git a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_2.cpp b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_2.cpp index 08ca08cdb..ce1042b88 100644 --- a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_2.cpp +++ b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_2.cpp @@ -28,16 +28,16 @@ namespace sd { template template - void ReductionFloatLoops::innerloopReduce(X * x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, Z* extraParams, int64_t start, int64_t stop) { + void ReductionFloatLoops::innerloopReduce(const X * x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, Z* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS ReductionLoops::template loopReduce(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop); #endif } template - void ReductionFloatLoops::wrapper(const int opNum, X *x, Nd4jLong *xShapeInfo, Y *z, - Nd4jLong *zShapeInfo, Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets, Y *extraParams, int64_t start, int64_t stop) { + void ReductionFloatLoops::wrapper(const int opNum, const X *x, const Nd4jLong *xShapeInfo, Y *z, + const Nd4jLong *zShapeInfo, const Nd4jLong *tadShapeInfo, + const Nd4jLong *tadOffsets, Y *extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop), REDUCE_FLOAT_OPS); #endif diff --git a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_3.cpp b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_3.cpp index 7735c2125..6cfac93bc 100644 --- a/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_3.cpp +++ b/libnd4j/include/helpers/cpu/loops/ReductionLoops_float_3.cpp @@ -28,16 +28,16 @@ namespace sd { template template - void ReductionFloatLoops::innerloopReduce(X * x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, Z* extraParams, int64_t start, int64_t stop) { + void ReductionFloatLoops::innerloopReduce(const X * x, const Nd4jLong* xShapeInfo, Z* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, Z* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS ReductionLoops::template loopReduce(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop); #endif } template - void ReductionFloatLoops::wrapper(const int opNum, X *x, Nd4jLong *xShapeInfo, Y *z, - Nd4jLong *zShapeInfo, Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets, Y *extraParams, int64_t start, int64_t stop) { + void ReductionFloatLoops::wrapper(const int opNum, const X *x, const Nd4jLong *xShapeInfo, Y *z, + const Nd4jLong *zShapeInfo, const Nd4jLong *tadShapeInfo, + const Nd4jLong *tadOffsets, Y *extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop), REDUCE_FLOAT_OPS); #endif diff --git a/libnd4j/include/helpers/cpu/loops/ReductionLoops_long.cpp b/libnd4j/include/helpers/cpu/loops/ReductionLoops_long.cpp index e4f4ab2e0..be6cb28bd 100644 --- a/libnd4j/include/helpers/cpu/loops/ReductionLoops_long.cpp +++ b/libnd4j/include/helpers/cpu/loops/ReductionLoops_long.cpp @@ -33,16 +33,16 @@ namespace sd { template template - void ReductionLongLoops::innerloopReduce(X * x, Nd4jLong* xShapeInfo, Z *z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop) { + void ReductionLongLoops::innerloopReduce(const X * x, const Nd4jLong* xShapeInfo, Z *z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS ReductionLoops::template loopReduce(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop); #endif } template - void ReductionLongLoops::wrapper(const int opNum, X *x, Nd4jLong *xShapeInfo, Y *z, - Nd4jLong *zShapeInfo, Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets, X *extraParams, int64_t start, int64_t stop) { + void ReductionLongLoops::wrapper(const int opNum, const X *x, const Nd4jLong *xShapeInfo, Y *z, + const Nd4jLong *zShapeInfo, const Nd4jLong *tadShapeInfo, + const Nd4jLong *tadOffsets, X *extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS DISPATCH_BY_OPNUM_TT(innerloopReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop), REDUCE_LONG_OPS); #endif diff --git a/libnd4j/include/helpers/cpu/loops/ReductionLoops_same.cpp b/libnd4j/include/helpers/cpu/loops/ReductionLoops_same.cpp index 6188a90f5..53725de83 100644 --- a/libnd4j/include/helpers/cpu/loops/ReductionLoops_same.cpp +++ b/libnd4j/include/helpers/cpu/loops/ReductionLoops_same.cpp @@ -26,16 +26,16 @@ namespace sd { template template - void ReductionSameLoops::innerloopReduce(X* x, Nd4jLong* xShapeInfo, X* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop) { + void ReductionSameLoops::innerloopReduce(const X* x, const Nd4jLong* xShapeInfo, X* z, const Nd4jLong* zShapeInfo, const Nd4jLong* tadShapeInfo, const Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS ReductionLoops::template loopReduce(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams, start, stop); #endif } template - void ReductionSameLoops::wrapper(const int opNum, X *vx, Nd4jLong *xShapeInfo, X *vz, - Nd4jLong *zShapeInfo, Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets, + void ReductionSameLoops::wrapper(const int opNum, const X *vx, const Nd4jLong *xShapeInfo, X *vz, + const Nd4jLong *zShapeInfo, const Nd4jLong *tadShapeInfo, + const Nd4jLong *tadOffsets, X *vextraParams, int64_t start, int64_t stop) { #ifndef INLINE_LOOPS auto x = reinterpret_cast(vx); diff --git a/libnd4j/include/helpers/cuda/ConstantShapeHelper.cu b/libnd4j/include/helpers/cuda/ConstantShapeHelper.cu index ebce6aac5..2026dbb04 100644 --- a/libnd4j/include/helpers/cuda/ConstantShapeHelper.cu +++ b/libnd4j/include/helpers/cuda/ConstantShapeHelper.cu @@ -83,40 +83,40 @@ namespace sd { return _cache[deviceId].count(descriptor) != 0; } - Nd4jLong* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const char order, const int rank, const Nd4jLong* shape) { + Nd4jLong const* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const char order, const int rank, const Nd4jLong* shape) { ShapeDescriptor descriptor(dataType, order, shape, rank); return bufferForShapeInfo(descriptor).primaryAsT(); } - Nd4jLong* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const Nd4jLong* shapeInfo) { + Nd4jLong const* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const Nd4jLong* shapeInfo) { return ConstantShapeHelper::createShapeInfo(dataType, shape::order(shapeInfo), shape::rank(shapeInfo), shape::shapeOf(const_cast(shapeInfo))); } - Nd4jLong* ConstantShapeHelper::emptyShapeInfo(const sd::DataType dataType) { + Nd4jLong const* ConstantShapeHelper::emptyShapeInfo(const sd::DataType dataType) { auto descriptor = ShapeDescriptor::emptyDescriptor(dataType); return bufferForShapeInfo(descriptor).primaryAsT(); } - Nd4jLong* ConstantShapeHelper::scalarShapeInfo(const sd::DataType dataType) { + Nd4jLong const* ConstantShapeHelper::scalarShapeInfo(const sd::DataType dataType) { auto descriptor = ShapeDescriptor::scalarDescriptor(dataType); return bufferForShapeInfo(descriptor).primaryAsT(); } - Nd4jLong* ConstantShapeHelper::vectorShapeInfo(const Nd4jLong length, const sd::DataType dataType) { + Nd4jLong const* ConstantShapeHelper::vectorShapeInfo(const Nd4jLong length, const sd::DataType dataType) { auto descriptor = ShapeDescriptor::vectorDescriptor(length, dataType); return bufferForShapeInfo(descriptor).primaryAsT(); } - Nd4jLong* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const char order, const std::vector &shape) { + Nd4jLong const* ConstantShapeHelper::createShapeInfo(const sd::DataType dataType, const char order, const std::vector &shape) { ShapeDescriptor descriptor(dataType, order, shape); return bufferForShapeInfo(descriptor).primaryAsT(); } - Nd4jLong* ConstantShapeHelper::createShapeInfo(const ShapeDescriptor &descriptor) { + Nd4jLong const* ConstantShapeHelper::createShapeInfo(const ShapeDescriptor &descriptor) { return bufferForShapeInfo(descriptor).primaryAsT(); } - Nd4jLong* ConstantShapeHelper::createFromExisting(Nd4jLong *shapeInfo, bool destroyOriginal) { + Nd4jLong const* ConstantShapeHelper::createFromExisting(Nd4jLong *shapeInfo, bool destroyOriginal) { ShapeDescriptor descriptor(shapeInfo); auto result = createShapeInfo(descriptor); @@ -126,7 +126,7 @@ namespace sd { return result; } - Nd4jLong* ConstantShapeHelper::createFromExisting(Nd4jLong *shapeInfo, sd::memory::Workspace *workspace) { + Nd4jLong const* ConstantShapeHelper::createFromExisting(Nd4jLong *shapeInfo, sd::memory::Workspace *workspace) { ShapeDescriptor descriptor(shapeInfo); auto result = createShapeInfo(descriptor); @@ -136,7 +136,7 @@ namespace sd { } //////////////////////////////////////////////////////////////////////// -ConstantDataBuffer ConstantShapeHelper::createShapeInfoWithUnitiesForBroadcast(const Nd4jLong* maxShapeInfo, const Nd4jLong* minShapeInfo, sd::memory::Workspace* workspace, const std::vector dimensions) { +ConstantDataBuffer ConstantShapeHelper::createShapeInfoWithUnitiesForBroadcast(const Nd4jLong* maxShapeInfo, const Nd4jLong* minShapeInfo, sd::memory::Workspace* workspace, const std::vector& dimensions) { Nd4jLong* newShapeInfo = nullptr; ALLOCATE(newShapeInfo, workspace, shape::shapeInfoLength(shape::rank(maxShapeInfo)), Nd4jLong); diff --git a/libnd4j/include/helpers/cuda_off/MmulHelper.cu b/libnd4j/include/helpers/cuda_off/MmulHelper.cu index fd1cd5813..0a3b466bc 100644 --- a/libnd4j/include/helpers/cuda_off/MmulHelper.cu +++ b/libnd4j/include/helpers/cuda_off/MmulHelper.cu @@ -268,8 +268,8 @@ NDArray* MmulHelper::mmulMxM(const NDArray* A, const NDArray* B, NDArray* C, dou const int sharedMem = threadsPerBlock * sizeof(int) * 6 + 128; // 6 = aRank + bRank + cRank NDArray::prepareSpecialUse({C}, {A, B}); - // BUILD_TRIPLE_SELECTOR(aType, bType, cType, usualGemm, (blocksPerGrid, threadsPerBlock, sharedMem, stream, A->getSpecialBuffer(), A->getSpecialShapeInfo(), B->getSpecialBuffer(), B->getSpecialShapeInfo(), C->getSpecialBuffer(), C->getSpecialShapeInfo(), 0, 1, 0, 1, 0, 1, alpha, beta), NUMERIC_TYPES, NUMERIC_TYPES, FLOAT_TYPES); - BUILD_SINGLE_SELECTOR_THRICE(aType, usualGemm, (blocksPerGrid, threadsPerBlock, sharedMem, stream, A->getSpecialBuffer(), A->getSpecialShapeInfo(), B->getSpecialBuffer(), B->getSpecialShapeInfo(), C->getSpecialBuffer(), C->getSpecialShapeInfo(), 0, 1, 0, 1, 0, 1, alpha, beta), NUMERIC_TYPES) + // BUILD_TRIPLE_SELECTOR(aType, bType, cType, usualGemm, (blocksPerGrid, threadsPerBlock, sharedMem, stream, A->specialBuffer(), A->specialShapeInfo(), B->specialBuffer(), B->specialShapeInfo(), C->specialBuffer(), C->specialShapeInfo(), 0, 1, 0, 1, 0, 1, alpha, beta), NUMERIC_TYPES, NUMERIC_TYPES, FLOAT_TYPES); + BUILD_SINGLE_SELECTOR_THRICE(aType, usualGemm, (blocksPerGrid, threadsPerBlock, sharedMem, stream, A->specialBuffer(), A->specialShapeInfo(), B->specialBuffer(), B->specialShapeInfo(), C->specialBuffer(), C->specialShapeInfo(), 0, 1, 0, 1, 0, 1, alpha, beta), NUMERIC_TYPES) NDArray::registerSpecialUse({C}, {A, B}); auto cudaResult = cudaStreamSynchronize(*stream); @@ -319,23 +319,23 @@ NDArray* MmulHelper::mmulMxM(const NDArray* A, const NDArray* B, NDArray* C, dou // choose appropriate cuda gemm api depending on data types if(typeDouble) { - status = cublasDgemm(*handle, transAblas, transBblas, M, N, K, &alpha, (double*)pA->getSpecialBuffer(), lda, (double*)pB->getSpecialBuffer(), ldb, &beta, (double*)pC->getSpecialBuffer(), ldc); + status = cublasDgemm(*handle, transAblas, transBblas, M, N, K, &alpha, (double*)pA->specialBuffer(), lda, (double*)pB->specialBuffer(), ldb, &beta, (double*)pC->specialBuffer(), ldc); } else if(typeFloat) { float alphaF(alpha), betaF(beta); - status = cublasSgemm(*handle, transAblas, transBblas, M, N, K, &alphaF, (float*)pA->getSpecialBuffer(), lda, (float*)pB->getSpecialBuffer(), ldb, &betaF, (float*)pC->getSpecialBuffer(), ldc); + status = cublasSgemm(*handle, transAblas, transBblas, M, N, K, &alphaF, (float*)pA->specialBuffer(), lda, (float*)pB->specialBuffer(), ldb, &betaF, (float*)pC->specialBuffer(), ldc); } else if(typeHalf) { float16 alphaH(alpha), betaH(beta); - status = cublasHgemm(*handle, transAblas, transBblas, M, N, K, &alphaH.data, (__half*)pA->getSpecialBuffer(), lda, (__half*)pB->getSpecialBuffer(), ldb, &betaH.data, (__half*)pC->getSpecialBuffer(), ldc); + status = cublasHgemm(*handle, transAblas, transBblas, M, N, K, &alphaH.data, (__half*)pA->specialBuffer(), lda, (__half*)pB->specialBuffer(), ldb, &betaH.data, (__half*)pC->specialBuffer(), ldc); } else if(typeIntFloat) { float alphaF(alpha), betaF(beta); - status = cublasSgemmEx(*handle, transAblas, transBblas, M, N, K, &alphaF, pA->getSpecialBuffer(), CUDA_R_8I, lda, pB->getSpecialBuffer(), CUDA_R_8I, ldb, &betaF, pC->getSpecialBuffer(), CUDA_R_32F, ldc); + status = cublasSgemmEx(*handle, transAblas, transBblas, M, N, K, &alphaF, pA->specialBuffer(), CUDA_R_8I, lda, pB->specialBuffer(), CUDA_R_8I, ldb, &betaF, pC->specialBuffer(), CUDA_R_32F, ldc); } else if(typeHalfFloat) { float alphaF(alpha), betaF(beta); - status = cublasSgemmEx(*handle, transAblas, transBblas, M, N, K, &alphaF, pA->getSpecialBuffer(), CUDA_R_16F, lda, pB->getSpecialBuffer(), CUDA_R_16F, ldb, &betaF, pC->getSpecialBuffer(), CUDA_R_32F, ldc); + status = cublasSgemmEx(*handle, transAblas, transBblas, M, N, K, &alphaF, pA->specialBuffer(), CUDA_R_16F, lda, pB->specialBuffer(), CUDA_R_16F, ldb, &betaF, pC->specialBuffer(), CUDA_R_32F, ldc); } if (status != CUBLAS_STATUS_SUCCESS) @@ -365,13 +365,13 @@ NDArray* MmulHelper::mmulMxV(const NDArray* A, const NDArray* X, sd::NDArray* Y, if(A->rankOf() != 2) throw std::runtime_error("MmulHelper::mmulMxV cuda: rank of A array is not equal 2 !"); - if(!shape::isCommonVector(X->getShapeInfo(), xLenDim)) + if(!shape::isCommonVector(X->shapeInfo(), xLenDim)) throw std::runtime_error("MmulHelper::mmulMxV cuda: X array must be vector !"); const auto M = A->sizeAt(0); const auto N = A->sizeAt(1); - if(Y != nullptr && !shape::isCommonVector(Y->getShapeInfo(), yLenDim)) + if(Y != nullptr && !shape::isCommonVector(Y->shapeInfo(), yLenDim)) throw std::runtime_error("MmulHelper::mmulMxV cuda: Y array must be vector !"); if(X->lengthOf() != N) throw std::runtime_error("MmulHelper::mmulMxV cuda: X vector has wrong length !"); @@ -411,8 +411,8 @@ NDArray* MmulHelper::mmulMxV(const NDArray* A, const NDArray* X, sd::NDArray* Y, const int blocksPerGrid = (M + threadsPerBlock - 1) / threadsPerBlock; NDArray::prepareSpecialUse({Y}, {A, X}); - // BUILD_TRIPLE_SELECTOR(aType, xType, yType, usualGemv, (blocksPerGrid, threadsPerBlock, stream, A->getSpecialBuffer(), A->getSpecialShapeInfo(), X->getSpecialBuffer(), X->getSpecialShapeInfo(), Y->getSpecialBuffer(), Y->getSpecialShapeInfo(), incx, incy, 0, alpha, beta), NUMERIC_TYPES, NUMERIC_TYPES, FLOAT_TYPES); - BUILD_SINGLE_SELECTOR_THRICE(xType, usualGemv, (blocksPerGrid, threadsPerBlock, stream, A->getSpecialBuffer(), A->getSpecialShapeInfo(), X->getSpecialBuffer(), X->getSpecialShapeInfo(), Y->getSpecialBuffer(), Y->getSpecialShapeInfo(), incx, incy, 0, alpha, beta), NUMERIC_TYPES) + // BUILD_TRIPLE_SELECTOR(aType, xType, yType, usualGemv, (blocksPerGrid, threadsPerBlock, stream, A->specialBuffer(), A->specialShapeInfo(), X->specialBuffer(), X->specialShapeInfo(), Y->specialBuffer(), Y->specialShapeInfo(), incx, incy, 0, alpha, beta), NUMERIC_TYPES, NUMERIC_TYPES, FLOAT_TYPES); + BUILD_SINGLE_SELECTOR_THRICE(xType, usualGemv, (blocksPerGrid, threadsPerBlock, stream, A->specialBuffer(), A->specialShapeInfo(), X->specialBuffer(), X->specialShapeInfo(), Y->specialBuffer(), Y->specialShapeInfo(), incx, incy, 0, alpha, beta), NUMERIC_TYPES) NDArray::registerSpecialUse({Y}, {A, X}); auto cudaResult = cudaStreamSynchronize(*stream); @@ -442,11 +442,11 @@ NDArray* MmulHelper::mmulMxV(const NDArray* A, const NDArray* X, sd::NDArray* Y, // choose appropriate cuda gemm api depending on data types if(typeDouble) { - status = cublasDgemv(*handle, transAblas, transA ? N : M, transA ? M : N, &alpha, (double*)pA->getSpecialBuffer(), lda, (double*)X->getSpecialBuffer(), incx, &beta, (double*)Y->getSpecialBuffer(), incy); + status = cublasDgemv(*handle, transAblas, transA ? N : M, transA ? M : N, &alpha, (double*)pA->specialBuffer(), lda, (double*)X->specialBuffer(), incx, &beta, (double*)Y->specialBuffer(), incy); } else if(typeFloat) { float alphaF(alpha), betaF(beta); - status = cublasSgemv(*handle, transAblas, transA ? N : M, transA ? M : N, &alphaF, (float*)pA->getSpecialBuffer(), lda, (float*)X->getSpecialBuffer(), incx, &betaF, (float*)Y->getSpecialBuffer(), incy); + status = cublasSgemv(*handle, transAblas, transA ? N : M, transA ? M : N, &alphaF, (float*)pA->specialBuffer(), lda, (float*)X->specialBuffer(), incx, &betaF, (float*)Y->specialBuffer(), incy); } if (status != CUBLAS_STATUS_SUCCESS) @@ -471,9 +471,9 @@ NDArray* MmulHelper::dot(const NDArray* X, const NDArray* Y, sd::NDArray* Z, con int xLenDim(0), yLenDim(0); - if(!shape::isCommonVector(X->getShapeInfo(), xLenDim)) + if(!shape::isCommonVector(X->shapeInfo(), xLenDim)) throw std::runtime_error("MmulHelper::dot cuda: X array must be vector !"); - if(!shape::isCommonVector(Y->getShapeInfo(), yLenDim)) + if(!shape::isCommonVector(Y->shapeInfo(), yLenDim)) throw std::runtime_error("MmulHelper::dot cuda: Y array must be vector !"); if(Z != nullptr && !Z->isScalar()) throw std::runtime_error("MmulHelper::dot cuda: Z array must be scalar !"); @@ -506,8 +506,8 @@ NDArray* MmulHelper::dot(const NDArray* X, const NDArray* Y, sd::NDArray* Z, con NDArray::prepareSpecialUse({Z}, {X, Y}); - //BUILD_TRIPLE_SELECTOR(xType, yType, zType, usualDot, (blocksPerGrid, threadsPerBlock, stream, length, alpha, X->getSpecialBuffer(), incx, Y->getSpecialBuffer(), incy, beta, Z->getSpecialBuffer()), NUMERIC_TYPES, NUMERIC_TYPES, FLOAT_TYPES); - BUILD_SINGLE_SELECTOR_THRICE(xType, usualDot, (blocksPerGrid, threadsPerBlock, stream, length, alpha, X->getSpecialBuffer(), incx, Y->getSpecialBuffer(), incy, beta, Z->getSpecialBuffer()), NUMERIC_TYPES) + //BUILD_TRIPLE_SELECTOR(xType, yType, zType, usualDot, (blocksPerGrid, threadsPerBlock, stream, length, alpha, X->specialBuffer(), incx, Y->specialBuffer(), incy, beta, Z->specialBuffer()), NUMERIC_TYPES, NUMERIC_TYPES, FLOAT_TYPES); + BUILD_SINGLE_SELECTOR_THRICE(xType, usualDot, (blocksPerGrid, threadsPerBlock, stream, length, alpha, X->specialBuffer(), incx, Y->specialBuffer(), incy, beta, Z->specialBuffer()), NUMERIC_TYPES) auto cudaResult = cudaStreamSynchronize(*stream); if (cudaResult != 0) throw cuda_exception::build("MmulHelper::dot cuda failed !", cudaResult); @@ -667,8 +667,8 @@ NDArray* MmulHelper::mmulNxN(const NDArray* A, const NDArray* B, NDArray* C, con cBatchDims = reinterpret_cast(manager.replicatePointer(ShapeUtils::evalDimsToExclude(cRank, {cMaxis, cNaxis}).data(), (cRank - 2) * sizeof(int))); NDArray::prepareSpecialUse({C}, {A, B}); - // BUILD_TRIPLE_SELECTOR(A->dataType(), b->dataType(), C->dataType(), batchedGemm, (blocksPerGrid, threadsPerBlock, A->getContext()->getCudaStream(), A->getSpecialBuffer(), A->getSpecialShapeInfo(), B->getSpecialBuffer(), B->getSpecialShapeInfo(), C->getSpecialBuffer(), C->getSpecialShapeInfo(), aMaxis, aKaxis, bKaxis, bNaxis, cMaxis, cNaxis, alpha, beta), NUMERIC_TYPES, NUMERIC_TYPES, FLOAT_TYPES); - BUILD_SINGLE_SELECTOR_THRICE(A->dataType(), batchedGemm, (blocksPerGrid, threadsPerBlock, sharedMem, A->getContext()->getCudaStream(), A->getSpecialBuffer(), A->getSpecialShapeInfo(), B->getSpecialBuffer(), B->getSpecialShapeInfo(), C->getSpecialBuffer(), C->getSpecialShapeInfo(), aBatchDims, bBatchDims, cBatchDims, aMaxis, aKaxis, bKaxis, bNaxis, cMaxis, cNaxis, alpha, beta), NUMERIC_TYPES) + // BUILD_TRIPLE_SELECTOR(A->dataType(), b->dataType(), C->dataType(), batchedGemm, (blocksPerGrid, threadsPerBlock, A->getContext()->getCudaStream(), A->specialBuffer(), A->specialShapeInfo(), B->specialBuffer(), B->specialShapeInfo(), C->specialBuffer(), C->specialShapeInfo(), aMaxis, aKaxis, bKaxis, bNaxis, cMaxis, cNaxis, alpha, beta), NUMERIC_TYPES, NUMERIC_TYPES, FLOAT_TYPES); + BUILD_SINGLE_SELECTOR_THRICE(A->dataType(), batchedGemm, (blocksPerGrid, threadsPerBlock, sharedMem, A->getContext()->getCudaStream(), A->specialBuffer(), A->specialShapeInfo(), B->specialBuffer(), B->specialShapeInfo(), C->specialBuffer(), C->specialShapeInfo(), aBatchDims, bBatchDims, cBatchDims, aMaxis, aKaxis, bKaxis, bNaxis, cMaxis, cNaxis, alpha, beta), NUMERIC_TYPES) NDArray::registerSpecialUse({C}, {A, B}); manager.synchronize(); @@ -797,13 +797,13 @@ NDArray* MmulHelper::mmulNxNold1(const NDArray* A, const NDArray* B, NDArray* C, // multiplication const std::vector dimsToExclude = ShapeUtils::evalDimsToExclude(C->rankOf(), {-2, -1}); - const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(C->getShapeInfo(), dimsToExclude); + const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(C->shapeInfo(), dimsToExclude); std::vector idxRanges(2 * C->rankOf()); // #pragma omp parallel for schedule(guided) firstprivate(idxRanges) for(Nd4jLong i = 0; i < numOfSubArrs; ++i) { - ShapeUtils::evalIdxRangesForSubArr(i, C->getShapeInfo(), dimsToExclude, idxRanges.data()); + ShapeUtils::evalIdxRangesForSubArr(i, C->shapeInfo(), dimsToExclude, idxRanges.data()); NDArray cSubArr = (*C)(idxRanges); if(aRank > bRank) { @@ -944,18 +944,18 @@ NDArray* MmulHelper::mmulNxNold2(const NDArray* A, const NDArray* B, NDArray* C, std::vector aSubArrs(bS), bSubArrs(bS), cSubArrs(bS); if(aRank > 2) - shape::calcSubArrsShapeInfoAndOffsets(pA->getShapeInfo(), bS, dimsToExclude.size(), dimsToExclude.data(), subArrShapeInfo.data(), subArrOffsets.data()); + shape::calcSubArrsShapeInfoAndOffsets(pA->shapeInfo(), bS, dimsToExclude.size(), dimsToExclude.data(), subArrShapeInfo.data(), subArrOffsets.data()); for (int i = 0; i < bS; ++i) - aSubArrs[i] = aRank == 2 ? pA->getSpecialBuffer() : pA->getSpecialBuffer() + subArrOffsets[i] * pA->sizeOfT(); + aSubArrs[i] = aRank == 2 ? pA->specialBuffer() : pA->specialBuffer() + subArrOffsets[i] * pA->sizeOfT(); if(bRank > 2) - shape::calcSubArrsShapeInfoAndOffsets(pB->getShapeInfo(), bS, dimsToExclude.size(), dimsToExclude.data(), subArrShapeInfo.data(), subArrOffsets.data()); + shape::calcSubArrsShapeInfoAndOffsets(pB->shapeInfo(), bS, dimsToExclude.size(), dimsToExclude.data(), subArrShapeInfo.data(), subArrOffsets.data()); for (int i = 0; i < bS; ++i) - bSubArrs[i] = bRank == 2 ? pB->getSpecialBuffer() : pB->getSpecialBuffer() + subArrOffsets[i] * pB->sizeOfT(); + bSubArrs[i] = bRank == 2 ? pB->specialBuffer() : pB->specialBuffer() + subArrOffsets[i] * pB->sizeOfT(); - shape::calcSubArrsShapeInfoAndOffsets(pC->getShapeInfo(), bS, dimsToExclude.size(), dimsToExclude.data(), subArrShapeInfo.data(), subArrOffsets.data()); + shape::calcSubArrsShapeInfoAndOffsets(pC->shapeInfo(), bS, dimsToExclude.size(), dimsToExclude.data(), subArrShapeInfo.data(), subArrOffsets.data()); for (int i = 0; i < bS; ++i) - cSubArrs[i] = pC->getSpecialBuffer() + subArrOffsets[i] * pC->sizeOfT(); + cSubArrs[i] = pC->specialBuffer() + subArrOffsets[i] * pC->sizeOfT(); PointersManager manager(A->getContext(), "mmulNxN"); @@ -1011,7 +1011,7 @@ NDArray* MmulHelper::mmulNxNold2(const NDArray* A, const NDArray* B, NDArray* C, for(Nd4jLong i = 0; i < bS; ++i) { - ShapeUtils::evalIdxRangesForSubArr(i, pC->getShapeInfo(), dimsToExclude, idxRanges.data()); + ShapeUtils::evalIdxRangesForSubArr(i, pC->shapeInfo(), dimsToExclude, idxRanges.data()); NDArray cSubArr = (*pC)(idxRanges); if(aRank > bRank) { diff --git a/libnd4j/include/helpers/impl/MmulHelper.cpp b/libnd4j/include/helpers/impl/MmulHelper.cpp index f5b9bc829..8e37fd530 100644 --- a/libnd4j/include/helpers/impl/MmulHelper.cpp +++ b/libnd4j/include/helpers/impl/MmulHelper.cpp @@ -91,7 +91,7 @@ void sd::MmulHelper::tensorDot(const sd::NDArray* a, const sd::NDArray* b, sd::N mmul(aPR, bPR, cPR, 1.0, 0.0); - if(cPR->getBuffer() != cP->getBuffer() || cPR->getSpecialBuffer() != cP->getSpecialBuffer() ) // this means both permute and reshape have been performed on c, cP always points on c->getBuffer() + if(cPR->buffer() != cP->buffer() || cPR->specialBuffer() != cP->specialBuffer() ) // this means both permute and reshape have been performed on c, cP always points on c->buffer() cP->assign(cPR); if(aP != aPR) @@ -150,7 +150,7 @@ void sd::MmulHelper::tensorDot(const NDArray* a, const NDArray* b, NDArray* c, c // check whether new buffer allocation was happened for c array if(!whatToDoWithC.empty()) { for(int i = cArrs.size()-1; i > 0; --i) { - if(cArrs[i]->getBuffer() != cArrs[i-1]->getBuffer() || cArrs[i]->getSpecialBuffer() != cArrs[i-1]->getSpecialBuffer()) + if(cArrs[i]->buffer() != cArrs[i-1]->buffer() || cArrs[i]->specialBuffer() != cArrs[i-1]->specialBuffer()) cArrs[i-1]->assign(cArrs[i]); delete cArrs[i]; } @@ -203,8 +203,8 @@ sd::NDArray* MmulHelper::mmul(const sd::NDArray* A, const sd::NDArray* B, sd::ND int lenDim; const int aRank = A->rankOf(); const int bRank = B->rankOf(); - const bool isAVector = shape::isCommonVector(A->getShapeInfo(), lenDim); - const bool isBVector = shape::isCommonVector(B->getShapeInfo(), lenDim); + const bool isAVector = shape::isCommonVector(A->shapeInfo(), lenDim); + const bool isBVector = shape::isCommonVector(B->shapeInfo(), lenDim); // dot product of 2 vectors if(isAVector && isBVector && (aRank != 2 || aRank == 2 && (A->isSameShape(B) || bRank == 1 && A->sizeAt(1) == 1))) // (1x1x1 * 1x1) or (1x4 * 1*4) or (4x1 * 4x1) or (4x1 * 4) @@ -243,7 +243,7 @@ sd::NDArray* MmulHelper::mmul(const sd::NDArray* A, const sd::NDArray* B, sd::ND int xRank = x->rankOf(); int yRank = y->rankOf(); - auto outShape = ShapeUtils::evalShapeForMatmul(x->getShapeInfo(), y->getShapeInfo(), transX, transY); + auto outShape = ShapeUtils::evalShapeForMatmul(x->shapeInfo(), y->shapeInfo(), transX, transY); if(!z->isSameShape(outShape)) { nd4j_printf("NDArrayFactory::matmul static method: input shape of output array is wrong, actual is %s and expected is %s ! \n", ShapeUtils::shapeAsString(z).c_str(), ShapeUtils::shapeAsString(outShape).c_str()); throw std::invalid_argument(""); @@ -285,7 +285,7 @@ sd::NDArray* MmulHelper::mmul(const sd::NDArray* A, const sd::NDArray* B, sd::ND for(int i = 0; i < batchRank; ++i) dimsToExclude[i] = i; - const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(xT->getShapeInfo(), dimsToExclude); + const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(xT->shapeInfo(), dimsToExclude); //PRAGMA_OMP_PARALLEL_FOR for(Nd4jLong i = 0; i < numOfSubArrs; ++i) { diff --git a/libnd4j/include/helpers/impl/ShapeUtils.cpp b/libnd4j/include/helpers/impl/ShapeUtils.cpp index aa8e917cc..c327004bd 100644 --- a/libnd4j/include/helpers/impl/ShapeUtils.cpp +++ b/libnd4j/include/helpers/impl/ShapeUtils.cpp @@ -118,13 +118,13 @@ std::vector ShapeUtils::evalShapeForTensorDot(const Nd4jLong* aShapeIn ////////////////////////////////////////////////////////////////////////// std::vector ShapeUtils::evalShapeForTensorDot(const NDArray* a, const NDArray* b, const std::vector& axesA, const std::vector& axesB, std::vector& permutAt, std::vector& permutBt, std::vector& shapeAt, std::vector& shapeBt) { - return evalShapeForTensorDot(a->getShapeInfo(), b->getShapeInfo(), axesA, axesB, permutAt, permutBt, shapeAt, shapeBt); + return evalShapeForTensorDot(a->shapeInfo(), b->shapeInfo(), axesA, axesB, permutAt, permutBt, shapeAt, shapeBt); } ////////////////////////////////////////////////////////////////////////// // evaluate output shape for reduce operation when input shape is empty -Nd4jLong* ShapeUtils::evalReduceShapeInfoEmpty(const char order, std::vector& dimsToExclude, const Nd4jLong *shapeInfo, const sd::DataType dataType, const bool keepDims, sd::memory::Workspace* workspace) { + const Nd4jLong* ShapeUtils::evalReduceShapeInfoEmpty(const char order, std::vector& dimsToExclude, const Nd4jLong *shapeInfo, const sd::DataType dataType, const bool keepDims, sd::memory::Workspace* workspace) { if (dimsToExclude.size() == 0) { // return copy of input shape Nd4jLong* outShapeInfo = ShapeBuilders::copyShapeInfoAndType(shapeInfo, dataType, true, workspace); @@ -171,22 +171,22 @@ Nd4jLong* ShapeUtils::evalReduceShapeInfoEmpty(const char order, std::vectorbufferForShapeInfo(descriptor).primaryAsT(); } -Nd4jLong* ShapeUtils::evalReduceShapeInfo(const char order, std::vector& dimsToExclude, const NDArray& arr, const bool keepDims, const bool supportOldShapes, sd::memory::Workspace* workspace) { - return evalReduceShapeInfo(order, dimsToExclude, arr, arr.dataType(), keepDims, supportOldShapes, workspace); -} + const Nd4jLong* ShapeUtils::evalReduceShapeInfo(const char order, std::vector& dimsToExclude, const NDArray& arr, const bool keepDims, const bool supportOldShapes, sd::memory::Workspace* workspace) { + return evalReduceShapeInfo(order, dimsToExclude, arr, arr.dataType(), keepDims, supportOldShapes, workspace); + } -Nd4jLong* ShapeUtils::evalReduceShapeInfo(const char order, std::vector& dimsToExclude, const Nd4jLong* shapeInfo, const bool keepDims, const bool supportOldShapes, sd::memory::Workspace* workspace) { - return evalReduceShapeInfo(order, dimsToExclude, shapeInfo, ArrayOptions::dataType(shapeInfo), keepDims, supportOldShapes, workspace); -} + const Nd4jLong* ShapeUtils::evalReduceShapeInfo(const char order, std::vector& dimsToExclude, const Nd4jLong* shapeInfo, const bool keepDims, const bool supportOldShapes, sd::memory::Workspace* workspace) { + return evalReduceShapeInfo(order, dimsToExclude, shapeInfo, ArrayOptions::dataType(shapeInfo), keepDims, supportOldShapes, workspace); + } ////////////////////////////////////////////////////////////////////////// -Nd4jLong* ShapeUtils::evalReduceShapeInfo(const char order, std::vector& dimsToExclude, const NDArray& arr, const sd::DataType dataType, const bool keepDims, const bool supportOldShapes, sd::memory::Workspace* workspace) { - return evalReduceShapeInfo(order, dimsToExclude, arr.getShapeInfo(), dataType, keepDims, supportOldShapes, workspace); -} + const Nd4jLong* ShapeUtils::evalReduceShapeInfo(const char order, std::vector& dimsToExclude, const NDArray& arr, const sd::DataType dataType, const bool keepDims, const bool supportOldShapes, sd::memory::Workspace* workspace) { + return evalReduceShapeInfo(order, dimsToExclude, arr.shapeInfo(), dataType, keepDims, supportOldShapes, workspace); + } ////////////////////////////////////////////////////////////////////////// // evaluate shape resulting from reduce operation -Nd4jLong* ShapeUtils::evalReduceShapeInfo(const char order, std::vector& dimsToExclude, const Nd4jLong *shapeInfo, const sd::DataType dataType, const bool keepDims, const bool supportOldShapes, sd::memory::Workspace* workspace) { + const Nd4jLong* ShapeUtils::evalReduceShapeInfo(const char order, std::vector& dimsToExclude, const Nd4jLong *shapeInfo, const sd::DataType dataType, const bool keepDims, const bool supportOldShapes, sd::memory::Workspace* workspace) { if(ArrayOptions::arrayType(shapeInfo) == ArrayType::EMPTY) return ShapeUtils::evalReduceShapeInfoEmpty(order, dimsToExclude, shapeInfo, dataType, keepDims, workspace); @@ -314,39 +314,39 @@ std::vector ShapeUtils::evalRepeatShape(int axis, const std::vectorbufferForShapeInfo(descriptor).primaryAsT(); -} + return ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT(); + } ////////////////////////////////////////////////////////////////////////// // evaluate shapeInfo of permuted array - Nd4jLong* ShapeUtils::evalPermShapeInfo(const Nd4jLong *dimensions, const int rank, const NDArray& arr, sd::memory::Workspace* workspace) { + const Nd4jLong* ShapeUtils::evalPermShapeInfo(const Nd4jLong *dimensions, const int rank, const NDArray& arr, sd::memory::Workspace* workspace) { std::vector dims(dimensions, dimensions + rank); return evalPermShapeInfo(dims.data(), rank, arr, workspace); @@ -354,7 +354,7 @@ Nd4jLong* ShapeUtils::evalPermShapeInfo(const int* dimensions, const int rank, c ////////////////////////////////////////////////////////////////////////// // evaluate shapeInfo of transposed array - Nd4jLong* ShapeUtils::evalTranspShapeInfo(const NDArray& arr, sd::memory::Workspace* workspace, const bool setContigStrides) { + const Nd4jLong* ShapeUtils::evalTranspShapeInfo(const NDArray& arr, sd::memory::Workspace* workspace, const bool setContigStrides) { int rank = arr.rankOf(); std::vector dimensions(rank); @@ -414,10 +414,10 @@ std::vector ShapeUtils::evalDimsToExclude(const int rank, const std::vector // check whether 2 arrays have mutually broadcastable shapes // shape comparison starts from the end bool ShapeUtils::areShapesBroadcastable(const NDArray &arr1, const NDArray &arr2) { - return areShapesBroadcastable(arr1.getShapeInfo(), arr2.getShapeInfo()); + return areShapesBroadcastable(arr1.shapeInfo(), arr2.shapeInfo()); } -bool ShapeUtils::areShapesBroadcastable(Nd4jLong *shapeInfo1, Nd4jLong *shapeInfo2) { +bool ShapeUtils::areShapesBroadcastable(const Nd4jLong *shapeInfo1, const Nd4jLong *shapeInfo2) { int minRank = shape::rank(shapeInfo1) < shape::rank(shapeInfo2) ? shape::rank(shapeInfo1) : shape::rank(shapeInfo2); for (int i = -1; i >= -minRank; --i) @@ -427,177 +427,177 @@ bool ShapeUtils::areShapesBroadcastable(Nd4jLong *shapeInfo1, Nd4jLong *shapeInf return true; } -bool ShapeUtils::areShapesBroadcastable(const std::vector& shape1, const std::vector& shape2) { + bool ShapeUtils::areShapesBroadcastable(const std::vector& shape1, const std::vector& shape2) { - const auto rank1 = shape1.size(); - const auto rank2 = shape2.size(); - const int minRank = rank1 < rank2 ? rank1 : rank2; + const auto rank1 = shape1.size(); + const auto rank2 = shape2.size(); + const int minRank = rank1 < rank2 ? rank1 : rank2; - for (int i = 1; i <= minRank; ++i) - if (shape1[rank1-i] != shape2[rank2-i] && shape1[rank1-i] != 1 && shape2[rank2-i] != 1) + for (int i = 1; i <= minRank; ++i) + if (shape1[rank1-i] != shape2[rank2-i] && shape1[rank1-i] != 1 && shape2[rank2-i] != 1) + return false; + + return true; + } + + ////////////////////////////////////////////////////////////////////////// + // check the possibility of broadcast operation, if true then return shapeInfo of resulting array + // if evalMinMax == false the array with larger rank has to be passed as first argument + bool ShapeUtils::evalBroadcastShapeInfo(const NDArray &max, const NDArray &min, const bool evalMinMax, const Nd4jLong*& resultShapeInfo, sd::memory::Workspace* workspace) { + return evalBroadcastShapeInfo(max.shapeInfo(), min.shapeInfo(), evalMinMax, resultShapeInfo, workspace); + } + + bool ShapeUtils::evalBroadcastShapeInfo(const Nd4jLong *max, const Nd4jLong *min, const bool evalMinMax, const Nd4jLong*& resultShapeInfo, sd::memory::Workspace* workspace) { + + // check whether broadcast operation is possible for input arrays + if(!areShapesBroadcastable(max, min)) return false; - return true; -} + auto maxShapeInfo = max; //max.shapeInfo(); + auto minShapeInfo = min; //min.shapeInfo(); -////////////////////////////////////////////////////////////////////////// -// check the possibility of broadcast operation, if true then return shapeInfo of resulting array -// if evalMinMax == false the array with larger rank has to be passed as first argument -bool ShapeUtils::evalBroadcastShapeInfo(const NDArray &max, const NDArray &min, const bool evalMinMax, Nd4jLong*& resultShapeInfo, sd::memory::Workspace* workspace) { - return evalBroadcastShapeInfo(max.getShapeInfo(), min.getShapeInfo(), evalMinMax, resultShapeInfo, workspace); -} + if(evalMinMax && (shape::rank(max) < shape::rank(min))) { + maxShapeInfo = min; + minShapeInfo = max; + } -bool ShapeUtils::evalBroadcastShapeInfo(Nd4jLong *max, Nd4jLong *min, const bool evalMinMax, Nd4jLong*& resultShapeInfo, sd::memory::Workspace* workspace) { + const auto maxRank = shape::rank(maxShapeInfo); + const auto minRank = shape::rank(minShapeInfo); - // check whether broadcast operation is possible for input arrays - if(!areShapesBroadcastable(max, min)) - return false; + // evaluate shapeInfo for resulting array + if(resultShapeInfo != nullptr) + throw std::runtime_error("std::runtime_error(ShapeUtils::evalBroadcastShapeInfo method: the input pointer on shapeInfo must be empty (=nullptr) !"); - auto maxShapeInfo = max; //max.getShapeInfo(); - auto minShapeInfo = min; //min.getShapeInfo(); + Nd4jLong *tmpShapeInfo = nullptr; + ALLOCATE(tmpShapeInfo, workspace, shape::shapeInfoLength(maxRank), Nd4jLong); - if(evalMinMax && (shape::rank(max) < shape::rank(min))) { - maxShapeInfo = min; - minShapeInfo = max; + // FIXME: get rid of memcpy here + memcpy(tmpShapeInfo, maxShapeInfo, shape::shapeInfoByteLength(maxRank)); + for (int i = 0; i < minRank; ++i) + if((maxShapeInfo[maxRank-i] != 0 && maxShapeInfo[maxRank-i] < minShapeInfo[minRank-i]) || minShapeInfo[minRank-i] == 0) + tmpShapeInfo[maxRank - i] = minShapeInfo[minRank-i]; + + ShapeUtils::updateStridesAndType(tmpShapeInfo, DataTypeUtils::pickPairwiseResultType(maxShapeInfo, minShapeInfo), shape::order(maxShapeInfo)); + + if (shape::isEmpty(max) || shape::isEmpty(min)) { + ArrayOptions::setPropertyBit(tmpShapeInfo, ARRAY_EMPTY); + memset(shape::stride(tmpShapeInfo), 0, shape::rank(tmpShapeInfo) * sizeof(Nd4jLong)); + } + + ShapeDescriptor descriptor(tmpShapeInfo); + RELEASE(tmpShapeInfo, workspace); + resultShapeInfo = ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT(); + + return true; } - const auto maxRank = shape::rank(maxShapeInfo); - const auto minRank = shape::rank(minShapeInfo); + ////////////////////////////////////////////////////////////////////////// + // check the possibility of broadcast operation for set of arrays, if true then return resulting broadcasted shapeInfo + bool ShapeUtils::evalCommonBroadcastShapeInfo(const std::vector& arrays, Nd4jLong*& resultShapeInfo, memory::Workspace* workspace) { - // evaluate shapeInfo for resulting array - if(resultShapeInfo != nullptr) - throw std::runtime_error("std::runtime_error(ShapeUtils::evalBroadcastShapeInfo method: the input pointer on shapeInfo must be empty (=nullptr) !"); + if(resultShapeInfo != nullptr) + throw std::runtime_error("ShapeUtils::evalCommonBroadcastShapeInfo method: the input pointer on shapeInfo must be empty (=nullptr) !"); - Nd4jLong *tmpShapeInfo = nullptr; - ALLOCATE(tmpShapeInfo, workspace, shape::shapeInfoLength(maxRank), Nd4jLong); + int size = arrays.size(); + int maxRank = arrays[size - 1]->rankOf(); - // FIXME: get rid of memcpy here - memcpy(tmpShapeInfo, maxShapeInfo, shape::shapeInfoByteLength(maxRank)); - for (int i = 0; i < minRank; ++i) - if((maxShapeInfo[maxRank-i] != 0 && maxShapeInfo[maxRank-i] < minShapeInfo[minRank-i]) || minShapeInfo[minRank-i] == 0) - tmpShapeInfo[maxRank - i] = minShapeInfo[minRank-i]; + for(int i = 0; i < size - 1; ++i) { + if(arrays[i]->rankOf() > maxRank) + maxRank = arrays[i]->rankOf(); + for(int j = i + 1; j < size; ++j) + if(!areShapesBroadcastable(*arrays[i], *arrays[j])) + return false; + } - ShapeUtils::updateStridesAndType(tmpShapeInfo, DataTypeUtils::pickPairwiseResultType(maxShapeInfo, minShapeInfo), shape::order(maxShapeInfo)); + Nd4jLong *tmpShapeInfo = nullptr; + ALLOCATE(tmpShapeInfo, workspace, shape::shapeInfoLength(maxRank), Nd4jLong); + memset(tmpShapeInfo, 0, shape::shapeInfoByteLength(maxRank)); + tmpShapeInfo[0] = maxRank; - if (shape::isEmpty(max) || shape::isEmpty(min)) { - ArrayOptions::setPropertyBit(tmpShapeInfo, ARRAY_EMPTY); - memset(shape::stride(tmpShapeInfo), 0, shape::rank(tmpShapeInfo) * sizeof(Nd4jLong)); + for(const auto& item : arrays ) { + for(int i = -1; i >= -item->rankOf(); --i) + if(tmpShapeInfo[i + 1 + maxRank] < item->sizeAt(i)) + tmpShapeInfo[i + 1 + maxRank] = item->sizeAt(i); + } + + shape::updateStrides(tmpShapeInfo, arrays[0]->ordering()); + ArrayOptions::setDataType(tmpShapeInfo, arrays[0]->dataType()); + + ShapeDescriptor descriptor(tmpShapeInfo); + RELEASE(tmpShapeInfo, workspace); + resultShapeInfo = const_cast(ConstantShapeHelper::getInstance()->createShapeInfo(descriptor)); + + return true; } - ShapeDescriptor descriptor(tmpShapeInfo); - RELEASE(tmpShapeInfo, workspace); - resultShapeInfo = ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT(); - return true; -} + ////////////////////////////////////////////////////////////////////////// + // return sorted vector of dimensions common (same) for two arrays, dimensions values corresponds to array with bigger rank + // for example if arr1{2,7}, arr2{2,5,4,7} then vector = {0,3} + std::vector ShapeUtils::getDimsWithSameShape(const NDArray& arr1, const NDArray& arr2) { -////////////////////////////////////////////////////////////////////////// -// check the possibility of broadcast operation for set of arrays, if true then return resulting broadcasted shapeInfo -bool ShapeUtils::evalCommonBroadcastShapeInfo(const std::vector& arrays, Nd4jLong*& resultShapeInfo, memory::Workspace* workspace) { + const NDArray *min, *max; - if(resultShapeInfo != nullptr) - throw std::runtime_error("ShapeUtils::evalCommonBroadcastShapeInfo method: the input pointer on shapeInfo must be empty (=nullptr) !"); + if(arr1.rankOf() >= arr2.rankOf()) { + max = &arr1; + min = &arr2; + } + else { + max = &arr2; + min = &arr1; + } - int size = arrays.size(); - int maxRank = arrays[size - 1]->rankOf(); + const int rankDiff = max->rankOf() - min->rankOf(); - for(int i = 0; i < size - 1; ++i) { - if(arrays[i]->rankOf() > maxRank) - maxRank = arrays[i]->rankOf(); - for(int j = i + 1; j < size; ++j) - if(!areShapesBroadcastable(*arrays[i], *arrays[j])) - return false; + std::vector dims; + + for (int i = 0; i < min->rankOf(); ++i) + if (min->sizeAt(i) == max->sizeAt(rankDiff + i)) + dims.emplace_back(rankDiff + i); + + return dims; } - Nd4jLong *tmpShapeInfo = nullptr; - ALLOCATE(tmpShapeInfo, workspace, shape::shapeInfoLength(maxRank), Nd4jLong); - memset(tmpShapeInfo, 0, shape::shapeInfoByteLength(maxRank)); - tmpShapeInfo[0] = maxRank; + ////////////////////////////////////////////////////////////////////////// + // evaluate shapeInfo for resulting array from tile operation + const Nd4jLong* ShapeUtils::evalTileShapeInfo(const NDArray& arr, const std::vector& reps, sd::memory::Workspace* workspace) { + // check whether reps contains at least one zero (then throw exception) or whether all elements in reps are unities (then simply reshape or do nothing) + int repsSize = reps.size(); + Nd4jLong product = 1; + for(const auto& item : reps) + product *= item; + if(product == 0) + throw std::runtime_error("NDArray::tile method: one of the elements in reps array is zero !"); - for(const auto& item : arrays ) { - for(int i = -1; i >= -item->rankOf(); --i) - if(tmpShapeInfo[i + 1 + maxRank] < item->sizeAt(i)) - tmpShapeInfo[i + 1 + maxRank] = item->sizeAt(i); + int rankOld = arr.rankOf(); + int diff = rankOld - repsSize; + + // evaluate new shapeInfo + Nd4jLong* newShapeInfo = nullptr; + if(diff < 0) { + ALLOCATE(newShapeInfo, workspace, shape::shapeInfoLength(repsSize), Nd4jLong); + newShapeInfo[0] = repsSize; // set new rank + for(int i=1; i <= -diff; ++i) + newShapeInfo[i] = 1; // set unities to be new dimensions at left-hand side of newShapeInfo shape place + memcpy(newShapeInfo + 1 - diff, arr.shapeInfo() + 1, rankOld*sizeof(Nd4jLong)); // copy old dimensions to the right-hand side of newShapeInfo shape place + for(int i=1; i <= repsSize; ++i) + newShapeInfo[i] *= reps[i - 1]; // set new shape by multiplying old dimensions by corresponding numbers from reps + } + else { + ALLOCATE(newShapeInfo, workspace, shape::shapeInfoLength(rankOld), Nd4jLong); + memcpy(newShapeInfo, arr.shapeInfo(), shape::shapeInfoByteLength(rankOld)); // copy all elements of _shapeInfo to newShapeInfo + for(int i=1; i <= repsSize; ++i) + newShapeInfo[rankOld + 1 - i] *= reps[repsSize - i]; // set new shape by multiplying old dimensions by corresponding numbers from reps + } + shape::updateStrides(newShapeInfo, arr.ordering()); + ArrayOptions::setDataType(newShapeInfo, arr.dataType()); + + ShapeDescriptor descriptor(newShapeInfo); + RELEASE(newShapeInfo, workspace); + return ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT(); } - shape::updateStrides(tmpShapeInfo, arrays[0]->ordering()); - ArrayOptions::setDataType(tmpShapeInfo, arrays[0]->dataType()); - - ShapeDescriptor descriptor(tmpShapeInfo); - RELEASE(tmpShapeInfo, workspace); - resultShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(descriptor); - - return true; -} - - -////////////////////////////////////////////////////////////////////////// -// return sorted vector of dimensions common (same) for two arrays, dimensions values corresponds to array with bigger rank -// for example if arr1{2,7}, arr2{2,5,4,7} then vector = {0,3} -std::vector ShapeUtils::getDimsWithSameShape(const NDArray& arr1, const NDArray& arr2) { - - const NDArray *min, *max; - - if(arr1.rankOf() >= arr2.rankOf()) { - max = &arr1; - min = &arr2; - } - else { - max = &arr2; - min = &arr1; - } - - const int rankDiff = max->rankOf() - min->rankOf(); - - std::vector dims; - - for (int i = 0; i < min->rankOf(); ++i) - if (min->sizeAt(i) == max->sizeAt(rankDiff + i)) - dims.emplace_back(rankDiff + i); - - return dims; -} - -////////////////////////////////////////////////////////////////////////// -// evaluate shapeInfo for resulting array from tile operation -Nd4jLong* ShapeUtils::evalTileShapeInfo(const NDArray& arr, const std::vector& reps, sd::memory::Workspace* workspace) { - // check whether reps contains at least one zero (then throw exception) or whether all elements in reps are unities (then simply reshape or do nothing) - int repsSize = reps.size(); - Nd4jLong product = 1; - for(const auto& item : reps) - product *= item; - if(product == 0) - throw std::runtime_error("NDArray::tile method: one of the elements in reps array is zero !"); - - int rankOld = arr.rankOf(); - int diff = rankOld - repsSize; - - // evaluate new shapeInfo - Nd4jLong* newShapeInfo = nullptr; - if(diff < 0) { - ALLOCATE(newShapeInfo, workspace, shape::shapeInfoLength(repsSize), Nd4jLong); - newShapeInfo[0] = repsSize; // set new rank - for(int i=1; i <= -diff; ++i) - newShapeInfo[i] = 1; // set unities to be new dimensions at left-hand side of newShapeInfo shape place - memcpy(newShapeInfo + 1 - diff, arr.getShapeInfo() + 1, rankOld*sizeof(Nd4jLong)); // copy old dimensions to the right-hand side of newShapeInfo shape place - for(int i=1; i <= repsSize; ++i) - newShapeInfo[i] *= reps[i - 1]; // set new shape by multiplying old dimensions by corresponding numbers from reps - } - else { - ALLOCATE(newShapeInfo, workspace, shape::shapeInfoLength(rankOld), Nd4jLong); - memcpy(newShapeInfo, arr.getShapeInfo(), shape::shapeInfoByteLength(rankOld)); // copy all elements of _shapeInfo to newShapeInfo - for(int i=1; i <= repsSize; ++i) - newShapeInfo[rankOld + 1 - i] *= reps[repsSize - i]; // set new shape by multiplying old dimensions by corresponding numbers from reps - } - shape::updateStrides(newShapeInfo, arr.ordering()); - ArrayOptions::setDataType(newShapeInfo, arr.dataType()); - - ShapeDescriptor descriptor(newShapeInfo); - RELEASE(newShapeInfo, workspace); - return ConstantShapeHelper::getInstance()->bufferForShapeInfo(descriptor).primaryAsT(); -} - - std::vector ShapeUtils::pullShapeFromShapeInfo(Nd4jLong *shapeInfo) { + std::vector ShapeUtils::pullShapeFromShapeInfo(const Nd4jLong *shapeInfo) { std::vector shape(shape::rank(shapeInfo)); int shapeSize = shape.size(); @@ -624,7 +624,7 @@ Nd4jLong* ShapeUtils::evalTileShapeInfo(const NDArray& arr, const std::vectorgetShapeInfo(); //Nd4jLong* + auto shapeBuffer = array->shapeInfo(); //Nd4jLong* int rank = (int)*shapeBuffer; result.append("["); for (int e = 0; e < rank; e++) { @@ -724,31 +724,31 @@ std::vector ShapeUtils::shapeAsVector(const Nd4jLong* shapeInfo) { ////////////////////////////////////////////////////////////////////////// // evaluate shapeInfo for diagonal array which is made using input arr elements as diagonal -Nd4jLong* ShapeUtils::evalDiagShapeInfo(const Nd4jLong* shapeInfoConst, sd::memory::Workspace* workspace){ - auto shapeInfo = const_cast(shapeInfoConst); + const Nd4jLong* ShapeUtils::evalDiagShapeInfo(const Nd4jLong* shapeInfoConst, sd::memory::Workspace* workspace){ + auto shapeInfo = const_cast(shapeInfoConst); - const auto rank = shape::rank(shapeInfo); + const auto rank = shape::rank(shapeInfo); - Nd4jLong* outputShapeInfo = nullptr; + Nd4jLong* outputShapeInfo = nullptr; - if(shape::isVector(shapeInfo) || shape::isScalar(shapeInfo)) { - ALLOCATE(outputShapeInfo, workspace, shape::shapeInfoLength(2), Nd4jLong); - outputShapeInfo[0] = 2; - outputShapeInfo[1] = outputShapeInfo[2] = shape::length(shapeInfo); + if(shape::isVector(shapeInfo) || shape::isScalar(shapeInfo)) { + ALLOCATE(outputShapeInfo, workspace, shape::shapeInfoLength(2), Nd4jLong); + outputShapeInfo[0] = 2; + outputShapeInfo[1] = outputShapeInfo[2] = shape::length(shapeInfo); + } + else { + ALLOCATE(outputShapeInfo, workspace, shape::shapeInfoLength(2*rank), Nd4jLong); + outputShapeInfo[0] = 2*rank; + for(int i = 1; i <= rank; ++i) + outputShapeInfo[i] = outputShapeInfo[i + rank] = shapeInfo[i]; + } + + ShapeUtils::updateStridesAndType(outputShapeInfo, shapeInfo, shape::order(shapeInfo)); + + auto result = ConstantShapeHelper::getInstance()->createShapeInfo(outputShapeInfo); + RELEASE(outputShapeInfo, workspace); + return result; } - else { - ALLOCATE(outputShapeInfo, workspace, shape::shapeInfoLength(2*rank), Nd4jLong); - outputShapeInfo[0] = 2*rank; - for(int i = 1; i <= rank; ++i) - outputShapeInfo[i] = outputShapeInfo[i + rank] = shapeInfo[i]; - } - - ShapeUtils::updateStridesAndType(outputShapeInfo, shapeInfo, shape::order(shapeInfo)); - - auto result = ConstantShapeHelper::getInstance()->createShapeInfo(outputShapeInfo); - RELEASE(outputShapeInfo, workspace); - return result; -} std::vector ShapeUtils::evalBroadcastBackwardAxis(const Nd4jLong *operandShapeInfo, const Nd4jLong *resultShapeInfo) { // rRank >= oRank always !! @@ -765,83 +765,82 @@ std::vector ShapeUtils::evalBroadcastBackwardAxis(const Nd4jLong *operandSh } //////////////////////////////////////////////////////////////////////////////// -Nd4jLong* ShapeUtils::matrixProductShape(Nd4jLong* theFirstShape, Nd4jLong* theSecondShape, bool shouldTranspondFirst, bool shouldTranspondSecond, sd::DataType dtype, sd::memory::Workspace* workspace) { + const Nd4jLong* ShapeUtils::matrixProductShape(const Nd4jLong* theFirstShape, const Nd4jLong* theSecondShape, bool shouldTranspondFirst, bool shouldTranspondSecond, sd::DataType dtype, sd::memory::Workspace* workspace) { + auto inA = theFirstShape; + auto inB = theSecondShape; + Nd4jLong *shape; + ALLOCATE(shape, workspace, shape::shapeInfoLength(2), Nd4jLong); - auto inA = theFirstShape; - auto inB = theSecondShape; - Nd4jLong *shape; - ALLOCATE(shape, workspace, shape::shapeInfoLength(2), Nd4jLong); + Nd4jLong* tmpA = ShapeBuilders::copyShapeInfo(inA, true, workspace); + Nd4jLong* tmpB = ShapeBuilders::copyShapeInfo(inB, true, workspace); - Nd4jLong* tmpA = ShapeBuilders::copyShapeInfo(inA, true, workspace); - Nd4jLong* tmpB = ShapeBuilders::copyShapeInfo(inB, true, workspace); + if (shouldTranspondFirst) + shape::transposeInplace(tmpA); - if (shouldTranspondFirst) - shape::transposeInplace(tmpA); - - if (shouldTranspondSecond) - shape::transposeInplace(tmpB); + if (shouldTranspondSecond) + shape::transposeInplace(tmpB); - if (shape::rank(tmpA) == 1 && shape::isMatrix(tmpB)) { - // special case here - shape[0] = 1; - shape[1] = tmpB[2]; - Nd4jLong *newShape = ShapeBuilders::createShapeInfo(dtype, 'f', 2, shape, workspace); - - RELEASE(shape, workspace); - RELEASE(tmpA, workspace); - RELEASE(tmpB, workspace); - - return newShape; - } else if (shape::isScalar(tmpA) && shape::isScalar(tmpB)) { - // just scalar vs scalar - shape[0] = 1; - shape[1] = 1; - } else if (shape::isMatrix(tmpA) && shape::isVector(tmpB)) { - // gemv case - if (shape::rank(tmpB) == 2) { - shape[0] = tmpA[1]; + if (shape::rank(tmpA) == 1 && shape::isMatrix(tmpB)) { + // special case here + shape[0] = 1; shape[1] = tmpB[2]; - } else { - // we have new 1D shape here - auto newShape = ShapeBuilders::createVectorShapeInfo(dtype, tmpA[1], workspace); + Nd4jLong *newShape = ShapeBuilders::createShapeInfo(dtype, 'f', 2, shape, workspace); RELEASE(shape, workspace); RELEASE(tmpA, workspace); RELEASE(tmpB, workspace); return newShape; + } else if (shape::isScalar(tmpA) && shape::isScalar(tmpB)) { + // just scalar vs scalar + shape[0] = 1; + shape[1] = 1; + } else if (shape::isMatrix(tmpA) && shape::isVector(tmpB)) { + // gemv case + if (shape::rank(tmpB) == 2) { + shape[0] = tmpA[1]; + shape[1] = tmpB[2]; + } else { + // we have new 1D shape here + auto newShape = ShapeBuilders::createVectorShapeInfo(dtype, tmpA[1], workspace); + + RELEASE(shape, workspace); + RELEASE(tmpA, workspace); + RELEASE(tmpB, workspace); + + return newShape; + } + } else if ((shape::isMatrix(tmpA) && shape::isMatrix(tmpB)) || + (shape::isVector(tmpA) && shape::isMatrix(tmpB)) || + (shape::isColumnVector(tmpA) && shape::isVector(tmpB))) { + // gemm case + shape[0] = tmpA[1]; + shape[1] = tmpB[2]; + } else if ((shape::isVector(tmpA) && shape::isScalar(tmpB)) || + (shape::isScalar(tmpA) && shape::isVector(tmpB))) { + // element-wise + shape[0] = 1; + shape[1] = (int) sd::math::nd4j_max(shape::length(tmpA), shape::length(tmpB)); + } else if (shape::isRowVector(tmpA) && shape::isRowVector(tmpB)) { + // dot case + shape[0] = 1; + shape[1] = 1; + } else if (shape::isRowVector(tmpA) && shape::isColumnVector(tmpB)) { + // dot case + shape[0] = 1; + shape[1] = 1; } - } else if ((shape::isMatrix(tmpA) && shape::isMatrix(tmpB)) || - (shape::isVector(tmpA) && shape::isMatrix(tmpB)) || - (shape::isColumnVector(tmpA) && shape::isVector(tmpB))) { - // gemm case - shape[0] = tmpA[1]; - shape[1] = tmpB[2]; - } else if ((shape::isVector(tmpA) && shape::isScalar(tmpB)) || - (shape::isScalar(tmpA) && shape::isVector(tmpB))) { - // element-wise - shape[0] = 1; - shape[1] = (int) sd::math::nd4j_max(shape::length(tmpA), shape::length(tmpB)); - } else if (shape::isRowVector(tmpA) && shape::isRowVector(tmpB)) { - // dot case - shape[0] = 1; - shape[1] = 1; - } else if (shape::isRowVector(tmpA) && shape::isColumnVector(tmpB)) { - // dot case - shape[0] = 1; - shape[1] = 1; + + auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(dtype, 'f', 2, shape); + + RELEASE(shape, workspace); + + RELEASE(tmpA, workspace); + RELEASE(tmpB, workspace); + return newShape; } - Nd4jLong *newShape = ShapeBuilders::createShapeInfo(dtype, 'f', 2, shape, workspace); - - RELEASE(shape, workspace); - - RELEASE(tmpA, workspace); - RELEASE(tmpB, workspace); - return newShape; -} - //////////////////////////////////////////////////////////////////////////////// std::vector ShapeUtils::evalPermutFromTo(const std::vector& shapeFrom, const std::vector& shapeTo) { auto rank = shapeFrom.size(); diff --git a/libnd4j/include/helpers/shape.h b/libnd4j/include/helpers/shape.h index 2c18615fc..8cde62ea1 100644 --- a/libnd4j/include/helpers/shape.h +++ b/libnd4j/include/helpers/shape.h @@ -65,7 +65,7 @@ namespace shape { * the information on an ndarray */ struct ND4J_EXPORT ShapeInformation { - _CUDA_HD ShapeInformation(Nd4jLong *shape_ = nullptr, Nd4jLong *stride_ = nullptr, char order_ = 0, int rank_ = 0, int offset_ = 0, int elementWiseStride_ = 0) + _CUDA_HD ShapeInformation(Nd4jLong* shape_ = nullptr, Nd4jLong *stride_ = nullptr, char order_ = 0, int rank_ = 0, int offset_ = 0, int elementWiseStride_ = 0) : shape(shape_), stride(stride_), order(order_), rank(rank_), offset(offset_), elementWiseStride(elementWiseStride_) {} @@ -93,19 +93,19 @@ namespace shape { ND4J_EXPORT _CUDA_HD bool shapeEquals(const int shape1Rank, const Nd4jLong *shape1, const int shape2Rank, const Nd4jLong *shape2); - ND4J_EXPORT _CUDA_HD Nd4jLong* detachShape(Nd4jLong *originalShape); + ND4J_EXPORT _CUDA_HD const Nd4jLong* detachShape(const Nd4jLong *originalShape); - ND4J_EXPORT _CUDA_HD Nd4jLong* copyShape(Nd4jLong *originalShape); + ND4J_EXPORT _CUDA_HD Nd4jLong* copyShape(Nd4jLong const* originalShape); ND4J_EXPORT _CUDA_HD bool shapeEquals(const Nd4jLong *shapeInfo1, const Nd4jLong *shapeInfo2); ND4J_EXPORT _CUDA_HD bool shapeEquals(const Nd4jLong *shapeInfo1, const Nd4jLong *shapeInfo2, const Nd4jLong *shapeInfo3); - ND4J_EXPORT _CUDA_HD bool strideEquals(int shape1Rank,Nd4jLong *shape1,int shape2Rank,Nd4jLong *shape2); + ND4J_EXPORT _CUDA_HD bool strideEquals(int const shape1Rank,Nd4jLong const* shape1,int const shape2Rank, Nd4jLong const* shape2); - ND4J_EXPORT _CUDA_HD bool strideEquals(Nd4jLong *shapeInfo1,Nd4jLong *shapeInfo2); + ND4J_EXPORT _CUDA_HD bool strideEquals(Nd4jLong const* shapeInfo1, Nd4jLong const* shapeInfo2); - ND4J_EXPORT _CUDA_HD bool strideEquals(Nd4jLong *stride1,int rank1,Nd4jLong *stride2,int rank2); + ND4J_EXPORT _CUDA_HD bool strideEquals(Nd4jLong const* stride1,int const rank1, Nd4jLong const* stride2, int const rank2); ND4J_EXPORT _CUDA_HD bool equalsSoft(const Nd4jLong *shapeA, const Nd4jLong *shapeB); @@ -128,7 +128,7 @@ namespace shape { ND4J_EXPORT _CUDA_HD int tadIndexForLinear(int linearIndex, int tadLength); - ND4J_EXPORT _CUDA_HD Nd4jLong tadLength(Nd4jLong *shapeInfo, int *dimension, int dimensionLength); + ND4J_EXPORT _CUDA_HD Nd4jLong tadLength(const Nd4jLong *shapeInfo, int *dimension, int dimensionLength); ND4J_EXPORT _CUDA_HD bool canReshape(const int oldRank, Nd4jLong* oldShape, const int newRank, Nd4jLong* newShape, bool isFOrder); @@ -142,17 +142,17 @@ namespace shape { * Get the shape info buffer * for the given rank and shape. */ - ND4J_EXPORT _CUDA_HD Nd4jLong *shapeBuffer(int rank, sd::DataType dtype, Nd4jLong *shape); + ND4J_EXPORT _CUDA_HD Nd4jLong *shapeBuffer(int rank, sd::DataType dtype, Nd4jLong const* shape); - ND4J_EXPORT _CUDA_HD Nd4jLong *shapeBuffer(int rank, sd::DataType dtype, Nd4jLong *shape, Nd4jLong *buffer); + ND4J_EXPORT _CUDA_HD Nd4jLong *shapeBuffer(int rank, sd::DataType dtype, Nd4jLong const* shape, Nd4jLong *buffer); /** * Get the shape info buffer * for the given rank and shape. */ - ND4J_EXPORT _CUDA_HD Nd4jLong *shapeBufferFortran(int rank, sd::DataType dtype, Nd4jLong *shape); + ND4J_EXPORT _CUDA_HD Nd4jLong *shapeBufferFortran(int rank, sd::DataType dtype, Nd4jLong const* shape); - ND4J_EXPORT _CUDA_HD Nd4jLong *shapeBufferFortran(int rank, sd::DataType dtype, Nd4jLong *shape, Nd4jLong *output); + ND4J_EXPORT _CUDA_HD Nd4jLong *shapeBufferFortran(int rank, sd::DataType dtype, Nd4jLong const* shape, Nd4jLong *output); #ifdef __CUDACC__ @@ -168,9 +168,9 @@ namespace shape { * @param startNum the start number for the strides * @return the strides for a matrix of n dimensions */ - ND4J_EXPORT _CUDA_HD Nd4jLong * calcStridesFortran(Nd4jLong *shape, int rank); + ND4J_EXPORT _CUDA_HD Nd4jLong * calcStridesFortran(Nd4jLong const* shape, int rank); - ND4J_EXPORT _CUDA_HD Nd4jLong * calcStridesFortran(Nd4jLong *shape, int rank, Nd4jLong* ret); + ND4J_EXPORT _CUDA_HD Nd4jLong * calcStridesFortran(Nd4jLong const* shape, int rank, Nd4jLong* ret); /** * Computes the standard packed array strides for a given shape. @@ -180,9 +180,9 @@ namespace shape { * @return the strides for a matrix of n dimensions */ - ND4J_EXPORT _CUDA_HD Nd4jLong* calcStrides(Nd4jLong *shape, int rank); + ND4J_EXPORT _CUDA_HD Nd4jLong* calcStrides(Nd4jLong const *shape, int rank); - ND4J_EXPORT _CUDA_HD Nd4jLong* calcStrides(Nd4jLong *shape, int rank, Nd4jLong* ret); + ND4J_EXPORT _CUDA_HD Nd4jLong* calcStrides(Nd4jLong const *shape, int rank, Nd4jLong* ret); ND4J_EXPORT _CUDA_HD void updateStrides(Nd4jLong *shape, const char order); ND4J_EXPORT _CUDA_HD void updateStrides(const int rank, const Nd4jLong *shapeOnly, Nd4jLong *stridesOnly, const char order); @@ -199,9 +199,9 @@ namespace shape { * @param startNum the start number for the strides * @return the strides for a matrix of n dimensions */ - ND4J_EXPORT _CUDA_HD Nd4jLong* calcStridesFortran(Nd4jLong *shape, int rank, int startNum); + ND4J_EXPORT _CUDA_HD Nd4jLong* calcStridesFortran(Nd4jLong const *shape, int rank, int startNum); - ND4J_EXPORT _CUDA_HD Nd4jLong* calcStridesFortran(Nd4jLong *shape, int rank, int startNum, Nd4jLong* ret); + ND4J_EXPORT _CUDA_HD Nd4jLong* calcStridesFortran(Nd4jLong const *shape, int rank, int startNum, Nd4jLong* ret); /** * Computes the standard packed array strides for a given shape. @@ -210,9 +210,9 @@ namespace shape { * @param startNum the start number for the strides * @return the strides for a matrix of n dimensions */ - ND4J_EXPORT _CUDA_HD Nd4jLong* calcStrides(Nd4jLong *shape, int rank, int startNum); + ND4J_EXPORT _CUDA_HD Nd4jLong* calcStrides(Nd4jLong const* shape, int rank, int startNum); - ND4J_EXPORT _CUDA_HD Nd4jLong* calcStrides(Nd4jLong *shape, int rank, int startNum, Nd4jLong* ret); + ND4J_EXPORT _CUDA_HD Nd4jLong* calcStrides(Nd4jLong const *shape, int rank, int startNum, Nd4jLong* ret); /** * @param toCopy the shape to copy @@ -244,7 +244,7 @@ namespace shape { * @return 0 if there is no element wise stride the * element wise stride of reshape(1,length) otherwise */ - ND4J_EXPORT _CUDA_HD int computeElementWiseStride(int rank, Nd4jLong *shape, Nd4jLong *stride, int isFOrder); + ND4J_EXPORT _CUDA_HD int computeElementWiseStride(int rank, Nd4jLong const* shape, Nd4jLong const* stride, int isFOrder); /** * Compute the element wise stride @@ -257,11 +257,11 @@ namespace shape { * @return 0 if there is no element wise stride the * element wise stride of reshape(1,length) otherwise */ - ND4J_EXPORT _CUDA_HD int computeElementWiseStride(int rank, Nd4jLong *shape, Nd4jLong *stride, int isFOrder, Nd4jLong *dimension, int dimensionLength); + ND4J_EXPORT _CUDA_HD int computeElementWiseStride(int rank, Nd4jLong const* shape, Nd4jLong const* stride, int isFOrder, Nd4jLong const* dimension, int dimensionLength); - ND4J_EXPORT _CUDA_HD Nd4jLong *shapeInfoOnlyShapeAndStride(Nd4jLong *shapeInfo, Nd4jLong *dimension, int dimensionLength,bool reverseCopyStride); + ND4J_EXPORT _CUDA_HD Nd4jLong *shapeInfoOnlyShapeAndStride(Nd4jLong const* shapeInfo, Nd4jLong *dimension, int dimensionLength,bool reverseCopyStride); - ND4J_EXPORT _CUDA_HD Nd4jLong *shapeInfoOnlyShapeAndStride(Nd4jLong *shapeInfo, Nd4jLong *dimension, int dimensionLength,bool reverseCopyStride, Nd4jLong *buffer); + ND4J_EXPORT _CUDA_HD Nd4jLong *shapeInfoOnlyShapeAndStride(const Nd4jLong *shapeInfo, Nd4jLong *dimension, int dimensionLength,bool reverseCopyStride, Nd4jLong *buffer); /** * * @param length @@ -281,7 +281,7 @@ namespace shape { */ ND4J_EXPORT _CUDA_HD void doPermuteSwap(int length, Nd4jLong **shape, int* rearrange); - ND4J_EXPORT _CUDA_HD Nd4jLong *permuteShapeBuffer(Nd4jLong *shapeBuffer, int* rearrange); + ND4J_EXPORT _CUDA_HD Nd4jLong *permuteShapeBuffer(Nd4jLong const* shapeBuffer, int* rearrange); ND4J_EXPORT _CUDA_HD void permuteShapeBufferInPlace(Nd4jLong *shapeBuffer, int* rearrange, Nd4jLong *out); @@ -304,7 +304,7 @@ namespace shape { ND4J_EXPORT _CUDA_HD Nd4jLong* createPermuteIndexes(int originalRank, int *dimension,int dimensionLength); - ND4J_EXPORT _CUDA_HD Nd4jLong* computeResultShape(Nd4jLong *originalShapeBuffer, int *dimension,int dimensionLength); + ND4J_EXPORT _CUDA_HD Nd4jLong* computeResultShape(const Nd4jLong *originalShapeBuffer, int *dimension,int dimensionLength); /** * This method does inplace transpose of given shapeBuffer @@ -350,7 +350,7 @@ namespace shape { * @param shape the shape of the array * @param rank the rank of cthe shape */ - ND4J_EXPORT _CUDA_HD int isVector(Nd4jLong *shape, int rank); + ND4J_EXPORT _CUDA_HD int isVector(Nd4jLong const* shape, int rank); /** @@ -363,13 +363,13 @@ namespace shape { ND4J_EXPORT _CUDA_HD int isVector(const Nd4jLong *shapeInfo); - ND4J_EXPORT _CUDA_HD bool isLikeVector(Nd4jLong *shapeInfo, int& posOfNonUnityDim); + ND4J_EXPORT _CUDA_HD bool isLikeVector(Nd4jLong const* shapeInfo, int& posOfNonUnityDim); ND4J_EXPORT _CUDA_HD bool isCommonVector(const Nd4jLong *shapeInfo, int& posOfNonUnityDim); ND4J_EXPORT _CUDA_HD bool isRowVector(const Nd4jLong *shapeInfo); - ND4J_EXPORT _CUDA_HD bool isColumnVector(Nd4jLong *shapeInfo); + ND4J_EXPORT _CUDA_HD bool isColumnVector(Nd4jLong const* shapeInfo); /** * shape - input inShape is shape only, not shapeInfo @@ -401,10 +401,10 @@ namespace shape { */ template - ND4J_EXPORT _CUDA_HD T* copyOf(Nd4jLong length, T *toCopy); + ND4J_EXPORT _CUDA_HD T* copyOf(Nd4jLong length, T const* toCopy); template - ND4J_EXPORT _CUDA_HD T* copyOf(Nd4jLong length, T *toCopy, T *ret); + ND4J_EXPORT _CUDA_HD T* copyOf(Nd4jLong length, T const* toCopy, T *ret); /** * Return a copy of a buffer. @@ -413,13 +413,13 @@ namespace shape { */ template - ND4J_EXPORT _CUDA_HD void copyTo(Nd4jLong length, T *from, T *to); + ND4J_EXPORT _CUDA_HD void copyTo(Nd4jLong length, T const* from, T *to); /** * Return a copy of a buffer. * This buffer allocates memory * that must be freed elsewhere. */ - ND4J_EXPORT _CUDA_HD void copyTo(int length, Nd4jLong *from, Nd4jLong *to, Nd4jLong *indexes); + ND4J_EXPORT _CUDA_HD void copyTo(int length, Nd4jLong const* from, Nd4jLong *to, Nd4jLong *indexes); /** * Permute the given strides @@ -566,7 +566,7 @@ namespace shape { * item */ template - ND4J_EXPORT _CUDA_HD void removeIndex(T1 *data, T2 *indexes, Nd4jLong dataLength, Nd4jLong indexesLength, T1 *out); + ND4J_EXPORT _CUDA_HD void removeIndex(T1 const* data, T2 const* indexes, Nd4jLong dataLength, Nd4jLong indexesLength, T1 *out); /** * Return a copy of this array with the @@ -582,7 +582,7 @@ namespace shape { */ template - ND4J_EXPORT _CUDA_HD T1* removeIndex(T1 *data, T2 *indexes, Nd4jLong dataLength, Nd4jLong indexesLength); + ND4J_EXPORT _CUDA_HD T1* removeIndex(T1 const* data, T2 const* indexes, Nd4jLong dataLength, Nd4jLong indexesLength); /** * Iterate over a given set of indexes @@ -595,7 +595,7 @@ namespace shape { * indexes should be the indexes to exclude * indexes length should be the length of indexes */ - ND4J_EXPORT _CUDA_HD Nd4jLong* everyIndexBut(Nd4jLong *indexes,int indexesLength,int begin,int end); + ND4J_EXPORT _CUDA_HD Nd4jLong* everyIndexBut(Nd4jLong const* indexes,int indexesLength,int begin,int end); /** * Computes the offset for accessing @@ -641,7 +641,7 @@ namespace shape { * Keep the given indexes * in the data */ - ND4J_EXPORT _CUDA_HD Nd4jLong *keep(volatile Nd4jLong *data, int* index, int indexLength, int dataLength); + ND4J_EXPORT _CUDA_HD Nd4jLong *keep(volatile Nd4jLong *data, int const* index, int indexLength, int dataLength); /** * Generate reverse copy of the data @@ -651,13 +651,13 @@ namespace shape { */ template - ND4J_EXPORT _CUDA_HD T* reverseCopy(T *data, Nd4jLong length); + ND4J_EXPORT _CUDA_HD T* reverseCopy(T const* data, Nd4jLong length); template - ND4J_EXPORT _CUDA_HD void reverseCopyTo(T *from, T *to, Nd4jLong length); + ND4J_EXPORT _CUDA_HD void reverseCopyTo(T const* from, T *to, Nd4jLong length); template - ND4J_EXPORT _CUDA_HD void reverseCopyTo(T *from, T *to, Nd4jLong *indexes, Nd4jLong length); + ND4J_EXPORT _CUDA_HD void reverseCopyTo(T const* from, T *to, Nd4jLong *indexes, Nd4jLong length); template ND4J_EXPORT _CUDA_H void convertT(T1 *from, T2 *to, Nd4jLong length); @@ -670,7 +670,7 @@ namespace shape { * @return */ template - ND4J_EXPORT _CUDA_HD T* concat(T* arr1, Nd4jLong arr1Length, T* arr2, Nd4jLong arr2Length); + ND4J_EXPORT _CUDA_HD T* concat(T const* arr1, Nd4jLong const arr1Length, T const* arr2, Nd4jLong const arr2Length); /** * @@ -681,7 +681,7 @@ namespace shape { * @return */ template - ND4J_EXPORT _CUDA_HD T* concat(int numArrays, int numTotalElements, Nd4jLong **arr, Nd4jLong *lengths); + ND4J_EXPORT _CUDA_HD T* concat(int const numArrays, int const numTotalElements, Nd4jLong const**arr, Nd4jLong const* lengths); /** * Get the length per slice of the @@ -695,7 +695,7 @@ namespace shape { * @return the length per slice of the given shape * along the given dimension */ - ND4J_EXPORT _CUDA_HD Nd4jLong lengthPerSlice(int rank, Nd4jLong *shape, int *dimension, int dimensionLength); + ND4J_EXPORT _CUDA_HD Nd4jLong lengthPerSlice(int rank, Nd4jLong const* shape, int const* dimension, int dimensionLength); /** * calculates the offset for a tensor @@ -706,10 +706,10 @@ namespace shape { */ ND4J_EXPORT _CUDA_HD Nd4jLong sliceOffsetForTensor(int rank, int index, - Nd4jLong *shape, - Nd4jLong *tensorShape, + Nd4jLong const* shape, + Nd4jLong const* tensorShape, int tensorShapeLength, - int *dimension, + int const *dimension, int dimensionLength); /** @@ -1095,7 +1095,7 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { * Length of a tad given * the shape information */ - INLINEDEF _CUDA_HD Nd4jLong tadLength(Nd4jLong *shapeInfo, int *dimension, int dimensionLength) { + INLINEDEF _CUDA_HD Nd4jLong tadLength(const Nd4jLong *shapeInfo, int *dimension, int dimensionLength) { if(dimensionLength == 1) { return shape::shapeOf(shapeInfo)[dimension[0]]; } @@ -1166,7 +1166,7 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { } - INLINEDEF _CUDA_HD bool strideEquals(int shape1Rank,Nd4jLong *shape1,int shape2Rank,Nd4jLong *shape2) { + INLINEDEF _CUDA_HD bool strideEquals(int const shape1Rank, Nd4jLong const* shape1,int const shape2Rank,Nd4jLong const* shape2) { if(shape1Rank != shape2Rank) return false; //rank not equals @@ -1178,12 +1178,12 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { return true; } - INLINEDEF _CUDA_HD bool strideEquals(Nd4jLong *shapeInfo1,Nd4jLong *shapeInfo2) { + INLINEDEF _CUDA_HD bool strideEquals(Nd4jLong const* shapeInfo1,Nd4jLong const* shapeInfo2) { return shape::strideEquals(shape::rank(shapeInfo1),shape::stride(shapeInfo1),shape::rank(shapeInfo2),shape::stride(shapeInfo2)); } - INLINEDEF _CUDA_HD bool strideEquals(Nd4jLong *stride1,int rank1 , Nd4jLong *stride2, int rank2) { + INLINEDEF _CUDA_HD bool strideEquals(Nd4jLong const* stride1,int const rank1 , Nd4jLong const* stride2, int const rank2) { if(rank1 != rank2) return false; @@ -1195,7 +1195,7 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { return true; } - INLINEDEF _CUDA_HD Nd4jLong *computeResultShape(Nd4jLong *originalShapeBuffer, int* dimension,int dimensionLength) { + INLINEDEF _CUDA_HD Nd4jLong *computeResultShape(Nd4jLong const* originalShapeBuffer, int * dimension,int dimensionLength) { Nd4jLong *retShape; int retShapeLength; if(dimensionLength == 1 && dimension[0] == 2147483647) { @@ -1236,7 +1236,7 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { } - INLINEDEF _CUDA_HD Nd4jLong *shapeInfoOnlyShapeAndStride(Nd4jLong *shapeInfo, Nd4jLong *dimension, int dimensionLength,bool reverseCopyStride, Nd4jLong *buffer) { + INLINEDEF _CUDA_HD Nd4jLong *shapeInfoOnlyShapeAndStride(const Nd4jLong *shapeInfo, Nd4jLong *dimension, int dimensionLength,bool reverseCopyStride, Nd4jLong *buffer) { Nd4jLong *theShape = shape::shapeOf(shapeInfo); Nd4jLong *theStride = shape::stride(shapeInfo); int rank = dimensionLength == 1 ? 2 : dimensionLength; @@ -1279,7 +1279,7 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { } else { - Nd4jLong *newIndexes = dimension; + Nd4jLong *newIndexes = dimension; if(reverseCopyStride) shape::reverseCopyTo(theStride, retStride, newIndexes, len); else @@ -1293,7 +1293,7 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { return ret; } - INLINEDEF _CUDA_HD Nd4jLong *shapeInfoOnlyShapeAndStride(Nd4jLong *shapeInfo, Nd4jLong *dimension, int dimensionLength,bool reverseCopyStride) { + INLINEDEF _CUDA_HD Nd4jLong *shapeInfoOnlyShapeAndStride(const Nd4jLong *shapeInfo, Nd4jLong *dimension, int dimensionLength,bool reverseCopyStride) { int rank = dimensionLength == 1 ? 2 : dimensionLength; traceNew(4); @@ -1330,7 +1330,7 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { * @param startNum the start number for the strides * @return the strides for a matrix of n dimensions */ - INLINEDEF _CUDA_HD Nd4jLong * calcStridesFortran(Nd4jLong *shape, int rank, int startNum) { + INLINEDEF _CUDA_HD Nd4jLong * calcStridesFortran(Nd4jLong const* shape, int rank, int startNum) { if (isVector(shape, rank)) { traceNew(5); @@ -1356,7 +1356,7 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { return stride; } - INLINEDEF _CUDA_HD Nd4jLong * calcStridesFortran(Nd4jLong *shape, int rank, int startNum, Nd4jLong *ret) { + INLINEDEF _CUDA_HD Nd4jLong * calcStridesFortran(Nd4jLong const* shape, int rank, int startNum, Nd4jLong *ret) { if (isVector(shape, rank)) { for (int i = 0; i < rank; i++) ret[i] = 1; @@ -1382,7 +1382,7 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { * @param startNum the start number for the strides * @return the strides for a matrix of n dimensions */ - INLINEDEF _CUDA_HD Nd4jLong * calcStrides(Nd4jLong *shape, int rank, int startNum) { + INLINEDEF _CUDA_HD Nd4jLong * calcStrides(Nd4jLong const *shape, int rank, int startNum) { traceNew(7); @@ -1410,7 +1410,7 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { return stride; } - INLINEDEF _CUDA_HD Nd4jLong * calcStrides(Nd4jLong *shape, int rank, int startNum, Nd4jLong* ret) { + INLINEDEF _CUDA_HD Nd4jLong * calcStrides(Nd4jLong const* shape, int rank, int startNum, Nd4jLong* ret) { if (rank == 1) { ret[0] = 1; return ret; @@ -1439,11 +1439,11 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { * @param startNum the start number for the strides * @return the strides for a matrix of n dimensions */ - INLINEDEF _CUDA_HD Nd4jLong * calcStridesFortran(Nd4jLong *shape, int rank) { + INLINEDEF _CUDA_HD Nd4jLong * calcStridesFortran(Nd4jLong const* shape, int rank) { return calcStridesFortran(shape, rank, 1); } - INLINEDEF _CUDA_HD Nd4jLong * calcStridesFortran(Nd4jLong *shape, int rank, Nd4jLong* ret) { + INLINEDEF _CUDA_HD Nd4jLong * calcStridesFortran(Nd4jLong const* shape, int rank, Nd4jLong* ret) { return calcStridesFortran(shape, rank, 1, ret); } @@ -1454,11 +1454,11 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { * @param startNum the start number for the strides * @return the strides for a matrix of n dimensions */ - INLINEDEF _CUDA_HD Nd4jLong* calcStrides(Nd4jLong *shape, int rank) { + INLINEDEF _CUDA_HD Nd4jLong* calcStrides(Nd4jLong const *shape, int rank) { return calcStrides(shape, rank, 1); } - INLINEDEF _CUDA_HD Nd4jLong* calcStrides(Nd4jLong *shape, int rank, Nd4jLong* ret) { + INLINEDEF _CUDA_HD Nd4jLong* calcStrides(Nd4jLong const *shape, int rank, Nd4jLong* ret) { return calcStrides(shape, rank, 1, ret); } @@ -1541,7 +1541,7 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { return copy; } - INLINEDEF _CUDA_HD int computeElementWiseStride(int rank, Nd4jLong *shape, Nd4jLong *stride, int isFOrder) { + INLINEDEF _CUDA_HD int computeElementWiseStride(int rank, Nd4jLong const* shape, Nd4jLong const* stride, int isFOrder) { if (rank == 0) return 1; @@ -1690,8 +1690,8 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { } - INLINEDEF _CUDA_HD int computeElementWiseStride(int rank, Nd4jLong *shape, Nd4jLong *stride, int isFOrder, - Nd4jLong *dimension, int dimensionLength) { + INLINEDEF _CUDA_HD int computeElementWiseStride(int rank, Nd4jLong const* shape, Nd4jLong const* stride, int isFOrder, + Nd4jLong const* dimension, int dimensionLength) { if(dimensionLength == 1) { return stride[dimension[0]]; } @@ -1703,13 +1703,13 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { * Get the shape info buffer * for the given rank and shape. */ - INLINEDEF _CUDA_HD Nd4jLong *shapeBuffer(int rank, sd::DataType dtype, Nd4jLong *shape) { + INLINEDEF _CUDA_HD Nd4jLong *shapeBuffer(int rank, sd::DataType dtype, Nd4jLong const* shape) { Nd4jLong *stride = shape::calcStrides(shape, rank); traceNew(11); auto shapeInfo = new shape::ShapeInformation(); - shapeInfo->shape = shape; + shapeInfo->shape = const_cast(shape); shapeInfo->stride = stride; shapeInfo->offset = 0; shapeInfo->rank = rank; @@ -1728,13 +1728,13 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { * * This method is used only for SoftMax */ - INLINEDEF _CUDA_HD Nd4jLong *shapeBuffer(int rank, sd::DataType dtype, Nd4jLong *shape, Nd4jLong *buffer) { + INLINEDEF _CUDA_HD Nd4jLong *shapeBuffer(int rank, sd::DataType dtype, Nd4jLong const* shape, Nd4jLong *buffer) { Nd4jLong stride[MAX_RANK]; shape::calcStrides(shape,rank, stride); shape::ShapeInformation shapeInfo; - shapeInfo.shape = shape; + shapeInfo.shape = const_cast(shape); shapeInfo.stride = stride; shapeInfo.offset = 0; shapeInfo.rank = rank; @@ -1751,13 +1751,13 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { * Get the shape info buffer * for the given rank and shape. */ - INLINEDEF _CUDA_HD Nd4jLong *shapeBufferFortran(int rank, sd::DataType dtype, Nd4jLong *shape) { + INLINEDEF _CUDA_HD Nd4jLong *shapeBufferFortran(int rank, sd::DataType dtype, Nd4jLong const* shape) { auto stride = shape::calcStridesFortran(shape,rank); traceNew(12); auto shapeInfo = new shape::ShapeInformation(); - shapeInfo->shape = shape; + shapeInfo->shape = const_cast(shape); shapeInfo->stride = stride; shapeInfo->offset = 0; shapeInfo->rank = rank; @@ -1772,13 +1772,13 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) { return shapeInfoBuffer; } - INLINEDEF _CUDA_HD Nd4jLong *shapeBufferFortran(int rank, sd::DataType dtype, Nd4jLong *shape, Nd4jLong *output) { + INLINEDEF _CUDA_HD Nd4jLong *shapeBufferFortran(int rank, sd::DataType dtype, Nd4jLong const *shape, Nd4jLong *output) { Nd4jLong stride[MAX_RANK]; shape::calcStridesFortran(shape,rank, stride); shape::ShapeInformation shapeInfo; - shapeInfo.shape = shape; + shapeInfo.shape = const_cast(shape); shapeInfo.stride = stride; shapeInfo.offset = 0; shapeInfo.rank = rank; @@ -2049,7 +2049,7 @@ INLINEDEF _CUDA_HD Nd4jLong indexOffset(Nd4jLong index, const Nd4jLong* lShapeIn shape::doPermuteShapeInfo(out, rearrange); } - INLINEDEF _CUDA_HD Nd4jLong *permuteShapeBuffer(Nd4jLong *shapeBuffer, int* rearrange) { + INLINEDEF _CUDA_HD Nd4jLong *permuteShapeBuffer(Nd4jLong const* shapeBuffer, int* rearrange) { auto len = shape::shapeInfoLength(shape::rank(shapeBuffer)); Nd4jLong *copy = shape::copyOf(len, shapeBuffer); shape::doPermuteShapeInfo(copy,rearrange); @@ -2238,7 +2238,7 @@ INLINEDEF _CUDA_HD Nd4jLong indexOffset(Nd4jLong index, const Nd4jLong* lShapeIn * @param shape the shape of the array * @param rank the rank of the shape */ - INLINEDEF _CUDA_HD int isVector(Nd4jLong *shape, int rank) { + INLINEDEF _CUDA_HD int isVector(Nd4jLong const* shape, int rank) { if (rank == 0) return 0; @@ -2254,7 +2254,7 @@ INLINEDEF _CUDA_HD Nd4jLong indexOffset(Nd4jLong index, const Nd4jLong* lShapeIn return 0; } - INLINEDEF _CUDA_HD bool isLikeVector(Nd4jLong *shapeInfo, int& posOfNonUnityDim) { + INLINEDEF _CUDA_HD bool isLikeVector(Nd4jLong const* shapeInfo, int& posOfNonUnityDim) { int numOfNonUnity = 0; for(int i = 1; i <= shapeInfo[0]; ++i) { @@ -2284,7 +2284,7 @@ INLINEDEF _CUDA_HD Nd4jLong indexOffset(Nd4jLong index, const Nd4jLong* lShapeIn return numOfNonUnity == 1; } - INLINEDEF _CUDA_H Nd4jLong* detachShape(Nd4jLong *originalShape) { + INLINEDEF _CUDA_H Nd4jLong const* detachShape(Nd4jLong const* originalShape) { Nd4jLong *newShape = new Nd4jLong[shape::shapeInfoLength(originalShape)]; memcpy(newShape, originalShape, shape::shapeInfoByteLength(originalShape)); @@ -2292,7 +2292,7 @@ INLINEDEF _CUDA_HD Nd4jLong indexOffset(Nd4jLong index, const Nd4jLong* lShapeIn } - INLINEDEF _CUDA_H Nd4jLong* copyShape(Nd4jLong *originalShape) { + INLINEDEF _CUDA_H Nd4jLong* copyShape(Nd4jLong const* originalShape) { Nd4jLong *newShape = new Nd4jLong[shape::shapeInfoLength(originalShape)]; memcpy(newShape, originalShape, shape::shapeInfoByteLength(originalShape)); @@ -2309,7 +2309,7 @@ INLINEDEF _CUDA_HD Nd4jLong indexOffset(Nd4jLong index, const Nd4jLong* lShapeIn return isVector && shapeFirstOne; } - INLINEDEF _CUDA_HD bool isColumnVector(Nd4jLong *shapeInfo) { + INLINEDEF _CUDA_HD bool isColumnVector(const Nd4jLong *shapeInfo) { bool isVector = shape::isVector(shapeInfo) == 1; bool shapeFirstOne = shape::shapeOf(shapeInfo)[0] == 1; return isVector && !shapeFirstOne; @@ -2381,7 +2381,7 @@ INLINEDEF _CUDA_HD int numOfNonUnitDims(const int rank, const Nd4jLong* inShape) * that must be freed elsewhere. */ template - INLINEDEF _CUDA_HD T *copyOf(Nd4jLong length, T *toCopy) { + INLINEDEF _CUDA_HD T *copyOf(Nd4jLong length, T const* toCopy) { traceNew(18); T *ret = new T[length]; @@ -2389,7 +2389,7 @@ INLINEDEF _CUDA_HD int numOfNonUnitDims(const int rank, const Nd4jLong* inShape) } template - INLINEDEF _CUDA_HD T* copyOf(Nd4jLong length, T *toCopy, T *ret) { + INLINEDEF _CUDA_HD T* copyOf(Nd4jLong length, T const* toCopy, T *ret) { memcpy(ret, toCopy, sizeof(T)*length); return ret; } @@ -2400,7 +2400,7 @@ INLINEDEF _CUDA_HD int numOfNonUnitDims(const int rank, const Nd4jLong* inShape) * that must be freed elsewhere. */ template - INLINEDEF _CUDA_HD void copyTo(Nd4jLong length, T *from, T *to) { + INLINEDEF _CUDA_HD void copyTo(Nd4jLong length, T const* from, T *to) { memcpy(to, from, sizeof(T)*length); } @@ -2409,7 +2409,7 @@ INLINEDEF _CUDA_HD int numOfNonUnitDims(const int rank, const Nd4jLong* inShape) * This buffer allocates memory * that must be freed elsewhere. */ - INLINEDEF _CUDA_HD void copyTo(int length, Nd4jLong *from, Nd4jLong *to, Nd4jLong *indexes) { + INLINEDEF _CUDA_HD void copyTo(int length, Nd4jLong const* from, Nd4jLong *to, Nd4jLong *indexes) { for(int i = 0; i < length; i++) { to[i] = from[indexes[i]]; } @@ -2817,7 +2817,7 @@ INLINEDEF _CUDA_HD int numOfNonUnitDims(const int rank, const Nd4jLong* inShape) * item */ template - INLINEDEF _CUDA_HD void removeIndex(T1* data, T2 *indexes, Nd4jLong dataLength, Nd4jLong indexesLength, T1 *ret) { + INLINEDEF _CUDA_HD void removeIndex(T1 const* data, T2 const* indexes, Nd4jLong dataLength, Nd4jLong indexesLength, T1 *ret) { int count = 0; int absLength = dataLength - indexesLength; @@ -2850,7 +2850,7 @@ INLINEDEF _CUDA_HD int numOfNonUnitDims(const int rank, const Nd4jLong* inShape) * item */ template - INLINEDEF _CUDA_HD T1* removeIndex(T1 *data, T2 *indexes, Nd4jLong dataLength, Nd4jLong indexesLength) { + INLINEDEF _CUDA_HD T1* removeIndex(T1 const* data, T2 const* indexes, Nd4jLong dataLength, Nd4jLong indexesLength) { auto lengthOfArr = dataLength - indexesLength; if(lengthOfArr < 0) { printf("Remove index call created a <= 0 length array. This was likely not intended."); @@ -2862,7 +2862,7 @@ INLINEDEF _CUDA_HD int numOfNonUnitDims(const int rank, const Nd4jLong* inShape) return ret; } - INLINEDEF _CUDA_HD Nd4jLong* everyIndexBut(Nd4jLong *indexes,int indexesLength,int begin,int end) { + INLINEDEF _CUDA_HD Nd4jLong* everyIndexBut(const Nd4jLong *indexes,int indexesLength,int begin,int end) { int len = end - indexesLength; traceNew(20); @@ -3086,7 +3086,7 @@ INLINEDEF _CUDA_HD bool haveSameShapeAndStrides(const Nd4jLong *shapeInfo1, cons * @param dataLength * @return */ - INLINEDEF _CUDA_HD Nd4jLong *keep(volatile Nd4jLong *data, int* index, int indexLength, int dataLength) { + INLINEDEF _CUDA_HD Nd4jLong *keep(volatile Nd4jLong *data, int const* index, int indexLength, int dataLength) { traceNew(23); @@ -3113,7 +3113,7 @@ INLINEDEF _CUDA_HD bool haveSameShapeAndStrides(const Nd4jLong *shapeInfo1, cons */ template - INLINEDEF _CUDA_HD T* reverseCopy(T *data, Nd4jLong length) { + INLINEDEF _CUDA_HD T* reverseCopy(T const* data, Nd4jLong length) { if (length < 1) return nullptr; @@ -3129,7 +3129,7 @@ INLINEDEF _CUDA_HD bool haveSameShapeAndStrides(const Nd4jLong *shapeInfo1, cons } template - INLINEDEF _CUDA_HD void reverseCopyTo(T *from, T *to, Nd4jLong length) { + INLINEDEF _CUDA_HD void reverseCopyTo(T const* from, T *to, Nd4jLong length) { if (length < 1) return; for (Nd4jLong i = 0; i <= length / 2; i++) { @@ -3140,7 +3140,7 @@ INLINEDEF _CUDA_HD bool haveSameShapeAndStrides(const Nd4jLong *shapeInfo1, cons } template - INLINEDEF _CUDA_HD void reverseCopyTo(T *from, T *to, Nd4jLong *indexes, Nd4jLong length) { + INLINEDEF _CUDA_HD void reverseCopyTo(T const* from, T *to, Nd4jLong *indexes, Nd4jLong length) { if (length < 1) return; @@ -3161,7 +3161,7 @@ INLINEDEF _CUDA_HD bool haveSameShapeAndStrides(const Nd4jLong *shapeInfo1, cons * @return */ template - INLINEDEF _CUDA_HD T* concat(T* arr1, Nd4jLong arr1Length, T* arr2, Nd4jLong arr2Length) { + INLINEDEF _CUDA_HD T* concat(T const* arr1, Nd4jLong const arr1Length, T const* arr2, Nd4jLong const arr2Length) { traceNew(25); @@ -3180,7 +3180,7 @@ INLINEDEF _CUDA_HD bool haveSameShapeAndStrides(const Nd4jLong *shapeInfo1, cons * @return */ template - INLINEDEF _CUDA_HD T *concat(Nd4jLong numArrays, Nd4jLong numTotalElements, T **arr, Nd4jLong *lengths) { + INLINEDEF _CUDA_HD T *concat(Nd4jLong const numArrays, Nd4jLong const numTotalElements, T const **arr, Nd4jLong const *lengths) { T* ret = new T[numTotalElements]; Nd4jLong count = 0; @@ -3206,7 +3206,7 @@ INLINEDEF _CUDA_HD bool haveSameShapeAndStrides(const Nd4jLong *shapeInfo1, cons * @return the length per slice of the given shape * along the given dimension */ - INLINEDEF _CUDA_HD Nd4jLong lengthPerSlice(int rank, Nd4jLong *shape, int* dimension, int dimensionLength) { + INLINEDEF _CUDA_HD Nd4jLong lengthPerSlice(int rank, Nd4jLong const* shape, int const* dimension, int dimensionLength) { if(shape::isVector(shape,rank)) { //return total length for row vectors if(dimensionLength == 1 && shape[0] == 1) { @@ -3230,7 +3230,7 @@ INLINEDEF _CUDA_HD bool haveSameShapeAndStrides(const Nd4jLong *shapeInfo1, cons * @param tensorShape * @return */ - INLINEDEF _CUDA_HD Nd4jLong sliceOffsetForTensor(int rank, int index, Nd4jLong *shape, Nd4jLong *tensorShape, int tensorShapeLength, int* dimension, int dimensionLength) { + INLINEDEF _CUDA_HD Nd4jLong sliceOffsetForTensor(int rank, int index, Nd4jLong const* shape, Nd4jLong const* tensorShape, int tensorShapeLength, int const* dimension, int dimensionLength) { auto tensorLength = prodLong(tensorShape, tensorShapeLength); auto lengthPerSlice2 = lengthPerSlice(rank, shape, dimension, dimensionLength); if (lengthPerSlice2 <= 0) { diff --git a/libnd4j/include/legacy/NativeOpExecutioner.h b/libnd4j/include/legacy/NativeOpExecutioner.h index 4d55a3357..84ab886c4 100644 --- a/libnd4j/include/legacy/NativeOpExecutioner.h +++ b/libnd4j/include/legacy/NativeOpExecutioner.h @@ -47,11 +47,11 @@ public: */ static void execIndexReduceScalar(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo); + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo); /** * @@ -68,13 +68,13 @@ public: */ static void execReduce3Scalar(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, void *extraParamsVals, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo); + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo); /** @@ -90,13 +90,13 @@ public: */ static void execReduce3(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, void *extraParamsVals, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo); + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo); /** * @@ -113,29 +113,29 @@ public: */ static void execReduce3(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, void *extraParamsVals, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *xTadOnlyShapeInfo, Nd4jLong *xTadOffsets, - Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets); + const Nd4jLong *xTadOnlyShapeInfo, const Nd4jLong *xTadOffsets, + const Nd4jLong *yTadOnlyShapeInfo, const Nd4jLong *yTadOffsets); static void execReduce3All(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, void *extraParamsVals, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *xTadShapeInfo, Nd4jLong *xOffsets, - Nd4jLong *yTadShapeInfo, Nd4jLong *yOffsets); + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xOffsets, + const Nd4jLong *yTadShapeInfo, const Nd4jLong *yOffsets); /** * @@ -150,13 +150,13 @@ public: */ static void execIndexReduce(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); /** * @@ -170,73 +170,76 @@ public: * @param n */ static void execScalar(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalar, Nd4jLong *hSscalarShapeInfo, - void *dScalar, Nd4jLong *dSscalarShapeInfo, - void *extraParams, bool allowParallelism = true); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + const void *hScalar, const Nd4jLong *hSscalarShapeInfo, + const void *dScalar, const Nd4jLong *dSscalarShapeInfo, + void *extraParams, + bool allowParallelism = true); static void execScalarBool(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalar, Nd4jLong *hSscalarShapeInfo, - void *dScalar, Nd4jLong *dSscalarShapeInfo, - void *extraParams, bool allowParallelism = true); + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + const void *hScalar, const Nd4jLong *hSscalarShapeInfo, + const void *dScalar, const Nd4jLong *dSscalarShapeInfo, + void *extraParams, + bool allowParallelism = true); static void execScalarInt(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalar, Nd4jLong *hSscalarShapeInfo, - void *dScalar, Nd4jLong *dSscalarShapeInfo, - void *extraParams, bool allowParallelism = true); + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + const void *hScalar, const Nd4jLong *hSscalarShapeInfo, + const void *dScalar, const Nd4jLong *dSscalarShapeInfo, + void *extraParams, + bool allowParallelism = true); static void execScalar(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalars, Nd4jLong *hScalarShapeInfo, - void *dScalars, Nd4jLong *dScalarShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, + void const* hScalars, Nd4jLong const* hScalarShapeInfo, + void const* dScalars, Nd4jLong const* dScalarShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ); + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ); static void execScalarBool(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalars, Nd4jLong *hScalarShapeInfo, - void *dScalars, Nd4jLong *dScalarShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + const void *hScalars, const Nd4jLong *hScalarShapeInfo, + const void *dScalars, const Nd4jLong *dScalarShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ); + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ); static void execScalarInt(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalars, Nd4jLong *hScalarShapeInfo, - void *dScalars, Nd4jLong *dScalarShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + const void *hScalars, const Nd4jLong *hScalarShapeInfo, + const void *dScalars, const Nd4jLong *dScalarShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ); /** @@ -252,105 +255,107 @@ static void execScalarInt(sd::LaunchContext *lc, * @param dimensionLength */ static void execBroadcast(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ,Nd4jLong *tadOffsetsZ); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ,const Nd4jLong *tadOffsetsZ); static void execBroadcast(sd::LaunchContext* lc, - const int opNum, - const void *hX, const Nd4jLong *hXShapeInfo, - const void *dX, const Nd4jLong *dXShapeInfo, - const void *hY, const Nd4jLong *hYShapeInfo, - const void *dY, const Nd4jLong *dYShapeInfo, - void *hZ, const Nd4jLong *hZShapeInfo, - void *dZ, const Nd4jLong *dZShapeInfo); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo); static void execInverseBroadcast(sd::LaunchContext *lc, int opNum, - void *x, Nd4jLong *xShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *result, Nd4jLong *resultShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + const void *x, const Nd4jLong *xShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); static void execBroadcastBool(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams, - int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ,Nd4jLong *tadOffsetsZ); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ,const Nd4jLong *tadOffsetsZ); - static void execBroadcastBool(sd::LaunchContext* lc, const int opNum, - const void *hX, const Nd4jLong *hXShapeInfo, - const void *dX, const Nd4jLong *dXShapeInfo, - const void *hY, const Nd4jLong *hYShapeInfo, - const void *dY, const Nd4jLong *dYShapeInfo, + static void execBroadcastBool(sd::LaunchContext* lc, + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, void *hZ, const Nd4jLong *hZShapeInfo, void *dZ, const Nd4jLong *dZShapeInfo, void *extraParams); static void execInverseBroadcastBool(sd::LaunchContext *lc, - int opNum, - void *x, Nd4jLong *xShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *result, Nd4jLong *resultShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams, - int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + int opNum, + const void *x, const Nd4jLong *xShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); static void execBroadcastInt(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ,Nd4jLong *tadOffsetsZ); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ,const Nd4jLong *tadOffsetsZ); - static void execBroadcastInt(sd::LaunchContext* lc, const int opNum, - const void *hX, const Nd4jLong *hXShapeInfo, - const void *dX, const Nd4jLong *dXShapeInfo, - const void *hY, const Nd4jLong *hYShapeInfo, - const void *dY, const Nd4jLong *dYShapeInfo, - void *hZ, const Nd4jLong *hZShapeInfo, - void *dZ, const Nd4jLong *dZShapeInfo); + static void execBroadcastInt(sd::LaunchContext* lc, + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo); static void execInverseBroadcastInt(sd::LaunchContext *lc, - int opNum, - void *x, Nd4jLong *xShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *result, Nd4jLong *resultShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + int opNum, + const void *x, const Nd4jLong *xShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); /** * @@ -365,34 +370,34 @@ static void execScalarInt(sd::LaunchContext *lc, * @param n */ static void execPairwiseTransform(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams); static void execPairwiseBoolTransform(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams); static void execPairwiseIntTransform(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams); /** * @@ -405,49 +410,50 @@ static void execScalarInt(sd::LaunchContext *lc, * @param n */ static void execTransformFloat(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); static void execTransformAny(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool allowParallelism = true); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + bool allowParallelism = true); static void execTransformStrict(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); static void execTransformSame(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); static void execTransformBool(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); /** * * @param opNum @@ -458,44 +464,44 @@ static void execTransformBool(sd::LaunchContext *lc, * @param resultShapeInfo */ static void execReduceFloat(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); static void execReduceSame(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); static void execReduceBool(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); static void execReduceLong(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); /** * @@ -506,49 +512,49 @@ static void execTransformBool(sd::LaunchContext *lc, * @return */ static void execReduceFloatScalar(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo); static void execReduceBoolScalar(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo); static void execReduceSameScalar(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo); static void execReduceLongScalar(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo); static void execReduce3TAD(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParamsVals, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *yTadShapeInfo, Nd4jLong *yTadOffsets); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParamsVals, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *yTadShapeInfo, const Nd4jLong *yTadOffsets); /** * @@ -562,15 +568,15 @@ static void execTransformBool(sd::LaunchContext *lc, * @param dimensionLength */ static void execSummaryStats(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - bool biasCorrected); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + bool biasCorrected); /** * @@ -582,13 +588,13 @@ static void execTransformBool(sd::LaunchContext *lc, * @param resultShapeInfo */ static void execSummaryStats(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - bool biasCorrected); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + bool biasCorrected); /** * @@ -600,68 +606,51 @@ static void execTransformBool(sd::LaunchContext *lc, * @param resultShapeInfo */ static void execSummaryStatsScalar(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - bool biasCorrected); + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + bool biasCorrected); static void execRandom(sd::LaunchContext *lc, - int opNum, - Nd4jPointer state, - void *hZ, Nd4jLong *hZShapeBuffer, - void *dZ, Nd4jLong *dZShapeBuffer, - void *extraArguments); + int opNum, + Nd4jPointer state, + void *hZ, const Nd4jLong *hZShapeBuffer, + void *dZ, const Nd4jLong *dZShapeBuffer, + void *extraArguments); static void execRandom(sd::LaunchContext *lc, - int opNum, - Nd4jPointer state, - void *hX, Nd4jLong *hXShapeBuffer, - void *dX, Nd4jLong *dXShapeBuffer, - void *hZ, Nd4jLong *hZShapeBuffer, - void *dZ, Nd4jLong *dZShapeBuffer, - void *extraArguments); + int opNum, + Nd4jPointer state, + const void *hX, const Nd4jLong *hXShapeBuffer, + const void *dX, const Nd4jLong *dXShapeBuffer, + void *hZ, const Nd4jLong *hZShapeBuffer, + void *dZ, const Nd4jLong *dZShapeBuffer, + void *extraArguments); static void execRandom(sd::LaunchContext *lc, - int opNum, - Nd4jPointer state, - void *hX, Nd4jLong *hXShapeBuffer, - void *dX, Nd4jLong *dXShapeBuffer, - void *hY, Nd4jLong *hYShapeBuffer, - void *dY, Nd4jLong *dYShapeBuffer, - void *hZ, Nd4jLong *hZShapeBuffer, - void *dZ, Nd4jLong *dZShapeBuffer, - void *extraArguments); + int opNum, + Nd4jPointer state, + const void *hX, const Nd4jLong *hXShapeBuffer, + const void *dX, const Nd4jLong *dXShapeBuffer, + const void *hY, const Nd4jLong *hYShapeBuffer, + const void *dY, const Nd4jLong *dYShapeBuffer, + void *hZ, const Nd4jLong *hZShapeBuffer, + void *dZ, const Nd4jLong *dZShapeBuffer, + void *extraArguments); - template - static FORCEINLINE void execAggregate(sd::LaunchContext *lc, - int opNum, - void **varguments, - int numArguments, - Nd4jLong **shapeArguments, - int numShapeArguments, - int *indexArguments, - int numIndexArguments, - int **intArrays, - int numIntArrays, - void *vrealArguments, - int numRealArguments) { - - } - - - inline static void execSort(void *x, Nd4jLong *xShapeInfo, bool descending) { + inline static void execSort(void *x, const Nd4jLong *xShapeInfo, bool descending) { auto xType = sd::ArrayOptions::dataType(xShapeInfo); BUILD_SINGLE_SELECTOR(xType, sd::SpecialMethods, ::sortGeneric(x, xShapeInfo, descending), LIBND4J_TYPES); } - static void execSort(void *x, Nd4jLong *xShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool descending) { + static void execSort(void *x, const Nd4jLong *xShapeInfo, int *dimension, int dimensionLength, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, bool descending) { auto xType = sd::ArrayOptions::dataType(xShapeInfo); BUILD_SINGLE_SELECTOR(xType, sd::SpecialMethods, ::sortTadGeneric(x, xShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffsets, descending), LIBND4J_TYPES); @@ -672,13 +661,13 @@ static void execTransformBool(sd::LaunchContext *lc, } - inline static Nd4jLong encodeBitmap(void *dx, Nd4jLong *xShapeInfo, Nd4jLong N, int *dz, float threshold) { + inline static Nd4jLong encodeBitmap(void *dx, const Nd4jLong *xShapeInfo, Nd4jLong N, int *dz, float threshold) { auto xType = sd::ArrayOptions::dataType(xShapeInfo); BUILD_SINGLE_SELECTOR(xType, return sd::SpecialMethods, ::encodeBitmapGeneric(dx, xShapeInfo, N, dz, threshold), FLOAT_TYPES); } - inline static void decodeBitmap(void *dx, Nd4jLong N, void *dz, Nd4jLong *zShapeInfo) { + inline static void decodeBitmap(const void *dx, Nd4jLong N, void *dz, const Nd4jLong *zShapeInfo) { auto zType = sd::ArrayOptions::dataType(zShapeInfo); BUILD_SINGLE_SELECTOR(zType, sd::SpecialMethods, ::decodeBitmapGeneric(dx, N, dz, zShapeInfo), FLOAT_TYPES); diff --git a/libnd4j/include/legacy/NativeOps.h b/libnd4j/include/legacy/NativeOps.h index 665909719..17affd1c3 100755 --- a/libnd4j/include/legacy/NativeOps.h +++ b/libnd4j/include/legacy/NativeOps.h @@ -122,9 +122,9 @@ ND4J_EXPORT void setTADThreshold(int num); */ ND4J_EXPORT void execIndexReduceScalar(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo); + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo); /** * @@ -139,10 +139,10 @@ ND4J_EXPORT void execIndexReduceScalar(Nd4jPointer *extraPointers, */ ND4J_EXPORT void execIndexReduce(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape); + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape); /** * @@ -159,20 +159,20 @@ ND4J_EXPORT void execIndexReduce(Nd4jPointer *extraPointers, ND4J_EXPORT void execBroadcast( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape); + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape); ND4J_EXPORT void execBroadcastBool( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraParams, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape); + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape); /** * @@ -189,17 +189,17 @@ ND4J_EXPORT void execBroadcastBool( ND4J_EXPORT void execPairwiseTransform( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraParams); ND4J_EXPORT void execPairwiseTransformBool( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraParams); /** @@ -213,28 +213,28 @@ ND4J_EXPORT void execPairwiseTransformBool( */ ND4J_EXPORT void execReduceFloat(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo); + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo); ND4J_EXPORT void execReduceSame(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo); + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo); ND4J_EXPORT void execReduceBool(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo); + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo); ND4J_EXPORT void execReduceLong(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo); + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo); /** * @@ -247,34 +247,34 @@ ND4J_EXPORT void execReduceLong(Nd4jPointer *extraPointers, */ ND4J_EXPORT void execReduceFloat2(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape); + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape); ND4J_EXPORT void execReduceSame2(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape); + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape); ND4J_EXPORT void execReduceBool2(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape); + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape); ND4J_EXPORT void execReduceLong2(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape); + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape); /** * @@ -289,10 +289,10 @@ ND4J_EXPORT void execReduceLong2(Nd4jPointer *extraPointers, */ ND4J_EXPORT void execReduce3(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParamsVals, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo); + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo); /** * @@ -305,10 +305,10 @@ ND4J_EXPORT void execReduce3(Nd4jPointer *extraPointers, */ ND4J_EXPORT void execReduce3Scalar(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParamsVals, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo); + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo); /** * * @param opNum @@ -324,24 +324,24 @@ ND4J_EXPORT void execReduce3Scalar(Nd4jPointer *extraPointers, */ ND4J_EXPORT void execReduce3Tad(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParamsVals, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets); + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape, + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* yTadOnlyShapeInfo, Nd4jLong const* yTadOffsets); ND4J_EXPORT void execReduce3All(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParamsVals, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape, - Nd4jLong *xTadShapeInfo, Nd4jLong *xOffsets, - Nd4jLong *yTadShapeInfo, Nd4jLong *yOffsets); + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape, + Nd4jLong const* xTadShapeInfo, Nd4jLong const* xOffsets, + Nd4jLong const* yTadShapeInfo, Nd4jLong const* yOffsets); /** * @@ -356,16 +356,16 @@ ND4J_EXPORT void execReduce3All(Nd4jPointer *extraPointers, */ ND4J_EXPORT void execScalar(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbScalar, Nd4jLong *hSscalarShapeInfo, Nd4jLong *dSscalarShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbScalar, Nd4jLong const* hSscalarShapeInfo, Nd4jLong const* dSscalarShapeInfo, void *extraParams); ND4J_EXPORT void execScalarBool(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbScalar, Nd4jLong *hSscalarShapeInfo, Nd4jLong *dSscalarShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbScalar, Nd4jLong const* hSscalarShapeInfo, Nd4jLong const* dSscalarShapeInfo, void *extraParams); /** @@ -377,9 +377,9 @@ ND4J_EXPORT void execScalarBool(Nd4jPointer *extraPointers, */ ND4J_EXPORT void execSummaryStatsScalar(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, bool biasCorrected); /** * @@ -392,9 +392,9 @@ ND4J_EXPORT void execSummaryStatsScalar(Nd4jPointer *extraPointers, */ ND4J_EXPORT void execSummaryStats(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, bool biasCorrected); /** * @@ -409,12 +409,12 @@ ND4J_EXPORT void execSummaryStats(Nd4jPointer *extraPointers, */ ND4J_EXPORT void execSummaryStatsTad(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape, bool biasCorrected, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets); /** * @@ -428,32 +428,32 @@ ND4J_EXPORT void execSummaryStatsTad(Nd4jPointer *extraPointers, */ ND4J_EXPORT void execTransformFloat(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraParams); ND4J_EXPORT void execTransformSame(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraParams); ND4J_EXPORT void execTransformBool(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraParams); ND4J_EXPORT void execTransformAny(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraParams); ND4J_EXPORT void execTransformStrict(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraParams); /** @@ -471,23 +471,23 @@ ND4J_EXPORT void execTransformStrict(Nd4jPointer *extraPointers, */ ND4J_EXPORT void execScalarTad(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbScalars, Nd4jLong *hScalarShapeInfo, Nd4jLong *dScalarShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbScalars, Nd4jLong const* hScalarShapeInfo, Nd4jLong const* dScalarShapeInfo, void *extraParams, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ); + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape, + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ); ND4J_EXPORT void execScalarBoolTad(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbScalars, Nd4jLong *hScalarShapeInfo, Nd4jLong *dScalarShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbScalars, Nd4jLong const* hScalarShapeInfo, Nd4jLong const* dScalarShapeInfo, void *extraParams, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ); + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape, + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ); ND4J_EXPORT void specialConcat ( Nd4jPointer *extraPointers, @@ -496,7 +496,7 @@ ND4J_EXPORT void specialConcat ( Nd4jPointer *data, Nd4jPointer *inputShapeInfo, void *result, - Nd4jLong *resultShapeInfo, + Nd4jLong const* resultShapeInfo, Nd4jPointer *tadPointers, Nd4jPointer *offsetPointers); @@ -792,14 +792,14 @@ typedef sd::TadPack OpaqueTadPack; * @param targetBuffer * @param offsetsBuffer */ -ND4J_EXPORT OpaqueTadPack* tadOnlyShapeInfo(Nd4jLong *xShapeInfo, +ND4J_EXPORT OpaqueTadPack* tadOnlyShapeInfo(Nd4jLong const*xShapeInfo, int *dimension, int dimensionLength); -ND4J_EXPORT Nd4jLong* getPrimaryShapeInfo(OpaqueTadPack* pack); -ND4J_EXPORT Nd4jLong* getPrimaryOffsets(OpaqueTadPack* pack); -ND4J_EXPORT Nd4jLong* getSpecialShapeInfo(OpaqueTadPack* pack); -ND4J_EXPORT Nd4jLong* getSpecialOffsets(OpaqueTadPack* pack); +ND4J_EXPORT Nd4jLong const* getPrimaryShapeInfo(OpaqueTadPack* pack); +ND4J_EXPORT Nd4jLong const* getPrimaryOffsets(OpaqueTadPack* pack); +ND4J_EXPORT Nd4jLong const* getSpecialShapeInfo(OpaqueTadPack* pack); +ND4J_EXPORT Nd4jLong const* getSpecialOffsets(OpaqueTadPack* pack); ND4J_EXPORT Nd4jLong getNumberOfTads(OpaqueTadPack* pack); ND4J_EXPORT int getShapeInfoLength(OpaqueTadPack* pack); @@ -824,14 +824,14 @@ ND4J_EXPORT void deleteTadPack(OpaqueTadPack* ptr); * @param zTadOffsets */ ND4J_EXPORT void pullRows(Nd4jPointer *extraPointers, - OpaqueDataBuffer *dbX, Nd4jLong *xShapeInfo, Nd4jLong *dxShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *zShapeInfo, Nd4jLong *dzShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* xShapeInfo, Nd4jLong const* dxShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* zShapeInfo, Nd4jLong const* dzShapeInfo, Nd4jLong n, Nd4jLong *indexes, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffsets); + Nd4jLong const* tadShapeInfo, + Nd4jLong const* tadOffsets, + Nd4jLong const* zTadShapeInfo, + Nd4jLong const* zTadOffsets); /** * @@ -843,20 +843,20 @@ ND4J_EXPORT void pullRows(Nd4jPointer *extraPointers, * @param propagate */ ND4J_EXPORT void average(Nd4jPointer *extras, - Nd4jPointer *x, Nd4jLong *xShapeInfo, - Nd4jPointer *dx, Nd4jLong *dxShapeInfo, - void *z, Nd4jLong *zShapeInfo, - void *dz, Nd4jLong *dzShapeInfo, + Nd4jPointer *x, Nd4jLong const* xShapeInfo, + Nd4jPointer *dx, Nd4jLong const* dxShapeInfo, + void *z, Nd4jLong const* zShapeInfo, + void *dz, Nd4jLong const* dzShapeInfo, int n, Nd4jLong length, bool propagate); ND4J_EXPORT void accumulate(Nd4jPointer *extras, - Nd4jPointer *x, Nd4jLong *xShapeInfo, - Nd4jPointer *dx, Nd4jLong *dxShapeInfo, - void *z, Nd4jLong *zShapeInfo, - void *dz, Nd4jLong *dzShapeInfo, + Nd4jPointer *x, Nd4jLong const* xShapeInfo, + Nd4jPointer *dx, Nd4jLong const* dxShapeInfo, + void *z, Nd4jLong const* zShapeInfo, + void *dz, Nd4jLong const* dzShapeInfo, int n, Nd4jLong length); @@ -1004,7 +1004,7 @@ ND4J_EXPORT void execAggregateBatch(Nd4jPointer *extraPointers, ND4J_EXPORT void execRandom(Nd4jPointer *extraPointers, int opNum, Nd4jPointer state, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeBuffer, Nd4jLong *dZShapeBuffer, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeBuffer, Nd4jLong const* dZShapeBuffer, void *extraArguments); /** @@ -1023,9 +1023,9 @@ ND4J_EXPORT void execRandom(Nd4jPointer *extraPointers, ND4J_EXPORT void execRandom3(Nd4jPointer *extraPointers, int opNum, Nd4jPointer state, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeBuffer, Nd4jLong *dXShapeBuffer, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeBuffer, Nd4jLong *dYShapeBuffer, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeBuffer, Nd4jLong *dZShapeBuffer, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeBuffer, Nd4jLong const* dXShapeBuffer, + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeBuffer, Nd4jLong const* dYShapeBuffer, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeBuffer, Nd4jLong const* dZShapeBuffer, void *extraArguments); /** @@ -1042,8 +1042,8 @@ ND4J_EXPORT void execRandom3(Nd4jPointer *extraPointers, ND4J_EXPORT void execRandom2(Nd4jPointer *extraPointers, int opNum, Nd4jPointer state, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeBuffer, Nd4jLong *dXShapeBuffer, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeBuffer, Nd4jLong *dZShapeBuffer, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeBuffer, Nd4jLong const* dXShapeBuffer, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeBuffer, Nd4jLong const* dZShapeBuffer, void *extraArguments); @@ -1098,11 +1098,11 @@ ND4J_EXPORT void destroyRandom(Nd4jPointer ptrRandom); */ template -static Nd4jPointer _numpyHeaderForNd4j(Nd4jPointer data,Nd4jPointer shapeBuffer,Nd4jLong wordSize,Nd4jLong *headerSize) { - Nd4jLong *shapeBufferCast = reinterpret_cast(shapeBuffer); +static Nd4jPointer _numpyHeaderForNd4j(Nd4jPointer data,const Nd4jPointer shapeBuffer,Nd4jLong wordSize,Nd4jLong* headerSize) { + Nd4jLong const* shapeBufferCast = reinterpret_cast(shapeBuffer); int rank = shape::rank(shapeBufferCast); - Nd4jLong *shape = shape::shapeOf(shapeBufferCast); - unsigned int *npShape = new unsigned int[rank]; + const Nd4jLong* shape = shape::shapeOf(shapeBufferCast); + unsigned int* npShape = new unsigned int[rank]; for(int i = 0; i < rank; i++) { npShape[i] = shape[i]; } @@ -1125,7 +1125,7 @@ static Nd4jPointer _numpyHeaderForNd4j(Nd4jPointer data,Nd4jPointer shapeBuffer, extern "C" { -static Nd4jPointer numpyHeaderForNd4j(Nd4jPointer data,Nd4jPointer shapeBuffer,Nd4jLong wordSize,Nd4jLong *headerSize) { +static Nd4jPointer numpyHeaderForNd4j(Nd4jPointer data,Nd4jPointer shapeBuffer,Nd4jLong wordSize,Nd4jLong* headerSize) { auto shapeBufferCast = reinterpret_cast(shapeBuffer); auto type = sd::ArrayOptions::dataType(shapeBufferCast); BUILD_SINGLE_SELECTOR(type, return _numpyHeaderForNd4j, (data, shapeBuffer, wordSize, headerSize), LIBND4J_TYPES); @@ -1427,65 +1427,53 @@ ND4J_EXPORT Nd4jPointer pointerForAddress(Nd4jLong address); * @return */ ND4J_EXPORT void tear(Nd4jPointer *extraPointers, - OpaqueDataBuffer *dbX, Nd4jLong *xShapeInfo, Nd4jLong *dxShapeInfo, - Nd4jPointer *targets, Nd4jLong *zShapeInfo, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets); - -ND4J_EXPORT Nd4jLong encodeBitmap(Nd4jPointer *extraPointers, void *dx, Nd4jLong *xShapeInfo, Nd4jLong N, int *dz, float threshold); -ND4J_EXPORT void decodeBitmap(Nd4jPointer *extraPointers, void *dx, Nd4jLong N, void *dz, Nd4jLong *zShapeInfo); - - -ND4J_EXPORT void encodeThresholdP1(Nd4jPointer *extraPointers, void *dx, Nd4jLong *xShapeInfo, Nd4jLong N, int *dz, float threshold); -ND4J_EXPORT void encodeThresholdP2Int(Nd4jPointer *extraPointers, int *dx, Nd4jLong N, int *dz); -ND4J_EXPORT void encodeThresholdP3(Nd4jPointer *extraPointers, void *dx, Nd4jLong *xShapeInfo, int *offsets, Nd4jLong N, int *dz); - - -ND4J_EXPORT void decodeThreshold(Nd4jPointer *extraPointers, void *dx, Nd4jLong N, void *dz, Nd4jLong *zShapeInfo); - + OpaqueDataBuffer *dbX, Nd4jLong const* xShapeInfo, Nd4jLong const* dxShapeInfo, + Nd4jPointer *targets, Nd4jLong const* zShapeInfo, + Nd4jLong const* tadShapeInfo, + Nd4jLong const* tadOffsets); ND4J_EXPORT void sort(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dx, Nd4jLong *dxShapeInfo, + void *x, Nd4jLong const* xShapeInfo, + void *dx, Nd4jLong const* dxShapeInfo, bool descending); ND4J_EXPORT void sortByKey(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dx, Nd4jLong *dxShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dy, Nd4jLong *dyShapeInfo, + void *x, Nd4jLong const* xShapeInfo, + void *dx, Nd4jLong const* dxShapeInfo, + void *y, Nd4jLong const* yShapeInfo, + void *dy, Nd4jLong const* dyShapeInfo, bool descending); ND4J_EXPORT void sortByValue(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dx, Nd4jLong *dxShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dy, Nd4jLong *dyShapeInfo, + void *x, Nd4jLong const* xShapeInfo, + void *dx, Nd4jLong const* dxShapeInfo, + void *y, Nd4jLong const* yShapeInfo, + void *dy, Nd4jLong const* dyShapeInfo, bool descending); ND4J_EXPORT void sortTad(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dx, Nd4jLong *dxShapeInfo, + void *x, Nd4jLong const* xShapeInfo, + void *dx, Nd4jLong const* dxShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets, + Nd4jLong const* tadShapeInfo, + Nd4jLong const* tadOffsets, bool descending); ND4J_EXPORT void sortTadByKey(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dx, Nd4jLong *dxShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dy, Nd4jLong *dyShapeInfo, + void *x, Nd4jLong const* xShapeInfo, + void *dx, Nd4jLong const* dxShapeInfo, + void *y, Nd4jLong const* yShapeInfo, + void *dy, Nd4jLong const* dyShapeInfo, int *dimension, int dimensionLength, bool descending); ND4J_EXPORT void sortTadByValue(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dx, Nd4jLong *dxShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dy, Nd4jLong *dyShapeInfo, + void *x, Nd4jLong const* xShapeInfo, + void *dx, Nd4jLong const* dxShapeInfo, + void *y, Nd4jLong const* yShapeInfo, + void *dy, Nd4jLong const* dyShapeInfo, int *dimension, int dimensionLength, bool descending); @@ -1521,7 +1509,7 @@ ND4J_EXPORT OpaqueShapeList* calculateOutputShapes(Nd4jPointer* extraPointers, N ND4J_EXPORT OpaqueShapeList* calculateOutputShapes2(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool *bArgs, int numBArgs, int *dArgs, int numDArgs); ND4J_EXPORT Nd4jLong getShapeListSize(OpaqueShapeList* list); -ND4J_EXPORT Nd4jLong* getShape(OpaqueShapeList* list, Nd4jLong i); +ND4J_EXPORT Nd4jLong const* getShape(OpaqueShapeList* list, Nd4jLong i); ND4J_EXPORT void deleteShapeList(Nd4jPointer shapeList); @@ -1538,7 +1526,7 @@ ND4J_EXPORT OpaqueVariable* getVariable(OpaqueVariablesSet* set, Nd4jLong i); ND4J_EXPORT int getVariableId(OpaqueVariable* variable); ND4J_EXPORT int getVariableIndex(OpaqueVariable* variable); ND4J_EXPORT const char* getVariableName(OpaqueVariable* variable); -ND4J_EXPORT Nd4jLong* getVariableShape(OpaqueVariable* variable); +ND4J_EXPORT Nd4jLong const* getVariableShape(OpaqueVariable* variable); ND4J_EXPORT void* getVariableBuffer(OpaqueVariable* variable); ND4J_EXPORT int unregisterGraph(Nd4jPointer *extraPointers, Nd4jLong graphId); @@ -1557,7 +1545,7 @@ ND4J_EXPORT void deleteGraphState(Nd4jPointer state); ND4J_EXPORT void deleteResultWrapper(Nd4jPointer ptr); -ND4J_EXPORT int estimateThreshold(Nd4jPointer *extraPointers, Nd4jPointer x, Nd4jLong *xShapeInfo, int N, float threshold); +ND4J_EXPORT int estimateThreshold(Nd4jPointer *extraPointers, Nd4jPointer x, Nd4jLong const* xShapeInfo, int N, float threshold); // this method executes op that requires scope to be present: if/while/cond/whatever ND4J_EXPORT Nd4jStatus execCustomOpWithScope(Nd4jPointer *extraPointers, Nd4jPointer state, Nd4jLong opHash, Nd4jLong *scopes, int numScopes, Nd4jPointer *inputBuffers, Nd4jPointer *inputShapes, int numInputs, Nd4jPointer *outputBuffers, Nd4jPointer *outputShapes, int numOutputs); @@ -1569,11 +1557,11 @@ ND4J_EXPORT char* getUtf8StringBuffer(Nd4jPointer *extraPointers, Nd4jPointer pt ND4J_EXPORT void deleteUtf8String(Nd4jPointer *extraPointers, Nd4jPointer ptr); ND4J_EXPORT void scatterUpdate(Nd4jPointer *extraPointers, int opCode, int numOfSubArrs, - void* hX, Nd4jLong* hXShapeInfo, Nd4jLong* hXOffsets, - void* dX, Nd4jLong* dXShapeInfo, Nd4jLong* dXOffsets, - void* hY, Nd4jLong* hYShapeInfo, Nd4jLong* hYOffsets, - void* dY, Nd4jLong* dYShapeInfo, Nd4jLong* dYOffsets, - void* hIindexes, Nd4jLong* hIndicesShapeInfo, void* dIindexes, Nd4jLong* dIndicesShapeInfo); + void* hX, Nd4jLong const* hXShapeInfo, Nd4jLong const* hXOffsets, + void* dX, Nd4jLong const* dXShapeInfo, Nd4jLong const* dXOffsets, + void* hY, Nd4jLong const* hYShapeInfo, Nd4jLong const* hYOffsets, + void* dY, Nd4jLong const* dYShapeInfo, Nd4jLong const* dYOffsets, + void* hIindexes, Nd4jLong const* hIndicesShapeInfo, void* dIindexes, Nd4jLong const* dIndicesShapeInfo); ND4J_EXPORT void inspectArray(Nd4jPointer *extraPointers, Nd4jPointer buffer, Nd4jLong *shapeInfo, Nd4jPointer specialBuffer, Nd4jLong *specialShapeInfo, Nd4jPointer debugInfo); @@ -1582,7 +1570,7 @@ typedef sd::ConstantDataBuffer OpaqueConstantDataBuffer; ND4J_EXPORT OpaqueConstantDataBuffer* shapeBuffer(int rank, Nd4jLong *shape, Nd4jLong *strides, sd::DataType dtype, char order, Nd4jLong ews, bool empty); -ND4J_EXPORT OpaqueConstantDataBuffer* constantBufferLong(sd::DataType dtype, Nd4jLong *data, int length); +ND4J_EXPORT OpaqueConstantDataBuffer* constantBufferLong(sd::DataType dtype, Nd4jLong const* data, int length); ND4J_EXPORT OpaqueConstantDataBuffer* constantBufferDouble(sd::DataType dtype, double *data, int length); ND4J_EXPORT OpaqueConstantDataBuffer* constantBuffer(sd::DataType dtype, sd::ConstantDescriptor *descriptor); diff --git a/libnd4j/include/legacy/cpu/NativeOpExecutioner.cpp b/libnd4j/include/legacy/cpu/NativeOpExecutioner.cpp index b3f15e345..ad75922e4 100644 --- a/libnd4j/include/legacy/cpu/NativeOpExecutioner.cpp +++ b/libnd4j/include/legacy/cpu/NativeOpExecutioner.cpp @@ -77,11 +77,11 @@ * @param hZShapeInfo */ void NativeOpExecutioner::execIndexReduceScalar(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo) { + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo) { @@ -106,22 +106,21 @@ void NativeOpExecutioner::execIndexReduceScalar(sd::LaunchContext *lc, int opNu */ void NativeOpExecutioner::execIndexReduce(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto zType = sd::ArrayOptions::dataType(hZShapeInfo); - Nd4jLong* hz = reinterpret_cast(hZ); + auto hz = reinterpret_cast(hZ); BUILD_DOUBLE_SELECTOR(xType, zType, functions::indexreduce::IndexReduce, ::exec(opNum, hX, hXShapeInfo, extraParams, hz, hZShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffsets), LIBND4J_TYPES, INDEXING_TYPES); -// BUILD_SINGLE_SELECTOR(xType, functions::indexreduce::IndexReduce, ::exec(opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffsets), LIBND4J_TYPES); } //////////////////////////////////////////////////////////////////////// @@ -139,16 +138,16 @@ void NativeOpExecutioner::execIndexReduce(sd::LaunchContext *lc, */ void NativeOpExecutioner::execBroadcast(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ,Nd4jLong *tadOffsetsZ) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ,const Nd4jLong *tadOffsetsZ) { @@ -230,15 +229,15 @@ void NativeOpExecutioner::execBroadcast(sd::LaunchContext* lc, const int opNum, void NativeOpExecutioner::execInverseBroadcast(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ,Nd4jLong *tadOffsetsZ) { + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ,const Nd4jLong *tadOffsetsZ) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto yType = sd::ArrayOptions::dataType(hYShapeInfo); auto zType = sd::ArrayOptions::dataType(hZShapeInfo); @@ -269,17 +268,17 @@ void NativeOpExecutioner::execInverseBroadcast(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execBroadcastBool(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams, - int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ,Nd4jLong *tadOffsetsZ) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ,const Nd4jLong *tadOffsetsZ) { if (shape::isEmpty(hXShapeInfo) || shape::isEmpty(hYShapeInfo)) @@ -320,17 +319,17 @@ void NativeOpExecutioner::execBroadcastBool(sd::LaunchContext* lc, const int opN void NativeOpExecutioner::execInverseBroadcastBool(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams, - int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ,Nd4jLong *tadOffsetsZ) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ,const Nd4jLong *tadOffsetsZ) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto yType = sd::ArrayOptions::dataType(hYShapeInfo); @@ -358,16 +357,16 @@ void NativeOpExecutioner::execInverseBroadcastBool(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execBroadcastInt(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ,Nd4jLong *tadOffsetsZ) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ,const Nd4jLong *tadOffsetsZ) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -422,16 +421,16 @@ void NativeOpExecutioner::execBroadcastInt(sd::LaunchContext *lc, const int opN } void NativeOpExecutioner::execInverseBroadcastInt(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ,Nd4jLong *tadOffsetsZ) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ,const Nd4jLong *tadOffsetsZ) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto yType = sd::ArrayOptions::dataType(hYShapeInfo); @@ -471,14 +470,14 @@ void NativeOpExecutioner::execInverseBroadcastInt(sd::LaunchContext *lc, * @param n */ void NativeOpExecutioner::execPairwiseTransform(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto yType = sd::ArrayOptions::dataType(hYShapeInfo); @@ -504,14 +503,14 @@ void NativeOpExecutioner::execPairwiseTransform(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execPairwiseBoolTransform(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -538,14 +537,14 @@ void NativeOpExecutioner::execPairwiseBoolTransform(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execPairwiseIntTransform(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto yType = sd::ArrayOptions::dataType(hYShapeInfo); @@ -580,14 +579,14 @@ void NativeOpExecutioner::execPairwiseIntTransform(sd::LaunchContext *lc, * @param hZShapeInfo */ void NativeOpExecutioner::execReduceFloat(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { @@ -609,14 +608,14 @@ void NativeOpExecutioner::execReduceFloat(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduceSame(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -637,14 +636,14 @@ void NativeOpExecutioner::execReduceSame(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduceBool(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -665,14 +664,14 @@ void NativeOpExecutioner::execReduceBool(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduceLong(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -701,12 +700,12 @@ void NativeOpExecutioner::execReduceLong(sd::LaunchContext *lc, * @return */ void NativeOpExecutioner::execReduceFloatScalar(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -717,12 +716,12 @@ void NativeOpExecutioner::execReduceFloatScalar(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduceSameScalar(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -732,14 +731,12 @@ void NativeOpExecutioner::execReduceSameScalar(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduceBoolScalar(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo) { - - + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto zType = sd::ArrayOptions::dataType(hZShapeInfo); @@ -749,13 +746,12 @@ void NativeOpExecutioner::execReduceBoolScalar(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduceLongScalar(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo) { - + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto zType = sd::ArrayOptions::dataType(hZShapeInfo); @@ -779,14 +775,15 @@ void NativeOpExecutioner::execReduceLongScalar(sd::LaunchContext *lc, * @param dimensionLength */ void NativeOpExecutioner::execReduce3Scalar(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParamsVals, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParamsVals, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo) { + auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto zType = sd::ArrayOptions::dataType(hZShapeInfo); @@ -807,15 +804,14 @@ void NativeOpExecutioner::execReduce3Scalar(sd::LaunchContext *lc, * @param hZShapeInfo */ void NativeOpExecutioner::execReduce3(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParamsVals, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo) { - + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParamsVals, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto zType = sd::ArrayOptions::dataType(hZShapeInfo); @@ -826,17 +822,17 @@ void NativeOpExecutioner::execReduce3(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduce3(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParamsVals, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *xTadOnlyShapeInfo, Nd4jLong *xTadOffsets, - Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParamsVals, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *xTadOnlyShapeInfo, const Nd4jLong *xTadOffsets, + const Nd4jLong *yTadOnlyShapeInfo, const Nd4jLong *yTadOffsets) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -867,18 +863,17 @@ void NativeOpExecutioner::execReduce3(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduce3All(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParamsVals, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *xTadShapeInfo, Nd4jLong *xOffsets, - Nd4jLong *yTadShapeInfo, Nd4jLong *yOffsets) { - + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParamsVals, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xOffsets, + const Nd4jLong *yTadShapeInfo, const Nd4jLong *yOffsets) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto zType = sd::ArrayOptions::dataType(hZShapeInfo); @@ -895,19 +890,17 @@ void NativeOpExecutioner::execReduce3All(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduce3TAD(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParamsVals, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *yTadShapeInfo, Nd4jLong *yTadOffsets) { - - + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParamsVals, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *yTadShapeInfo, const Nd4jLong *yTadOffsets) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto zType = sd::ArrayOptions::dataType(hZShapeInfo); @@ -948,14 +941,15 @@ void NativeOpExecutioner::execReduce3TAD(sd::LaunchContext *lc, * @param n */ void NativeOpExecutioner::execScalar(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalar, Nd4jLong *hScalarShapeInfo, - void *dScalar, Nd4jLong *dScalarShapeInfo, - void *extraParams, bool allowParallelism) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + const void *hScalar, const Nd4jLong *hScalarShapeInfo, + const void *dScalar, const Nd4jLong *dScalarShapeInfo, + void *extraParams, + bool allowParallelism) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto yType = sd::ArrayOptions::dataType(hScalarShapeInfo); @@ -983,16 +977,16 @@ void NativeOpExecutioner::execScalar(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execScalar(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const*hXShapeInfo, + void const* dX, Nd4jLong const*dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalars, Nd4jLong *hScalarShapeInfo, - void *dScalars, Nd4jLong *dScalarShapeInfo, + void *hZ, Nd4jLong const*hZShapeInfo, + void *dZ, Nd4jLong const*dZShapeInfo, + void const* hScalars, Nd4jLong const*hScalarShapeInfo, + void const* dScalars, Nd4jLong const*dScalarShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + Nd4jLong const*tadShapeInfo, Nd4jLong const*tadOffsets, + Nd4jLong const*tadShapeInfoZ, Nd4jLong const*tadOffsetsZ) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto yType = sd::ArrayOptions::dataType(hScalarShapeInfo); @@ -1019,14 +1013,15 @@ void NativeOpExecutioner::execScalar(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execScalarBool(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalar, Nd4jLong *hSscalarShapeInfo, - void *dScalar, Nd4jLong *dSscalarShapeInfo, - void *extraParams, bool allowParallelism) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + const void *hScalar, const Nd4jLong *hSscalarShapeInfo, + const void *dScalar, const Nd4jLong *dSscalarShapeInfo, + void *extraParams, + bool allowParallelism) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto yType = sd::ArrayOptions::dataType(hSscalarShapeInfo); @@ -1052,17 +1047,17 @@ void NativeOpExecutioner::execScalarBool(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execScalarBool(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalars, Nd4jLong *hScalarShapeInfo, - void *dScalars, Nd4jLong *dScalarShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + const void *hScalars, const Nd4jLong *hScalarShapeInfo, + const void *dScalars, const Nd4jLong *dScalarShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto yType = sd::ArrayOptions::dataType(hScalarShapeInfo); @@ -1087,14 +1082,15 @@ void NativeOpExecutioner::execScalarBool(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execScalarInt(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalar, Nd4jLong *hSscalarShapeInfo, - void *dScalar, Nd4jLong *dSscalarShapeInfo, - void *extraParams, bool allowParallelism) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + const void *hScalar, const Nd4jLong *hSscalarShapeInfo, + const void *dScalar, const Nd4jLong *dSscalarShapeInfo, + void *extraParams, + bool allowParallelism) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto yType = sd::ArrayOptions::dataType(hSscalarShapeInfo); @@ -1120,17 +1116,17 @@ void NativeOpExecutioner::execScalarInt(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execScalarInt(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalars, Nd4jLong *hScalarShapeInfo, - void *dScalars, Nd4jLong *dScalarShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + const void *hScalars, const Nd4jLong *hScalarShapeInfo, + const void *dScalars, const Nd4jLong *dScalarShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto yType = sd::ArrayOptions::dataType(hScalarShapeInfo); @@ -1164,13 +1160,13 @@ void NativeOpExecutioner::execScalarInt(sd::LaunchContext *lc, * @param hZShapeInfo */ void NativeOpExecutioner::execSummaryStats(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - bool biasCorrected) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + bool biasCorrected) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -1190,13 +1186,13 @@ void NativeOpExecutioner::execSummaryStats(sd::LaunchContext *lc, * @param hZShapeInfo */ void NativeOpExecutioner::execSummaryStatsScalar(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - bool biasCorrected) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + bool biasCorrected) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -1218,15 +1214,15 @@ void NativeOpExecutioner::execSummaryStatsScalar(sd::LaunchContext *lc, * @param dimensionLength */ void NativeOpExecutioner::execSummaryStats(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - bool biasCorrected) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + bool biasCorrected) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto zType = sd::ArrayOptions::dataType(hZShapeInfo); @@ -1246,13 +1242,13 @@ void NativeOpExecutioner::execSummaryStats(sd::LaunchContext *lc, * @param n */ void NativeOpExecutioner::execTransformFloat(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto zType = sd::ArrayOptions::dataType(hZShapeInfo); @@ -1268,13 +1264,13 @@ void NativeOpExecutioner::execTransformFloat(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execTransformBool(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto zType = sd::ArrayOptions::dataType(hZShapeInfo); @@ -1290,13 +1286,14 @@ void NativeOpExecutioner::execTransformBool(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execTransformAny(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool allowParallelism) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + bool allowParallelism) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto zType = sd::ArrayOptions::dataType(hZShapeInfo); @@ -1319,13 +1316,13 @@ void NativeOpExecutioner::execTransformAny(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execTransformSame(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto zType = sd::ArrayOptions::dataType(hZShapeInfo); @@ -1341,13 +1338,13 @@ void NativeOpExecutioner::execTransformSame(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execTransformStrict(sd::LaunchContext *lc, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + int opNum, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraParams, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); auto zType = sd::ArrayOptions::dataType(hZShapeInfo); @@ -1363,11 +1360,11 @@ void NativeOpExecutioner::execTransformStrict(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execRandom(sd::LaunchContext *lc, - int opNum, - Nd4jPointer state, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraArguments) { + int opNum, + Nd4jPointer state, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraArguments) { auto zType = sd::ArrayOptions::dataType(hZShapeInfo); @@ -1380,14 +1377,13 @@ void NativeOpExecutioner::execRandom(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execRandom(sd::LaunchContext *lc, - int opNum, - Nd4jPointer state, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraArguments) { - + int opNum, + Nd4jPointer state, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraArguments) { auto zType = sd::ArrayOptions::dataType(hZShapeInfo); @@ -1399,16 +1395,15 @@ void NativeOpExecutioner::execRandom(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execRandom(sd::LaunchContext *lc, - int opNum, - Nd4jPointer state, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraArguments) { - + int opNum, + Nd4jPointer state, + const void *hX, const Nd4jLong *hXShapeInfo, + const void *dX, const Nd4jLong *dXShapeInfo, + const void *hY, const Nd4jLong *hYShapeInfo, + const void *dY, const Nd4jLong *dYShapeInfo, + void *hZ, const Nd4jLong *hZShapeInfo, + void *dZ, const Nd4jLong *dZShapeInfo, + void *extraArguments) { auto xType = sd::ArrayOptions::dataType(hZShapeInfo); diff --git a/libnd4j/include/legacy/cpu/NativeOps.cpp b/libnd4j/include/legacy/cpu/NativeOps.cpp index 2b394f840..799351ccc 100644 --- a/libnd4j/include/legacy/cpu/NativeOps.cpp +++ b/libnd4j/include/legacy/cpu/NativeOps.cpp @@ -102,9 +102,9 @@ void setTADThreshold(int num) { */ void execIndexReduceScalar(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo) { + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo) { try { NativeOpExecutioner::execIndexReduceScalar(nullptr, opNum, dbX->primary(), hXShapeInfo, dbX->special(), dXShapeInfo, extraParams, dbZ->primary(), hZShapeInfo, dbZ->special(), dZShapeInfo); } catch (std::exception &e) { @@ -125,10 +125,10 @@ void execIndexReduceScalar(Nd4jPointer *extraPointers, * @param dimensionLength */ void execIndexReduce(Nd4jPointer *extraPointers,int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape) { + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbDimension, const Nd4jLong *hDimensionShape, const Nd4jLong *dDimensionShape) { try { auto dimension = reinterpret_cast(dbDimension->primary()); int dimensionLength = static_cast(shape::length(hDimensionShape)); @@ -176,18 +176,16 @@ void execIndexReduce(Nd4jPointer *extraPointers,int opNum, */ void execBroadcast(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape) { + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbY, const Nd4jLong *hYShapeInfo, const Nd4jLong *dYShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbDimension, const Nd4jLong *hDimensionShape, const Nd4jLong *dDimensionShape) { try { auto dimension = reinterpret_cast(dbDimension->primary()); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + auto dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPackX = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, - dimensionLength); - auto tadPackZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(hZShapeInfo, dimension, - dimensionLength); + auto tadPackX = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); + auto tadPackZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(hZShapeInfo, dimension, dimensionLength); auto hTADShapeInfo = tadPackX.primaryShapeInfo(); auto hTADOffsets = tadPackX.primaryOffsets(); @@ -216,19 +214,17 @@ void execBroadcast(Nd4jPointer *extraPointers, void execBroadcastBool(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbY, const Nd4jLong *hYShapeInfo, const Nd4jLong *dYShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, void *extraParams, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape) { + OpaqueDataBuffer *dbDimension, const Nd4jLong *hDimensionShape, const Nd4jLong *dDimensionShape) { try { auto dimension = reinterpret_cast(dbDimension->primary()); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + auto dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPackX = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, - dimensionLength); - auto tadPackZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(hZShapeInfo, dimension, - dimensionLength); + auto tadPackX = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); + auto tadPackZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(hZShapeInfo, dimension, dimensionLength); auto hTADShapeInfo = tadPackX.primaryShapeInfo(); auto hTADOffsets = tadPackX.primaryOffsets(); @@ -272,9 +268,9 @@ void execBroadcastBool(Nd4jPointer *extraPointers, void execPairwiseTransform( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbY, const Nd4jLong *hYShapeInfo, const Nd4jLong *dYShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, void *extraParams) { try { NativeOpExecutioner::execPairwiseTransform(nullptr, @@ -301,9 +297,9 @@ void execPairwiseTransform( void execPairwiseTransformBool( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbY, const Nd4jLong *hYShapeInfo, const Nd4jLong *dYShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, void *extraParams) { try { @@ -340,9 +336,9 @@ void execPairwiseTransformBool( void execReduceFloat( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo) { + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo) { try { NativeOpExecutioner::execReduceFloatScalar(nullptr, @@ -365,9 +361,9 @@ void execReduceFloat( void execReduceSame( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo) { + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo) { try { NativeOpExecutioner::execReduceSameScalar(nullptr, @@ -390,9 +386,9 @@ void execReduceSame( void execReduceBool( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo) { + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo) { try { NativeOpExecutioner::execReduceBoolScalar(nullptr, opNum, @@ -414,9 +410,9 @@ void execReduceBool( void execReduceLong( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo) { + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo) { try { NativeOpExecutioner::execReduceLongScalar(nullptr, opNum, @@ -446,16 +442,15 @@ void execReduceLong( */ void execReduceFloat2(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape) { + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbDimension, const Nd4jLong *hDimensionShape, const Nd4jLong *dDimensionShape) { try { auto dimension = reinterpret_cast(dbDimension->primary()); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + auto dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPackX = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, - dimensionLength); + auto tadPackX = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); auto hTADShapeInfo = tadPackX.primaryShapeInfo(); auto hTADOffsets = tadPackX.primaryOffsets(); @@ -482,13 +477,13 @@ void execReduceFloat2(Nd4jPointer *extraPointers, void execReduceBool2(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape) { + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbDimension, const Nd4jLong *hDimensionShape, const Nd4jLong *dDimensionShape) { try { auto dimension = reinterpret_cast(dbDimension->primary()); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + auto dimensionLength = static_cast(shape::length(hDimensionShape)); auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); @@ -518,10 +513,10 @@ void execReduceBool2(Nd4jPointer *extraPointers, void execReduceSame2(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape) { + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbDimension, const Nd4jLong *hDimensionShape, const Nd4jLong *dDimensionShape) { try { auto dimension = reinterpret_cast(dbDimension->primary()); int dimensionLength = static_cast(shape::length(hDimensionShape)); @@ -554,16 +549,15 @@ void execReduceSame2(Nd4jPointer *extraPointers, void execReduceLong2(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape) { + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbDimension, const Nd4jLong *hDimensionShape, const Nd4jLong *dDimensionShape) { try { auto dimension = reinterpret_cast(dbDimension->primary()); int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, - dimensionLength); + auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); auto hTADShapeInfo = tadPack.primaryShapeInfo(); auto hTADOffsets = tadPack.primaryOffsets(); @@ -601,10 +595,10 @@ void execReduceLong2(Nd4jPointer *extraPointers, */ void execReduce3(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo) { + OpaqueDataBuffer *dbY, const Nd4jLong *hYShapeInfo, const Nd4jLong *dYShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo) { try { NativeOpExecutioner::execReduce3(nullptr, opNum, dbX->primary(), hXShapeInfo, dbX->special(), dXShapeInfo, extraParams, dbY->primary(), hYShapeInfo, dbY->special(), dYShapeInfo, dbZ->primary(), hZShapeInfo, dbZ->special(), dZShapeInfo); @@ -624,10 +618,10 @@ void execReduce3(Nd4jPointer *extraPointers, * @param hYShapeInfo */ void execReduce3Scalar(Nd4jPointer *extraPointers,int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo) { + OpaqueDataBuffer *dbY, const Nd4jLong *hYShapeInfo, const Nd4jLong *dYShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo) { try { NativeOpExecutioner::execReduce3Scalar(nullptr, opNum, dbX->primary(), hXShapeInfo, dbX->special(), dXShapeInfo, extraParams, dbY->primary(), hYShapeInfo, dbY->special(), dYShapeInfo, dbZ->primary(), hZShapeInfo, dbZ->special(), dZShapeInfo); @@ -651,16 +645,16 @@ void execReduce3Scalar(Nd4jPointer *extraPointers,int opNum, */ void execReduce3Tad(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets) { + OpaqueDataBuffer *dbY, const Nd4jLong *hYShapeInfo, const Nd4jLong *dYShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbDimension, const Nd4jLong *hDimensionShape, const Nd4jLong *dDimensionShape, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *yTadOnlyShapeInfo, const Nd4jLong *yTadOffsets) { try { auto dimension = reinterpret_cast(dbDimension->primary()); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + auto dimensionLength = static_cast(shape::length(hDimensionShape)); if (extraPointers == nullptr || extraPointers[2] == 0) { NativeOpExecutioner::execReduce3(LaunchContext::defaultContext(), opNum, dbX->primary(), hXShapeInfo, dbX->special(), dXShapeInfo, @@ -704,9 +698,9 @@ bool isBlasVersionMatches(int major, int minor, int build) { void execScalar( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbScalar, Nd4jLong *hScalarShapeInfo, Nd4jLong *dScalarShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbScalar, const Nd4jLong *hScalarShapeInfo, const Nd4jLong *dScalarShapeInfo, void *extraParams) { try { NativeOpExecutioner::execScalar(nullptr, @@ -733,9 +727,9 @@ void execScalar( void execScalarBool( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbScalar, Nd4jLong *hScalarShapeInfo, Nd4jLong *dScalarShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbScalar, const Nd4jLong *hScalarShapeInfo, const Nd4jLong *dScalarShapeInfo, void *extraParams) { try { NativeOpExecutioner::execScalarBool(nullptr, @@ -768,9 +762,9 @@ void execScalarBool( */ void execSummaryStatsScalar(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, bool biasCorrected) { try { NativeOpExecutioner::execSummaryStatsScalar(nullptr, @@ -801,9 +795,9 @@ void execSummaryStatsScalar(Nd4jPointer *extraPointers, */ void execSummaryStats(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, bool biasCorrected) { try { NativeOpExecutioner::execSummaryStats(nullptr, @@ -836,12 +830,12 @@ void execSummaryStats(Nd4jPointer *extraPointers, */ void execSummaryStatsTad(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbDimension, const Nd4jLong *hDimensionShape, const Nd4jLong *dDimensionShape, bool biasCorrected, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { try { auto dimension = reinterpret_cast(dbDimension->primary()); int dimensionLength = static_cast(shape::length(hDimensionShape)); @@ -882,8 +876,8 @@ void execSummaryStatsTad(Nd4jPointer *extraPointers, void execTransformFloat( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, void *extraParams) { try { NativeOpExecutioner::execTransformFloat(nullptr, @@ -908,8 +902,8 @@ void execTransformFloat( void execTransformSame( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, void *extraParams) { try { NativeOpExecutioner::execTransformSame(nullptr, @@ -934,8 +928,8 @@ void execTransformSame( void execTransformBool( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, void *extraParams) { try { NativeOpExecutioner::execTransformBool(nullptr, @@ -960,8 +954,8 @@ void execTransformBool( void execTransformAny( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, void *extraParams) { try { NativeOpExecutioner::execTransformAny(nullptr, @@ -986,8 +980,8 @@ void execTransformAny( void execTransformStrict( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, void *extraParams) { try { NativeOpExecutioner::execTransformStrict(nullptr, @@ -1011,19 +1005,17 @@ void execTransformStrict( void execReduce3All(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, void *extraParamsVals, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape, - Nd4jLong *xTadShapeInfo, - Nd4jLong *xOffsets, - Nd4jLong *yTadShapeInfo, - Nd4jLong *yOffsets) { + OpaqueDataBuffer *dbY, const Nd4jLong *hYShapeInfo, const Nd4jLong *dYShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbDimension, const Nd4jLong *hDimensionShape, const Nd4jLong *dDimensionShape, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xOffsets, + const Nd4jLong *yTadShapeInfo, const Nd4jLong *yOffsets) { try { auto dimension = reinterpret_cast(dbDimension->primary()); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + auto dimensionLength = static_cast(shape::length(hDimensionShape)); NativeOpExecutioner::execReduce3All(nullptr, opNum, dbX->primary(), hXShapeInfo, dbX->special(), dXShapeInfo, extraParamsVals, dbY->primary(), @@ -1046,7 +1038,7 @@ void specialConcat( Nd4jPointer *data, Nd4jPointer *inputShapeInfo, void *hZ, - Nd4jLong *hZShapeInfo, + Nd4jLong const* hZShapeInfo, Nd4jPointer *tadPointers, Nd4jPointer *offsetPointers) { try { @@ -1227,7 +1219,7 @@ void setGridLimit(int gridSize) { // no-op } -sd::TadPack* tadOnlyShapeInfo(Nd4jLong *hXShapeInfo, int *dimension, int dimensionLength) { +sd::TadPack* tadOnlyShapeInfo(Nd4jLong const* hXShapeInfo, int *dimension, int dimensionLength) { auto pack = new TadPack(); try { *pack = sd::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); @@ -1239,21 +1231,26 @@ sd::TadPack* tadOnlyShapeInfo(Nd4jLong *hXShapeInfo, int *dimension, int dimensi return pack; } -Nd4jLong* getPrimaryShapeInfo(sd::TadPack* pack) { - return pack->primaryShapeInfo(); +Nd4jLong const* getPrimaryShapeInfo(sd::TadPack* pack) { + return const_cast(pack->primaryShapeInfo()); } -Nd4jLong* getPrimaryOffsets(sd::TadPack* pack) { - return pack->primaryOffsets(); + +Nd4jLong const* getPrimaryOffsets(sd::TadPack* pack) { + return const_cast(pack->primaryOffsets()); } -Nd4jLong* getSpecialShapeInfo(sd::TadPack* pack) { - return pack->specialShapeInfo(); + +Nd4jLong const* getSpecialShapeInfo(sd::TadPack* pack) { + return const_cast(pack->specialShapeInfo()); } -Nd4jLong* getSpecialOffsets(sd::TadPack* pack) { - return pack->specialOffsets(); + +Nd4jLong const* getSpecialOffsets(sd::TadPack* pack) { + return const_cast(pack->specialOffsets()); } + Nd4jLong getNumberOfTads(sd::TadPack* pack) { return pack->numberOfTads(); } + int getShapeInfoLength(sd::TadPack* pack) { return pack->shapeInfoLength(); } @@ -1270,15 +1267,15 @@ Nd4jPointer getConstantSpace() { template void pullRowsGeneric(void *vx, - Nd4jLong *hXShapeInfo, + Nd4jLong const* hXShapeInfo, void *vz, - Nd4jLong *hZShapeInfo, + Nd4jLong const* hZShapeInfo, const int n, - Nd4jLong *indexes, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffsets) { + Nd4jLong const* indexes, + Nd4jLong const* tadShapeInfo, + Nd4jLong const* tadOffsets, + Nd4jLong const* zTadShapeInfo, + Nd4jLong const* zTadOffsets) { auto hX = reinterpret_cast(vx); auto hZ = reinterpret_cast(vz); @@ -1322,14 +1319,14 @@ void pullRowsGeneric(void *vx, } void pullRows(Nd4jPointer *extraPointers, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, Nd4jLong n, - Nd4jLong *indexes, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffsets) { + Nd4jLong* indexes, + Nd4jLong const* tadShapeInfo, + Nd4jLong const* tadOffsets, + Nd4jLong const* zTadShapeInfo, + Nd4jLong const* zTadOffsets) { try { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -1342,11 +1339,11 @@ void pullRows(Nd4jPointer *extraPointers, template void tearGeneric(void *vx, - Nd4jLong *hXShapeInfo, + Nd4jLong const* hXShapeInfo, Nd4jPointer *targets, - Nd4jLong *hZShapeInfo, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets) { + Nd4jLong const* hZShapeInfo, + Nd4jLong const* tadShapeInfo, + Nd4jLong const* tadOffsets) { auto hX = reinterpret_cast(vx); @@ -1381,11 +1378,11 @@ void tearGeneric(void *vx, } void tear(Nd4jPointer *extraPointers, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, Nd4jPointer *targets, - Nd4jLong *hZShapeInfo, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets) { + Nd4jLong const* hZShapeInfo, + Nd4jLong const* tadShapeInfo, + Nd4jLong const* tadOffsets) { try { auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -1398,10 +1395,10 @@ void tear(Nd4jPointer *extraPointers, void average(Nd4jPointer *extras, - Nd4jPointer *hX, Nd4jLong *hXShapeInfo, - Nd4jPointer *dX, Nd4jLong *dXShapeInfo, - void *z, Nd4jLong *hZShapeInfo, - void *dz, Nd4jLong *dZShapeInfo, + Nd4jPointer *hX, const Nd4jLong *hXShapeInfo, + Nd4jPointer *dX, const Nd4jLong *dXShapeInfo, + void *z, const Nd4jLong *hZShapeInfo, + void *dz, const Nd4jLong *dZShapeInfo, int n, Nd4jLong length, bool propagate) { @@ -1416,10 +1413,10 @@ void average(Nd4jPointer *extras, } void accumulate(Nd4jPointer *extras, - Nd4jPointer *hX, Nd4jLong *hXShapeInfo, - Nd4jPointer *dX, Nd4jLong *dXShapeInfo, - void *hz, Nd4jLong *hZShapeInfo, - void *dz, Nd4jLong *dZShapeInfo, + Nd4jPointer *hX, Nd4jLong const* hXShapeInfo, + Nd4jPointer *dX, Nd4jLong const* dXShapeInfo, + void *hz, Nd4jLong const* hZShapeInfo, + void *dz, Nd4jLong const* dZShapeInfo, int n, Nd4jLong length) { try { @@ -1438,7 +1435,7 @@ void enableP2P(bool enable) { -void encodeThresholdP1(Nd4jPointer *extraPointers, void *hX, Nd4jLong *hXShapeInfo, Nd4jLong N, int *dz, float threshold) { +void encodeThresholdP1(Nd4jPointer *extraPointers, void *hX, Nd4jLong const* hXShapeInfo, Nd4jLong N, int *dz, float threshold) { // TODO: to be implemented } @@ -1448,13 +1445,13 @@ void encodeThresholdP2Int(Nd4jPointer *extraPointers, int *hX, Nd4jLong N, int * } -void encodeThresholdP3(Nd4jPointer *extraPointers, void *hX, Nd4jLong *hXShapeInfo, int *offsets, Nd4jLong N, int *dz){ +void encodeThresholdP3(Nd4jPointer *extraPointers, void *hX, Nd4jLong const* hXShapeInfo, int *offsets, Nd4jLong N, int *dz){ // offsets won't be used here // TODO: to be implemented } -void decodeThreshold(Nd4jPointer *extraPointers, void *hX, Nd4jLong N, void *dz, Nd4jLong *hZShapeInfo){ +void decodeThreshold(Nd4jPointer *extraPointers, void *hX, Nd4jLong N, void *dz, const Nd4jLong *hZShapeInfo){ // TODO: to be implemented } @@ -1467,12 +1464,12 @@ void checkP2P() { // no-op } -void decodeBitmap(Nd4jPointer *extraPointers, void *hX, Nd4jLong N, void *dz, Nd4jLong *hZShapeInfo) { +void decodeBitmap(Nd4jPointer *extraPointers, void *hX, Nd4jLong N, void *dz, Nd4jLong const* hZShapeInfo) { NativeOpExecutioner::decodeBitmap(hX, N, dz, hZShapeInfo); } template -void shuffleGeneric(void **hX, Nd4jLong **hXShapeInfo, void **dz, Nd4jLong **hZShapeInfo, int N, int *shuffleMap, Nd4jLong **tadOnlyShapeInfo, Nd4jLong **tadOffsets) { +void shuffleGeneric(void **hX, Nd4jLong * const*hXShapeInfo, void **dz, Nd4jLong * const* hZShapeInfo, int N, int *shuffleMap, Nd4jLong * const* tadOnlyShapeInfo, Nd4jLong * const* tadOffsets) { auto dX = reinterpret_cast(hX); auto dZ = reinterpret_cast(dz); @@ -1543,10 +1540,10 @@ void shuffle(Nd4jPointer *extras, Nd4jPointer *tadShapeInfo, Nd4jPointer *tadOffsets) { try { - auto xShape = reinterpret_cast(hXShapeInfo); - auto zShape = reinterpret_cast(hZShapeInfo); - auto tadOnlyShapeInfo = reinterpret_cast(tadShapeInfo); - auto tadOffset = reinterpret_cast(tadOffsets); + auto xShape = reinterpret_cast(hXShapeInfo); + auto zShape = reinterpret_cast(hZShapeInfo); + auto tadOnlyShapeInfo = reinterpret_cast(tadShapeInfo); + auto tadOffset = reinterpret_cast(tadOffsets); auto xType = sd::ArrayOptions::dataType(xShape[0]); @@ -1574,13 +1571,13 @@ int getDevice() { void execScalarTad(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbScalars, Nd4jLong *hScalarShapeInfo, Nd4jLong *dScalarShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const*dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const*dZShapeInfo, + OpaqueDataBuffer *dbScalars, Nd4jLong const* hScalarShapeInfo, Nd4jLong const* dScalarShapeInfo, void *extraParams, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape, + Nd4jLong const*tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const*tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { try { auto dimension = reinterpret_cast(dbDimension->primary()); int dimensionLength = static_cast(shape::length(hDimensionShape)); @@ -1614,13 +1611,13 @@ void execScalarTad(Nd4jPointer *extraPointers, void execScalarBoolTad(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbScalars, Nd4jLong *hScalarShapeInfo, Nd4jLong *dScalarShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbScalars, const Nd4jLong *hScalarShapeInfo, const Nd4jLong *dScalarShapeInfo, void *extraParams, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + OpaqueDataBuffer *dbDimension, const Nd4jLong *hDimensionShape, const Nd4jLong *dDimensionShape, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ) { try { auto dimension = reinterpret_cast(dbDimension->primary()); int dimensionLength = static_cast(shape::length(hDimensionShape)); @@ -1722,7 +1719,7 @@ void execAggregateBatch(Nd4jPointer *extraPointers, void execRandom(Nd4jPointer *extraPointers, int opNum, Nd4jPointer state, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, void *extraArguments) { try { NativeOpExecutioner::execRandom(nullptr, opNum, state, dbZ->primary(), hZShapeInfo, dbZ->special(), dZShapeInfo, extraArguments); @@ -1735,9 +1732,9 @@ void execRandom(Nd4jPointer *extraPointers, void execRandom3(Nd4jPointer *extraPointers, int opNum, Nd4jPointer state, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbY, const Nd4jLong *hYShapeInfo, const Nd4jLong *dYShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, void *extraArguments) { try { NativeOpExecutioner::execRandom(nullptr, opNum, state, dbX->primary(), hXShapeInfo, dbX->special(), dXShapeInfo, dbY->primary(), hYShapeInfo, dbY->special(), dYShapeInfo, dbZ->primary(), hZShapeInfo, dbZ->special(), dZShapeInfo, extraArguments); @@ -1750,8 +1747,8 @@ void execRandom3(Nd4jPointer *extraPointers, void execRandom2(Nd4jPointer *extraPointers, int opNum, Nd4jPointer state, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, const Nd4jLong *hXShapeInfo, const Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbZ, const Nd4jLong *hZShapeInfo, const Nd4jLong *dZShapeInfo, void *extraArguments) { try { NativeOpExecutioner::execRandom(nullptr, opNum, state, dbX->primary(), hXShapeInfo, dbX->special(), dXShapeInfo, dbZ->primary(), hZShapeInfo, dbZ->special(), dZShapeInfo, extraArguments); @@ -1819,8 +1816,8 @@ Nd4jPointer pointerForAddress(Nd4jLong address) { } void sort(Nd4jPointer *extraPointers, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void *hX, const Nd4jLong *hXShapeInfo, + void *dX, const Nd4jLong *dXShapeInfo, bool descending) { try { NativeOpExecutioner::execSort(hX, hXShapeInfo, descending); @@ -1831,12 +1828,11 @@ void sort(Nd4jPointer *extraPointers, } void sortTad(Nd4jPointer *extraPointers, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets, + void *hX, const Nd4jLong *hXShapeInfo, + void *dX, const Nd4jLong *dXShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, + const Nd4jLong *tadOffsets, bool descending) { try { NativeOpExecutioner::execSort(hX, hXShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffsets, descending); @@ -1859,7 +1855,7 @@ void sortCooIndices(Nd4jPointer *extraPointers, } } -Nd4jLong encodeBitmap(Nd4jPointer *extraPointers, void *hX, Nd4jLong *hXShapeInfo, Nd4jLong N, int *dz, float threshold) { +Nd4jLong encodeBitmap(Nd4jPointer *extraPointers, void *hX, Nd4jLong const* hXShapeInfo, Nd4jLong N, int *dz, float threshold) { return NativeOpExecutioner::encodeBitmap(hX, hXShapeInfo, N, dz, threshold); } @@ -1948,7 +1944,7 @@ FORCEINLINE int estimateThresholdGeneric(Nd4jPointer *extraPointers, Nd4jPointer } -int estimateThreshold(Nd4jPointer *extraPointers, Nd4jPointer hX, Nd4jLong *hXShapeInfo, int N, float threshold) { +int estimateThreshold(Nd4jPointer *extraPointers, Nd4jPointer hX, Nd4jLong const* hXShapeInfo, int N, float threshold) { try { auto xType = ArrayOptions::dataType(hXShapeInfo); BUILD_SINGLE_SELECTOR(xType, return estimateThresholdGeneric, (extraPointers, hX, N, threshold), FLOAT_TYPES); @@ -1963,8 +1959,8 @@ Nd4jLong getShapeListSize(sd::ShapeList* list) { return list->size(); } -Nd4jLong* getShape(sd::ShapeList* list, Nd4jLong i) { - return list->at(i); +Nd4jLong const* getShape(sd::ShapeList* list, Nd4jLong i) { + return const_cast(list->at(i)); } void deleteShapeList(Nd4jPointer shapeList) { @@ -2258,8 +2254,8 @@ const char* getVariableName(sd::graph::Variable* variable) { return variable->getName()->c_str(); } -Nd4jLong* getVariableShape(sd::graph::Variable* variable) { - return variable->getNDArray()->shapeInfo(); +Nd4jLong const* getVariableShape(sd::graph::Variable* variable) { + return const_cast(variable->getNDArray()->shapeInfo()); } void* getVariableBuffer(sd::graph::Variable* variable) { @@ -2601,12 +2597,13 @@ void deleteUtf8String(Nd4jPointer *extraPointers, Nd4jPointer ptr) { } template -static void _scatterUpdate(Nd4jPointer *extraPointers, int opCode, int numOfSubArrs, - void* hX, Nd4jLong* hXShapeInfo, Nd4jLong* hXOffsets, - void* dX, Nd4jLong* dXShapeInfo, Nd4jLong* dXOffsets, - void* hY, Nd4jLong* hYShapeInfo, Nd4jLong* hYOffsets, - void* dY, Nd4jLong* dYShapeInfo, Nd4jLong* dYOffsets, - void* vIindexes, Nd4jLong* hIndicesShapeInfo, void* dIindexes, Nd4jLong* dIndicesShapeInfo) { +static void _scatterUpdate( + Nd4jPointer *extraPointers, int opCode, int numOfSubArrs, + void* hX, const Nd4jLong* hXShapeInfo, const Nd4jLong* hXOffsets, + void* dX, const Nd4jLong* dXShapeInfo, const Nd4jLong* dXOffsets, + void* hY, const Nd4jLong* hYShapeInfo, const Nd4jLong* hYOffsets, + void* dY, const Nd4jLong* dYShapeInfo, const Nd4jLong* dYOffsets, + void* vIindexes, const Nd4jLong* hIndicesShapeInfo, void* dIindexes, const Nd4jLong* dIndicesShapeInfo) { auto hIindexes = reinterpret_cast(vIindexes); auto func = PRAGMA_THREADS_DO { @@ -2658,11 +2655,11 @@ static void _scatterUpdate(Nd4jPointer *extraPointers, int opCode, int numOfSub //////////////////////////////////////////////////////////////////////// void scatterUpdate(Nd4jPointer *extraPointers, int opCode, int numOfSubArrs, - void* hX, Nd4jLong* hXShapeInfo, Nd4jLong* hXOffsets, - void* dX, Nd4jLong* dXShapeInfo, Nd4jLong* dXOffsets, - void* hY, Nd4jLong* hYShapeInfo, Nd4jLong* hYOffsets, - void* dY, Nd4jLong* dYShapeInfo, Nd4jLong* dYOffsets, - void* hIindexes, Nd4jLong* hIndicesShapeInfo, void* dIindexes, Nd4jLong* dIndicesShapeInfo) { + void* hX, const Nd4jLong* hXShapeInfo, const Nd4jLong* hXOffsets, + void* dX, const Nd4jLong* dXShapeInfo, const Nd4jLong* dXOffsets, + void* hY, const Nd4jLong* hYShapeInfo, const Nd4jLong* hYOffsets, + void* dY, const Nd4jLong* dYShapeInfo, const Nd4jLong* dYOffsets, + void* hIindexes, const Nd4jLong* hIndicesShapeInfo, void* dIindexes, const Nd4jLong* dIndicesShapeInfo) { auto iType = ArrayOptions::dataType(hIndicesShapeInfo); try { @@ -2718,7 +2715,7 @@ void deleteTadPack(sd::TadPack* ptr) { delete ptr; } -sd::ConstantDataBuffer* constantBufferLong(sd::DataType dtype, Nd4jLong *data, int length) { +sd::ConstantDataBuffer* constantBufferLong(sd::DataType dtype, const Nd4jLong *data, int length) { return nullptr; } @@ -2879,7 +2876,7 @@ Nd4jPointer shapeBufferForNumpy(Nd4jPointer npyArray) { } else { shapeBuffer = sd::ShapeBuilders::createShapeInfo(dtype, arr.fortranOrder ? 'f' : 'c', shape); } - return reinterpret_cast(sd::ConstantShapeHelper::getInstance()->createFromExisting(shapeBuffer, true)); + return const_cast(sd::ConstantShapeHelper::getInstance()->createFromExisting(shapeBuffer, true)); } catch (std::exception &e) { sd::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); sd::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); @@ -2888,10 +2885,10 @@ Nd4jPointer shapeBufferForNumpy(Nd4jPointer npyArray) { } void sortByKey(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dx, Nd4jLong *dxShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dy, Nd4jLong *dyShapeInfo, + void *x, const Nd4jLong *xShapeInfo, + void *dx, const Nd4jLong *dxShapeInfo, + void *y, const Nd4jLong *yShapeInfo, + void *dy, const Nd4jLong *dyShapeInfo, bool descending) { try { auto xType = ArrayOptions::dataType(xShapeInfo); @@ -2905,10 +2902,10 @@ void sortByKey(Nd4jPointer *extraPointers, } void sortByValue(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dx, Nd4jLong *dxShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dy, Nd4jLong *dyShapeInfo, + void *x, const Nd4jLong *xShapeInfo, + void *dx, const Nd4jLong *dxShapeInfo, + void *y, const Nd4jLong *yShapeInfo, + void *dy, const Nd4jLong *dyShapeInfo, bool descending) { try { auto xType = ArrayOptions::dataType(xShapeInfo); @@ -2922,12 +2919,11 @@ void sortByValue(Nd4jPointer *extraPointers, } void sortTadByKey(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dx, Nd4jLong *dxShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dy, Nd4jLong *dyShapeInfo, - int *dimension, - int dimensionLength, + void *x, const Nd4jLong *xShapeInfo, + void *dx, const Nd4jLong *dxShapeInfo, + void *y, const Nd4jLong *yShapeInfo, + void *dy, const Nd4jLong *dyShapeInfo, + int *dimension, int dimensionLength, bool descending) { try { auto xType = ArrayOptions::dataType(xShapeInfo); @@ -2941,12 +2937,11 @@ void sortTadByKey(Nd4jPointer *extraPointers, } void sortTadByValue(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dx, Nd4jLong *dxShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dy, Nd4jLong *dyShapeInfo, - int *dimension, - int dimensionLength, + void *x, const Nd4jLong *xShapeInfo, + void *dx, const Nd4jLong *dxShapeInfo, + void *y, const Nd4jLong *yShapeInfo, + void *dy, const Nd4jLong *dyShapeInfo, + int *dimension, int dimensionLength, bool descending) { try { auto xType = ArrayOptions::dataType(xShapeInfo); @@ -3227,8 +3222,8 @@ void dbClose(OpaqueDataBuffer *dataBuffer) { dataBuffer->getDataBuffer()->close(); } -BUILD_SINGLE_TEMPLATE(template void pullRowsGeneric, (void *, Nd4jLong*, void*, Nd4jLong*, const int, Nd4jLong*, Nd4jLong*, Nd4jLong*, Nd4jLong*, Nd4jLong*), LIBND4J_TYPES); -BUILD_SINGLE_TEMPLATE(template void tearGeneric, (void *, Nd4jLong*, Nd4jPointer*, Nd4jLong*, Nd4jLong*, Nd4jLong*), LIBND4J_TYPES); -BUILD_SINGLE_TEMPLATE(template void shuffleGeneric, (void**, Nd4jLong**, void**, Nd4jLong**, int, int*, Nd4jLong**, Nd4jLong**), LIBND4J_TYPES); +BUILD_SINGLE_TEMPLATE(template void pullRowsGeneric, (void *, Nd4jLong const*, void*, Nd4jLong const*, const int, Nd4jLong const*, Nd4jLong const*, Nd4jLong const*, Nd4jLong const*, Nd4jLong const*), LIBND4J_TYPES); +BUILD_SINGLE_TEMPLATE(template void tearGeneric, (void *, Nd4jLong const* , Nd4jPointer*, Nd4jLong const*, Nd4jLong const*, Nd4jLong const*), LIBND4J_TYPES); +BUILD_SINGLE_TEMPLATE(template void shuffleGeneric, (void**, Nd4jLong* const*, void**, Nd4jLong* const*, int, int*, Nd4jLong* const*, Nd4jLong* const*), LIBND4J_TYPES); diff --git a/libnd4j/include/legacy/cuda/NativeOpExecutioner.cu b/libnd4j/include/legacy/cuda/NativeOpExecutioner.cu index 00a9ea03f..f01daffd7 100644 --- a/libnd4j/include/legacy/cuda/NativeOpExecutioner.cu +++ b/libnd4j/include/legacy/cuda/NativeOpExecutioner.cu @@ -87,12 +87,12 @@ extern "C" __global__ void prepareShapeBuffer(int *dimension, int *maxDimension, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execPairwiseTransform(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void const* hY, Nd4jLong const* hYShapeInfo, + void const* dY, Nd4jLong const* dYShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, void *extraParams) { auto stream = lc->getCudaStream(); @@ -128,12 +128,12 @@ void NativeOpExecutioner::execPairwiseTransform(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execPairwiseBoolTransform( sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void const* hY, Nd4jLong const* hYShapeInfo, + void const* dY, Nd4jLong const* dYShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, void *extraParams) { auto stream = lc->getCudaStream(); @@ -164,12 +164,12 @@ void NativeOpExecutioner::execPairwiseBoolTransform( sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execPairwiseIntTransform( sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void const* hY, Nd4jLong const* hYShapeInfo, + void const* dY, Nd4jLong const* dYShapeInfo, + void * hZ, Nd4jLong const* hZShapeInfo, + void * dZ, Nd4jLong const* dZShapeInfo, void *extraParams) { auto stream = lc->getCudaStream(); @@ -200,11 +200,11 @@ void NativeOpExecutioner::execPairwiseIntTransform( sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execSummaryStatsScalar(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, bool biasCorrected) { auto stream = lc->getCudaStream(); @@ -226,16 +226,16 @@ void NativeOpExecutioner::execSummaryStatsScalar(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execBroadcastBool(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void const* hY, Nd4jLong const* hYShapeInfo, + void const* dY, Nd4jLong const* dYShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, void *extraParams, int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ,Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { auto stream = lc->getCudaStream(); @@ -300,16 +300,16 @@ void NativeOpExecutioner::execBroadcastBool(sd::LaunchContext* lc, const int opN void NativeOpExecutioner::execInverseBroadcastBool(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void const* hY, Nd4jLong const* hYShapeInfo, + void const* dY, Nd4jLong const* dYShapeInfo, + void* hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, void *extraParams, int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ,Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { auto stream = lc->getCudaStream(); auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -338,15 +338,15 @@ void NativeOpExecutioner::execInverseBroadcastBool(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execBroadcastInt(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void const* hY, Nd4jLong const* hYShapeInfo, + void const* dY, Nd4jLong const* dYShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ,Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadOnlyShapeInfoZ,Nd4jLong const* tadOffsetsZ) { auto stream = lc->getCudaStream(); @@ -413,15 +413,15 @@ void NativeOpExecutioner::execBroadcastInt(sd::LaunchContext* lc, const int opNu void NativeOpExecutioner::execInverseBroadcastInt(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void const* hY, Nd4jLong const* hYShapeInfo, + void const* dY, Nd4jLong const* dYShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ,Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadOnlyShapeInfoZ,Nd4jLong const* tadOffsetsZ) { auto stream = lc->getCudaStream(); auto xType = sd::ArrayOptions::dataType(hXShapeInfo); @@ -465,15 +465,15 @@ void NativeOpExecutioner::execInverseBroadcastInt(sd::LaunchContext *lc, */ void NativeOpExecutioner::execBroadcast(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void const* hY, Nd4jLong const* hYShapeInfo, + void const* dY, Nd4jLong const* dYShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ,Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadOnlyShapeInfoZ,Nd4jLong const* tadOffsetsZ) { auto stream = lc->getCudaStream(); @@ -536,15 +536,15 @@ void NativeOpExecutioner::execBroadcast(sd::LaunchContext *lc, const int opNum, void NativeOpExecutioner::execInverseBroadcast(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void const* hY, Nd4jLong const* hYShapeInfo, + void const* dY, Nd4jLong const* dYShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadOnlyShapeInfoZ,Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadOnlyShapeInfoZ,Nd4jLong const* tadOffsetsZ) { auto stream = lc->getCudaStream(); @@ -572,13 +572,13 @@ void NativeOpExecutioner::execInverseBroadcast(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduceSame(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets) { auto stream = lc->getCudaStream(); auto reductionPointer = lc->getReductionPointer(); @@ -607,13 +607,13 @@ void NativeOpExecutioner::execReduceSame(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduceLong(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, int *dimension,int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets) { auto stream = lc->getCudaStream(); auto reductionPointer = lc->getReductionPointer(); @@ -643,13 +643,13 @@ void NativeOpExecutioner::execReduceLong(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduceBool(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets) { auto stream = lc->getCudaStream(); auto reductionPointer = lc->getReductionPointer(); @@ -689,13 +689,13 @@ void NativeOpExecutioner::execReduceBool(sd::LaunchContext *lc, */ void NativeOpExecutioner::execIndexReduce(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets) { auto stream = lc->getCudaStream(); auto reductionPointer = lc->getReductionPointer(); @@ -734,13 +734,13 @@ void NativeOpExecutioner::execIndexReduce(sd::LaunchContext *lc, */ void NativeOpExecutioner::execReduceFloat(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, int *dimension,int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets) { auto stream = lc->getCudaStream(); auto reductionPointer = lc->getReductionPointer(); @@ -774,11 +774,11 @@ void NativeOpExecutioner::execReduceFloat(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execIndexReduceScalar(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo){ + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo){ if (sd::Environment::getInstance()->isDebug()) printf("F1 opNum:[%i]\n", opNum); @@ -825,11 +825,11 @@ void NativeOpExecutioner::execIndexReduceScalar(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduceFloatScalar(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo) { + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo) { auto stream = lc->getCudaStream(); auto reductionPointer = lc->getReductionPointer(); @@ -854,11 +854,11 @@ void NativeOpExecutioner::execReduceFloatScalar(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduceBoolScalar(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo) { + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo) { auto stream = lc->getCudaStream(); auto reductionPointer = lc->getReductionPointer(); @@ -885,11 +885,11 @@ void NativeOpExecutioner::execReduceBoolScalar(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduceSameScalar(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo) { + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo) { auto stream = lc->getCudaStream(); auto reductionPointer = lc->getReductionPointer(); @@ -916,11 +916,11 @@ void NativeOpExecutioner::execReduceSameScalar(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduceLongScalar(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo) { + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo) { auto stream = lc->getCudaStream(); auto reductionPointer = lc->getReductionPointer(); @@ -947,12 +947,12 @@ void NativeOpExecutioner::execReduceLongScalar(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execTransformSame(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, void *extraParams, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets) { auto stream = lc->getCudaStream(); @@ -981,12 +981,12 @@ void NativeOpExecutioner::execTransformSame(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execTransformBool(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, void *extraParams, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets) { auto stream = lc->getCudaStream(); @@ -1015,12 +1015,12 @@ void NativeOpExecutioner::execTransformBool(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execTransformAny(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, void *extraParams, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool allowParallelism) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool allowParallelism) { auto stream = lc->getCudaStream(); @@ -1050,12 +1050,12 @@ void NativeOpExecutioner::execTransformAny(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execTransformStrict(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, void *extraParams, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets) { auto stream = lc->getCudaStream(); @@ -1084,12 +1084,12 @@ void NativeOpExecutioner::execTransformStrict(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execTransformFloat(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, void *extraParams, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets) { auto stream = lc->getCudaStream(); auto reductionPointer = lc->getReductionPointer(); @@ -1118,11 +1118,11 @@ void NativeOpExecutioner::execTransformFloat(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execSummaryStats(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, bool biasCorrected) { auto stream = lc->getCudaStream(); @@ -1147,13 +1147,13 @@ void NativeOpExecutioner::execSummaryStats(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execSummaryStats(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool biasCorrected) { auto stream = lc->getCudaStream(); auto reductionPointer = lc->getReductionPointer(); @@ -1178,13 +1178,13 @@ void NativeOpExecutioner::execSummaryStats(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduce3(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo) { + void const* hY, Nd4jLong const* hYShapeInfo, + void const* dY, Nd4jLong const* dYShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo) { auto stream = lc->getCudaStream(); auto reductionPointer = lc->getReductionPointer(); @@ -1215,16 +1215,16 @@ void NativeOpExecutioner::execReduce3(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduce3(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hY, Nd4jLong const* hYShapeInfo, + void const* dY, Nd4jLong const* dYShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, int *dimension, int dimensionLength, - Nd4jLong* tadOnlyShapeInfo, Nd4jLong* tadOffsets, - Nd4jLong* yTadOnlyShapeInfo, Nd4jLong* yTadOffsets) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* yTadOnlyShapeInfo, Nd4jLong const* yTadOffsets) { if(shape::isScalar(hZShapeInfo)) { NativeOpExecutioner::execReduce3(lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo); @@ -1268,13 +1268,13 @@ void NativeOpExecutioner::execReduce3(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduce3Scalar(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo) { + void const* hY, Nd4jLong const* hYShapeInfo, + void const* dY, Nd4jLong const* dYShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo) { auto stream = lc->getCudaStream(); @@ -1308,12 +1308,12 @@ void NativeOpExecutioner::execReduce3Scalar(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execScalarBool(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalar, Nd4jLong *hScalarShapeInfo, - void *dScalar, Nd4jLong *dScalarShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, + void const* hScalar, Nd4jLong const* hScalarShapeInfo, + void const* dScalar, Nd4jLong const* dScalarShapeInfo, void *extraParams, bool allowParallelism) { auto stream = lc->getCudaStream(); @@ -1344,16 +1344,16 @@ void NativeOpExecutioner::execScalarBool(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execScalarBool(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalars, Nd4jLong *hScalarShapeInfo, - void *dScalars, Nd4jLong *dScalarShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, + void const* hScalars, Nd4jLong const* hScalarShapeInfo, + void const* dScalars, Nd4jLong const* dScalarShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { auto stream = lc->getCudaStream(); @@ -1383,12 +1383,12 @@ void NativeOpExecutioner::execScalarBool(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execScalarInt(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalar, Nd4jLong *hScalarShapeInfo, - void *dScalar, Nd4jLong *dScalarShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, + void const* hScalar, Nd4jLong const* hScalarShapeInfo, + void const* dScalar, Nd4jLong const* dScalarShapeInfo, void *extraParams, bool allowParallelism) { auto stream = lc->getCudaStream(); @@ -1419,16 +1419,16 @@ void NativeOpExecutioner::execScalarInt(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execScalarInt(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalars, Nd4jLong *hScalarShapeInfo, - void *dScalars, Nd4jLong *dScalarShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, + void const* hScalars, Nd4jLong const* hScalarShapeInfo, + void const* dScalars, Nd4jLong const* dScalarShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { auto stream = lc->getCudaStream(); @@ -1458,12 +1458,12 @@ void NativeOpExecutioner::execScalarInt(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execScalar(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalar, Nd4jLong *hScalarShapeInfo, - void *dScalar, Nd4jLong *dScalarShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void* hZ, Nd4jLong const* hZShapeInfo, + void* dZ, Nd4jLong const* dZShapeInfo, + void const* hScalar, Nd4jLong const* hScalarShapeInfo, + void const* dScalar, Nd4jLong const* dScalarShapeInfo, void *extraParams, bool allowParallelism) { auto stream = lc->getCudaStream(); @@ -1493,16 +1493,16 @@ void NativeOpExecutioner::execScalar(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execScalar(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalars, Nd4jLong *hScalarShapeInfo, - void *dScalars, Nd4jLong *dScalarShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, + void const* hScalars, Nd4jLong const* hScalarShapeInfo, + void const* dScalars, Nd4jLong const* dScalarShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { auto stream = lc->getCudaStream(); @@ -1531,8 +1531,8 @@ void NativeOpExecutioner::execScalar(sd::LaunchContext *lc, void NativeOpExecutioner::execRandom(sd::LaunchContext *lc, int opNum, Nd4jPointer stateHost, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, void *extraArguments) { auto stream = lc->getCudaStream(); @@ -1564,10 +1564,10 @@ void NativeOpExecutioner::execRandom(sd::LaunchContext *lc, void NativeOpExecutioner::execRandom(sd::LaunchContext *lc, int opNum, Nd4jPointer stateHost, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, void *extraArguments) { auto stream = lc->getCudaStream(); @@ -1599,12 +1599,12 @@ void NativeOpExecutioner::execRandom(sd::LaunchContext *lc, void NativeOpExecutioner::execRandom(sd::LaunchContext *lc, int opNum, Nd4jPointer stateHost, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, + void const* hY, Nd4jLong const* hYShapeInfo, + void const* dY, Nd4jLong const* dYShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, void *extraArguments) { auto stream = lc->getCudaStream(); @@ -1634,16 +1634,16 @@ void NativeOpExecutioner::execRandom(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduce3All(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParamsVals, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hY, Nd4jLong const* hYShapeInfo, + void const* dY, Nd4jLong const* dYShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *xTadShapeInfo, Nd4jLong *xOffsets, - Nd4jLong *yTadShapeInfo, Nd4jLong *yOffsets) { + Nd4jLong const* xTadShapeInfo, Nd4jLong const* xOffsets, + Nd4jLong const* yTadShapeInfo, Nd4jLong const* yOffsets) { auto stream = lc->getCudaStream(); auto allocationPointer = lc->getAllocationPointer(); @@ -1676,16 +1676,16 @@ void NativeOpExecutioner::execReduce3All(sd::LaunchContext *lc, //////////////////////////////////////////////////////////////////////// void NativeOpExecutioner::execReduce3TAD(sd::LaunchContext *lc, int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void const* hX, Nd4jLong const* hXShapeInfo, + void const* dX, Nd4jLong const* dXShapeInfo, void *extraParams, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, + void const* hY, Nd4jLong const* hYShapeInfo, + void const* dY, Nd4jLong const* dYShapeInfo, + void *hZ, Nd4jLong const* hZShapeInfo, + void *dZ, Nd4jLong const* dZShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *yTadShapeInfo, Nd4jLong *yTadOffsets) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* yTadShapeInfo, Nd4jLong const* yTadOffsets) { if(shape::isScalar(hZShapeInfo)) { NativeOpExecutioner::execReduce3(lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo); diff --git a/libnd4j/include/legacy/cuda/NativeOps.cu b/libnd4j/include/legacy/cuda/NativeOps.cu index 1d6a90126..8be9b3bfd 100755 --- a/libnd4j/include/legacy/cuda/NativeOps.cu +++ b/libnd4j/include/legacy/cuda/NativeOps.cu @@ -123,8 +123,8 @@ int getDeviceSharedThreshold(int deviceId) { sd::buffer::Buffer * createScalarBuffer(cudaStream_t stream) { - Nd4jLong *scalarShapeInfo = shape::createScalarShapeInfo(); - sd::buffer::Buffer *buff = sd::buffer::createBuffer(scalarShapeInfo,shape::shapeInfoLength(2), stream); + auto scalarShapeInfo = shape::createScalarShapeInfo(); + auto buff = sd::buffer::createBuffer(scalarShapeInfo,shape::shapeInfoLength(2), stream); sd::buffer::copyDataToGpu(&buff, stream); return buff; } @@ -229,9 +229,9 @@ public: void execPairwiseTransform( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraParams) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX, dbY}); @@ -251,9 +251,9 @@ void execPairwiseTransform( Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execPairwiseTransformBool(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraParams) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX, dbY}); @@ -275,9 +275,9 @@ void execPairwiseTransformBool(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execSummaryStatsScalar(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, bool biasCorrected) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); @@ -299,11 +299,11 @@ void execSummaryStatsScalar(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execBroadcastBool(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraParams, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape) { + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX, dbY}); InteropDataBuffer::preparePrimaryUse({}, {dbDimension}); @@ -348,10 +348,10 @@ void execBroadcastBool(Nd4jPointer *extraPointers, void execBroadcast( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape) { + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX, dbY}); InteropDataBuffer::preparePrimaryUse({}, {dbDimension}); @@ -399,9 +399,9 @@ void execBroadcast( //////////////////////////////////////////////////////////////////////// void execReduceFloat(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo) { + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); @@ -421,9 +421,9 @@ void execReduceFloat(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execReduceSame(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo) { + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); @@ -443,10 +443,10 @@ void execReduceSame(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execReduceSame2(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const*hXShapeInfo, Nd4jLong const*dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape) { + OpaqueDataBuffer *dbZ, Nd4jLong const*hZShapeInfo, Nd4jLong const*dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const*hDimensionShape, Nd4jLong const*dDimensionShape) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); InteropDataBuffer::preparePrimaryUse({}, {dbDimension}); @@ -476,10 +476,10 @@ void execReduceSame2(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execReduceLong2(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const*hXShapeInfo, Nd4jLong const*dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape) { + OpaqueDataBuffer *dbZ, Nd4jLong const*hZShapeInfo, Nd4jLong const*dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const*hDimensionShape, Nd4jLong const*dDimensionShape) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); InteropDataBuffer::preparePrimaryUse({}, {dbDimension}); @@ -509,9 +509,9 @@ void execReduceLong2(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execReduceLong(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const*hXShapeInfo, Nd4jLong const*dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo) { + OpaqueDataBuffer *dbZ, Nd4jLong const*hZShapeInfo, Nd4jLong const*dZShapeInfo) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); @@ -551,10 +551,10 @@ void execReduceLong(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execReduceBool2(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const*hXShapeInfo, Nd4jLong const*dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape) { + OpaqueDataBuffer *dbZ, Nd4jLong const*hZShapeInfo, Nd4jLong const*dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const*hDimensionShape, Nd4jLong const*dDimensionShape) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); InteropDataBuffer::preparePrimaryUse({}, {dbDimension}); @@ -584,9 +584,9 @@ void execReduceBool2(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execReduceBool(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo) { + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); @@ -637,10 +637,10 @@ void execReduceBool(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execIndexReduce(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape) { + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); InteropDataBuffer::preparePrimaryUse({}, {dbDimension}); @@ -679,10 +679,10 @@ void execIndexReduce(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execReduceFloat2(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape) { + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); InteropDataBuffer::preparePrimaryUse({}, {dbDimension}); @@ -720,9 +720,9 @@ void execReduceFloat2(Nd4jPointer *extraPointers, void execIndexReduceScalar( Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo){ + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo){ try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); @@ -741,8 +741,8 @@ void execIndexReduceScalar( //////////////////////////////////////////////////////////////////////// void execTransformSame(Nd4jPointer *extraPointers,int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraParams) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); @@ -766,8 +766,8 @@ void execTransformSame(Nd4jPointer *extraPointers,int opNum, //////////////////////////////////////////////////////////////////////// void execTransformBool(Nd4jPointer *extraPointers,int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraParams) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); @@ -791,8 +791,8 @@ void execTransformBool(Nd4jPointer *extraPointers,int opNum, //////////////////////////////////////////////////////////////////////// void execTransformAny(Nd4jPointer *extraPointers,int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraParams) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); @@ -817,8 +817,8 @@ void execTransformAny(Nd4jPointer *extraPointers,int opNum, //////////////////////////////////////////////////////////////////////// void execTransformStrict(Nd4jPointer *extraPointers,int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraParams) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); @@ -842,8 +842,8 @@ void execTransformStrict(Nd4jPointer *extraPointers,int opNum, //////////////////////////////////////////////////////////////////////// void execTransformFloat(Nd4jPointer *extraPointers,int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraParams) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); @@ -1368,7 +1368,7 @@ void specialConcat( Nd4jPointer *data, Nd4jPointer *inputShapeInfo, void *dZ, - Nd4jLong *dZShapeInfo, Nd4jPointer *tadPointers, Nd4jPointer *offsetPointers) { + Nd4jLong const* dZShapeInfo, Nd4jPointer *tadPointers, Nd4jPointer *offsetPointers) { try { BUILD_SINGLE_SELECTOR(ArrayOptions::dataType(dZShapeInfo), sd::SpecialMethods, ::concatCpuGeneric(dimension, numArrays, data, inputShapeInfo, dZ, dZShapeInfo), @@ -1383,7 +1383,7 @@ void specialConcat( /** * This method saves */ -sd::TadPack* tadOnlyShapeInfo(Nd4jLong *dXShapeInfo, int *dimension, int dimensionLength) { +sd::TadPack* tadOnlyShapeInfo(Nd4jLong const* dXShapeInfo, int *dimension, int dimensionLength) { try { auto pack = new TadPack(); *pack = sd::ConstantTadHelper::getInstance()->tadForDimensions(dXShapeInfo, dimension, dimensionLength); @@ -1395,16 +1395,16 @@ sd::TadPack* tadOnlyShapeInfo(Nd4jLong *dXShapeInfo, int *dimension, int dimensi } } -Nd4jLong* getPrimaryShapeInfo(sd::TadPack* pack) { +Nd4jLong const* getPrimaryShapeInfo(sd::TadPack* pack) { return pack->primaryShapeInfo(); } -Nd4jLong* getPrimaryOffsets(sd::TadPack* pack) { +Nd4jLong const* getPrimaryOffsets(sd::TadPack* pack) { return pack->primaryOffsets(); } -Nd4jLong* getSpecialShapeInfo(sd::TadPack* pack) { +Nd4jLong const* getSpecialShapeInfo(sd::TadPack* pack) { return pack->specialShapeInfo(); } -Nd4jLong* getSpecialOffsets(sd::TadPack* pack) { +Nd4jLong const* getSpecialOffsets(sd::TadPack* pack) { return pack->specialOffsets(); } Nd4jLong getNumberOfTads(sd::TadPack* pack) { @@ -1460,14 +1460,14 @@ Nd4jPointer getConstantSpace() { } void pullRows(Nd4jPointer *extraPointers, - OpaqueDataBuffer *dbX, Nd4jLong *xShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *zShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* xShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* zShapeInfo, Nd4jLong const* dZShapeInfo, Nd4jLong n, Nd4jLong *indexes, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffsets) { + Nd4jLong const* tadShapeInfo, + Nd4jLong const* tadOffsets, + Nd4jLong const* zTadShapeInfo, + Nd4jLong const* zTadOffsets) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); @@ -1489,10 +1489,10 @@ void pullRows(Nd4jPointer *extraPointers, void average(Nd4jPointer *extras, - Nd4jPointer *x, Nd4jLong *xShapeInfo, - Nd4jPointer *dx, Nd4jLong *dXShapeInfo, - void *z, Nd4jLong *zShapeInfo, - void *dz, Nd4jLong *dzShapeInfo, + Nd4jPointer *x, Nd4jLong const* xShapeInfo, + Nd4jPointer *dx, Nd4jLong const* dXShapeInfo, + void *z, Nd4jLong const* zShapeInfo, + void *dz, Nd4jLong const* dzShapeInfo, int n, Nd4jLong length, bool propagate) { @@ -1524,10 +1524,10 @@ void average(Nd4jPointer *extras, } void accumulate(Nd4jPointer *extras, - Nd4jPointer *x, Nd4jLong *xShapeInfo, - Nd4jPointer *dx, Nd4jLong *dXShapeInfo, - void *z, Nd4jLong *zShapeInfo, - void *dz, Nd4jLong *dzShapeInfo, + Nd4jPointer *x, Nd4jLong const* xShapeInfo, + Nd4jPointer *dx, Nd4jLong const* dXShapeInfo, + void *z, Nd4jLong const* zShapeInfo, + void *dz, Nd4jLong const* dzShapeInfo, int n, Nd4jLong length) { try { @@ -1572,8 +1572,8 @@ void shuffle(Nd4jPointer *extras, auto dX = reinterpret_cast(dx); auto dZ = reinterpret_cast(dz); - auto xShape = reinterpret_cast(xShapeInfo); - auto dxShape = reinterpret_cast(dXShapeInfo); + auto xShape = reinterpret_cast(xShapeInfo); + auto dxShape = reinterpret_cast(dXShapeInfo); auto tadOnlyShapeInfo = reinterpret_cast(tadShapeInfo); auto tadOffset = reinterpret_cast(tadOffsets); @@ -1614,9 +1614,9 @@ void setTADThreshold(int num) { //////////////////////////////////////////////////////////////////////// void execSummaryStats(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, bool biasCorrected) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); @@ -1638,12 +1638,12 @@ void execSummaryStats(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execSummaryStatsTad(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape, bool biasCorrected, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX, dbDimension}); InteropDataBuffer::preparePrimaryUse({}, {dbDimension}); @@ -1670,10 +1670,10 @@ void execSummaryStatsTad(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execReduce3(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo) { + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX, dbY}); @@ -1694,13 +1694,13 @@ void execReduce3(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execReduce3Tad(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets) { + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape, + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* yTadOnlyShapeInfo, Nd4jLong const* yTadOffsets) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX, dbY}); InteropDataBuffer::preparePrimaryUse({}, {dbDimension}); @@ -1744,10 +1744,10 @@ void execReduce3Tad(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execReduce3Scalar(Nd4jPointer *extraPointers,int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParams, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo) { + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX, dbY}); @@ -1768,9 +1768,9 @@ void execReduce3Scalar(Nd4jPointer *extraPointers,int opNum, //////////////////////////////////////////////////////////////////////// void execScalarBool(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbScalar, Nd4jLong *hScalarShapeInfo, Nd4jLong *dScalarShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbScalar, Nd4jLong const* hScalarShapeInfo, Nd4jLong const* dScalarShapeInfo, void *extraParams) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX, dbScalar}); @@ -1792,13 +1792,13 @@ void execScalarBool(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execScalarBoolTad(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbScalars, Nd4jLong *hScalarShapeInfo, Nd4jLong *dScalarShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbScalars, Nd4jLong const* hScalarShapeInfo, Nd4jLong const* dScalarShapeInfo, void *extraParams, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape, + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX, dbScalars}); InteropDataBuffer::preparePrimaryUse({}, {dbDimension}); @@ -1825,9 +1825,9 @@ void execScalarBoolTad(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execScalar(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbScalar, Nd4jLong *hScalarShapeInfo, Nd4jLong *dScalarShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbScalar, Nd4jLong const* hScalarShapeInfo, Nd4jLong const* dScalarShapeInfo, void *extraParams) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX, dbScalar}); @@ -1849,13 +1849,13 @@ void execScalar(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execScalarTad(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbScalars, Nd4jLong *hScalarShapeInfo, Nd4jLong *dScalarShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbScalars, Nd4jLong const* hScalarShapeInfo, Nd4jLong const* dScalarShapeInfo, void *extraParams, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape, + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX, dbScalars}); InteropDataBuffer::preparePrimaryUse({}, {dbDimension}); @@ -1931,7 +1931,7 @@ void execAggregateBatch(Nd4jPointer *extraPointers, void execRandom(Nd4jPointer *extraPointers, int opNum, Nd4jPointer stateHost, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraArguments) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {}); @@ -1950,8 +1950,8 @@ void execRandom(Nd4jPointer *extraPointers, //////////////////////////////////////////////////////////////////////// void execRandom2(Nd4jPointer *extraPointers, int opNum, Nd4jPointer stateHost, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraArguments) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX}); @@ -1971,9 +1971,9 @@ void execRandom2(Nd4jPointer *extraPointers, int opNum, Nd4jPointer stateHost, //////////////////////////////////////////////////////////////////////// void execRandom3(Nd4jPointer *extraPointers, int opNum, Nd4jPointer stateHost, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, void *extraArguments) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX, dbY}); @@ -2091,11 +2091,11 @@ Nd4jPointer pointerForAddress(Nd4jLong address) { } void tear(Nd4jPointer *extras, - OpaqueDataBuffer *dbX, Nd4jLong *xShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* xShapeInfo, Nd4jLong const* dXShapeInfo, Nd4jPointer *targets, - Nd4jLong *zShapeInfo, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets) { + Nd4jLong const* zShapeInfo, + Nd4jLong const* tadShapeInfo, + Nd4jLong const* tadOffsets) { try { InteropDataBuffer::prepareSpecialUse({}, {dbX}); @@ -2197,86 +2197,16 @@ void prescanArrayRecursive(Nd4jPointer *extras, int *dZ, int *dX, int numElement sd::DebugHelper::checkErrorCode(stream, "prescanArray(...) failed"); } - -void encodeThresholdP1(Nd4jPointer *extras, void *dx, Nd4jLong *hXShapeInfo, Nd4jLong N, int *dz, float threshold) { - try { - cudaStream_t *stream = reinterpret_cast(extras[1]); - - int blockSize = 1024; - int numBlocks = N / blockSize + (N % blockSize ? 1 : 0); - - dim3 launchDims(numBlocks, blockSize, 1024); - auto xType = sd::ArrayOptions::dataType(hXShapeInfo); - BUILD_SINGLE_SELECTOR(xType, encoderKernelP1Generic, (launchDims, stream, dx, N, dz, threshold), LIBND4J_TYPES); - - sd::DebugHelper::checkErrorCode(stream, "encodeThresholdP1Float(...) failed"); - } catch (std::exception &e) { - sd::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); - sd::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); - } -} - - - -void encodeThresholdP2Int(Nd4jPointer *extraPointers, int *dx, Nd4jLong N, int *dz) { - try { - cudaStream_t *stream = reinterpret_cast(extraPointers[1]); - //encoderKernelP2Float<<>>(dx, N, dz); - prescanArrayRecursive(extraPointers, dz, dx + 1, (int) N, 0); - sd::DebugHelper::checkErrorCode(stream, "encodeThresholdP2Int(...) failed"); - } catch (std::exception &e) { - sd::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); - sd::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); - } -} - -void encodeThresholdP3(Nd4jPointer *extraPointers, void *dx, Nd4jLong *hXShapeInfo, int *offsets, Nd4jLong N, int *dz){ - try { - cudaStream_t *stream = reinterpret_cast(extraPointers[1]); - - int blockSize = 1024; - int numBlocks = N / blockSize + (N % blockSize ? 1 : 0); - - dim3 launchDims(numBlocks, blockSize, 4096); - auto xType = sd::ArrayOptions::dataType(hXShapeInfo); - BUILD_SINGLE_SELECTOR(xType, encoderKernelP3Generic, (launchDims, stream, dx, offsets, N, dz), LIBND4J_TYPES); - - sd::DebugHelper::checkErrorCode(stream, "encodeThresholdP3Float(...) failed"); - } catch (std::exception &e) { - sd::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); - sd::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); - } -} - -void decodeThreshold(Nd4jPointer *extraPointers, void *dx, Nd4jLong N, void *dz, Nd4jLong *zShapeInfo){ - try { - cudaStream_t *stream = reinterpret_cast(extraPointers[1]); - - // we probably want to have smaller blocks here, memory writes are misaligned anyway - int blockSize = 128; - int numBlocks = N / blockSize + (N % blockSize ? 1 : 0); - - dim3 launchDims(numBlocks, blockSize, 1024); - auto zType = sd::ArrayOptions::dataType(zShapeInfo); - BUILD_SINGLE_SELECTOR(zType, decoderKernelGeneric, (launchDims, stream, dx, N, dz), LIBND4J_TYPES); - - sd::DebugHelper::checkErrorCode(stream, "decodeThresholdFloat(...) failed"); - } catch (std::exception &e) { - sd::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); - sd::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); - } -} - //////////////////////////////////////////////////////////////////////// void execReduce3All(Nd4jPointer *extraPointers, int opNum, - OpaqueDataBuffer *dbX, Nd4jLong *hXShapeInfo, Nd4jLong *dXShapeInfo, + OpaqueDataBuffer *dbX, Nd4jLong const* hXShapeInfo, Nd4jLong const* dXShapeInfo, void *extraParamsVals, - OpaqueDataBuffer *dbY, Nd4jLong *hYShapeInfo, Nd4jLong *dYShapeInfo, - OpaqueDataBuffer *dbZ, Nd4jLong *hZShapeInfo, Nd4jLong *dZShapeInfo, - OpaqueDataBuffer *dbDimension, Nd4jLong *hDimensionShape, Nd4jLong *dDimensionShape, - Nd4jLong *xTadShapeInfo, Nd4jLong *xOffsets, - Nd4jLong *yTadShapeInfo, Nd4jLong *yOffsets) { + OpaqueDataBuffer *dbY, Nd4jLong const* hYShapeInfo, Nd4jLong const* dYShapeInfo, + OpaqueDataBuffer *dbZ, Nd4jLong const* hZShapeInfo, Nd4jLong const* dZShapeInfo, + OpaqueDataBuffer *dbDimension, Nd4jLong const* hDimensionShape, Nd4jLong const* dDimensionShape, + Nd4jLong const* xTadShapeInfo, Nd4jLong const* xOffsets, + Nd4jLong const* yTadShapeInfo, Nd4jLong const* yOffsets) { try { InteropDataBuffer::prepareSpecialUse({dbZ}, {dbX, dbY, dbDimension}); InteropDataBuffer::preparePrimaryUse({}, {dbDimension}); @@ -2302,8 +2232,8 @@ void execReduce3All(Nd4jPointer *extraPointers, void sort(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void *x, Nd4jLong const* xShapeInfo, + void *dX, Nd4jLong const* dXShapeInfo, bool descending) { try { cudaStream_t *stream = reinterpret_cast(extraPointers[1]); @@ -2368,10 +2298,10 @@ void sort(Nd4jPointer *extraPointers, void sortByKey(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dy, Nd4jLong *dyShapeInfo, + void *x, Nd4jLong const* xShapeInfo, + void *dX, Nd4jLong const* dXShapeInfo, + void *y, Nd4jLong const* yShapeInfo, + void *dy, Nd4jLong const* dyShapeInfo, bool descending) { try { auto stream = reinterpret_cast(extraPointers[1]); @@ -2442,10 +2372,10 @@ void sortByKey(Nd4jPointer *extraPointers, } void sortByValue(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dy, Nd4jLong *dyShapeInfo, + void *x, Nd4jLong const* xShapeInfo, + void *dX, Nd4jLong const* dXShapeInfo, + void *y, Nd4jLong const* yShapeInfo, + void *dy, Nd4jLong const* dyShapeInfo, bool descending) { try { auto stream = reinterpret_cast(extraPointers[1]); @@ -2517,10 +2447,10 @@ void sortByValue(Nd4jPointer *extraPointers, void sortTadByKey(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dy, Nd4jLong *dyShapeInfo, + void *x, Nd4jLong const* xShapeInfo, + void *dX, Nd4jLong const* dXShapeInfo, + void *y, Nd4jLong const* yShapeInfo, + void *dy, Nd4jLong const* dyShapeInfo, int *dimension, int dimensionLength, bool descending) { @@ -2544,10 +2474,10 @@ void sortTadByKey(Nd4jPointer *extraPointers, } void sortTadByValue(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dy, Nd4jLong *dyShapeInfo, + void *x, Nd4jLong const* xShapeInfo, + void *dX, Nd4jLong const* dXShapeInfo, + void *y, Nd4jLong const* yShapeInfo, + void *dy, Nd4jLong const* dyShapeInfo, int *dimension, int dimensionLength, bool descending) { @@ -2573,12 +2503,12 @@ void sortTadByValue(Nd4jPointer *extraPointers, void sortTad(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, + void *x, Nd4jLong const* xShapeInfo, + void *dX, Nd4jLong const* dXShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets, + Nd4jLong const* tadShapeInfo, + Nd4jLong const* tadOffsets, bool descending) { try { // to be implemented @@ -2603,55 +2533,6 @@ void sortCooIndices(Nd4jPointer *extraPointers, Nd4jLong *indices, void *values, throw std::runtime_error("sortCooIndices:: Not implemented yet"); } - -Nd4jLong encodeBitmap(Nd4jPointer *extraPointers, - void *dx, Nd4jLong *hXShapeInfo, - Nd4jLong N, - int *dz, - float threshold) { - try { - - cudaStream_t *stream = reinterpret_cast(extraPointers[1]); - int *resultPointer = reinterpret_cast(extraPointers[2]); - int *reductionPointer = reinterpret_cast(extraPointers[3]); - - dim3 launchDims(512, 512, 32768); - auto xType = sd::ArrayOptions::dataType(hXShapeInfo); - BUILD_SINGLE_SELECTOR(xType, cudaEncodeBitmapGeneric, - (launchDims, stream, dx, N, dz, resultPointer, reductionPointer, threshold), - LIBND4J_TYPES); - - sd::DebugHelper::checkErrorCode(stream, "encodeBitmapFloat(...) failed"); - - Nd4jLong dZ = (Nd4jLong) resultPointer[0]; - resultPointer[0] = 0; - - return dZ; - } catch (std::exception &e) { - sd::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); - sd::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); - return 0; - } -} - - -void decodeBitmap(Nd4jPointer *extraPointers, - void *dx, - Nd4jLong N, - void *dz, Nd4jLong *zShapeInfo) { - try { - cudaStream_t *stream = reinterpret_cast(extraPointers[1]); - dim3 launchDims(512, 512, 16384); - auto xType = sd::ArrayOptions::dataType(zShapeInfo); - BUILD_SINGLE_SELECTOR(xType, cudaDecodeBitmapGeneric, (launchDims, stream, dx, N, dz), LIBND4J_TYPES); - - sd::DebugHelper::checkErrorCode(stream, "decodeBitmapFloat(...) failed"); - } catch (std::exception &e) { - sd::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); - sd::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); - } -} - Nd4jLong* mmapFile(Nd4jPointer *extraPointers, const char *fileName, Nd4jLong length) { return nullptr; } @@ -2772,7 +2653,7 @@ Nd4jLong getShapeListSize(sd::ShapeList* list) { return list->size(); } -Nd4jLong* getShape(sd::ShapeList* list, Nd4jLong i) { +Nd4jLong const* getShape(sd::ShapeList* list, Nd4jLong i) { return list->at(i); } @@ -2996,7 +2877,7 @@ const char* getVariableName(sd::graph::Variable* variable) { return variable->getName()->c_str(); } -Nd4jLong* getVariableShape(sd::graph::Variable* variable) { +Nd4jLong const* getVariableShape(sd::graph::Variable* variable) { return variable->getNDArray()->shapeInfo(); } @@ -3145,7 +3026,7 @@ void deleteResultWrapper(Nd4jPointer ptr) { delete p; } -int estimateThreshold(Nd4jPointer *extraPointers, Nd4jPointer dX, Nd4jLong *dXShapeInfo, int N, float threshold) { +int estimateThreshold(Nd4jPointer *extraPointers, Nd4jPointer dX, Nd4jLong const* dXShapeInfo, int N, float threshold) { throw std::runtime_error("estimateThreshold: Not implemented yet"); } @@ -3356,7 +3237,7 @@ void deleteUtf8String(Nd4jPointer *extraPointers, Nd4jPointer ptr) { /////////////////////////////////////////////////////////////////// template __global__ static void scatterUpdateCuda(const int opCode, const int numOfSubArrs, - void* vx, const Nd4jLong *xShapeInfo, const Nd4jLong *xOffsets, + void* vx, const Nd4jLong* xShapeInfo, const Nd4jLong *xOffsets, void* vy, const Nd4jLong *yShapeInfo, const Nd4jLong *yOffsets, const void* vindexes) { @@ -3419,7 +3300,7 @@ __global__ static void scatterUpdateCuda(const int opCode, const int numOfSubArr } template -__host__ static void scatterUpdateCudaLauncher(const cudaStream_t* stream, const int opCode, const int numOfSubArrs, void* vx, const Nd4jLong *xShapeInfo, const Nd4jLong *xOffsets, void* vy, const Nd4jLong *yShapeInfo, const Nd4jLong *yOffsets, const void* indexes) { +__host__ static void scatterUpdateCudaLauncher(const cudaStream_t* stream, const int opCode, const int numOfSubArrs, void* vx, const Nd4jLong const* xShapeInfo, const Nd4jLong* xOffsets, void* vy, const Nd4jLong *yShapeInfo, const Nd4jLong *yOffsets, const void* indexes) { scatterUpdateCuda<<<512, 256, MAX_NUM_THREADS, *stream>>>(opCode, numOfSubArrs, vx, xShapeInfo, xOffsets, vy, yShapeInfo, yOffsets, indexes); } @@ -3427,11 +3308,11 @@ __host__ static void scatterUpdateCudaLauncher(const cudaStream_t* stream, const ////////////////////////////////////////////////////////////////////////// void scatterUpdate(Nd4jPointer *extraPointers, int opCode, int numOfSubArrs, - void* hX, Nd4jLong* hXShapeInfo, Nd4jLong* hXOffsets, - void* dX, Nd4jLong* dXShapeInfo, Nd4jLong* dXOffsets, - void* hY, Nd4jLong* hYShapeInfo, Nd4jLong* hYOffsets, - void* dY, Nd4jLong* dYShapeInfo, Nd4jLong* dYOffsets, - void* hIindexes, Nd4jLong* hIndicesShapeInfo, void* dIindexes, Nd4jLong* dIndicesShapeInfo) { + void* hX, Nd4jLong const* hXShapeInfo, Nd4jLong const* hXOffsets, + void* dX, Nd4jLong const* dXShapeInfo, Nd4jLong const* dXOffsets, + void* hY, Nd4jLong const* hYShapeInfo, Nd4jLong const* hYOffsets, + void* dY, Nd4jLong const* dYShapeInfo, Nd4jLong const* dYOffsets, + void* hIindexes, Nd4jLong const* hIndicesShapeInfo, void* dIindexes, Nd4jLong const* dIndicesShapeInfo) { try { auto stream = reinterpret_cast(extraPointers[1]); @@ -3528,7 +3409,7 @@ bool isBlasVersionMatches(int major, int minor, int build) { return result; } -sd::ConstantDataBuffer* constantBufferLong(sd::DataType dtype, Nd4jLong *data, int length) { +sd::ConstantDataBuffer* constantBufferLong(sd::DataType dtype, Nd4jLong const* data, int length) { return sd::ConstantHelper::getInstance()->constantBuffer(ConstantDescriptor(data, length), dtype); } @@ -3674,8 +3555,7 @@ Nd4jPointer shapeBufferForNumpy(Nd4jPointer npyArray) { } else { shapeBuffer = sd::ShapeBuilders::createShapeInfo(dtype, arr.fortranOrder ? 'f' : 'c', shape); } - return reinterpret_cast(sd::ConstantShapeHelper::getInstance()->createFromExisting(shapeBuffer, - true)); + return (Nd4jPointer)(sd::ConstantShapeHelper::getInstance()->createFromExisting(shapeBuffer, true)); // TO DO: this can lead to unpleasant crash sometimes } catch (std::exception &e) { sd::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); sd::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); diff --git a/libnd4j/include/legacy/impl/Environment.cpp b/libnd4j/include/legacy/impl/Environment.cpp index fae3a28dc..b19a7147b 100644 --- a/libnd4j/include/legacy/impl/Environment.cpp +++ b/libnd4j/include/legacy/impl/Environment.cpp @@ -207,7 +207,7 @@ namespace sd { } void Environment::setMaxSpecialyMemory(uint64_t maxBytes) { - _maxTotalSpecialMemory; + _maxTotalSpecialMemory = maxBytes; } void Environment::setMaxDeviceMemory(uint64_t maxBytes) { diff --git a/libnd4j/include/loops/BroadcastScalarConverter.h b/libnd4j/include/loops/BroadcastScalarConverter.h index 12006c293..f4d536f33 100644 --- a/libnd4j/include/loops/BroadcastScalarConverter.h +++ b/libnd4j/include/loops/BroadcastScalarConverter.h @@ -21,6 +21,7 @@ #define DEV_TESTS_BROADCASTSCALARCONVERTER_H #include +#include #include namespace sd { diff --git a/libnd4j/include/loops/broadcasting.h b/libnd4j/include/loops/broadcasting.h index 20c95588c..4f05f0c6e 100755 --- a/libnd4j/include/loops/broadcasting.h +++ b/libnd4j/include/loops/broadcasting.h @@ -56,18 +56,15 @@ namespace functions { class Broadcast { public: -#ifdef __CUDACC__ +#ifdef __CUDABLAS__ template - static __device__ void transformCuda( - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - int *dimension, - int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __device__ void transformCuda(const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); template static __device__ void transformCuda(const void *x, const Nd4jLong *xShapeInfo, @@ -75,67 +72,83 @@ namespace functions { void *z, const Nd4jLong *zShapeInfo); template - static __host__ void intermediateBroadcast(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __host__ void intermediateBroadcast(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); template - static __host__ void intermediateBroadcast(dim3 launchDims, cudaStream_t *stream, const void *x, const Nd4jLong *xShapeInfo, const void *y, const Nd4jLong *yShapeInfo, void *z, const Nd4jLong *zShapeInfo); + static __host__ void intermediateBroadcast(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *z, const Nd4jLong *zShapeInfo); - static __host__ void execBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __host__ void execBroadcast(dim3 launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); - static __host__ void execBroadcast(dim3 launchDims, cudaStream_t *stream, const int opNum, const void *x, const Nd4jLong *xShapeInfo, const void *y, const Nd4jLong *yShapeInfo, void *z, const Nd4jLong *zShapeInfo); + static __host__ void execBroadcast(dim3 launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *z, const Nd4jLong *zShapeInfo); template - static __device__ void transformInverseCuda( - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - int *dimension, - int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __device__ void transformInverseCuda(const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); template - static __host__ void intermediateInverseBroadcast(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __host__ void intermediateInverseBroadcast(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); - static __host__ void execInverseBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __host__ void execInverseBroadcast(dim3 launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); #else static void execInverse(int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, - Nd4jLong *tadShapeInfoZ, - Nd4jLong *tadOffsetZ, - uint64_t start, - uint64_t stop); + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetZ, + uint64_t start, uint64_t stop); static void exec(int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, - Nd4jLong *tadShapeInfoZ, - Nd4jLong *tadOffsetZ, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetZ, sd::LoopKind::Kind loopKind, - uint64_t start, - uint64_t stop); + uint64_t start, uint64_t stop); /** * CPU execution @@ -149,39 +162,25 @@ namespace functions { * @param dimensionLength the length of the dimension buffer */ template - static void exec(void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, - Nd4jLong *tadShapeInfoZ, - Nd4jLong *tadOffsetZ, + static void exec(const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetZ, sd::LoopKind::Kind loopKind, - uint64_t start, - uint64_t stop); + uint64_t start, uint64_t stop); template - static void execInverse(void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, - Nd4jLong *tadShapeInfoZ, - Nd4jLong *tadOffsetZ, - uint64_t start, - uint64_t stop); + static void execInverse(const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetZ, + uint64_t start, uint64_t stop); - static void exec(const int opNum, + static void exec(int opNum, const void *x, const Nd4jLong *xShapeInfo, const void *y, const Nd4jLong *yShapeInfo, void *z, const Nd4jLong *zShapeInfo); diff --git a/libnd4j/include/loops/broadcasting_bool.h b/libnd4j/include/loops/broadcasting_bool.h index 9bab82c81..400269c02 100644 --- a/libnd4j/include/loops/broadcasting_bool.h +++ b/libnd4j/include/loops/broadcasting_bool.h @@ -58,16 +58,13 @@ namespace functions { #ifdef __CUDACC__ template - static __device__ void transformCuda( - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - void *extraParams, - int *dimension, - int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __device__ void transformCuda(const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); template static __device__ void transformCuda(const void *x, const Nd4jLong *xShapeInfo, @@ -76,7 +73,7 @@ namespace functions { void *extraParams); template - static __host__ void intermediateBroadcast(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *extraParams, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __host__ void intermediateBroadcast(dim3 launchDims, cudaStream_t *stream, void const* x, Nd4jLong const* xShapeInfo, void const* y, Nd4jLong const* yShapeInfo, void *result, Nd4jLong const* resultShapeInfo, void *extraParams, int *dimension, int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ); template static __host__ void intermediateBroadcast(dim3 launchDims, cudaStream_t *stream, @@ -85,7 +82,7 @@ namespace functions { void *z, const Nd4jLong *zShapeInfo, void *extraParams); - static __host__ void execBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *extraParams, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __host__ void execBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void const* x, Nd4jLong const* xShapeInfo, void const* y, Nd4jLong const* yShapeInfo, void *result, Nd4jLong const* resultShapeInfo, void *extraParams, int *dimension, int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ); static __host__ void execBroadcast(dim3 launchDims, cudaStream_t *stream, const int opNum, const void *x, const Nd4jLong *xShapeInfo, @@ -94,63 +91,61 @@ namespace functions { void *extraParams); template - static __device__ void transformInverseCuda( - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - void *extraParams, - int *dimension, - int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __device__ void transformInverseCuda(const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); template - static __host__ void intermediateInverseBroadcast(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *extraParams, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __host__ void intermediateInverseBroadcast(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); - static __host__ void execInverseBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *extraParams, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __host__ void execInverseBroadcast(dim3 launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); #else static void exec(int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, void *extraParams, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, - Nd4jLong *tadShapeInfoZ, - Nd4jLong *tadOffsetZ, - uint64_t start, - uint64_t stop); + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetZ, + uint64_t start, uint64_t stop); - static void exec(const int opNum, + static void exec(int opNum, const void *x, const Nd4jLong *xShapeInfo, const void *y, const Nd4jLong *yShapeInfo, void *z, const Nd4jLong *zShapeInfo, void *extraParams); static void execInverse(int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - void *extraParams, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, - Nd4jLong *tadShapeInfoZ, - Nd4jLong *tadOffsetZ, - uint64_t start, - uint64_t stop); + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetZ, + uint64_t start, uint64_t stop); /** * CPU execution @@ -164,21 +159,14 @@ namespace functions { * @param dimensionLength the length of the dimension buffer */ template - static void exec(void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, + static void exec(const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, void *extraParams, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, - Nd4jLong *tadShapeInfoZ, - Nd4jLong *tadOffsetZ, - uint64_t start, - uint64_t stop); + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetZ, + uint64_t start, uint64_t stop); template static void exec(const void *x, const Nd4jLong *xShapeInfo, @@ -187,21 +175,14 @@ namespace functions { void *extraParams); template - static void execInverse(void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - void *extraParams, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, - Nd4jLong *tadShapeInfoZ, - Nd4jLong *tadOffsetZ, - uint64_t start, - uint64_t stop); + static void execInverse(const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetZ, + uint64_t start, uint64_t stop); #endif }; } diff --git a/libnd4j/include/loops/broadcasting_int.h b/libnd4j/include/loops/broadcasting_int.h index 81149ad8a..386fbd3f7 100644 --- a/libnd4j/include/loops/broadcasting_int.h +++ b/libnd4j/include/loops/broadcasting_int.h @@ -58,15 +58,12 @@ namespace functions { #ifdef __CUDACC__ template - static __device__ void transformCuda( - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - int *dimension, - int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __device__ void transformCuda(const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); template static __device__ void transformCuda(const void *x, const Nd4jLong *xShapeInfo, @@ -74,7 +71,13 @@ namespace functions { void *z, const Nd4jLong *zShapeInfo); template - static __host__ void intermediateBroadcast(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __host__ void intermediateBroadcast(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); template static __host__ void intermediateBroadcast(dim3 launchDims, cudaStream_t *stream, @@ -82,7 +85,14 @@ namespace functions { const void *y, const Nd4jLong *yShapeInfo, void *z, const Nd4jLong *zShapeInfo); - static __host__ void execBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __host__ void execBroadcast(dim3 launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); static __host__ void execBroadcast(dim3 launchDims, cudaStream_t *stream, const int opNum, const void *x, const Nd4jLong *xShapeInfo, @@ -90,59 +100,55 @@ namespace functions { void *z, const Nd4jLong *zShapeInfo); template - static __device__ void transformInverseCuda( - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - int *dimension, - int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __device__ void transformInverseCuda(const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); template - static __host__ void intermediateInverseBroadcast(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __host__ void intermediateInverseBroadcast(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); - static __host__ void execInverseBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ); + static __host__ void execInverseBroadcast(dim3 launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadOnlyShapeInfoZ, const Nd4jLong *tadOffsetsZ); #else static void exec(int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, - Nd4jLong *tadShapeInfoZ, - Nd4jLong *tadOffsetZ, - uint64_t start, - uint64_t stop); + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetZ, + uint64_t start, uint64_t stop); - static void exec(const int opNum, + static void exec(int opNum, const void *x, const Nd4jLong *xShapeInfo, const void *y, const Nd4jLong *yShapeInfo, void *z, const Nd4jLong *zShapeInfo); static void execInverse(int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, - Nd4jLong *tadShapeInfoZ, - Nd4jLong *tadOffsetZ, - uint64_t start, - uint64_t stop); + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetZ, + uint64_t start, uint64_t stop); /** * CPU execution @@ -156,20 +162,13 @@ namespace functions { * @param dimensionLength the length of the dimension buffer */ template - static void exec(void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, - Nd4jLong *tadShapeInfoZ, - Nd4jLong *tadOffsetZ, - uint64_t start, - uint64_t stop); + static void exec(const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetZ, + uint64_t start, uint64_t stop); template static void exec(const void *x, const Nd4jLong *xShapeInfo, @@ -177,20 +176,13 @@ namespace functions { void *z, const Nd4jLong *zShapeInfo); template - static void execInverse(void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, - Nd4jLong *tadShapeInfoZ, - Nd4jLong *tadOffsetZ, - uint64_t start, - uint64_t stop); + static void execInverse(const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetZ, + uint64_t start, uint64_t stop); #endif }; } diff --git a/libnd4j/include/loops/cpu/broadcasting.hpp b/libnd4j/include/loops/cpu/broadcasting.hpp index 8de52cca7..c0f22313b 100644 --- a/libnd4j/include/loops/cpu/broadcasting.hpp +++ b/libnd4j/include/loops/cpu/broadcasting.hpp @@ -34,20 +34,13 @@ namespace broadcast { template void Broadcast::execInverse(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *xTadShapeInfo, - Nd4jLong *xTadOffset, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffset, - uint64_t start, - uint64_t stop) { + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffset, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffset, + uint64_t start, uint64_t stop) { DISPATCH_BY_OPNUM_TTT(execInverse, PARAMS(x, xShapeInfo, y, @@ -64,21 +57,14 @@ namespace broadcast { template void Broadcast::exec(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *xTadShapeInfo, - Nd4jLong *xTadOffset, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffset, - sd::LoopKind::Kind loopKind, - uint64_t start, - uint64_t stop) { + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffset, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffset, + sd::LoopKind::Kind loopKind, + uint64_t start, uint64_t stop) { DISPATCH_BY_OPNUM_TTT(exec, PARAMS(x, xShapeInfo, y, @@ -96,24 +82,17 @@ namespace broadcast { template template - void Broadcast::exec(void *vx, - Nd4jLong *xShapeInfo, - void *vy, - Nd4jLong *yShapeInfo, - void *vz, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *xTadShapeInfo, - Nd4jLong *xTadOffset, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffset, - sd::LoopKind::Kind loopKind, - uint64_t start, - uint64_t stop) { + void Broadcast::exec(const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffset, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffset, + sd::LoopKind::Kind loopKind, + uint64_t start, uint64_t stop) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); //decompose in to several sub tads after @@ -397,23 +376,16 @@ namespace broadcast { template template - void Broadcast::execInverse(void *vx, - Nd4jLong *xShapeInfo, - void *vy, - Nd4jLong *yShapeInfo, - void *vz, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *yTadShapeInfo, - Nd4jLong *yTadOffset, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffset, - uint64_t start, - uint64_t stop) { + void Broadcast::execInverse(const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *yTadShapeInfo, const Nd4jLong *yTadOffset, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffset, + uint64_t start, uint64_t stop) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); //decompose in to several sub tads after diff --git a/libnd4j/include/loops/cpu/broadcasting_bool.hpp b/libnd4j/include/loops/cpu/broadcasting_bool.hpp index 21b40cb55..18c8705e2 100644 --- a/libnd4j/include/loops/cpu/broadcasting_bool.hpp +++ b/libnd4j/include/loops/cpu/broadcasting_bool.hpp @@ -33,21 +33,14 @@ namespace broadcast { template void BroadcastBool::exec(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - void *extraParams, - int *dimension, - int dimensionLength, - Nd4jLong *xTadShapeInfo, - Nd4jLong *xTadOffset, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffset, - uint64_t start, - uint64_t stop) { + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffset, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffset, + uint64_t start, uint64_t stop) { DISPATCH_BY_OPNUM_TT(exec, PARAMS(x, xShapeInfo, y, @@ -75,21 +68,14 @@ namespace broadcast { template void BroadcastBool::execInverse(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - void *extraParams, - int *dimension, - int dimensionLength, - Nd4jLong *xTadShapeInfo, - Nd4jLong *xTadOffset, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffset, - uint64_t start, - uint64_t stop) { + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffset, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffset, + uint64_t start, uint64_t stop) { DISPATCH_BY_OPNUM_TT(execInverse, PARAMS(x, xShapeInfo, y, @@ -107,24 +93,17 @@ namespace broadcast { template template - void BroadcastBool::exec(void *vx, - Nd4jLong *xShapeInfo, - void *vy, - Nd4jLong *yShapeInfo, - void *vz, - Nd4jLong *zShapeInfo, - void *vextraParams, - int *dimension, - int dimensionLength, - Nd4jLong *xTadShapeInfo, - Nd4jLong *xTadOffset, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffset, - uint64_t start, - uint64_t stop) { + void BroadcastBool::exec(const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + void *vextraParams, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffset, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffset, + uint64_t start, uint64_t stop) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -138,8 +117,8 @@ namespace broadcast { if (xTadShapeInfo == nullptr || tadOffsets == nullptr) { auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); - xTadShapeShapeInfo = tadPack.primaryShapeInfo(); - tadOffsets = tadPack.primaryOffsets(); + xTadShapeShapeInfo = const_cast(tadPack.primaryShapeInfo()); + tadOffsets = const_cast(tadPack.primaryOffsets()); } //int *resultStride = shape::stride(xTadShapeShapeInfo); @@ -279,24 +258,17 @@ namespace broadcast { template template - void BroadcastBool::execInverse(void *vx, - Nd4jLong *xShapeInfo, - void *vy, - Nd4jLong *yShapeInfo, - void *vz, - Nd4jLong *zShapeInfo, - void *vextraParams, - int *dimension, - int dimensionLength, - Nd4jLong *yTadShapeInfo, - Nd4jLong *yTadOffset, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffset, - uint64_t start, - uint64_t stop) { + void BroadcastBool::execInverse(const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + void *vextraParams, + int *dimension, int dimensionLength, + const Nd4jLong *yTadShapeInfo, const Nd4jLong *yTadOffset, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffset, + uint64_t start, uint64_t stop) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -310,8 +282,8 @@ namespace broadcast { if (yTadShapeInfo == nullptr || tadOffsets == nullptr) { auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(yShapeInfo, dimension, dimensionLength); - yTadShapeShapeInfo = tadPack.primaryShapeInfo(); - tadOffsets = tadPack.primaryOffsets(); + yTadShapeShapeInfo = const_cast(tadPack.primaryShapeInfo()); + tadOffsets = const_cast(tadPack.primaryOffsets()); } //int *resultStride = shape::stride(yTadShapeShapeInfo); diff --git a/libnd4j/include/loops/cpu/broadcasting_int.hpp b/libnd4j/include/loops/cpu/broadcasting_int.hpp index 456994b16..7d0a995d6 100644 --- a/libnd4j/include/loops/cpu/broadcasting_int.hpp +++ b/libnd4j/include/loops/cpu/broadcasting_int.hpp @@ -33,20 +33,13 @@ namespace functions { template void BroadcastInt::exec(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *xTadShapeInfo, - Nd4jLong *xTadOffset, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffset, - uint64_t start, - uint64_t stop) { + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffset, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffset, + uint64_t start, uint64_t stop) { DISPATCH_BY_OPNUM_T(exec, PARAMS(x, xShapeInfo, y, @@ -72,20 +65,13 @@ namespace functions { template void BroadcastInt::execInverse(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *xTadShapeInfo, - Nd4jLong *xTadOffset, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffset, - uint64_t start, - uint64_t stop) { + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffset, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffset, + uint64_t start, uint64_t stop) { DISPATCH_BY_OPNUM_T(execInverse, PARAMS(x, xShapeInfo, y, @@ -102,23 +88,16 @@ namespace functions { template template - void BroadcastInt::exec(void *vx, - Nd4jLong *xShapeInfo, - void *vy, - Nd4jLong *yShapeInfo, - void *vz, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *xTadShapeInfo, - Nd4jLong *xTadOffset, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffset, - uint64_t start, - uint64_t stop) { + void BroadcastInt::exec(const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffset, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffset, + uint64_t start, uint64_t stop) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); //decompose in to several sub tads after @@ -131,8 +110,8 @@ namespace functions { if (xTadShapeInfo == nullptr || tadOffsets == nullptr) { auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); - xTadShapeShapeInfo = tadPack.primaryShapeInfo(); - tadOffsets = tadPack.primaryOffsets(); + xTadShapeShapeInfo = const_cast(tadPack.primaryShapeInfo()); + tadOffsets = const_cast(tadPack.primaryOffsets()); } //int *resultStride = shape::stride(xTadShapeShapeInfo); @@ -272,23 +251,16 @@ namespace functions { template template - void BroadcastInt::execInverse(void *vx, - Nd4jLong *xShapeInfo, - void *vy, - Nd4jLong *yShapeInfo, - void *vz, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *yTadShapeInfo, - Nd4jLong *yTadOffset, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffset, - uint64_t start, - uint64_t stop) { + void BroadcastInt::execInverse(const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, const int dimensionLength, + const Nd4jLong *yTadShapeInfo, const Nd4jLong *yTadOffset, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffset, + uint64_t start, uint64_t stop) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); //decompose in to several sub tads after @@ -301,8 +273,8 @@ namespace functions { if (yTadShapeInfo == nullptr || tadOffsets == nullptr) { auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(yShapeInfo, dimension, dimensionLength); - yTadShapeShapeInfo = tadPack.primaryShapeInfo(); - tadOffsets = tadPack.primaryOffsets(); + yTadShapeShapeInfo = const_cast(tadPack.primaryShapeInfo()); + tadOffsets = const_cast(tadPack.primaryOffsets()); } //int *resultStride = shape::stride(yTadShapeShapeInfo); diff --git a/libnd4j/include/loops/cpu/indexreduce.hpp b/libnd4j/include/loops/cpu/indexreduce.hpp index d4abd8c82..296fbcdef 100644 --- a/libnd4j/include/loops/cpu/indexreduce.hpp +++ b/libnd4j/include/loops/cpu/indexreduce.hpp @@ -33,27 +33,27 @@ namespace indexreduce { //////////////////////////////////////////////////////////////////////// template -Nd4jLong IndexReduce::execScalar( const int opNum, void *x, Nd4jLong *xShapeInfo, void *extraParams) { +Nd4jLong IndexReduce::execScalar( const int opNum, const void *x, const Nd4jLong *xShapeInfo, void *extraParams) { RETURNING_DISPATCH_BY_OPNUM_TT(execScalar, PARAMS(x, xShapeInfo, extraParams), INDEX_REDUCE_OPS); } //////////////////////////////////////////////////////////////////////// template void IndexReduce::exec(const int opNum, - void *x, Nd4jLong *xShapeInfo, - void *extraParams, - void *z, Nd4jLong *zShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset) { + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset) { DISPATCH_BY_OPNUM_TT(exec, PARAMS(x, xShapeInfo, extraParams, z, zShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffset), INDEX_REDUCE_OPS); } //////////////////////////////////////////////////////////////////////// template template -Nd4jLong IndexReduce::execScalar(void *vx, Nd4jLong *xShapeInfo, void *vextraParams) { +Nd4jLong IndexReduce::execScalar(const void *vx, const Nd4jLong *xShapeInfo, void *vextraParams) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto extraParams = reinterpret_cast(vextraParams); //T startingVal = OpType::startingValue(x); @@ -107,13 +107,13 @@ Nd4jLong IndexReduce::execScalar(void *vx, Nd4jLong *xShapeInfo, void *vex //////////////////////////////////////////////////////////////////////// template template -void IndexReduce::exec(void *vx, Nd4jLong *xShapeInfo, - void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset) { +void IndexReduce::exec(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -136,7 +136,7 @@ void IndexReduce::exec(void *vx, Nd4jLong *xShapeInfo, } auto tadOnlyShapeInfo = tadShapeInfo; - Nd4jLong *tadOffsets = tadOffset; + auto tadOffsets = tadOffset; if (tadOnlyShapeInfo == nullptr || tadOffsets == nullptr) { if (dimensionLength < 1) diff --git a/libnd4j/include/loops/cpu/pairwise.hpp b/libnd4j/include/loops/cpu/pairwise.hpp index 27c97efa9..45fe46e8f 100644 --- a/libnd4j/include/loops/cpu/pairwise.hpp +++ b/libnd4j/include/loops/cpu/pairwise.hpp @@ -34,18 +34,13 @@ namespace functions { namespace pairwise_transforms { template - void PairWiseTransform::exec( - const int opNum, - void *x, - Nd4jLong xEws, - void *y, - Nd4jLong yEws, - void *z, - Nd4jLong zEws, - void *extraParams, - Nd4jLong n, - const uint64_t start, - const uint64_t stop) { + void PairWiseTransform::exec(const int opNum, + const void *x, Nd4jLong xEws, + const void *y, Nd4jLong yEws, + void *z, Nd4jLong zEws, + void *extraParams, + Nd4jLong n, + const uint64_t start,const uint64_t stop) { DISPATCH_BY_OPNUM_TTT(exec, PARAMS(x, xEws, y, @@ -60,16 +55,16 @@ namespace functions { template template - void PairWiseTransform::exec(void *vx, Nd4jLong xEws, - void *vy, Nd4jLong yEws, + void PairWiseTransform::exec(const void *vx, Nd4jLong xEws, + const void *vy, Nd4jLong yEws, void *vz, Nd4jLong zEws, void *vextraParams, const Nd4jLong n, const uint64_t start, const uint64_t stop) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -86,17 +81,12 @@ namespace functions { } template - void PairWiseTransform::exec( - const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - void *extraParams, - const uint64_t start, - const uint64_t stop) { + void PairWiseTransform::exec(const int opNum, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + void *extraParams, + const uint64_t start, const uint64_t stop) { DISPATCH_BY_OPNUM_TTT(exec, PARAMS(x, xShapeInfo, y, @@ -110,19 +100,14 @@ namespace functions { template template - void PairWiseTransform::exec( - void *vx, - Nd4jLong* xShapeInfo, - void *vy, - Nd4jLong* yShapeInfo, - void *vz, - Nd4jLong* zShapeInfo, - void *vextraParams, - const uint64_t start, - const uint64_t stop) { + void PairWiseTransform::exec(const void *vx, const Nd4jLong* xShapeInfo, + const void *vy, const Nd4jLong* yShapeInfo, + void *vz, const Nd4jLong* zShapeInfo, + void *vextraParams, + const uint64_t start, const uint64_t stop) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); diff --git a/libnd4j/include/loops/cpu/pairwise_bool.cpp b/libnd4j/include/loops/cpu/pairwise_bool.cpp index d77413e8c..dfcdf6bfa 100644 --- a/libnd4j/include/loops/cpu/pairwise_bool.cpp +++ b/libnd4j/include/loops/cpu/pairwise_bool.cpp @@ -30,18 +30,13 @@ namespace functions { namespace pairwise_transforms { template - void PairWiseBoolTransform::exec( - const int opNum, - void *x, - Nd4jLong xEws, - void *y, - Nd4jLong yEws, - void *z, - Nd4jLong zEws, - void *extraParams, - Nd4jLong n, - const uint64_t start, - const uint64_t stop) { + void PairWiseBoolTransform::exec(const int opNum, + const void *x, Nd4jLong xEws, + const void *y, Nd4jLong yEws, + void *z, Nd4jLong zEws, + void *extraParams, + Nd4jLong n, + const uint64_t start, const uint64_t stop) { DISPATCH_BY_OPNUM_TT(exec, PARAMS(x, xEws, y, @@ -56,19 +51,15 @@ namespace functions { template template - void PairWiseBoolTransform::exec(void *vx, - Nd4jLong xEws, - void *vy, - Nd4jLong yEws, - void *vz, - Nd4jLong zEws, - void *vextraParams, - const Nd4jLong n, - const uint64_t start, - const uint64_t stop) { + void PairWiseBoolTransform::exec(const void *vx, Nd4jLong xEws, + const void *vy, Nd4jLong yEws, + void *vz, Nd4jLong zEws, + void *vextraParams, + const Nd4jLong n, + const uint64_t start, const uint64_t stop) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -85,17 +76,12 @@ namespace functions { } template - void PairWiseBoolTransform::exec( - const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - void *extraParams, - const uint64_t start, - const uint64_t stop) { + void PairWiseBoolTransform::exec(const int opNum, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + void *extraParams, + const uint64_t start,const uint64_t stop) { DISPATCH_BY_OPNUM_TT(exec, PARAMS(x, xShapeInfo, y, @@ -109,15 +95,14 @@ namespace functions { template template - void PairWiseBoolTransform::exec(void *vx, Nd4jLong* xShapeInfo, - void *vy, Nd4jLong* yShapeInfo, - void *vz, Nd4jLong* zShapeInfo, - void *vextraParams, - const uint64_t start, - const uint64_t stop) { + void PairWiseBoolTransform::exec(const void *vx, const Nd4jLong* xShapeInfo, + const void *vy, const Nd4jLong* yShapeInfo, + void *vz, const Nd4jLong* zShapeInfo, + void *vextraParams, + const uint64_t start, const uint64_t stop) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); diff --git a/libnd4j/include/loops/cpu/pairwise_int.cpp b/libnd4j/include/loops/cpu/pairwise_int.cpp index 9af092a0f..b82216611 100644 --- a/libnd4j/include/loops/cpu/pairwise_int.cpp +++ b/libnd4j/include/loops/cpu/pairwise_int.cpp @@ -30,18 +30,13 @@ namespace functions { namespace pairwise_transforms { template - void PairWiseIntTransform::exec( - const int opNum, - void *x, - Nd4jLong xEws, - void *y, - Nd4jLong yEws, - void *z, - Nd4jLong zEws, - void *extraParams, - Nd4jLong n, - const uint64_t start, - const uint64_t stop) { + void PairWiseIntTransform::exec(const int opNum, + const void *x, Nd4jLong xEws, + const void *y, Nd4jLong yEws, + void *z, Nd4jLong zEws, + void *extraParams, + Nd4jLong n, + const uint64_t start, const uint64_t stop) { DISPATCH_BY_OPNUM_T(exec, PARAMS(x, xEws, y, @@ -56,19 +51,15 @@ namespace functions { template template - void PairWiseIntTransform::exec(void *vx, - Nd4jLong xEws, - void *vy, - Nd4jLong yEws, - void *vz, - Nd4jLong zEws, - void *vextraParams, - const Nd4jLong n, - const uint64_t start, - const uint64_t stop) { - - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + void PairWiseIntTransform::exec(const void *vx, Nd4jLong xEws, + const void *vy, Nd4jLong yEws, + void *vz, Nd4jLong zEws, + void *vextraParams, + const Nd4jLong n, + const uint64_t start, + const uint64_t stop) { + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -85,17 +76,12 @@ namespace functions { } template - void PairWiseIntTransform::exec( - const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - void *extraParams, - const uint64_t start, - const uint64_t stop) { + void PairWiseIntTransform::exec(const int opNum, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + void *extraParams, + const uint64_t start, const uint64_t stop) { DISPATCH_BY_OPNUM_T(exec, PARAMS(x, xShapeInfo, y, @@ -109,15 +95,15 @@ namespace functions { template template - void PairWiseIntTransform::exec(void *vx, Nd4jLong* xShapeInfo, - void *vy, Nd4jLong* yShapeInfo, - void *vz, Nd4jLong* zShapeInfo, - void *vextraParams, - const uint64_t start, - const uint64_t stop) { + void PairWiseIntTransform::exec(const void *vx, const Nd4jLong* xShapeInfo, + const void *vy, const Nd4jLong* yShapeInfo, + void *vz, const Nd4jLong* zShapeInfo, + void *vextraParams, + const uint64_t start, + const uint64_t stop) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); diff --git a/libnd4j/include/loops/cpu/random.hpp b/libnd4j/include/loops/cpu/random.hpp index 034179f07..ea1dc9e76 100644 --- a/libnd4j/include/loops/cpu/random.hpp +++ b/libnd4j/include/loops/cpu/random.hpp @@ -33,16 +33,13 @@ namespace functions { template template void RandomFunction::execTransform(Nd4jPointer state, - void *vx, - Nd4jLong *xShapeInfo, - void *vy, - Nd4jLong *yShapeInfo, - void *vz, - Nd4jLong *zShapeInfo, - void *vextraArguments) { + const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + void *vextraArguments) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -166,12 +163,10 @@ namespace functions { template template void RandomFunction::execTransform(Nd4jPointer state, - void *vx, - Nd4jLong *xShapeInfo, - void *vz, - Nd4jLong *zShapeInfo, - void *vextraArguments) { - auto x = reinterpret_cast(vx); + const void *vx, const Nd4jLong *xShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + void *vextraArguments) { + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -227,7 +222,7 @@ namespace functions { template template - void RandomFunction::execTransform(Nd4jPointer state, void *vz, Nd4jLong *zShapeInfo, void *vextraArguments) { + void RandomFunction::execTransform(Nd4jPointer state, void *vz, const Nd4jLong *zShapeInfo, void *vextraArguments) { auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -266,17 +261,17 @@ namespace functions { } template - void RandomFunction::execTransform(int opNum, Nd4jPointer state, void *x, Nd4jLong *xShapeInfo, void *z, Nd4jLong *zShapeInfo, void *extraArguments) { + void RandomFunction::execTransform(int opNum, Nd4jPointer state, const void *x, const Nd4jLong *xShapeInfo, void *z, const Nd4jLong *zShapeInfo, void *extraArguments) { DISPATCH_BY_OPNUM_T(execTransform, PARAMS(state, x, xShapeInfo, z, zShapeInfo, extraArguments), RANDOM_OPS) } template - void RandomFunction::execTransform(int opNum, Nd4jPointer state, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo, void *extraArguments) { + void RandomFunction::execTransform(int opNum, Nd4jPointer state, const void *x, const Nd4jLong *xShapeInfo, const void *y, const Nd4jLong *yShapeInfo, void *z, const Nd4jLong *zShapeInfo, void *extraArguments) { DISPATCH_BY_OPNUM_T(execTransform, PARAMS(state, x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, extraArguments), RANDOM_OPS) } template - void RandomFunction::execTransform(int opNum, Nd4jPointer state, void *z, Nd4jLong *zShapeInfo, void *extraArguments) { + void RandomFunction::execTransform(int opNum, Nd4jPointer state, void *z, const Nd4jLong *zShapeInfo, void *extraArguments) { DISPATCH_BY_OPNUM_T(execTransform, PARAMS(state, z, zShapeInfo, extraArguments), RANDOM_OPS) } diff --git a/libnd4j/include/loops/cpu/reduce/reduce_bool.cpp b/libnd4j/include/loops/cpu/reduce/reduce_bool.cpp index afb441a45..708f3c0d7 100644 --- a/libnd4j/include/loops/cpu/reduce/reduce_bool.cpp +++ b/libnd4j/include/loops/cpu/reduce/reduce_bool.cpp @@ -33,12 +33,10 @@ namespace functions { namespace reduce { template template - void _CUDA_H ReduceBoolFunction::execScalar(void *vx, - Nd4jLong *xShapeInfo, - void *vextraParams, - void *vz, - Nd4jLong *zShapeInfo) { - auto x = reinterpret_cast(vx); + void _CUDA_H ReduceBoolFunction::execScalar(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vz, const Nd4jLong *zShapeInfo) { + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -78,9 +76,9 @@ namespace functions { template template - Z _CUDA_H ReduceBoolFunction::execScalar(void *vx, Nd4jLong *xShapeInfo, void *vextraParams) { + Z _CUDA_H ReduceBoolFunction::execScalar(const void *vx, const Nd4jLong *xShapeInfo, void *vextraParams) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto extraParams = reinterpret_cast(vextraParams); const Nd4jLong length = shape::length(xShapeInfo); @@ -103,49 +101,39 @@ namespace functions { template Y ReduceBoolFunction::execScalar(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams) { + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams) { RETURNING_DISPATCH_BY_OPNUM_TT(execScalar, PARAMS(x, xShapeInfo, extraParams), REDUCE_BOOL_OPS); } template void ReduceBoolFunction::execScalar(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo) { + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo) { DISPATCH_BY_OPNUM_TT(execScalar, PARAMS(x, xShapeInfo, extraParams, z, zShapeInfo), REDUCE_BOOL_OPS); } template void ReduceBoolFunction::exec(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, int64_t start, int64_t stop) { + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + int64_t start, int64_t stop) { DISPATCH_BY_OPNUM_TT(exec, PARAMS(x, xShapeInfo, extraParams, z, zShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffset, start, stop), REDUCE_BOOL_OPS); } template template - void _CUDA_H ReduceBoolFunction::exec(void *vx, - Nd4jLong *xShapeInfo, - void *vextraParams, - void *vresult, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, int64_t start, int64_t stop) { + void _CUDA_H ReduceBoolFunction::exec(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vresult, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, int64_t start, int64_t stop) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vresult); auto extraParams = reinterpret_cast(vextraParams); @@ -193,20 +181,17 @@ namespace functions { template template - void _CUDA_H ReduceBoolFunction::exec(void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *vresult, - Nd4jLong *resultShapeInfo) { - // FIXME: wtf??? + void _CUDA_H ReduceBoolFunction::exec(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *vresult, const Nd4jLong *resultShapeInfo) { auto z = reinterpret_cast(vresult); z[0] = execScalar(x, xShapeInfo, extraParams); } template template - Z _CUDA_H ReduceBoolFunction::execScalar(void *vx, Nd4jLong xEws, Nd4jLong length, void *vextraParams) { - auto x = reinterpret_cast(vx); + Z _CUDA_H ReduceBoolFunction::execScalar(const void *vx, Nd4jLong xEws, Nd4jLong length, void *vextraParams) { + auto x = reinterpret_cast(vx); auto extraParams = reinterpret_cast(vextraParams); int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance()->maxThreads()); Z intermediate[64]; diff --git a/libnd4j/include/loops/cpu/reduce/reduce_float.hpp b/libnd4j/include/loops/cpu/reduce/reduce_float.hpp index 40c24f4fa..1795dbc3d 100644 --- a/libnd4j/include/loops/cpu/reduce/reduce_float.hpp +++ b/libnd4j/include/loops/cpu/reduce/reduce_float.hpp @@ -33,12 +33,10 @@ namespace functions { namespace reduce { template template - void _CUDA_H ReduceFloatFunction::execScalar(void *vx, - Nd4jLong *xShapeInfo, - void *vextraParams, - void *vz, - Nd4jLong *zShapeInfo) { - auto x = reinterpret_cast(vx); + void _CUDA_H ReduceFloatFunction::execScalar(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vz, const Nd4jLong *zShapeInfo) { + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -98,8 +96,8 @@ namespace functions { template template - Z _CUDA_H ReduceFloatFunction::execScalar(void *vx, Nd4jLong *xShapeInfo, void *vextraParams) { - auto x = reinterpret_cast(vx); + Z _CUDA_H ReduceFloatFunction::execScalar(const void *vx, const Nd4jLong *xShapeInfo, void *vextraParams) { + auto x = reinterpret_cast(vx); auto extraParams = reinterpret_cast(vextraParams); const Nd4jLong length = shape::length(xShapeInfo); @@ -122,33 +120,27 @@ namespace functions { template Y ReduceFloatFunction::execScalar(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams) { + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams) { RETURNING_DISPATCH_BY_OPNUM_TT(execScalar, PARAMS(x, xShapeInfo, extraParams), REDUCE_FLOAT_OPS); } template void ReduceFloatFunction::execScalar(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo) { + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo) { DISPATCH_BY_OPNUM_TT(execScalar, PARAMS(x, xShapeInfo, extraParams, z, zShapeInfo), REDUCE_FLOAT_OPS); } template void ReduceFloatFunction::exec(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, int64_t start, int64_t stop) { + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + int64_t start, int64_t stop) { DISPATCH_BY_OPNUM_TT(exec, PARAMS(x, xShapeInfo, extraParams, @@ -163,17 +155,14 @@ namespace functions { template template - void _CUDA_H ReduceFloatFunction::exec(void *vx, - Nd4jLong *xShapeInfo, - void *vextraParams, - void *vresult, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, int64_t start, int64_t stop) { + void _CUDA_H ReduceFloatFunction::exec(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vresult, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + int64_t start, int64_t stop) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vresult); auto extraParams = reinterpret_cast(vextraParams); @@ -226,11 +215,9 @@ namespace functions { template template - void _CUDA_H ReduceFloatFunction::exec(void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *vresult, - Nd4jLong *resultShapeInfo) { + void _CUDA_H ReduceFloatFunction::exec(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *vresult, const Nd4jLong *resultShapeInfo) { // FIXME: wtf??? auto z = reinterpret_cast(vresult); z[0] = execScalar(x, xShapeInfo, extraParams); @@ -238,9 +225,9 @@ namespace functions { template template - Z _CUDA_H ReduceFloatFunction::execScalar(void *vx, Nd4jLong xEws, Nd4jLong length, void *vextraParams) { + Z _CUDA_H ReduceFloatFunction::execScalar(const void *vx, Nd4jLong xEws, Nd4jLong length, void *vextraParams) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto extraParams = reinterpret_cast(vextraParams); int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance()->maxThreads()); Z intermediate[64]; diff --git a/libnd4j/include/loops/cpu/reduce/reduce_long.cpp b/libnd4j/include/loops/cpu/reduce/reduce_long.cpp index 98b462ebd..c1fd4385c 100644 --- a/libnd4j/include/loops/cpu/reduce/reduce_long.cpp +++ b/libnd4j/include/loops/cpu/reduce/reduce_long.cpp @@ -33,12 +33,10 @@ namespace functions { namespace reduce { template template - void _CUDA_H ReduceLongFunction::execScalar(void *vx, - Nd4jLong *xShapeInfo, - void *vextraParams, - void *vz, - Nd4jLong *zShapeInfo) { - auto x = reinterpret_cast(vx); + void _CUDA_H ReduceLongFunction::execScalar(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vz, const Nd4jLong *zShapeInfo) { + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -93,10 +91,8 @@ namespace functions { template template - Z _CUDA_H ReduceLongFunction::execScalar(void *vx, - Nd4jLong *xShapeInfo, - void *vextraParams) { - auto x = reinterpret_cast(vx); + Z _CUDA_H ReduceLongFunction::execScalar(const void *vx, const Nd4jLong *xShapeInfo, void *vextraParams) { + auto x = reinterpret_cast(vx); auto extraParams = reinterpret_cast(vextraParams); const Nd4jLong length = shape::length(xShapeInfo); @@ -120,49 +116,40 @@ namespace functions { template Y ReduceLongFunction::execScalar(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams) { + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams) { RETURNING_DISPATCH_BY_OPNUM_TT(execScalar, PARAMS(x, xShapeInfo, extraParams), REDUCE_LONG_OPS); } template void ReduceLongFunction::execScalar(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo) { + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo) { DISPATCH_BY_OPNUM_TT(execScalar, PARAMS(x, xShapeInfo, extraParams, z, zShapeInfo), REDUCE_LONG_OPS); } template void ReduceLongFunction::exec(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, int64_t start, int64_t stop) { + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + int64_t start, int64_t stop) { DISPATCH_BY_OPNUM_TT(exec, PARAMS(x, xShapeInfo, extraParams, z, zShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffset, start, stop), REDUCE_LONG_OPS); } template template - void _CUDA_H ReduceLongFunction::exec(void *vx, - Nd4jLong *xShapeInfo, - void *vextraParams, - void *vresult, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, int64_t start, int64_t stop) { + void _CUDA_H ReduceLongFunction::exec(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vresult, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + int64_t start, int64_t stop) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vresult); auto extraParams = reinterpret_cast(vextraParams); @@ -215,21 +202,18 @@ namespace functions { template template - void _CUDA_H ReduceLongFunction::exec(void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *vresult, - Nd4jLong *resultShapeInfo) { - // FIXME: wtf??? + void _CUDA_H ReduceLongFunction::exec(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *vresult, const Nd4jLong *resultShapeInfo) { auto z = reinterpret_cast(vresult); z[0] = execScalar(x, xShapeInfo, extraParams); } template template - Z _CUDA_H ReduceLongFunction::execScalar(void *vx, Nd4jLong xEws, Nd4jLong length, void *vextraParams) { + Z _CUDA_H ReduceLongFunction::execScalar(const void *vx, Nd4jLong xEws, Nd4jLong length, void *vextraParams) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto extraParams = reinterpret_cast(vextraParams); int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance()->maxThreads()); Z intermediate[64]; diff --git a/libnd4j/include/loops/cpu/reduce/reduce_same.cpp b/libnd4j/include/loops/cpu/reduce/reduce_same.cpp index f357b7e64..2516767b6 100644 --- a/libnd4j/include/loops/cpu/reduce/reduce_same.cpp +++ b/libnd4j/include/loops/cpu/reduce/reduce_same.cpp @@ -34,12 +34,10 @@ namespace functions { namespace reduce { template template - void _CUDA_H ReduceSameFunction::execScalar(void *vx, - Nd4jLong *xShapeInfo, - void *vextraParams, - void *vz, - Nd4jLong *zShapeInfo) { - auto x = reinterpret_cast(vx); + void _CUDA_H ReduceSameFunction::execScalar(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vz, const Nd4jLong *zShapeInfo) { + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -95,10 +93,8 @@ namespace functions { template template - X _CUDA_H ReduceSameFunction::execScalar(void *vx, - Nd4jLong *xShapeInfo, - void *vextraParams) { - auto x = reinterpret_cast(vx); + X _CUDA_H ReduceSameFunction::execScalar(const void *vx, const Nd4jLong *xShapeInfo, void *vextraParams) { + auto x = reinterpret_cast(vx); auto extraParams = reinterpret_cast(vextraParams); const Nd4jLong length = shape::length(xShapeInfo); @@ -120,33 +116,27 @@ namespace functions { template X ReduceSameFunction::execScalar(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams) { + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams) { RETURNING_DISPATCH_BY_OPNUM_T(execScalar, PARAMS(x, xShapeInfo, extraParams), REDUCE_SAME_OPS); } template void ReduceSameFunction::execScalar(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo) { + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo) { DISPATCH_BY_OPNUM_T(execScalar, PARAMS(x, xShapeInfo, extraParams, z, zShapeInfo), REDUCE_SAME_OPS); } template void ReduceSameFunction::exec(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, int64_t start, int64_t stop) { + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + int64_t start, int64_t stop) { DISPATCH_BY_OPNUM_T(exec, PARAMS(x, xShapeInfo, extraParams, @@ -161,17 +151,14 @@ namespace functions { template template - void _CUDA_H ReduceSameFunction::exec(void *vx, - Nd4jLong *xShapeInfo, - void *vextraParams, - void *vz, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, int64_t start, int64_t stop) { + void _CUDA_H ReduceSameFunction::exec(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + int64_t start, int64_t stop) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -224,21 +211,18 @@ namespace functions { template template - void _CUDA_H ReduceSameFunction::exec(void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *vz, - Nd4jLong *zShapeInfo) { - // FIXME: wtf??? + void _CUDA_H ReduceSameFunction::exec(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *vz, const Nd4jLong *zShapeInfo) { auto z = reinterpret_cast(vz); z[0] = execScalar(x, xShapeInfo, extraParams); } template template - X _CUDA_H ReduceSameFunction::execScalar(void *vx, Nd4jLong xEws, Nd4jLong length, void *vextraParams) { + X _CUDA_H ReduceSameFunction::execScalar(const void *vx, Nd4jLong xEws, Nd4jLong length, void *vextraParams) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto extraParams = reinterpret_cast(vextraParams); int maxThreads = sd::math::nd4j_min(64, sd::Environment::getInstance()->maxThreads()); X intermediate[64]; diff --git a/libnd4j/include/loops/cpu/reduce3.hpp b/libnd4j/include/loops/cpu/reduce3.hpp index 961c6b1c8..3a830377e 100644 --- a/libnd4j/include/loops/cpu/reduce3.hpp +++ b/libnd4j/include/loops/cpu/reduce3.hpp @@ -34,13 +34,13 @@ namespace reduce3 { ////////////////////////////////////////////////////////////////////////// template template -void Reduce3::execScalar(void *vx, Nd4jLong *xShapeInfo, - void *vextraParams, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo) { +void Reduce3::execScalar(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -134,10 +134,10 @@ void Reduce3::execScalar(void *vx, Nd4jLong *xShapeInfo, ////////////////////////////////////////////////////////////////////////// template void Reduce3::execScalar(const int opNum, - void *vx, Nd4jLong *xShapeInfo, - void *extraParamsVals, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo) { + const void *vx, const Nd4jLong *xShapeInfo, + void *extraParamsVals, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo) { DISPATCH_BY_OPNUM_TT(execScalar, PARAMS(vx, xShapeInfo, extraParamsVals, vy, yShapeInfo, vz, zShapeInfo), REDUCE3_OPS); } @@ -146,14 +146,15 @@ void Reduce3::execScalar(const int opNum, ////////////////////////////////////////////////////////////////////////// template template -void Reduce3::exec(void *vx, Nd4jLong *xShapeInfo, - void *vextraParams, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, - int *dimension, int dimensionLength, int64_t start, int64_t stop) { +void Reduce3::exec(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + int64_t start, int64_t stop) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -171,15 +172,16 @@ void Reduce3::exec(void *vx, Nd4jLong *xShapeInfo, ////////////////////////////////////////////////////////////////////////// template template -void Reduce3::exec(void *vx, Nd4jLong *xShapeInfo, +void Reduce3::exec(const void *vx, const Nd4jLong *xShapeInfo, void *vextraParams, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, int64_t start, int64_t stop) { + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + int64_t start, int64_t stop) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); #ifdef INLINE_LOOPS @@ -193,16 +195,17 @@ void Reduce3::exec(void *vx, Nd4jLong *xShapeInfo, ////////////////////////////////////////////////////////////////////////// template template -void Reduce3:: execAll(void *vx, Nd4jLong *xShapeInfo, +void Reduce3:: execAll(const void *vx, const Nd4jLong *xShapeInfo, void *vextraParams, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *xTadShapeInfo, Nd4jLong *xOffsets, - Nd4jLong *yTadShapeInfo, Nd4jLong *yOffsets, int64_t start, int64_t stop) { + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xOffsets, + const Nd4jLong *yTadShapeInfo, const Nd4jLong *yOffsets, + int64_t start, int64_t stop) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -215,12 +218,13 @@ void Reduce3:: execAll(void *vx, Nd4jLong *xShapeInfo, ////////////////////////////////////////////////////////////////////////// template -void Reduce3::exec( const int opNum, - void *vx, Nd4jLong *xShapeInfo, +void Reduce3::exec(const int opNum, + const void *vx, const Nd4jLong *xShapeInfo, void *extraParamsVals, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, - int *dimension, int dimensionLength, int64_t start, int64_t stop) { + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + int64_t start, int64_t stop) { DISPATCH_BY_OPNUM_TT(exec, PARAMS(vx, xShapeInfo, extraParamsVals, vy, yShapeInfo, vz, zShapeInfo, dimension, dimensionLength, start, stop), REDUCE3_OPS); } @@ -228,13 +232,14 @@ void Reduce3::exec( const int opNum, ////////////////////////////////////////////////////////////////////////// template -void Reduce3::exec( const int opNum, - void *vx, Nd4jLong *xShapeInfo, +void Reduce3::exec(const int opNum, + const void *vx, const Nd4jLong *xShapeInfo, void *extraParamsVals, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, int64_t start, int64_t stop) { + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + int64_t start, int64_t stop) { DISPATCH_BY_OPNUM_TT(exec, PARAMS(vx,xShapeInfo,extraParamsVals,vy, yShapeInfo,vz,zShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffsets, start, stop), REDUCE3_OPS); } @@ -243,13 +248,14 @@ void Reduce3::exec( const int opNum, ////////////////////////////////////////////////////////////////////////// template void Reduce3::execAll(const int opNum, - void *vx, Nd4jLong *xShapeInfo, - void *extraParamsVals, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, - int *dimension, int dimensionLength, - Nd4jLong *xTadShapeInfo, Nd4jLong *xOffsets, - Nd4jLong *yTadShapeInfo, Nd4jLong *yOffsets, int64_t start, int64_t stop) { + const void *vx, const Nd4jLong *xShapeInfo, + void *extraParamsVals, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xOffsets, + const Nd4jLong *yTadShapeInfo, const Nd4jLong *yOffsets, + int64_t start, int64_t stop) { DISPATCH_BY_OPNUM_TT(execAll, PARAMS(vx, xShapeInfo, extraParamsVals, vy, yShapeInfo, vz, zShapeInfo, dimension, dimensionLength, xTadShapeInfo, xOffsets, yTadShapeInfo, yOffsets, start, stop), REDUCE3_OPS); } diff --git a/libnd4j/include/loops/cpu/scalar.hpp b/libnd4j/include/loops/cpu/scalar.hpp index d93db7c8f..236ba7e25 100644 --- a/libnd4j/include/loops/cpu/scalar.hpp +++ b/libnd4j/include/loops/cpu/scalar.hpp @@ -34,18 +34,18 @@ namespace scalar { //////////////////////////////////////////////////////////////////////// template template -void ScalarTransform::transform(void *vx, Nd4jLong *xShapeInfo, - void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, - void *vscalars, - int *dimension, int dimensionLength, - Nd4jLong *xTadShapeInfo, Nd4jLong *xTadOffsets, - Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets, - const uint64_t start, const uint64_t stop) { +void ScalarTransform::transform(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vz, const Nd4jLong *zShapeInfo, + const void *vscalars, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffsets, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffsets, + const uint64_t start, const uint64_t stop) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); - auto scalars = reinterpret_cast(vscalars); + auto scalars = reinterpret_cast(vscalars); auto extraParams = reinterpret_cast(vextraParams); if (zTadShapeInfo == nullptr) { @@ -92,14 +92,14 @@ void ScalarTransform::transform(void *vx, Nd4jLong *xShapeInfo, //////////////////////////////////////////////////////////////////////// template void ScalarTransform::transform(int opNum, - void *x, Nd4jLong *xShapeInfo, - void *extraParams, - void *z, Nd4jLong *zShapeInfo, - void *scalars, - int *dimension, int dimensionLength, - Nd4jLong *xTadShapeInfo, Nd4jLong *xTadOffsets, - Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets, - const uint64_t start, const uint64_t stop) { + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, + const void *scalars, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffsets, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffsets, + const uint64_t start, const uint64_t stop) { DISPATCH_BY_OPNUM_TTT(transform, PARAMS(x, xShapeInfo, extraParams, z, zShapeInfo, scalars, dimension, dimensionLength, xTadShapeInfo, xTadOffsets, zTadShapeInfo, zTadOffsets, start, stop), SCALAR_OPS); } @@ -107,12 +107,12 @@ void ScalarTransform::transform(int opNum, //////////////////////////////////////////////////////////////////////// template void ScalarTransform::transform(const int opNum, - void *x, Nd4jLong xStride, - void *z, Nd4jLong zStride, - void *scalar, - void *extraParams, - const uint64_t n, - const uint64_t start, const uint64_t stop) { + const void *x, Nd4jLong xStride, + void *z, Nd4jLong zStride, + const void *scalar, + void *extraParams, + const uint64_t n, + const uint64_t start, const uint64_t stop) { DISPATCH_BY_OPNUM_TTT(transform, PARAMS(x, xStride, z, zStride, scalar, extraParams, n, start, stop), SCALAR_OPS); } @@ -120,11 +120,11 @@ void ScalarTransform::transform(const int opNum, //////////////////////////////////////////////////////////////////////// template void ScalarTransform::transform(const int opNum, - void *x, Nd4jLong *xShapeInfo, - void *z, Nd4jLong *zShapeInfo, - void *scalar, - void *extraParams, - const uint64_t start, const uint64_t stop) { + const void *x, const Nd4jLong *xShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + const void *scalar, + void *extraParams, + const uint64_t start, const uint64_t stop) { DISPATCH_BY_OPNUM_TTT(transform, PARAMS(x, xShapeInfo, z, zShapeInfo, scalar, extraParams, start, stop), SCALAR_OPS); } @@ -132,15 +132,15 @@ void ScalarTransform::transform(const int opNum, //////////////////////////////////////////////////////////////////////// template template -void ScalarTransform::transform(void *vx, Nd4jLong *xShapeInfo, - void *vz, Nd4jLong *zShapeInfo, - void *vscalar, - void *vextraParams, - const uint64_t start, const uint64_t stop) { +void ScalarTransform::transform(const void *vx, const Nd4jLong *xShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + const void *vscalar, + void *vextraParams, + const uint64_t start, const uint64_t stop) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); - auto scalar = reinterpret_cast(vscalar)[0]; + auto scalar = reinterpret_cast(vscalar)[0]; auto extraParams = reinterpret_cast(vextraParams); const auto len = shape::length(xShapeInfo); @@ -181,15 +181,15 @@ void ScalarTransform::transform(void *vx, Nd4jLong *xShapeInfo, //////////////////////////////////////////////////////////////////////// template template -void ScalarTransform::transform(void *vx, Nd4jLong xEws, - void *vz, Nd4jLong zEws, - void *vscalar, - void *vextraParams, - const uint64_t len, const uint64_t start, const uint64_t stop) { +void ScalarTransform::transform(const void *vx, Nd4jLong xEws, + void *vz, Nd4jLong zEws, + const void *vscalar, + void *vextraParams, + const uint64_t len, const uint64_t start, const uint64_t stop) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); - auto scalar = reinterpret_cast(vscalar)[0]; + auto scalar = reinterpret_cast(vscalar)[0]; auto extraParams = reinterpret_cast(vextraParams); if (xEws == 1 && zEws == 1) { diff --git a/libnd4j/include/loops/cpu/scalar_bool.cpp b/libnd4j/include/loops/cpu/scalar_bool.cpp index c6f437ba8..72513c10d 100644 --- a/libnd4j/include/loops/cpu/scalar_bool.cpp +++ b/libnd4j/include/loops/cpu/scalar_bool.cpp @@ -34,18 +34,18 @@ namespace functions { template template - void ScalarBoolTransform::transform(void *vx, Nd4jLong *xShapeInfo, - void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, - void *vscalars, - int *dimension, int dimensionLength, - Nd4jLong *xTadShapeInfo, Nd4jLong *xTadOffsets, - Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets, - const uint64_t start, const uint64_t stop) { + void ScalarBoolTransform::transform(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vz, const Nd4jLong *zShapeInfo, + const void *vscalars, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffsets, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffsets, + const uint64_t start, const uint64_t stop) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); - auto scalars = reinterpret_cast(vscalars); + auto scalars = reinterpret_cast(vscalars); auto extraParams = reinterpret_cast(vextraParams); if (zTadShapeInfo == nullptr) { @@ -92,60 +92,50 @@ namespace functions { template void ScalarBoolTransform::transform(int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo, - void *scalars, - int *dimension, - int dimensionLength, - Nd4jLong *xTadShapeInfo, - Nd4jLong *xTadOffsets, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffsets, const uint64_t start, const uint64_t stop) { + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, + const void *scalars, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffsets, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffsets, + const uint64_t start, const uint64_t stop) { DISPATCH_BY_OPNUM_TT(transform, PARAMS(x, xShapeInfo, extraParams, z, zShapeInfo, scalars, dimension, dimensionLength, xTadShapeInfo, xTadOffsets, zTadShapeInfo, zTadOffsets, start, stop), SCALAR_BOOL_OPS); } template void ScalarBoolTransform::transform(const int opNum, - void *x, - Nd4jLong xEws, - void *z, - Nd4jLong zEws, - void *scalar, - void *extraParams, - const uint64_t n, - const uint64_t start, const uint64_t stop) { + const void *x, Nd4jLong xEws, + void *z, Nd4jLong zEws, + const void *scalar, + void *extraParams, + const uint64_t n, + const uint64_t start, const uint64_t stop) { DISPATCH_BY_OPNUM_TT(transform, PARAMS(x, xEws, z, zEws, scalar, extraParams, n, start, stop), SCALAR_BOOL_OPS); } template void ScalarBoolTransform::transform(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - void *scalar, - void *extraParams, - const uint64_t start, const uint64_t stop) { + const void *x, const Nd4jLong *xShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + const void *scalar, + void *extraParams, + const uint64_t start, const uint64_t stop) { DISPATCH_BY_OPNUM_TT(transform, PARAMS(x, xShapeInfo, z, zShapeInfo, scalar, extraParams, start, stop), SCALAR_BOOL_OPS); } template template - void ScalarBoolTransform::transform(void *vx, - Nd4jLong *xShapeInfo, - void *vz, - Nd4jLong *zShapeInfo, - void *vscalar, - void *vextraParams, - const uint64_t start, const uint64_t stop) { + void ScalarBoolTransform::transform(const void *vx, const Nd4jLong *xShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + const void *vscalar, + void *vextraParams, + const uint64_t start, const uint64_t stop) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); - auto scalar = reinterpret_cast(vscalar)[0]; + auto scalar = reinterpret_cast(vscalar)[0]; auto extraParams = reinterpret_cast(vextraParams); auto xEws = shape::elementWiseStride(xShapeInfo); @@ -185,18 +175,16 @@ namespace functions { template template - void ScalarBoolTransform::transform(void *vx, - Nd4jLong xEws, - void *vz, - Nd4jLong zEws, - void *vscalar, - void *vextraParams, - const uint64_t len, - const uint64_t start, const uint64_t stop) { + void ScalarBoolTransform::transform(const void *vx, Nd4jLong xEws, + void *vz, Nd4jLong zEws, + const void *vscalar, + void *vextraParams, + const uint64_t len, + const uint64_t start, const uint64_t stop) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); - auto scalar = reinterpret_cast(vscalar)[0]; + auto scalar = reinterpret_cast(vscalar)[0]; auto extraParams = reinterpret_cast(vextraParams); if (xEws == 1 && zEws == 1) { diff --git a/libnd4j/include/loops/cpu/scalar_int.cpp b/libnd4j/include/loops/cpu/scalar_int.cpp index ed85e28ef..1a8f5bcca 100644 --- a/libnd4j/include/loops/cpu/scalar_int.cpp +++ b/libnd4j/include/loops/cpu/scalar_int.cpp @@ -34,18 +34,18 @@ namespace functions { template template - void ScalarIntTransform::transform(void *vx, Nd4jLong *xShapeInfo, - void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, - void *vscalars, - int *dimension, int dimensionLength, - Nd4jLong *xTadShapeInfo, Nd4jLong *xTadOffsets, - Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets, - const uint64_t start, const uint64_t stop) { + void ScalarIntTransform::transform(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vz, const Nd4jLong *zShapeInfo, + const void *vscalars, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffsets, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffsets, + const uint64_t start, const uint64_t stop) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); - auto scalars = reinterpret_cast(vscalars); + auto scalars = reinterpret_cast(vscalars); auto extraParams = reinterpret_cast(vextraParams); if (zTadShapeInfo == nullptr) { @@ -92,19 +92,14 @@ namespace functions { template void ScalarIntTransform::transform(int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo, - void *scalars, - int *dimension, - int dimensionLength, - Nd4jLong *xTadShapeInfo, - Nd4jLong *xTadOffsets, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffsets, - const uint64_t start, const uint64_t stop) { + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, + const void *scalars, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffsets, + const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffsets, + const uint64_t start, const uint64_t stop) { DISPATCH_BY_OPNUM_T(transform, PARAMS(x, xShapeInfo, extraParams, z, zShapeInfo, scalars, dimension, dimensionLength, xTadShapeInfo, xTadOffsets, zTadShapeInfo, zTadOffsets, start, stop), SCALAR_INT_OPS); } @@ -112,42 +107,35 @@ namespace functions { template void ScalarIntTransform::transform(const int opNum, - void *x, - Nd4jLong xEws, - void *z, - Nd4jLong zEws, - void *scalar, - void *extraParams, - const uint64_t n, - const uint64_t start, const uint64_t stop) { + const void *x, Nd4jLong xEws, + void *z, Nd4jLong zEws, + const void *scalar, + void *extraParams, + const uint64_t n, + const uint64_t start, const uint64_t stop) { DISPATCH_BY_OPNUM_T(transform, PARAMS(x, xEws, z, zEws, scalar, extraParams, n, start, stop), SCALAR_INT_OPS); } template void ScalarIntTransform::transform(const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - void *scalar, - void *extraParams, - const uint64_t start, const uint64_t stop) { + const void *x, const Nd4jLong *xShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + const void *scalar, + void *extraParams, + const uint64_t start, const uint64_t stop) { DISPATCH_BY_OPNUM_T(transform, PARAMS(x, xShapeInfo, z, zShapeInfo, scalar, extraParams, start, stop), SCALAR_INT_OPS); } template template - void ScalarIntTransform::transform(void *vx, - Nd4jLong *xShapeInfo, - void *vz, - Nd4jLong *zShapeInfo, - void *vscalar, - void *vextraParams, - const uint64_t start, const uint64_t stop) { + void ScalarIntTransform::transform(const void *vx, const Nd4jLong *xShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + const void *vscalar, void *vextraParams, + const uint64_t start, const uint64_t stop) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); - auto scalar = reinterpret_cast(vscalar)[0]; + auto scalar = reinterpret_cast(vscalar)[0]; auto extraParams = reinterpret_cast(vextraParams); auto xEws = shape::elementWiseStride(xShapeInfo); @@ -187,18 +175,15 @@ namespace functions { template template - void ScalarIntTransform::transform(void *vx, - Nd4jLong xEws, - void *vz, - Nd4jLong zEws, - void *vscalar, - void *vextraParams, - const uint64_t len, - const uint64_t start, const uint64_t stop) { + void ScalarIntTransform::transform(const void *vx, Nd4jLong xEws, + void *vz, Nd4jLong zEws, + const void *vscalar, + void *vextraParams, + const uint64_t len, const uint64_t start, const uint64_t stop) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); - auto scalar = reinterpret_cast(vscalar)[0]; + auto scalar = reinterpret_cast(vscalar)[0]; auto extraParams = reinterpret_cast(vextraParams); if (scalar < (sizeof(X) * 8)) { diff --git a/libnd4j/include/loops/cpu/summarystatsreduce.cpp b/libnd4j/include/loops/cpu/summarystatsreduce.cpp index f6b44b75c..2d53671d2 100644 --- a/libnd4j/include/loops/cpu/summarystatsreduce.cpp +++ b/libnd4j/include/loops/cpu/summarystatsreduce.cpp @@ -34,54 +34,46 @@ namespace functions { template Y SummaryStatsReduce::execScalar(const int opNum, - const bool biasCorrected, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams) { + const bool biasCorrected, + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams) { RETURNING_DISPATCH_BY_OPNUM_TT(execScalar, PARAMS(biasCorrected, x, xShapeInfo, extraParams), SUMMARY_STATS_OPS); } template void SummaryStatsReduce::execScalar(const int opNum, - const bool biasCorrected, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo) { + const bool biasCorrected, + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo) { DISPATCH_BY_OPNUM_TT(execScalar, PARAMS(biasCorrected, x, xShapeInfo, extraParams, z, zShapeInfo), SUMMARY_STATS_OPS); } template void SummaryStatsReduce::exec(const int opNum, - const bool biasCorrected, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength) { + const bool biasCorrected, + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength) { DISPATCH_BY_OPNUM_TT(exec, PARAMS(biasCorrected, x, xShapeInfo, extraParams, z, zShapeInfo, dimension, dimensionLength), SUMMARY_STATS_OPS); } template template void SummaryStatsReduce::execScalar(const bool biasCorrected, - void *vx, - Nd4jLong *xShapeInfo, - void *vextraParams, - void *vz, - Nd4jLong *zShapeInfo) { + const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vz, const Nd4jLong *zShapeInfo) { auto z = reinterpret_cast(vz); z[0] = execScalar(biasCorrected, vx, xShapeInfo, vextraParams); } template template - Z SummaryStatsReduce::execScalar(const bool biasCorrected, void *vx, Nd4jLong *xShapeInfo, void *vextraParams) { + Z SummaryStatsReduce::execScalar(const bool biasCorrected, const void *vx, const Nd4jLong *xShapeInfo, void *vextraParams) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto extraParams = reinterpret_cast(vextraParams); SummaryStatsData startingIndex; @@ -105,15 +97,12 @@ namespace functions { template template void SummaryStatsReduce::exec(const bool biasCorrected, - void *vx, - Nd4jLong *xShapeInfo, - void *vextraParams, - void *vz, - Nd4jLong *zShapeInfo, - int *dimension, - int dimensionLength) { + const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); auto resultLength = shape::length(zShapeInfo); diff --git a/libnd4j/include/loops/cpu/transform/transform_any.cpp b/libnd4j/include/loops/cpu/transform/transform_any.cpp index 3fc9af1b3..6a8c07094 100644 --- a/libnd4j/include/loops/cpu/transform/transform_any.cpp +++ b/libnd4j/include/loops/cpu/transform/transform_any.cpp @@ -30,25 +30,23 @@ namespace functions { namespace transform { template - void TransformAny::exec( - int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - void *extraParams, - uint64_t threadId, uint64_t numThreads) { + void TransformAny::exec(int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + void *extraParams, + uint64_t threadId, uint64_t numThreads) { DISPATCH_BY_OPNUM_TT(exec, PARAMS(x, xShapeInfo, z, zShapeInfo, extraParams, threadId, numThreads), TRANSFORM_ANY_OPS); } ///////////////////////////////////////////////////////////////////// template template -void _CUDA_H TransformAny::exec(void *vx, Nd4jLong *xShapeInfo, - void *vz,Nd4jLong *zShapeInfo, - void *vextraParams, uint64_t threadId, uint64_t numThreads) { +void _CUDA_H TransformAny::exec(const void *vx, const Nd4jLong *xShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + void *vextraParams, + uint64_t threadId, uint64_t numThreads) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); diff --git a/libnd4j/include/loops/cpu/transform/transform_bool.cpp b/libnd4j/include/loops/cpu/transform/transform_bool.cpp index 7302ef970..5e88a15c3 100644 --- a/libnd4j/include/loops/cpu/transform/transform_bool.cpp +++ b/libnd4j/include/loops/cpu/transform/transform_bool.cpp @@ -30,27 +30,22 @@ namespace functions { namespace transform { template - void TransformBool::exec( - int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - void *extraParams, - uint64_t threadId, uint64_t numThreads) { + void TransformBool::exec(int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + void *extraParams, + uint64_t threadId, uint64_t numThreads) { DISPATCH_BY_OPNUM_TT(exec, PARAMS(x, xShapeInfo, z, zShapeInfo, extraParams, threadId, numThreads), TRANSFORM_BOOL_OPS); } template template - void _CUDA_H TransformBool::exec( - void *vx, - Nd4jLong *xShapeInfo, - void *vz, - Nd4jLong *zShapeInfo, - void *vextraParams, uint64_t threadId, uint64_t numThreads) { + void _CUDA_H TransformBool::exec(const void *vx, const Nd4jLong *xShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + void *vextraParams, + uint64_t threadId, uint64_t numThreads) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); diff --git a/libnd4j/include/loops/cpu/transform/transform_float.cpp b/libnd4j/include/loops/cpu/transform/transform_float.cpp index 833b263f1..fd37391c2 100644 --- a/libnd4j/include/loops/cpu/transform/transform_float.cpp +++ b/libnd4j/include/loops/cpu/transform/transform_float.cpp @@ -29,27 +29,22 @@ using namespace simdOps; namespace functions { namespace transform { template - void TransformFloat::exec( - int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - void *extraParams, - uint64_t threadId, uint64_t numThreads) { + void TransformFloat::exec(int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + void *extraParams, + uint64_t threadId, uint64_t numThreads) { DISPATCH_BY_OPNUM_TT(exec, PARAMS(x, xShapeInfo, z, zShapeInfo, extraParams, threadId, numThreads), TRANSFORM_FLOAT_OPS); } template template - void _CUDA_H TransformFloat::exec( - void *vx, - Nd4jLong *xShapeInfo, - void *vz, - Nd4jLong *zShapeInfo, - void *vextraParams, uint64_t threadId, uint64_t numThreads) { + void _CUDA_H TransformFloat::exec(const void *vx, const Nd4jLong *xShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + void *vextraParams, + uint64_t threadId, uint64_t numThreads) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); diff --git a/libnd4j/include/loops/cpu/transform/transform_same.cpp b/libnd4j/include/loops/cpu/transform/transform_same.cpp index bc9d2e525..d2793d9c0 100644 --- a/libnd4j/include/loops/cpu/transform/transform_same.cpp +++ b/libnd4j/include/loops/cpu/transform/transform_same.cpp @@ -30,24 +30,22 @@ namespace functions { namespace transform { template - void TransformSame::exec( - int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - void *extraParams, uint64_t threadId, uint64_t numThreads) { + void TransformSame::exec(int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + void *extraParams, + uint64_t threadId, uint64_t numThreads) { DISPATCH_BY_OPNUM_T(exec, PARAMS(x, xShapeInfo, z, zShapeInfo, extraParams, threadId, numThreads), TRANSFORM_SAME_OPS); } template template - void _CUDA_H TransformSame::exec(void *vx, Nd4jLong *xShapeInfo, - void *vz, Nd4jLong *zShapeInfo, + void _CUDA_H TransformSame::exec(const void *vx, const Nd4jLong *xShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, void *vextraParams, uint64_t threadId, uint64_t numThreads) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); diff --git a/libnd4j/include/loops/cpu/transform/transform_strict.cpp b/libnd4j/include/loops/cpu/transform/transform_strict.cpp index 2ef3b808e..54a24d0e3 100644 --- a/libnd4j/include/loops/cpu/transform/transform_strict.cpp +++ b/libnd4j/include/loops/cpu/transform/transform_strict.cpp @@ -30,26 +30,23 @@ namespace functions { namespace transform { template - void TransformStrict::exec( - int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - void *extraParams, uint64_t threadId, uint64_t numThreads) { + void TransformStrict::exec(int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *z, + const Nd4jLong *zShapeInfo, + void *extraParams, + uint64_t threadId, uint64_t numThreads) { DISPATCH_BY_OPNUM_T(exec, PARAMS(x, xShapeInfo, z, zShapeInfo, extraParams, threadId, numThreads), TRANSFORM_STRICT_OPS); } template template - void _CUDA_H TransformStrict::exec( - void *vx, - Nd4jLong *xShapeInfo, - void *vz, - Nd4jLong *zShapeInfo, - void *vextraParams, uint64_t threadId, uint64_t numThreads) { + void _CUDA_H TransformStrict::exec(const void *vx, const Nd4jLong *xShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + void *vextraParams, + uint64_t threadId, uint64_t numThreads) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); diff --git a/libnd4j/include/loops/cuda/broadcasting.chpp b/libnd4j/include/loops/cuda/broadcasting.chpp index 848522a35..4b5c7833f 100644 --- a/libnd4j/include/loops/cuda/broadcasting.chpp +++ b/libnd4j/include/loops/cuda/broadcasting.chpp @@ -34,22 +34,22 @@ using namespace simdOps; template static __global__ void broadcastSimple( - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, + void const* x, + Nd4jLong const* xShapeInfo, + void const* y, + Nd4jLong const* yShapeInfo, void *z, - Nd4jLong *zShapeInfo, + Nd4jLong const* zShapeInfo, int *dimension, - int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { functions::broadcast::Broadcast::template transformCuda(x,xShapeInfo,y,yShapeInfo,z,zShapeInfo,dimension,dimensionLength,tadOnlyShapeInfo,tadOffsets,tadOnlyShapeInfoZ,tadOffsetsZ); } template -static __global__ void broadcastSimple(const void *x, const Nd4jLong *xShapeInfo, - const void *y, const Nd4jLong *yShapeInfo, - void *z, const Nd4jLong *zShapeInfo ) { +static __global__ void broadcastSimple(const void const* x, const Nd4jLong const* xShapeInfo, + const void const* y, const Nd4jLong const* yShapeInfo, + void *z, const Nd4jLong const* zShapeInfo ) { functions::broadcast::Broadcast::template transformCuda(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo); } @@ -57,14 +57,14 @@ static __global__ void broadcastSimple(const void *x, const Nd4jLong *xShapeInfo template static __global__ void broadcastInverseSimple( - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, + void const* x, + Nd4jLong const* xShapeInfo, + void const* y, + Nd4jLong const* yShapeInfo, void *z, - Nd4jLong *zShapeInfo, + Nd4jLong const* zShapeInfo, int *dimension, - int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { functions::broadcast::Broadcast::template transformInverseCuda(x,xShapeInfo,y,yShapeInfo,z,zShapeInfo,dimension,dimensionLength,tadOnlyShapeInfo,tadOffsets,tadOnlyShapeInfoZ,tadOffsetsZ); } @@ -73,17 +73,17 @@ static __global__ void broadcastInverseSimple( namespace functions { namespace broadcast { - static Nd4jLong __device__ __noinline__ getIndexOffset(Nd4jLong index, Nd4jLong *shapeInfo) { + static Nd4jLong __device__ __noinline__ getIndexOffset(Nd4jLong index, const Nd4jLong *shapeInfo) { return shape::getIndexOffset(index, shapeInfo); } - static Nd4jLong __device__ __noinline__ length(Nd4jLong *shapeInfo) { + static Nd4jLong __device__ __noinline__ length(const Nd4jLong *shapeInfo) { return shape::length(shapeInfo); } template template - __host__ void Broadcast::intermediateBroadcast(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + __host__ void Broadcast::intermediateBroadcast(dim3 launchDims, cudaStream_t *stream, void const* x, Nd4jLong const* xShapeInfo, void const* y, Nd4jLong const* yShapeInfo, void* z, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { broadcastSimple<<>>(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ); } @@ -94,14 +94,14 @@ namespace functions { } template - __host__ void Broadcast::execBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + __host__ void Broadcast::execBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void const* x, Nd4jLong const* xShapeInfo, void const* y, Nd4jLong const* yShapeInfo, void *z, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { DISPATCH_BY_OPNUM_TTT(intermediateBroadcast, PARAMS(launchDims, stream, x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ), OPS_A(BROADCAST_OPS)) DEBUG_KERNEL(stream, opNum); } template - __host__ void Broadcast::execBroadcast(dim3 launchDims, cudaStream_t *stream, const int opNum, const void *x, const Nd4jLong *xShapeInfo, const void *y, const Nd4jLong *yShapeInfo, void *z, const Nd4jLong *zShapeInfo) { + __host__ void Broadcast::execBroadcast(dim3 launchDims, cudaStream_t *stream, const int opNum, const void *x, const Nd4jLong *xShapeInfo, const void *y, const Nd4jLong *yShapeInfo, void *z, const Nd4jLong const* zShapeInfo) { DISPATCH_BY_OPNUM_TTT(intermediateBroadcast, PARAMS(launchDims, stream, x, xShapeInfo, y, yShapeInfo, z, zShapeInfo), OPS_A(BROADCAST_OPS)) DEBUG_KERNEL(stream, opNum); @@ -109,12 +109,12 @@ namespace functions { template template - __host__ void Broadcast::intermediateInverseBroadcast(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + __host__ void Broadcast::intermediateInverseBroadcast(dim3 launchDims, cudaStream_t *stream, void const* x, Nd4jLong const* xShapeInfo, void const* y, Nd4jLong const* yShapeInfo, void *z, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { broadcastInverseSimple<<>>(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ); } template - __host__ void Broadcast::execInverseBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + __host__ void Broadcast::execInverseBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void const* x, Nd4jLong const* xShapeInfo, void const* y, Nd4jLong const* yShapeInfo, void *z, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { DISPATCH_BY_OPNUM_TTT(intermediateInverseBroadcast, PARAMS(launchDims, stream, x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ), OPS_A(BROADCAST_OPS)) DEBUG_KERNEL(stream, opNum); @@ -123,19 +123,19 @@ namespace functions { template template __device__ void Broadcast::transformInverseCuda( - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, + void* vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { if (tadOnlyShapeInfoZ == nullptr) { tadOnlyShapeInfoZ = tadOnlyShapeInfo; tadOffsetsZ = tadOffsets; } - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); //decompose in to several sub tads after @@ -189,19 +189,19 @@ namespace functions { template template __device__ void Broadcast::transformCuda( - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { if (tadOnlyShapeInfoZ == nullptr) { tadOnlyShapeInfoZ = tadOnlyShapeInfo; tadOffsetsZ = tadOffsets; } - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); //decompose in to several sub tads after diff --git a/libnd4j/include/loops/cuda/broadcasting_bool.cu b/libnd4j/include/loops/cuda/broadcasting_bool.cu index 1c7bc358e..bed00a20f 100644 --- a/libnd4j/include/loops/cuda/broadcasting_bool.cu +++ b/libnd4j/include/loops/cuda/broadcasting_bool.cu @@ -34,24 +34,24 @@ using namespace simdOps; ////////////////////////////////////////////////////////////////////////// template static __global__ void broadcastBoolSimple( - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, + void const* x, + Nd4jLong const* xShapeInfo, + void const* y, + Nd4jLong const* yShapeInfo, void *z, - Nd4jLong *zShapeInfo, + Nd4jLong const* zShapeInfo, void *extraParams, int *dimension, - int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { functions::broadcast::BroadcastBool::template transformCuda(x,xShapeInfo,y,yShapeInfo,z,zShapeInfo, extraParams, dimension,dimensionLength,tadOnlyShapeInfo,tadOffsets,tadOnlyShapeInfoZ,tadOffsetsZ); } ////////////////////////////////////////////////////////////////////////// template -static __global__ void broadcastBoolSimple(const void *x, const Nd4jLong *xShapeInfo, - const void *y, const Nd4jLong *yShapeInfo, - void *z, const Nd4jLong *zShapeInfo, +static __global__ void broadcastBoolSimple(const void const* x, const Nd4jLong const* xShapeInfo, + const void const* y, const Nd4jLong const* yShapeInfo, + void *z, const Nd4jLong const* zShapeInfo, void *extraParams) { functions::broadcast::BroadcastBool::template transformCuda(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, extraParams); @@ -59,15 +59,15 @@ static __global__ void broadcastBoolSimple(const void *x, const Nd4jLong *xShape ////////////////////////////////////////////////////////////////////////// template static __global__ void broadcastBoolInverseSimple( - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, + void const* x, + Nd4jLong const* xShapeInfo, + void const* y, + Nd4jLong const* yShapeInfo, void *z, - Nd4jLong *zShapeInfo, + Nd4jLong const* zShapeInfo, void *extraParams, int *dimension, - int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { functions::broadcast::BroadcastBool::template transformInverseCuda(x,xShapeInfo,y,yShapeInfo,z,zShapeInfo,extraParams,dimension,dimensionLength,tadOnlyShapeInfo,tadOffsets,tadOnlyShapeInfoZ,tadOffsetsZ); } @@ -78,7 +78,7 @@ namespace broadcast { ////////////////////////////////////////////////////////////////////////// template template -__host__ void BroadcastBool::intermediateBroadcast(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo, void *extraParams, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { +__host__ void BroadcastBool::intermediateBroadcast(dim3 launchDims, cudaStream_t *stream, void const* x, Nd4jLong const* xShapeInfo, void const* y, Nd4jLong const* yShapeInfo, void* z, Nd4jLong const* zShapeInfo, void *extraParams, int *dimension, int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { broadcastBoolSimple<<>>(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, extraParams, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ); sd::DebugHelper::checkErrorCode(stream, "intermediateBroadcastBool(...) failed"); } @@ -98,7 +98,7 @@ __host__ void BroadcastBool::intermediateBroadcast(dim3 launchDims, cudaStr ////////////////////////////////////////////////////////////////////////// template -__host__ void BroadcastBool::execBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo, void *extraParams, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { +__host__ void BroadcastBool::execBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void const* x, Nd4jLong const* xShapeInfo, void const* y, Nd4jLong const* yShapeInfo, void *z, Nd4jLong const* zShapeInfo, void *extraParams, int *dimension, int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { DISPATCH_BY_OPNUM_TT(intermediateBroadcast, PARAMS(launchDims, stream, x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, extraParams, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ), OPS_A(BROADCAST_BOOL_OPS)) DEBUG_KERNEL(stream, opNum); @@ -119,14 +119,14 @@ __host__ void BroadcastBool::execBroadcast(dim3 launchDims, cudaStream_t *s ////////////////////////////////////////////////////////////////////////// template template - __host__ void BroadcastBool::intermediateInverseBroadcast(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo, void *extraParams, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + __host__ void BroadcastBool::intermediateInverseBroadcast(dim3 launchDims, cudaStream_t *stream, void const* x, Nd4jLong const* xShapeInfo, void const* y, Nd4jLong const* yShapeInfo, void *z, Nd4jLong const* zShapeInfo, void *extraParams, int *dimension, int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { broadcastBoolInverseSimple<<>>(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, extraParams, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ); sd::DebugHelper::checkErrorCode(stream, "intermediateBroadcastBool(...) failed"); } ////////////////////////////////////////////////////////////////////////// template - __host__ void BroadcastBool::execInverseBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo, void *extraParams, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + __host__ void BroadcastBool::execInverseBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void const* x, Nd4jLong const* xShapeInfo, void const* y, Nd4jLong const* yShapeInfo, void *z, Nd4jLong const* zShapeInfo, void *extraParams, int *dimension, int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { DISPATCH_BY_OPNUM_TT(intermediateInverseBroadcast, PARAMS(launchDims, stream, x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, extraParams, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ), OPS_A(BROADCAST_BOOL_OPS)) DEBUG_KERNEL(stream, opNum); @@ -136,20 +136,20 @@ __host__ void BroadcastBool::execBroadcast(dim3 launchDims, cudaStream_t *s template template __device__ void BroadcastBool::transformInverseCuda( - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, void *vextraParams, int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { if (tadOnlyShapeInfoZ == nullptr) { tadOnlyShapeInfoZ = tadOnlyShapeInfo; tadOffsetsZ = tadOffsets; } - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -198,20 +198,20 @@ __host__ void BroadcastBool::execBroadcast(dim3 launchDims, cudaStream_t *s template template __device__ void BroadcastBool::transformCuda( - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, void *vextraParams, int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { if (tadOnlyShapeInfoZ == nullptr) { tadOnlyShapeInfoZ = tadOnlyShapeInfo; tadOffsetsZ = tadOffsets; } - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -235,7 +235,7 @@ __host__ void BroadcastBool::execBroadcast(dim3 launchDims, cudaStream_t *s __syncthreads(); __shared__ Z *rZ; - __shared__ X *rX; + __shared__ X const* rX; for (int r = blockIdx.x; r < numTads; r += gridDim.x) { diff --git a/libnd4j/include/loops/cuda/broadcasting_int.cu b/libnd4j/include/loops/cuda/broadcasting_int.cu index 998ac9ae8..37cbf3eba 100644 --- a/libnd4j/include/loops/cuda/broadcasting_int.cu +++ b/libnd4j/include/loops/cuda/broadcasting_int.cu @@ -34,23 +34,23 @@ using namespace simdOps; ////////////////////////////////////////////////////////////////////////// template static __global__ void broadcastIntSimple( - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, + void const* x, + Nd4jLong const* xShapeInfo, + void const* y, + Nd4jLong const* yShapeInfo, void *z, - Nd4jLong *zShapeInfo, + Nd4jLong const* zShapeInfo, int *dimension, - int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { functions::broadcast::BroadcastInt::template transformCuda(x,xShapeInfo,y,yShapeInfo,z,zShapeInfo,dimension,dimensionLength,tadOnlyShapeInfo,tadOffsets,tadOnlyShapeInfoZ,tadOffsetsZ); } ////////////////////////////////////////////////////////////////////////// template -static __global__ void broadcastIntSimple(const void *x, const Nd4jLong *xShapeInfo, - const void *y, const Nd4jLong *yShapeInfo, - void *z, const Nd4jLong *zShapeInfo) { +static __global__ void broadcastIntSimple(const void *x, const Nd4jLong const* xShapeInfo, + const void *y, const Nd4jLong const* yShapeInfo, + void *z, const Nd4jLong const* zShapeInfo) { functions::broadcast::BroadcastInt::template transformCuda(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo); } @@ -58,14 +58,14 @@ static __global__ void broadcastIntSimple(const void *x, const Nd4jLong *xShapeI ////////////////////////////////////////////////////////////////////////// template static __global__ void broadcastBoolInverseSimple( - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, + void const* x, + Nd4jLong const* xShapeInfo, + void const* y, + Nd4jLong const* yShapeInfo, void *z, - Nd4jLong *zShapeInfo, + Nd4jLong const* zShapeInfo, int *dimension, - int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { functions::broadcast::BroadcastInt::template transformInverseCuda(x,xShapeInfo,y,yShapeInfo,z,zShapeInfo,dimension,dimensionLength,tadOnlyShapeInfo,tadOffsets,tadOnlyShapeInfoZ,tadOffsetsZ); } @@ -75,7 +75,7 @@ namespace broadcast { ////////////////////////////////////////////////////////////////////////// template template -__host__ void BroadcastInt::intermediateBroadcast(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { +__host__ void BroadcastInt::intermediateBroadcast(dim3 launchDims, cudaStream_t *stream, void const* x, Nd4jLong const* xShapeInfo, void const* y, Nd4jLong const* yShapeInfo, void *z, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { broadcastIntSimple<<>>(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ); } @@ -92,16 +92,16 @@ __host__ void BroadcastInt::intermediateBroadcast(dim3 launchDims, cudaStream ////////////////////////////////////////////////////////////////////////// template -__host__ void BroadcastInt::execBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { +__host__ void BroadcastInt::execBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void const* x, Nd4jLong const* xShapeInfo, void const* y, Nd4jLong const* yShapeInfo, void *z, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { DISPATCH_BY_OPNUM_T(intermediateBroadcast, PARAMS(launchDims, stream, x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ), OPS_A(BROADCAST_INT_OPS)) } ////////////////////////////////////////////////////////////////////////// template __host__ void BroadcastInt::execBroadcast(dim3 launchDims, cudaStream_t *stream, const int opNum, - const void *x, const Nd4jLong *xShapeInfo, - const void *y, const Nd4jLong *yShapeInfo, - void *z, const Nd4jLong *zShapeInfo) { + const void *x, const Nd4jLong const* xShapeInfo, + const void *y, const Nd4jLong const* yShapeInfo, + void *z, const Nd4jLong const* zShapeInfo) { DISPATCH_BY_OPNUM_T(intermediateBroadcast, PARAMS(launchDims, stream, x, xShapeInfo, y, yShapeInfo, z, zShapeInfo), OPS_A(BROADCAST_INT_OPS)) } @@ -109,13 +109,13 @@ __host__ void BroadcastInt::execBroadcast(dim3 launchDims, cudaStream_t *stre ////////////////////////////////////////////////////////////////////////// template template - __host__ void BroadcastInt::intermediateInverseBroadcast(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + __host__ void BroadcastInt::intermediateInverseBroadcast(dim3 launchDims, cudaStream_t *stream, void const* x, Nd4jLong const* xShapeInfo, void const* y, Nd4jLong const* yShapeInfo, void *z, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { broadcastBoolInverseSimple<<>>(x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ); } ////////////////////////////////////////////////////////////////////////// template - __host__ void BroadcastInt::execInverseBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + __host__ void BroadcastInt::execInverseBroadcast(dim3 launchDims, cudaStream_t *stream, int opNum, void const* x, Nd4jLong const* xShapeInfo, void const* y, Nd4jLong const* yShapeInfo, void *z, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { DISPATCH_BY_OPNUM_T(intermediateInverseBroadcast, PARAMS(launchDims, stream, x, xShapeInfo, y, yShapeInfo, z, zShapeInfo, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ), OPS_A(BROADCAST_INT_OPS)) } @@ -123,19 +123,19 @@ __host__ void BroadcastInt::execBroadcast(dim3 launchDims, cudaStream_t *stre template template __device__ void BroadcastInt::transformInverseCuda( - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { if (tadOnlyShapeInfoZ == nullptr) { tadOnlyShapeInfoZ = tadOnlyShapeInfo; tadOffsetsZ = tadOffsets; } - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); //decompose in to several sub tads after @@ -183,19 +183,19 @@ __host__ void BroadcastInt::execBroadcast(dim3 launchDims, cudaStream_t *stre template template __device__ void BroadcastInt::transformCuda( - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadOnlyShapeInfoZ, Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadOnlyShapeInfoZ, Nd4jLong const* tadOffsetsZ) { if (tadOnlyShapeInfoZ == nullptr) { tadOnlyShapeInfoZ = tadOnlyShapeInfo; tadOffsetsZ = tadOffsets; } - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); //decompose in to several sub tads after @@ -218,7 +218,7 @@ __host__ void BroadcastInt::execBroadcast(dim3 launchDims, cudaStream_t *stre __syncthreads(); __shared__ X *rZ; - __shared__ X *rX; + __shared__ X const* rX; for (int r = blockIdx.x; r < numTads; r += gridDim.x) { @@ -250,9 +250,9 @@ __host__ void BroadcastInt::execBroadcast(dim3 launchDims, cudaStream_t *stre ////////////////////////////////////////////////////////////////////////// template template -__device__ void BroadcastInt::transformCuda(const void *vx, const Nd4jLong *xShapeInfo, - const void *vy, const Nd4jLong *yShapeInfo, - void *vz, const Nd4jLong *zShapeInfo) { +__device__ void BroadcastInt::transformCuda(const void *vx, const Nd4jLong const* xShapeInfo, + const void *vy, const Nd4jLong const* yShapeInfo, + void *vz, const Nd4jLong const* zShapeInfo) { const X* x = reinterpret_cast(vx); const X* y = reinterpret_cast(vy); diff --git a/libnd4j/include/loops/cuda/indexreduce.cu b/libnd4j/include/loops/cuda/indexreduce.cu index 6383458c9..e6a52b16a 100644 --- a/libnd4j/include/loops/cuda/indexreduce.cu +++ b/libnd4j/include/loops/cuda/indexreduce.cu @@ -31,14 +31,14 @@ using namespace simdOps; template static __global__ void simpleIndexReduceGeneric(const int op, - void *dx, - Nd4jLong *xShapeInfo, int xRank, + void const* dx, + Nd4jLong const* xShapeInfo, int xRank, void *extraParams, void *result, - Nd4jLong *zShapeInfo, int zRank, + Nd4jLong const* zShapeInfo, int zRank, int *dimension, int dimensionLength, - int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { + int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets) { functions::indexreduce::IndexReduce::transform(op,dx,xShapeInfo,extraParams,result,zShapeInfo,dimension,dimensionLength,postProcessOrNot,allocationBuffer,reductionBuffer,tadOnlyShapeInfo,tadOffsets); } @@ -49,15 +49,15 @@ namespace functions { template _CUDA_H void IndexReduce::executeIndexReduceScalar(dim3 launchDims, cudaStream_t *stream, const int opNum, - void *dx, Nd4jLong *xShapeInfo, + void const* dx, Nd4jLong const* xShapeInfo, int xRank, void *extraParams, - void *result, Nd4jLong *zShapeInfo, + void *result, Nd4jLong const* zShapeInfo, int zRank, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets) { simpleIndexReduceGeneric<<>>(opNum, dx, xShapeInfo, xRank, @@ -70,7 +70,7 @@ namespace functions { } template - _CUDA_H void IndexReduce::executeIndexReduce(dim3 launchDims, cudaStream_t *stream, const int opNum, void *dx, Nd4jLong *xShapeInfo, int xRank, void *extraParams, void *result, Nd4jLong *zShapeInfo, int zRank, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { + _CUDA_H void IndexReduce::executeIndexReduce(dim3 launchDims, cudaStream_t *stream, const int opNum, void const* dx, Nd4jLong const* xShapeInfo, int xRank, void *extraParams, void *result, Nd4jLong const* zShapeInfo, int zRank, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets) { simpleIndexReduceGeneric<<>>( opNum, dx, @@ -154,35 +154,35 @@ namespace functions { template __device__ void IndexReduce::transform( const int opNum, - void *x, - Nd4jLong *xShapeInfo, + void const* x, + Nd4jLong const* xShapeInfo, void *extraParams, void *result, - Nd4jLong *zShapeInfo, + Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset) { + Nd4jLong const* tadShapeInfo, + Nd4jLong const* tadOffset) { DISPATCH_BY_OPNUM_TT(transform, PARAMS(x, xShapeInfo, extraParams, result, zShapeInfo, dimension, dimensionLength, postProcessOrNot, allocationBuffer, reductionBuffer, tadShapeInfo, tadOffset), INDEX_REDUCE_OPS); } template template - __device__ void IndexReduce::transform(void *vdx, Nd4jLong *xShapeInfo, + __device__ void IndexReduce::transform(void const* vdx, Nd4jLong const* xShapeInfo, void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, + void* vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *vreductionBuffer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets){ + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets){ /**int * Gpu information for the problem */ - auto dx = reinterpret_cast(vdx); + auto dx = reinterpret_cast(vdx); auto z = reinterpret_cast(vz); auto extraParams = static_cast(vextraParams); auto reductionBuffer = static_cast(vreductionBuffer); diff --git a/libnd4j/include/loops/cuda/pairwise.chpp b/libnd4j/include/loops/cuda/pairwise.chpp index d3252d862..ee2c01695 100644 --- a/libnd4j/include/loops/cuda/pairwise.chpp +++ b/libnd4j/include/loops/cuda/pairwise.chpp @@ -28,13 +28,13 @@ using namespace simdOps; //////////////////////////////////////////////////////////////////////////////// template -__global__ static void pairwiseSimpleShaped(void* vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, +__global__ static void pairwiseSimpleShaped(void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, void *vextraParams) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -91,9 +91,9 @@ namespace pairwise_transforms { template template void __host__ PairWiseTransform::intermediateShaped(dim3& launchDims, cudaStream_t *stream, - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, void *vextraParams){ pairwiseSimpleShaped<<>>(vx, xShapeInfo, vy, yShapeInfo, vz, zShapeInfo, vextraParams); @@ -101,7 +101,7 @@ void __host__ PairWiseTransform::intermediateShaped(dim3& launchDims, cud //////////////////////////////////////////////////////////////////////////////// template -void __host__ PairWiseTransform::executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, void *vextraParams) { +void __host__ PairWiseTransform::executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, void const* vx, Nd4jLong const* xShapeInfo, void const* vy, Nd4jLong const* yShapeInfo, void *vz, Nd4jLong const* zShapeInfo, void* vextraParams) { DISPATCH_BY_OPNUM_TTT(intermediateShaped, PARAMS(launchDims, stream, vx, xShapeInfo, vy, yShapeInfo, vz, zShapeInfo, vextraParams), PAIRWISE_TRANSFORM_OPS); } diff --git a/libnd4j/include/loops/cuda/pairwise_bool.cu b/libnd4j/include/loops/cuda/pairwise_bool.cu index f697de814..29cc90f2c 100644 --- a/libnd4j/include/loops/cuda/pairwise_bool.cu +++ b/libnd4j/include/loops/cuda/pairwise_bool.cu @@ -28,13 +28,13 @@ using namespace simdOps; //////////////////////////////////////////////////////////////////////////////// template -__global__ static void pairwiseSimpleShaped(void* vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, +__global__ static void pairwiseSimpleShaped(void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, void *vextraParams) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -92,9 +92,9 @@ namespace pairwise_transforms { template template void _CUDA_H PairWiseBoolTransform::intermediateShaped(dim3& launchDims, cudaStream_t *stream, - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, void *vextraParams){ pairwiseSimpleShaped<<>>(vx, xShapeInfo, vy, yShapeInfo, vz, zShapeInfo, vextraParams); @@ -103,7 +103,7 @@ void _CUDA_H PairWiseBoolTransform::intermediateShaped(dim3& launchDims, cu //////////////////////////////////////////////////////////////////////////////// template -void PairWiseBoolTransform::executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, void *vextraParams) { +void PairWiseBoolTransform::executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, void const* vx, Nd4jLong const* xShapeInfo, void const* vy, Nd4jLong const* yShapeInfo, void *vz, Nd4jLong const* zShapeInfo, void *vextraParams) { auto xType = sd::DataTypeUtils::fromT(); auto yType = sd::DataTypeUtils::fromT(); diff --git a/libnd4j/include/loops/cuda/pairwise_int.cu b/libnd4j/include/loops/cuda/pairwise_int.cu index 44447605e..740995cee 100644 --- a/libnd4j/include/loops/cuda/pairwise_int.cu +++ b/libnd4j/include/loops/cuda/pairwise_int.cu @@ -28,13 +28,13 @@ using namespace simdOps; //////////////////////////////////////////////////////////////////////////////// template -__global__ static void pairwiseSimpleShaped(void* vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, +__global__ static void pairwiseSimpleShaped(void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, void *vextraParams) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -92,9 +92,9 @@ namespace pairwise_transforms { template template void _CUDA_H PairWiseIntTransform::intermediateShaped(dim3& launchDims, cudaStream_t *stream, - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, - void *vz, Nd4jLong *zShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, void *vextraParams){ pairwiseSimpleShaped<<>>(vx, xShapeInfo, vy, yShapeInfo, vz, zShapeInfo, vextraParams); @@ -103,7 +103,7 @@ void _CUDA_H PairWiseIntTransform::intermediateShaped(dim3& launchDims, cudaS //////////////////////////////////////////////////////////////////////////////// template -void PairWiseIntTransform::executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, void *vextraParams) { +void PairWiseIntTransform::executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, void const* vx, Nd4jLong const* xShapeInfo, void const* vy, Nd4jLong const* yShapeInfo, void *vz, Nd4jLong const* zShapeInfo, void *vextraParams) { auto xType = sd::DataTypeUtils::fromT(); DISPATCH_BY_OPNUM_T(intermediateShaped, PARAMS(launchDims, stream, vx, xShapeInfo, vy, yShapeInfo, vz, zShapeInfo, vextraParams), PAIRWISE_INT_OPS); diff --git a/libnd4j/include/loops/cuda/random.cu b/libnd4j/include/loops/cuda/random.cu index c7550b926..755763293 100644 --- a/libnd4j/include/loops/cuda/random.cu +++ b/libnd4j/include/loops/cuda/random.cu @@ -32,7 +32,7 @@ template static inline __device__ void randomSingleGeneric( Nd4jPointer state, void *z, - Nd4jLong *zShapeBuffer, + Nd4jLong const* zShapeBuffer, void *extraArguments) { @@ -46,10 +46,10 @@ static inline __device__ void randomSingleGeneric( template static inline __device__ void randomDoubleGeneric( Nd4jPointer state, - void *x, - Nd4jLong *xShapeBuffer, + void const* x, + Nd4jLong const* xShapeBuffer, void *z, - Nd4jLong *zShapeBuffer, + Nd4jLong const* zShapeBuffer, void *extraArguments) { @@ -66,12 +66,12 @@ static inline __device__ void randomDoubleGeneric( template static inline __device__ void randomTripleGeneric( Nd4jPointer state, - void *x, - Nd4jLong *xShapeBuffer, - void *y, - Nd4jLong *yShapeBuffer, + void const* x, + Nd4jLong const* xShapeBuffer, + void const* y, + Nd4jLong const* yShapeBuffer, void *z, - Nd4jLong *zShapeBuffer, + Nd4jLong const* zShapeBuffer, void *extraArguments) { @@ -89,20 +89,20 @@ static inline __device__ void randomTripleGeneric( #ifndef __CLION_IDE__ // here we generate kernels for target operations -DISPATCH_KERNEL_SIMPLE(randomSingle_, randomSingleGeneric, float, INPUT(Nd4jPointer state, void *z, Nd4jLong *zShapeBuffer, void *extraArguments), PARAMS(state, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) -DISPATCH_KERNEL_SIMPLE(randomSingle_, randomSingleGeneric, double, INPUT(Nd4jPointer state, void *z, Nd4jLong *zShapeBuffer, void *extraArguments), PARAMS(state, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) -DISPATCH_KERNEL_SIMPLE(randomSingle_, randomSingleGeneric, float16, INPUT(Nd4jPointer state, void *z, Nd4jLong *zShapeBuffer, void *extraArguments), PARAMS(state, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) -DISPATCH_KERNEL_SIMPLE(randomSingle_, randomSingleGeneric, bfloat16, INPUT(Nd4jPointer state, void *z, Nd4jLong *zShapeBuffer, void *extraArguments), PARAMS(state, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) +DISPATCH_KERNEL_SIMPLE(randomSingle_, randomSingleGeneric, float, INPUT(Nd4jPointer state, void *z, Nd4jLong const* zShapeBuffer, void *extraArguments), PARAMS(state, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) +DISPATCH_KERNEL_SIMPLE(randomSingle_, randomSingleGeneric, double, INPUT(Nd4jPointer state, void *z, Nd4jLong const* zShapeBuffer, void *extraArguments), PARAMS(state, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) +DISPATCH_KERNEL_SIMPLE(randomSingle_, randomSingleGeneric, float16, INPUT(Nd4jPointer state, void *z, Nd4jLong const* zShapeBuffer, void *extraArguments), PARAMS(state, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) +DISPATCH_KERNEL_SIMPLE(randomSingle_, randomSingleGeneric, bfloat16, INPUT(Nd4jPointer state, void *z, Nd4jLong const* zShapeBuffer, void *extraArguments), PARAMS(state, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) -DISPATCH_KERNEL_SIMPLE(randomDouble_, randomDoubleGeneric, float, INPUT(Nd4jPointer state, void *x, Nd4jLong *xShapeBuffer, void *z, Nd4jLong *zShapeBuffer, void *extraArguments), PARAMS(state, x, xShapeBuffer, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) -DISPATCH_KERNEL_SIMPLE(randomDouble_, randomDoubleGeneric, double, INPUT(Nd4jPointer state, void *x, Nd4jLong *xShapeBuffer, void *z, Nd4jLong *zShapeBuffer, void *extraArguments), PARAMS(state, x, xShapeBuffer, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) -DISPATCH_KERNEL_SIMPLE(randomDouble_, randomDoubleGeneric, float16, INPUT(Nd4jPointer state, void *x, Nd4jLong *xShapeBuffer, void *z, Nd4jLong *zShapeBuffer, void *extraArguments), PARAMS(state, x, xShapeBuffer, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) -DISPATCH_KERNEL_SIMPLE(randomDouble_, randomDoubleGeneric, bfloat16, INPUT(Nd4jPointer state, void *x, Nd4jLong *xShapeBuffer, void *z, Nd4jLong *zShapeBuffer, void *extraArguments), PARAMS(state, x, xShapeBuffer, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) +DISPATCH_KERNEL_SIMPLE(randomDouble_, randomDoubleGeneric, float, INPUT(Nd4jPointer state, void const* x, Nd4jLong const* xShapeBuffer, void *z, Nd4jLong const* zShapeBuffer, void *extraArguments), PARAMS(state, x, xShapeBuffer, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) +DISPATCH_KERNEL_SIMPLE(randomDouble_, randomDoubleGeneric, double, INPUT(Nd4jPointer state, void const* x, Nd4jLong const* xShapeBuffer, void *z, Nd4jLong const* zShapeBuffer, void *extraArguments), PARAMS(state, x, xShapeBuffer, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) +DISPATCH_KERNEL_SIMPLE(randomDouble_, randomDoubleGeneric, float16, INPUT(Nd4jPointer state, void const* x, Nd4jLong const* xShapeBuffer, void *z, Nd4jLong const* zShapeBuffer, void *extraArguments), PARAMS(state, x, xShapeBuffer, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) +DISPATCH_KERNEL_SIMPLE(randomDouble_, randomDoubleGeneric, bfloat16, INPUT(Nd4jPointer state, void const* x, Nd4jLong const* xShapeBuffer, void *z, Nd4jLong const* zShapeBuffer, void *extraArguments), PARAMS(state, x, xShapeBuffer, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) -DISPATCH_KERNEL_SIMPLE(randomTriple_, randomTripleGeneric, float, INPUT(Nd4jPointer state, void *x, Nd4jLong *xShapeBuffer, void *y, Nd4jLong *yShapeBuffer, void *z, Nd4jLong *zShapeBuffer, void *extraArguments), PARAMS(state, x, xShapeBuffer, y, yShapeBuffer, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) -DISPATCH_KERNEL_SIMPLE(randomTriple_, randomTripleGeneric, double, INPUT(Nd4jPointer state, void *x, Nd4jLong *xShapeBuffer, void *y, Nd4jLong *yShapeBuffer, void *z, Nd4jLong *zShapeBuffer, void *extraArguments), PARAMS(state, x, xShapeBuffer, y, yShapeBuffer, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) -DISPATCH_KERNEL_SIMPLE(randomTriple_, randomTripleGeneric, float16, INPUT(Nd4jPointer state, void *x, Nd4jLong *xShapeBuffer, void *y, Nd4jLong *yShapeBuffer, void *z, Nd4jLong *zShapeBuffer, void *extraArguments), PARAMS(state, x, xShapeBuffer, y, yShapeBuffer, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) -DISPATCH_KERNEL_SIMPLE(randomTriple_, randomTripleGeneric, bfloat16, INPUT(Nd4jPointer state, void *x, Nd4jLong *xShapeBuffer, void *y, Nd4jLong *yShapeBuffer, void *z, Nd4jLong *zShapeBuffer, void *extraArguments), PARAMS(state, x, xShapeBuffer, y, yShapeBuffer, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) +DISPATCH_KERNEL_SIMPLE(randomTriple_, randomTripleGeneric, float, INPUT(Nd4jPointer state, void const* x, Nd4jLong const* xShapeBuffer, void const* y, Nd4jLong const* yShapeBuffer, void *z, Nd4jLong const* zShapeBuffer, void *extraArguments), PARAMS(state, x, xShapeBuffer, y, yShapeBuffer, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) +DISPATCH_KERNEL_SIMPLE(randomTriple_, randomTripleGeneric, double, INPUT(Nd4jPointer state, void const* x, Nd4jLong const* xShapeBuffer, void const* y, Nd4jLong const* yShapeBuffer, void *z, Nd4jLong const* zShapeBuffer, void *extraArguments), PARAMS(state, x, xShapeBuffer, y, yShapeBuffer, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) +DISPATCH_KERNEL_SIMPLE(randomTriple_, randomTripleGeneric, float16, INPUT(Nd4jPointer state, void const* x, Nd4jLong const* xShapeBuffer, void const* y, Nd4jLong const* yShapeBuffer, void *z, Nd4jLong const* zShapeBuffer, void *extraArguments), PARAMS(state, x, xShapeBuffer, y, yShapeBuffer, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) +DISPATCH_KERNEL_SIMPLE(randomTriple_, randomTripleGeneric, bfloat16, INPUT(Nd4jPointer state, void const* x, Nd4jLong const* xShapeBuffer, void const* y, Nd4jLong const* yShapeBuffer, void *z, Nd4jLong const* zShapeBuffer, void *extraArguments), PARAMS(state, x, xShapeBuffer, y, yShapeBuffer, z, zShapeBuffer, extraArguments), OPS_A(RANDOM_OPS)) #endif @@ -110,10 +110,10 @@ namespace functions { namespace random { template template - void _CUDA_D RandomFunction::execTransformCuda(Nd4jPointer state, void *vx, Nd4jLong *xShapeBuffer, void *vy, Nd4jLong *yShapeBuffer, void *vz, Nd4jLong *zShapeBuffer, void *vextraArguments) { + void _CUDA_D RandomFunction::execTransformCuda(Nd4jPointer state, void const* vx, Nd4jLong const* xShapeBuffer, void const* vy, Nd4jLong const* yShapeBuffer, void *vz, Nd4jLong const* zShapeBuffer, void *vextraArguments) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -180,9 +180,9 @@ namespace functions { template template - void _CUDA_D RandomFunction::execTransformCuda(Nd4jPointer state, void *vx, Nd4jLong *xShapeBuffer, void *vz, Nd4jLong *zShapeBuffer, void *vextraArguments) { + void _CUDA_D RandomFunction::execTransformCuda(Nd4jPointer state, void const* vx, Nd4jLong const* xShapeBuffer, void* vz, Nd4jLong const* zShapeBuffer, void *vextraArguments) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -238,7 +238,7 @@ namespace functions { template template - void _CUDA_D RandomFunction::execTransformCuda(Nd4jPointer state, void *vz, Nd4jLong *zShapeBuffer, void *vextraArguments) { + void _CUDA_D RandomFunction::execTransformCuda(Nd4jPointer state, void *vz, Nd4jLong const* zShapeBuffer, void *vextraArguments) { auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -283,7 +283,7 @@ namespace functions { } template <> - _CUDA_H void RandomFunction::executeCudaSingle(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *vz, Nd4jLong *zShapeBuffer, void *vextraArguments) { + _CUDA_H void RandomFunction::executeCudaSingle(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *vz, Nd4jLong const* zShapeBuffer, void *vextraArguments) { auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -295,7 +295,7 @@ namespace functions { } template <> - _CUDA_H void RandomFunction::executeCudaSingle(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *vz, Nd4jLong *zShapeBuffer, void *vextraArguments) { + _CUDA_H void RandomFunction::executeCudaSingle(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *vz, Nd4jLong const* zShapeBuffer, void *vextraArguments) { auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -307,7 +307,7 @@ namespace functions { } template <> - _CUDA_H void RandomFunction::executeCudaSingle(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *vz, Nd4jLong *zShapeBuffer, void *vextraArguments) { + _CUDA_H void RandomFunction::executeCudaSingle(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *vz, Nd4jLong const* zShapeBuffer, void *vextraArguments) { auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -319,7 +319,7 @@ namespace functions { } template <> - _CUDA_H void RandomFunction::executeCudaSingle(dim3& launchDims, cudaStream_t *stream, int opNum, Nd4jPointer stateHost, void *vz, Nd4jLong *zShapeBuffer, void *vextraArguments) { + _CUDA_H void RandomFunction::executeCudaSingle(dim3& launchDims, cudaStream_t *stream, int opNum, Nd4jPointer stateHost, void *vz, Nd4jLong const* zShapeBuffer, void *vextraArguments) { auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -331,9 +331,9 @@ namespace functions { } template <> - _CUDA_H void RandomFunction::executeCudaDouble(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *vx, Nd4jLong *xShapeBuffer, void *vz, Nd4jLong *zShapeBuffer, void *vextraArguments) { + _CUDA_H void RandomFunction::executeCudaDouble(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void const* vx, Nd4jLong const* xShapeBuffer, void *vz, Nd4jLong const* zShapeBuffer, void *vextraArguments) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -345,9 +345,9 @@ namespace functions { template <> - _CUDA_H void RandomFunction::executeCudaDouble(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *vx, Nd4jLong *xShapeBuffer, void *vz, Nd4jLong *zShapeBuffer, void *vextraArguments) { + _CUDA_H void RandomFunction::executeCudaDouble(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void const* vx, Nd4jLong const* xShapeBuffer, void *vz, Nd4jLong const* zShapeBuffer, void *vextraArguments) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -358,9 +358,9 @@ namespace functions { } template <> - _CUDA_H void RandomFunction::executeCudaDouble(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *vx, Nd4jLong *xShapeBuffer, void *vz, Nd4jLong *zShapeBuffer, void *vextraArguments) { + _CUDA_H void RandomFunction::executeCudaDouble(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void const* vx, Nd4jLong const* xShapeBuffer, void *vz, Nd4jLong const* zShapeBuffer, void *vextraArguments) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -371,9 +371,9 @@ namespace functions { } template <> - _CUDA_H void RandomFunction::executeCudaDouble(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *vx, Nd4jLong *xShapeBuffer, void *vz, Nd4jLong *zShapeBuffer, void *vextraArguments) { + _CUDA_H void RandomFunction::executeCudaDouble(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void const* vx, Nd4jLong const* xShapeBuffer, void *vz, Nd4jLong const* zShapeBuffer, void *vextraArguments) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -384,11 +384,10 @@ namespace functions { } template <> - _CUDA_H void RandomFunction::executeCudaTriple(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *vx, Nd4jLong *xShapeBuffer, void *vy, Nd4jLong *yShapeBuffer, void *vz, Nd4jLong *zShapeBuffer, void *vextraArguments) { + _CUDA_H void RandomFunction::executeCudaTriple(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void const* vx, Nd4jLong const* xShapeBuffer, void const* vy, Nd4jLong const* yShapeBuffer, void *vz, Nd4jLong const* zShapeBuffer, void *vextraArguments) { - - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -399,10 +398,10 @@ namespace functions { } template <> - _CUDA_H void RandomFunction::executeCudaTriple(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *vx, Nd4jLong *xShapeBuffer, void *vy, Nd4jLong *yShapeBuffer, void *vz, Nd4jLong *zShapeBuffer, void *vextraArguments) { + _CUDA_H void RandomFunction::executeCudaTriple(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void const* vx, Nd4jLong const* xShapeBuffer, void const* vy, Nd4jLong const* yShapeBuffer, void *vz, Nd4jLong const* zShapeBuffer, void *vextraArguments) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -413,10 +412,10 @@ namespace functions { } template <> - _CUDA_H void RandomFunction::executeCudaTriple(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *vx, Nd4jLong *xShapeBuffer, void *vy, Nd4jLong *yShapeBuffer, void *vz, Nd4jLong *zShapeBuffer, void *vextraArguments) { + _CUDA_H void RandomFunction::executeCudaTriple(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void const* vx, Nd4jLong const* xShapeBuffer, void const* vy, Nd4jLong const* yShapeBuffer, void *vz, Nd4jLong const* zShapeBuffer, void *vextraArguments) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); @@ -429,10 +428,10 @@ namespace functions { template <> - _CUDA_H void RandomFunction::executeCudaTriple(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *vx, Nd4jLong *xShapeBuffer, void *vy, Nd4jLong *yShapeBuffer, void *vz, Nd4jLong *zShapeBuffer, void *vextraArguments) { + _CUDA_H void RandomFunction::executeCudaTriple(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void const* vx, Nd4jLong const* xShapeBuffer, void const* vy, Nd4jLong const* yShapeBuffer, void *vz, Nd4jLong const* zShapeBuffer, void *vextraArguments) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto extraArguments = reinterpret_cast(vextraArguments); diff --git a/libnd4j/include/loops/cuda/reduce/reduce_bool.cu b/libnd4j/include/loops/cuda/reduce/reduce_bool.cu index 3aa2626a2..b70f0f38f 100644 --- a/libnd4j/include/loops/cuda/reduce/reduce_bool.cu +++ b/libnd4j/include/loops/cuda/reduce/reduce_bool.cu @@ -33,23 +33,24 @@ using namespace simdOps; //////////////////////////////////////////////////////////////////////// template -__global__ void simpleReduce(void *x, Nd4jLong *xShapeInfo, +__global__ void simpleReduce(const void *x, const Nd4jLong *xShapeInfo, void *extraParams, - void *z, Nd4jLong *zShapeInfo, + void *z, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets) { functions::reduce::ReduceBoolFunction::template transformCudaXD(x, xShapeInfo, extraParams, z, zShapeInfo, dimension, dimensionLength, reductionBuffer, tadOnlyShapeInfo, tadOffsets); } //////////////////////////////////////////////////////////////////////// template -__global__ void simpleScalar(void *x, Nd4jLong *xShapeInfo, +__global__ void simpleScalar(const void *x, const Nd4jLong *xShapeInfo, void *extraParams, - void *z, Nd4jLong *zShapeInfo, + void *z, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, - void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo) { + void *reductionBuffer, + const Nd4jLong *tadOnlyShapeInfo) { functions::reduce::ReduceBoolFunction::template execScalarCuda(x, xShapeInfo, extraParams, z, zShapeInfo, reductionBuffer, tadOnlyShapeInfo); } @@ -94,14 +95,14 @@ __device__ void ReduceBoolFunction::aggregatePartials(void *vsPartials, Nd4 //////////////////////////////////////////////////////////////////////// template template -__device__ void ReduceBoolFunction::transformCudaXD( void *vx, Nd4jLong *xShapeInfo, +__device__ void ReduceBoolFunction::transformCudaXD(const void *vx, const Nd4jLong *xShapeInfo, void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, void *vreductionBuffer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); auto reductionBuffer = reinterpret_cast(vreductionBuffer); @@ -147,13 +148,13 @@ __device__ void ReduceBoolFunction::transformCudaXD( void *vx, Nd4jLong *xS //////////////////////////////////////////////////////////////////////// template template -__device__ void ReduceBoolFunction::execScalarCuda(void *vx, Nd4jLong *xShapeInfo, +__device__ void ReduceBoolFunction::execScalarCuda(const void *vx, const Nd4jLong *xShapeInfo, void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, void *vreductionBuffer, - Nd4jLong *tadOnlyShapeInfo) { + const Nd4jLong *tadOnlyShapeInfo) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); auto reductionBuffer = reinterpret_cast(vreductionBuffer); @@ -235,7 +236,13 @@ __device__ void ReduceBoolFunction::execScalarCuda(void *vx, Nd4jLong *xSha //////////////////////////////////////////////////////////////////////// template template -__host__ void ReduceBoolFunction::intermediateXD(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { +__host__ void ReduceBoolFunction::intermediateXD(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShapeInfo, const Nd4jLong *hXShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, const Nd4jLong *hZShapeInfo, + int *dimension, int dimensionLength, + void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { nd4j_printf("Step A%i\n", -1); @@ -244,7 +251,7 @@ __host__ void ReduceBoolFunction::intermediateXD(dim3 launchDims, cudaStrea if(shape::isEmpty(hZShapeInfo)) return; - const auto startingVal = static_cast(OpType::startingValue(reinterpret_cast(x))); + const auto startingVal = static_cast(OpType::startingValue(reinterpret_cast(x))); auto res = cudaMemcpyAsync(sd::LaunchContext::defaultContext()->getScalarPointer(), &startingVal, sizeof(Z), cudaMemcpyHostToDevice, *stream); if (res != 0) @@ -265,14 +272,20 @@ __host__ void ReduceBoolFunction::intermediateXD(dim3 launchDims, cudaStrea //////////////////////////////////////////////////////////////////////// template template -__host__ void ReduceBoolFunction::intermediateScalar(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo) { +__host__ void ReduceBoolFunction::intermediateScalar(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShapeInfo, const Nd4jLong *hXShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, const Nd4jLong *hZShapeInfo, + int *dimension, int dimensionLength, + void *reductionBuffer, + const Nd4jLong *tadOnlyShapeInfo) { if (shape::isEmpty(hXShapeInfo)) { if (shape::isEmpty(hZShapeInfo)) return; - const auto startingVal = static_cast(OpType::startingValue(reinterpret_cast(x))); + const auto startingVal = static_cast(OpType::startingValue(reinterpret_cast(x))); auto res = cudaMemcpyAsync(z, &startingVal, sizeof(Z), cudaMemcpyHostToDevice, *stream); if (res != 0) @@ -289,7 +302,14 @@ __host__ void ReduceBoolFunction::intermediateScalar(dim3 launchDims, cudaS //////////////////////////////////////////////////////////////////////// template -_CUDA_H void ReduceBoolFunction::execReduceScalar(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo) { +_CUDA_H void ReduceBoolFunction::execReduceScalar(dim3 launchDims, cudaStream_t *stream, + const int opNum, + const void *x, const Nd4jLong *xShapeInfo, const Nd4jLong *hXShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, const Nd4jLong *hZShapeInfo, + int *dimension, int dimensionLength, + void *reductionBuffer, + const Nd4jLong *tadOnlyShapeInfo) { DISPATCH_BY_OPNUM_TT(intermediateScalar, PARAMS(launchDims, stream, x, xShapeInfo, hXShapeInfo, extraParams, z, zShapeInfo, hZShapeInfo, dimension, dimensionLength, reductionBuffer, tadOnlyShapeInfo), OPS_A(REDUCE_BOOL_OPS)); sd::DebugHelper::checkErrorCode(stream, "execReduceScalarFloat(...) failed"); @@ -297,7 +317,14 @@ _CUDA_H void ReduceBoolFunction::execReduceScalar(dim3 launchDims, cudaStre //////////////////////////////////////////////////////////////////////// template -_CUDA_H void ReduceBoolFunction::execReduceXD(dim3 launchDims, cudaStream_t *stream, int opNum, int rank, void *x, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { +_CUDA_H void ReduceBoolFunction::execReduceXD(dim3 launchDims, cudaStream_t *stream, + const int opNum, + const int rank, const void *x, const Nd4jLong *xShapeInfo, const Nd4jLong *hXShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, const Nd4jLong *hZShapeInfo, + int *dimension, int dimensionLength, + void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { DISPATCH_BY_OPNUM_TT(intermediateXD, PARAMS(launchDims, stream, x, xShapeInfo, hXShapeInfo, extraParams, z, zShapeInfo, hZShapeInfo, dimension, dimensionLength, reductionPointer, tadShapeInfo, tadOffsets), OPS_A(REDUCE_BOOL_OPS)); DEBUG_KERNEL(stream, opNum); diff --git a/libnd4j/include/loops/cuda/reduce/reduce_float.chpp b/libnd4j/include/loops/cuda/reduce/reduce_float.chpp index e1b95ae55..71f5d03da 100644 --- a/libnd4j/include/loops/cuda/reduce/reduce_float.chpp +++ b/libnd4j/include/loops/cuda/reduce/reduce_float.chpp @@ -35,23 +35,24 @@ using namespace simdOps; //////////////////////////////////////////////////////////////////////// template -__global__ void simpleReduce(void *x, Nd4jLong *xShapeInfo, +__global__ void simpleReduce(const void *x, const Nd4jLong *xShapeInfo, void *extraParams, - void *z, Nd4jLong *zShapeInfo, + void *z, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets) { functions::reduce::ReduceFloatFunction::template transformCudaXD(x, xShapeInfo, extraParams, z, zShapeInfo, dimension, dimensionLength, reductionBuffer, tadOnlyShapeInfo, tadOffsets); } //////////////////////////////////////////////////////////////////////// template -__global__ void simpleScalar(void *x, Nd4jLong *xShapeInfo, +__global__ void simpleScalar(const void *x, const Nd4jLong *xShapeInfo, void *extraParams, - void *z, Nd4jLong *zShapeInfo, + void *z, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, - void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo) { + void *reductionBuffer, + const Nd4jLong *tadOnlyShapeInfo) { functions::reduce::ReduceFloatFunction::template execScalarCuda(x, xShapeInfo, extraParams, z, zShapeInfo, reductionBuffer, tadOnlyShapeInfo); } @@ -95,14 +96,14 @@ __device__ void ReduceFloatFunction::aggregatePartials(void *vsPartials, Nd //////////////////////////////////////////////////////////////////////// template template -__device__ void ReduceFloatFunction::transformCudaXD( void *vx, Nd4jLong *xShapeInfo, +__device__ void ReduceFloatFunction::transformCudaXD(const void *vx, const Nd4jLong *xShapeInfo, void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, void *vreductionBuffer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); auto reductionBuffer = reinterpret_cast(vreductionBuffer); @@ -146,13 +147,13 @@ __device__ void ReduceFloatFunction::transformCudaXD( void *vx, Nd4jLong *x //////////////////////////////////////////////////////////////////////// template template -__device__ void ReduceFloatFunction::execScalarCuda(void *vx, Nd4jLong *xShapeInfo, - void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, - void *vreductionBuffer, - Nd4jLong *tadOnlyShapeInfo) { +__device__ void ReduceFloatFunction::execScalarCuda(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vz, const Nd4jLong *zShapeInfo, + void *vreductionBuffer, + const Nd4jLong *tadOnlyShapeInfo) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); auto reductionBuffer = reinterpret_cast(vreductionBuffer); @@ -234,14 +235,20 @@ __device__ void ReduceFloatFunction::execScalarCuda(void *vx, Nd4jLong *xSh //////////////////////////////////////////////////////////////////////// template template -__host__ void ReduceFloatFunction::intermediateXD(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShape, Nd4jLong *hXShapeInfo, void *extraParams, void *z, Nd4jLong *zShape, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { +__host__ void ReduceFloatFunction::intermediateXD(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShape, const Nd4jLong *hXShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShape, const Nd4jLong *hZShapeInfo, + int *dimension, int dimensionLength, + void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { if(shape::isEmpty(hXShapeInfo)) { if(shape::isEmpty(hZShapeInfo)) return; - const auto startingVal = std::is_same>::value ? sd::DataTypeUtils::nanOrZero() : static_cast(OpType::startingValue(reinterpret_cast(x))); + const auto startingVal = std::is_same>::value ? sd::DataTypeUtils::nanOrZero() : static_cast(OpType::startingValue(reinterpret_cast(x))); auto res = cudaMemcpyAsync(sd::LaunchContext::defaultContext()->getScalarPointer(), &startingVal, sizeof(Z), cudaMemcpyHostToDevice, *stream); if (res != 0) throw sd::cuda_exception::build("ReduceFloatFunction::intermediateXD: failed to copy temporary scalar", res); @@ -259,27 +266,40 @@ __host__ void ReduceFloatFunction::intermediateXD(dim3 launchDims, cudaStre //////////////////////////////////////////////////////////////////////// template template -__host__ void ReduceFloatFunction::intermediateScalar(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo) { +__host__ void ReduceFloatFunction::intermediateScalar(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShapeInfo, const Nd4jLong *hXShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, const Nd4jLong *hZShapeInfo, + int *dimension, int dimensionLength, + void *reductionBuffer, + const Nd4jLong *tadOnlyShapeInfo) { if (shape::isEmpty(hXShapeInfo)) { if (shape::isEmpty(hZShapeInfo)) return; - const auto startingVal = std::is_same>::value ? sd::DataTypeUtils::nanOrZero() : static_cast(OpType::startingValue(reinterpret_cast(x))); + const auto startingVal = std::is_same>::value ? sd::DataTypeUtils::nanOrZero() : static_cast(OpType::startingValue(reinterpret_cast(x))); auto res = cudaMemcpyAsync(z, &startingVal, sizeof(Z), cudaMemcpyHostToDevice, *stream); if (res != 0) throw sd::cuda_exception::build("ReduceFloatFunction::intermediateScalar: failed to copy resulting scalar", res); } else { - simpleScalar << < launchDims.x, launchDims.y, launchDims.z, *stream>>>(x, xShapeInfo, extraParams, z, zShapeInfo, dimension, dimensionLength, reductionBuffer, tadOnlyShapeInfo); + simpleScalar <<>>(x, xShapeInfo, extraParams, z, zShapeInfo, dimension, dimensionLength, reductionBuffer, tadOnlyShapeInfo); } } //////////////////////////////////////////////////////////////////////// template -_CUDA_H void ReduceFloatFunction::execReduceScalar(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo) { +_CUDA_H void ReduceFloatFunction::execReduceScalar(dim3 launchDims, cudaStream_t *stream, + const int opNum, + const void *x, const Nd4jLong *xShapeInfo, const Nd4jLong *hXShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, const Nd4jLong *hZShapeInfo, + int *dimension, int dimensionLength, + void *reductionBuffer, + const Nd4jLong *tadOnlyShapeInfo) { DISPATCH_BY_OPNUM_TT(intermediateScalar, PARAMS(launchDims, stream, x, xShapeInfo, hXShapeInfo, extraParams, z, zShapeInfo, hZShapeInfo, dimension, dimensionLength, reductionBuffer, tadOnlyShapeInfo), OPS_A(REDUCE_FLOAT_OPS)); sd::DebugHelper::checkErrorCode(stream, "execReduceScalarFloat(...) failed"); @@ -287,7 +307,14 @@ _CUDA_H void ReduceFloatFunction::execReduceScalar(dim3 launchDims, cudaStr //////////////////////////////////////////////////////////////////////// template -_CUDA_H void ReduceFloatFunction::execReduceXD(dim3 launchDims, cudaStream_t *stream, int opNum, int rank, void *x, Nd4jLong *xShape, Nd4jLong *hXShapeInfo, void *extraParams, void *z, Nd4jLong *zShape, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { +_CUDA_H void ReduceFloatFunction::execReduceXD(dim3 launchDims, cudaStream_t *stream, + const int opNum, + const int rank, const void *x, const Nd4jLong *xShape, const Nd4jLong *hXShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShape, const Nd4jLong *hZShapeInfo, + int *dimension, int dimensionLength, + void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { DISPATCH_BY_OPNUM_TT(intermediateXD, PARAMS(launchDims, stream, x, xShape, hXShapeInfo, extraParams, z, zShape, hZShapeInfo, dimension, dimensionLength, reductionPointer, tadShapeInfo, tadOffsets), OPS_A(REDUCE_FLOAT_OPS)); DEBUG_KERNEL(stream, opNum); diff --git a/libnd4j/include/loops/cuda/reduce/reduce_long.cu b/libnd4j/include/loops/cuda/reduce/reduce_long.cu index e55ecd11c..1beac5330 100644 --- a/libnd4j/include/loops/cuda/reduce/reduce_long.cu +++ b/libnd4j/include/loops/cuda/reduce/reduce_long.cu @@ -33,46 +33,48 @@ using namespace simdOps; //////////////////////////////////////////////////////////////////////// template -__device__ void reduceSimpleGeneric(void *x, Nd4jLong *xShapeInfo, +__device__ void reduceSimpleGeneric(const void *x, const Nd4jLong *xShapeInfo, void *extraParams, - void *z, Nd4jLong *zShapeInfo, + void *z, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets) { functions::reduce::ReduceLongFunction::template transformCudaXD(x, xShapeInfo, extraParams, z, zShapeInfo, dimension, dimensionLength, reductionBuffer, tadOnlyShapeInfo, tadOffsets); } //////////////////////////////////////////////////////////////////////// template -__device__ void reduceScalarGeneric(void *x, Nd4jLong *xShapeInfo, +__device__ void reduceScalarGeneric(const void *x, const Nd4jLong *xShapeInfo, void *extraParams, - void *z, Nd4jLong *zShapeInfo, + void *z, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, - void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo) { + void *reductionBuffer, + const Nd4jLong *tadOnlyShapeInfo) { functions::reduce::ReduceLongFunction::template execScalarCuda(x, xShapeInfo, extraParams, z, zShapeInfo, reductionBuffer, tadOnlyShapeInfo); } //////////////////////////////////////////////////////////////////////// template -__global__ void simpleReduce(void *x, Nd4jLong *xShapeInfo, +__global__ void simpleReduce(const void *x, const Nd4jLong *xShapeInfo, void *extraParams, - void *z, Nd4jLong *zShapeInfo, + void *z, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets) { reduceSimpleGeneric(x, xShapeInfo, extraParams, z, zShapeInfo, dimension, dimensionLength, reductionBuffer, tadOnlyShapeInfo, tadOffsets); } //////////////////////////////////////////////////////////////////////// template -__global__ void simpleScalar(void *x, Nd4jLong *xShapeInfo, +__global__ void simpleScalar(const void *x, const Nd4jLong *xShapeInfo, void *extraParams, - void *z, Nd4jLong *zShapeInfo, + void *z, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, - void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo) { + void *reductionBuffer, + const Nd4jLong *tadOnlyShapeInfo) { reduceScalarGeneric(x, xShapeInfo, extraParams, z, zShapeInfo, dimension, dimensionLength, reductionBuffer, tadOnlyShapeInfo); } @@ -116,14 +118,14 @@ __device__ void ReduceLongFunction::aggregatePartials(void *vsPartials, Nd4 //////////////////////////////////////////////////////////////////////// template template -__device__ void ReduceLongFunction::transformCudaXD( void *vx, Nd4jLong *xShapeInfo, +__device__ void ReduceLongFunction::transformCudaXD(const void *vx, const Nd4jLong *xShapeInfo, void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, void *vreductionBuffer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); auto reductionBuffer = reinterpret_cast(vreductionBuffer); @@ -167,13 +169,13 @@ __device__ void ReduceLongFunction::transformCudaXD( void *vx, Nd4jLong *xS //////////////////////////////////////////////////////////////////////// template template -__device__ void ReduceLongFunction::execScalarCuda(void *vx, Nd4jLong *xShapeInfo, +__device__ void ReduceLongFunction::execScalarCuda(const void *vx, const Nd4jLong *xShapeInfo, void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, void *vreductionBuffer, - Nd4jLong *tadOnlyShapeInfo) { + const Nd4jLong *tadOnlyShapeInfo) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); auto reductionBuffer = reinterpret_cast(vreductionBuffer); @@ -254,14 +256,20 @@ __device__ void ReduceLongFunction::execScalarCuda(void *vx, Nd4jLong *xSha //////////////////////////////////////////////////////////////////////// template template -__host__ void ReduceLongFunction::intermediateXD(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { +__host__ void ReduceLongFunction::intermediateXD(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShapeInfo, const Nd4jLong *hXShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, const Nd4jLong *hZShapeInfo, + int *dimension, int dimensionLength, + void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { if(shape::isEmpty(hXShapeInfo)) { if(shape::isEmpty(hZShapeInfo)) return; - const auto startingVal = static_cast(OpType::startingValue(reinterpret_cast(x))); + const auto startingVal = static_cast(OpType::startingValue(reinterpret_cast(x))); auto res = cudaMemcpyAsync(sd::LaunchContext::defaultContext()->getScalarPointer(), &startingVal, sizeof(Z), cudaMemcpyHostToDevice, *stream); if (res != 0) @@ -280,14 +288,20 @@ __host__ void ReduceLongFunction::intermediateXD(dim3 launchDims, cudaStrea //////////////////////////////////////////////////////////////////////// template template -__host__ void ReduceLongFunction::intermediateScalar(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo) { +__host__ void ReduceLongFunction::intermediateScalar(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShapeInfo, const Nd4jLong *hXShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, const Nd4jLong *hZShapeInfo, + int *dimension, int dimensionLength, + void *reductionBuffer, + const Nd4jLong *tadOnlyShapeInfo) { if (shape::isEmpty(hXShapeInfo)) { if (shape::isEmpty(hZShapeInfo)) return; - const auto startingVal = static_cast(OpType::startingValue(reinterpret_cast(x))); + const auto startingVal = static_cast(OpType::startingValue(reinterpret_cast(x))); auto res = cudaMemcpyAsync(z, &startingVal, sizeof(Z), cudaMemcpyHostToDevice, *stream); if (res != 0) @@ -300,7 +314,14 @@ __host__ void ReduceLongFunction::intermediateScalar(dim3 launchDims, cudaS //////////////////////////////////////////////////////////////////////// template -_CUDA_H void ReduceLongFunction::execReduceScalar(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, Nd4jLong* hXShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, Nd4jLong* hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo) { +_CUDA_H void ReduceLongFunction::execReduceScalar(dim3 launchDims, cudaStream_t *stream, + const int opNum, + const void *x, const Nd4jLong *xShapeInfo, const Nd4jLong* hXShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, const Nd4jLong* hZShapeInfo, + int *dimension, int dimensionLength, + void *reductionBuffer, + const Nd4jLong *tadOnlyShapeInfo) { DISPATCH_BY_OPNUM_TT(intermediateScalar, PARAMS(launchDims, stream, x, xShapeInfo, hXShapeInfo, extraParams, z, zShapeInfo, hZShapeInfo, dimension, dimensionLength, reductionBuffer, tadOnlyShapeInfo), OPS_A(REDUCE_LONG_OPS)); sd::DebugHelper::checkErrorCode(stream, "execReduceScalarFloat(...) failed"); @@ -308,7 +329,14 @@ _CUDA_H void ReduceLongFunction::execReduceScalar(dim3 launchDims, cudaStre //////////////////////////////////////////////////////////////////////// template -_CUDA_H void ReduceLongFunction::execReduceXD(dim3 launchDims, cudaStream_t *stream, int opNum, int rank, void *x, Nd4jLong *xShapeInfo, Nd4jLong* hXShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, Nd4jLong* hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { +_CUDA_H void ReduceLongFunction::execReduceXD(dim3 launchDims, cudaStream_t *stream, + const int opNum, + int rank, const void *x, const Nd4jLong *xShapeInfo, const Nd4jLong* hXShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, const Nd4jLong* hZShapeInfo, + int *dimension, int dimensionLength, + void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { DISPATCH_BY_OPNUM_TT(intermediateXD, PARAMS(launchDims, stream, x, xShapeInfo, hXShapeInfo, extraParams, z, zShapeInfo, hZShapeInfo, dimension, dimensionLength, reductionPointer, tadShapeInfo, tadOffsets), OPS_A(REDUCE_LONG_OPS)); DEBUG_KERNEL(stream, opNum); diff --git a/libnd4j/include/loops/cuda/reduce/reduce_same.cu b/libnd4j/include/loops/cuda/reduce/reduce_same.cu index c3c74c806..c1947314e 100644 --- a/libnd4j/include/loops/cuda/reduce/reduce_same.cu +++ b/libnd4j/include/loops/cuda/reduce/reduce_same.cu @@ -34,23 +34,23 @@ using namespace simdOps; //////////////////////////////////////////////////////////////////////// template -__global__ void simpleReduce(void *x, Nd4jLong *xShapeInfo, +__global__ void simpleReduce(void const* x, Nd4jLong const* xShapeInfo, void *extraParams, - void *z, Nd4jLong *zShapeInfo, + void *z, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets) { functions::reduce::ReduceSameFunction::template transformCudaXD(x, xShapeInfo, extraParams, z, zShapeInfo, dimension, dimensionLength, reductionBuffer, tadOnlyShapeInfo, tadOffsets); } //////////////////////////////////////////////////////////////////////// template -__global__ void simpleScalar(void *x, Nd4jLong *xShapeInfo, +__global__ void simpleScalar(void const* x, Nd4jLong const* xShapeInfo, void *extraParams, - void *z, Nd4jLong *zShapeInfo, + void *z, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, - void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo) { + void *reductionBuffer, Nd4jLong const* tadOnlyShapeInfo) { functions::reduce::ReduceSameFunction::template execScalarCuda(x, xShapeInfo, extraParams, z, zShapeInfo, reductionBuffer, tadOnlyShapeInfo); } @@ -95,14 +95,14 @@ __device__ void ReduceSameFunction::aggregatePartials(void *vsPartials, Nd4jL //////////////////////////////////////////////////////////////////////// template template -__device__ void ReduceSameFunction::transformCudaXD( void *vx, Nd4jLong *xShapeInfo, +__device__ void ReduceSameFunction::transformCudaXD( void const* vx, Nd4jLong const* xShapeInfo, void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, void *vreductionBuffer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); auto reductionBuffer = reinterpret_cast(vreductionBuffer); @@ -155,23 +155,23 @@ __device__ void ReduceSameFunction::transformCudaXD( void *vx, Nd4jLong *xSha //////////////////////////////////////////////////////////////////////// template -__device__ void ReduceSameFunction::execScalarCudaLegacy(int opNum, void *vx, Nd4jLong *xShapeInfo, +__device__ void ReduceSameFunction::execScalarCudaLegacy(int opNum, void const* vx, Nd4jLong const* xShapeInfo, void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, void *vreductionBuffer, - Nd4jLong *tadOnlyShapeInfo) { + Nd4jLong const* tadOnlyShapeInfo) { DISPATCH_BY_OPNUM_T(execScalarCuda, PARAMS(vx, xShapeInfo, vextraParams, vz, zShapeInfo, vreductionBuffer, tadOnlyShapeInfo), REDUCE_SAME_OPS); } //////////////////////////////////////////////////////////////////////// template template -__device__ void ReduceSameFunction::execScalarCuda(void *vx, Nd4jLong *xShapeInfo, +__device__ void ReduceSameFunction::execScalarCuda(void const* vx, Nd4jLong const* xShapeInfo, void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, + void * vz, Nd4jLong const* zShapeInfo, void *vreductionBuffer, - Nd4jLong *tadOnlyShapeInfo) { - auto x = reinterpret_cast(vx); + Nd4jLong const* tadOnlyShapeInfo) { + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); auto reductionBuffer = reinterpret_cast(vreductionBuffer); @@ -251,14 +251,14 @@ __device__ void ReduceSameFunction::execScalarCuda(void *vx, Nd4jLong *xShape //////////////////////////////////////////////////////////////////////// template template -__host__ void ReduceSameFunction::intermediateXD(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { +__host__ void ReduceSameFunction::intermediateXD(dim3 launchDims, cudaStream_t *stream, void const* x, Nd4jLong const* xShapeInfo, Nd4jLong const* hXShapeInfo, void *extraParams, void *z, Nd4jLong const* zShapeInfo, Nd4jLong const* hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets) { if(shape::isEmpty(hXShapeInfo)) { if(shape::isEmpty(hZShapeInfo)) return; - const auto startingVal = static_cast(OpType::startingValue(reinterpret_cast(x))); + const auto startingVal = static_cast(OpType::startingValue(reinterpret_cast(x))); auto res = cudaMemcpyAsync(sd::LaunchContext::defaultContext()->getScalarPointer(), &startingVal, sizeof(X), cudaMemcpyHostToDevice, *stream); if (res != 0) @@ -277,14 +277,14 @@ __host__ void ReduceSameFunction::intermediateXD(dim3 launchDims, cudaStream_ //////////////////////////////////////////////////////////////////////// template template -__host__ void ReduceSameFunction::intermediateScalar(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo) { +__host__ void ReduceSameFunction::intermediateScalar(dim3 launchDims, cudaStream_t *stream, void const* x, Nd4jLong const* xShapeInfo, Nd4jLong const* hXShapeInfo, void *extraParams, void *z, Nd4jLong const* zShapeInfo, Nd4jLong const* hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong const* tadOnlyShapeInfo) { if (shape::isEmpty(hXShapeInfo)) { if (shape::isEmpty(hZShapeInfo)) return; - const auto startingVal = static_cast(OpType::startingValue(reinterpret_cast(x))); + const auto startingVal = static_cast(OpType::startingValue(reinterpret_cast(x))); auto res = cudaMemcpyAsync(z, &startingVal, sizeof(X), cudaMemcpyHostToDevice, *stream); if (res != 0) @@ -297,7 +297,7 @@ __host__ void ReduceSameFunction::intermediateScalar(dim3 launchDims, cudaStr //////////////////////////////////////////////////////////////////////// template -_CUDA_H void ReduceSameFunction::execReduceScalar(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, Nd4jLong* hXShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, Nd4jLong* hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo) { +_CUDA_H void ReduceSameFunction::execReduceScalar(dim3 launchDims, cudaStream_t *stream, int opNum, void const* x, Nd4jLong const* xShapeInfo, Nd4jLong const* hXShapeInfo, void *extraParams, void *z, Nd4jLong const* zShapeInfo, Nd4jLong const* hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong const* tadOnlyShapeInfo) { DISPATCH_BY_OPNUM_T(intermediateScalar, PARAMS(launchDims, stream, x, xShapeInfo, hXShapeInfo, extraParams, z, zShapeInfo, hZShapeInfo, dimension, dimensionLength, reductionBuffer, tadOnlyShapeInfo), REDUCE_SAME_OPS); sd::DebugHelper::checkErrorCode(stream, "execReduceScalarSame(...) failed"); @@ -305,7 +305,7 @@ _CUDA_H void ReduceSameFunction::execReduceScalar(dim3 launchDims, cudaStream //////////////////////////////////////////////////////////////////////// template -_CUDA_H void ReduceSameFunction::execReduceXD(dim3 launchDims, cudaStream_t *stream, int opNum, int rank, void *x, Nd4jLong *xShapeInfo, Nd4jLong* hXShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, Nd4jLong* hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { +_CUDA_H void ReduceSameFunction::execReduceXD(dim3 launchDims, cudaStream_t *stream, int opNum, int rank, void const* x, Nd4jLong const* xShapeInfo, Nd4jLong const* hXShapeInfo, void *extraParams, void *z, Nd4jLong const* zShapeInfo, Nd4jLong const* hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets) { DISPATCH_BY_OPNUM_T(intermediateXD, PARAMS(launchDims, stream, x, xShapeInfo, hXShapeInfo, extraParams, z, zShapeInfo, hZShapeInfo, dimension, dimensionLength, reductionPointer, tadShapeInfo, tadOffsets), REDUCE_SAME_OPS); DEBUG_KERNEL(stream, opNum); diff --git a/libnd4j/include/loops/cuda/reduce3.chpp b/libnd4j/include/loops/cuda/reduce3.chpp index 2fa16e9ac..2a301b817 100644 --- a/libnd4j/include/loops/cuda/reduce3.chpp +++ b/libnd4j/include/loops/cuda/reduce3.chpp @@ -32,28 +32,28 @@ namespace reduce3 { //////////////////////////////////////////////////////////////////////// template __global__ void execScalarGeneric(const int opNum, - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, void *extraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int* allocationPointer, void *reductionBuffer, - Nd4jLong *tadOnlyShapeInfo) { + Nd4jLong const* tadOnlyShapeInfo) { Reduce3::execScalarCuda(opNum, vx, xShapeInfo, vy, yShapeInfo, extraParams, vz, zShapeInfo, allocationPointer, reductionBuffer, tadOnlyShapeInfo); } template __global__ void execAllGeneric(const int opNum, - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, void *extraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationPointer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* yTadOnlyShapeInfo, Nd4jLong const* yTadOffsets) { Reduce3::execAllCuda(opNum, vx, xShapeInfo, vy, yShapeInfo, extraParams, vz, zShapeInfo, dimension, dimensionLength, postProcessOrNot, allocationPointer, tadOnlyShapeInfo, tadOffsets, yTadOnlyShapeInfo, yTadOffsets); } @@ -62,15 +62,15 @@ __global__ void execAllGeneric(const int opNum, //////////////////////////////////////////////////////////////////////// template __global__ void execGeneric(const int opNum, - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, void *extraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationPointer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* yTadOnlyShapeInfo, Nd4jLong const* yTadOffsets) { Reduce3::execCuda(opNum, vx, xShapeInfo, vy, yShapeInfo, extraParams, vz, zShapeInfo, dimension, dimensionLength, postProcessOrNot, allocationPointer, tadOnlyShapeInfo, tadOffsets, yTadOnlyShapeInfo, yTadOffsets); } @@ -111,14 +111,14 @@ __device__ void Reduce3::aggregatePartials(void* vsPartials, Nd4jLong tid, ////////////////////////////////////////////////////////////////////////// template template -__device__ void Reduce3::execScalarCuda( void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, +__device__ void Reduce3::execScalarCuda( void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, void *extraParams, - void *vz, Nd4jLong *zShapeInfo, - int *allocationPointer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo) { + void* vz, Nd4jLong const* zShapeInfo, + int *allocationPointer, void *reductionBuffer, Nd4jLong const* tadOnlyShapeInfo) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); __shared__ Z extraZ[3]; @@ -235,18 +235,18 @@ __device__ void Reduce3::execScalarCuda( void *vx, Nd4jLong *xShapeInfo, ////////////////////////////////////////////////////////////////////////// template template -__device__ void Reduce3::transformAll( void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, +__device__ void Reduce3::transformAll( void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, void *extraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationPointer, - Nd4jLong *xTadShapeInfo, Nd4jLong *xOffsets, - Nd4jLong *yTadShapeInfo,Nd4jLong *yOffsets) { + Nd4jLong const* xTadShapeInfo, Nd4jLong const* xOffsets, + Nd4jLong const* yTadShapeInfo, Nd4jLong const* yOffsets) { - auto dx = reinterpret_cast(vx); - auto dy = reinterpret_cast(vy); + auto dx = reinterpret_cast(vx); + auto dy = reinterpret_cast(vy); auto z = reinterpret_cast(vz); // initialize partials first @@ -287,7 +287,7 @@ __device__ void Reduce3::transformAll( void *vx, Nd4jLong *xShapeInfo, for (int r = blockIdx.x; r < xTads; r += blockDim.x * gridDim.x) { - X *x = dx + xOffsets[r]; + auto x = dx + xOffsets[r]; if (threadIdx.x < xTadLength && threadIdx.x < maxBlock) { auto x0 = shape::getIndexOffset(threadIdx.x, xTadShapeInfo); @@ -297,7 +297,7 @@ __device__ void Reduce3::transformAll( void *vx, Nd4jLong *xShapeInfo, for (int g = 0; g < yTads; g++) { - X *y = dy + yOffsets[g]; + auto y = dy + yOffsets[g]; int ri = (r * yTads) + g; sPartials[threadIdx.x] = startingVal; @@ -339,15 +339,15 @@ __device__ void Reduce3::transformAll( void *vx, Nd4jLong *xShapeInfo, ////////////////////////////////////////////////////////////////////////// template template -__device__ void Reduce3::transform(void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, +__device__ void Reduce3::transform(void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, void *extraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationPointer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* yTadOnlyShapeInfo, Nd4jLong const* yTadOffsets) { // FIXME if(shape::isScalar(zShapeInfo)) @@ -357,8 +357,8 @@ __device__ void Reduce3::transform(void *vx, Nd4jLong *xShapeInfo, yTadOnlyShapeInfo = yShapeInfo; // execReduce3TAD case } - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); Z startingVal = OpType::startingValue(x); @@ -454,15 +454,15 @@ __device__ void Reduce3::transform(void *vx, Nd4jLong *xShapeInfo, ////////////////////////////////////////////////////////////////////////// template __device__ void Reduce3::execCuda(const int opNum, - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, void *extraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationPointer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* yTadOnlyShapeInfo, Nd4jLong const* yTadOffsets) { DISPATCH_BY_OPNUM_TT(transform, PARAMS(vx, xShapeInfo, vy, yShapeInfo, extraParams, vz, zShapeInfo, dimension, dimensionLength, postProcessOrNot, allocationPointer, tadOnlyShapeInfo, tadOffsets, yTadOnlyShapeInfo, yTadOffsets), REDUCE3_OPS); } @@ -472,15 +472,15 @@ __device__ void Reduce3::execCuda(const int opNum, ////////////////////////////////////////////////////////////////////////// template __device__ void Reduce3::execAllCuda( const int opNum, - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, void *extraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationPointer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* yTadOnlyShapeInfo, Nd4jLong const* yTadOffsets) { DISPATCH_BY_OPNUM_TT(transformAll, PARAMS(vx, xShapeInfo, vy, yShapeInfo, extraParams, vz, zShapeInfo, dimension, dimensionLength, postProcessOrNot, allocationPointer, tadOnlyShapeInfo, tadOffsets, yTadOnlyShapeInfo, yTadOffsets), REDUCE3_OPS); } @@ -489,12 +489,12 @@ __device__ void Reduce3::execAllCuda( const int opNum, ////////////////////////////////////////////////////////////////////////// template __device__ void Reduce3::execScalarCuda(const int opNum, - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, void *extraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int * allocationPointer, void *reductionBuffer, - Nd4jLong *tadOnlyShapeInfo) { + Nd4jLong const* tadOnlyShapeInfo) { DISPATCH_BY_OPNUM_TT(execScalarCuda, PARAMS(vx, xShapeInfo, vy, yShapeInfo, extraParams, vz, zShapeInfo, allocationPointer, reductionBuffer, tadOnlyShapeInfo), REDUCE3_OPS); } @@ -504,15 +504,15 @@ __device__ void Reduce3::execScalarCuda(const int opNum, template __host__ void Reduce3::exec(dim3 launchDims, cudaStream_t *stream, int opNum, - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, void *extraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationPointer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* yTadOnlyShapeInfo, Nd4jLong const* yTadOffsets) { execGeneric<<>>(opNum, vx, xShapeInfo, vy, yShapeInfo, extraParams, vz, zShapeInfo, dimension, dimensionLength, postProcessOrNot, allocationPointer, tadOnlyShapeInfo, tadOffsets, yTadOnlyShapeInfo, yTadOffsets); sd::DebugHelper::checkErrorCode(stream, "reduce3exec(...) failed"); @@ -522,15 +522,15 @@ __host__ void Reduce3::exec(dim3 launchDims, cudaStream_t *stream, template __host__ void Reduce3::execAll(dim3 launchDims, cudaStream_t *stream, int opNum, - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, void *extraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationPointer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* yTadOnlyShapeInfo, Nd4jLong const* yTadOffsets) { execAllGeneric<<>>(opNum, vx, xShapeInfo, vy, yShapeInfo, extraParams, vz, zShapeInfo, dimension, dimensionLength, postProcessOrNot, allocationPointer, tadOnlyShapeInfo, tadOffsets, yTadOnlyShapeInfo, yTadOffsets); sd::DebugHelper::checkErrorCode(stream, "execAllGeneric(...) failed"); @@ -540,13 +540,13 @@ __host__ void Reduce3::exec(dim3 launchDims, cudaStream_t *stream, template __host__ void Reduce3::execScalar(dim3 launchDims, cudaStream_t *stream, int opNum, - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, + void const* vx, Nd4jLong const* xShapeInfo, + void const* vy, Nd4jLong const* yShapeInfo, void *extraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int* allocationPointer, void *reductionBuffer, - Nd4jLong *tadOnlyShapeInfo) { + Nd4jLong const* tadOnlyShapeInfo) { execScalarGeneric<<>>(opNum, vx, xShapeInfo, vy, yShapeInfo, extraParams, vz, zShapeInfo, allocationPointer, reductionBuffer, tadOnlyShapeInfo); sd::DebugHelper::checkErrorCode(stream, "execScalarGeneric(...) failed"); diff --git a/libnd4j/include/loops/cuda/scalar.chpp b/libnd4j/include/loops/cuda/scalar.chpp index ec1b42334..b412e4957 100644 --- a/libnd4j/include/loops/cuda/scalar.chpp +++ b/libnd4j/include/loops/cuda/scalar.chpp @@ -32,10 +32,10 @@ using namespace simdOps; //////////////////////////////////////////////////////////////////////////////// template -__global__ static void scalarSimpleShaped(void* vx, void *vscalar, Nd4jLong *xShapeInfo, void *vparams, void *vz, Nd4jLong *zShapeInfo, int *allocationBuffer) { +__global__ static void scalarSimpleShaped(void const* vx, void const* vscalar, Nd4jLong const* xShapeInfo, void *vparams, void *vz, Nd4jLong const* zShapeInfo, int *allocationBuffer) { - auto scalar = reinterpret_cast(vscalar)[0]; - auto x = reinterpret_cast(vx); + auto scalar = reinterpret_cast(vscalar)[0]; + auto x = reinterpret_cast(vx); auto params = reinterpret_cast(vparams); auto z = reinterpret_cast(vz); @@ -69,18 +69,18 @@ __global__ static void scalarSimpleShaped(void* vx, void *vscalar, Nd4jLong *xSh //////////////////////////////////////////////////////////////////////////////// template -__global__ static void scalarAlongDimension(void *vx, Nd4jLong *xShapeInfo, - void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, - void *vscalars, +__global__ static void scalarAlongDimension(void const* vx, Nd4jLong const* xShapeInfo, + void* vextraParams, + void* vz, Nd4jLong const* zShapeInfo, + void const* vscalars, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto extraParams = reinterpret_cast(vextraParams); auto z = reinterpret_cast(vz); - auto scalars = reinterpret_cast(vscalars); + auto scalars = reinterpret_cast(vscalars); if (tadShapeInfoZ == nullptr) { tadShapeInfoZ = tadShapeInfo; @@ -98,7 +98,7 @@ __global__ static void scalarAlongDimension(void *vx, Nd4jLong *xShapeInfo, // main loop, rolling over tads for (int r = blockIdx.x; r < numTads; r += gridDim.x) { Z *oZ = z + tadOffsetsZ[r]; - X *oX = x + tadOffsets[r]; + auto oX = x + tadOffsets[r]; auto s = scalars[r]; @@ -109,7 +109,7 @@ __global__ static void scalarAlongDimension(void *vx, Nd4jLong *xShapeInfo, // main loop, rolling over tads for (int r = blockIdx.x; r < numTads; r += gridDim.x) { Z *oZ = z + tadOffsetsZ[r]; - X *oX = x + tadOffsets[r]; + auto oX = x + tadOffsets[r]; auto s = scalars[r]; @@ -126,7 +126,7 @@ namespace scalar { //////////////////////////////////////////////////////////////////////////////// template template -void _CUDA_H ScalarTransform::intermediateShaped(dim3& launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, Nd4jLong *hxShapeInfo, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hzShapeInfo, void* vscalar, void *vextraParams, int *allocPointer){ +void _CUDA_H ScalarTransform::intermediateShaped(dim3& launchDims, cudaStream_t *stream, void const* vx, Nd4jLong const* xShapeInfo, Nd4jLong const* hxShapeInfo, void *vz, Nd4jLong const* zShapeInfo, Nd4jLong const* hzShapeInfo, void const* vscalar, void *vextraParams, int *allocPointer){ auto xEws = shape::elementWiseStride(hxShapeInfo); auto xOrder = shape::order(hxShapeInfo); @@ -143,14 +143,14 @@ void _CUDA_H ScalarTransform::intermediateShaped(dim3& launchDims, cudaSt //////////////////////////////////////////////////////////////////////////////// template template -void _CUDA_H ScalarTransform::intermediateAlongDimension(dim3& launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, void *z, Nd4jLong *zShapeInfo, void *scalars, void *extraParams, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { +void _CUDA_H ScalarTransform::intermediateAlongDimension(dim3& launchDims, cudaStream_t *stream, void const* x, Nd4jLong const* xShapeInfo, void *z, Nd4jLong const* zShapeInfo, void const* scalars, void *extraParams, int *dimension, int dimensionLength, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { scalarAlongDimension<<>>(x, xShapeInfo, extraParams, z, zShapeInfo, scalars, dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, tadOffsetsZ); sd::DebugHelper::checkErrorCode(stream, "scalarAlongDimA(...) failed"); } //////////////////////////////////////////////////////////////////////////////// template -void ScalarTransform::executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, Nd4jLong *hxShapeInfo, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hzShapeInfo, void* vscalar, void *vextraParams) { +void ScalarTransform::executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, void const* vx, Nd4jLong const* xShapeInfo, Nd4jLong const* hxShapeInfo, void *vz, Nd4jLong const* zShapeInfo, Nd4jLong const* hzShapeInfo, void const* vscalar, void *vextraParams) { if (sd::Environment::getInstance()->isDebugAndVerbose()) printf("H14 opNum:[%i]\n", opNum); @@ -160,11 +160,10 @@ void ScalarTransform::executeCudaShaped(dim3& launchDims, cudaStream_t *s //////////////////////////////////////////////////////////////////////////////// template -void ScalarTransform::executeCudaAlongDimension(dim3& launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, void *vscalars, void *vextraParams, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { +void ScalarTransform::executeCudaAlongDimension(dim3& launchDims, cudaStream_t *stream, int opNum, void const* vx, Nd4jLong const* xShapeInfo, void *vz, Nd4jLong const* zShapeInfo, void const* vscalars, void *vextraParams, int *dimension, int dimensionLength, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { DISPATCH_BY_OPNUM_TTT(intermediateAlongDimension, PARAMS(launchDims, stream, vx, xShapeInfo, vz, zShapeInfo, vscalars, vextraParams, dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, tadOffsetsZ), SCALAR_OPS); } - } } diff --git a/libnd4j/include/loops/cuda/scalar_bool.cu b/libnd4j/include/loops/cuda/scalar_bool.cu index 1c8929ef3..e23560778 100644 --- a/libnd4j/include/loops/cuda/scalar_bool.cu +++ b/libnd4j/include/loops/cuda/scalar_bool.cu @@ -29,13 +29,13 @@ using namespace simdOps; //////////////////////////////////////////////////////////////////////// template -__global__ void scalarAlongDimension(void *x, Nd4jLong *xShapeInfo, +__global__ void scalarAlongDimension(void const* x, Nd4jLong const* xShapeInfo, void *extraParams, - void *z, Nd4jLong *zShapeInfo, - void *scalars, + void *z, Nd4jLong const* zShapeInfo, + void const* scalars, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { functions::scalar::ScalarBoolTransform::template transformCuda(x, xShapeInfo, extraParams, z, zShapeInfo, scalars, dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, tadOffsetsZ); } @@ -43,7 +43,7 @@ __global__ void scalarAlongDimension(void *x, Nd4jLong *xShapeInfo, //////////////////////////////////////////////////////////////////////// template -__global__ void scalarSimpleShaped(void* x, void *y, Nd4jLong *xShapeInfo, void *params, void *z, Nd4jLong *zShapeInfo, int *allocationBuffer) { +__global__ void scalarSimpleShaped(void const* x, void const* y, Nd4jLong const* xShapeInfo, void *params, void *z, Nd4jLong const* zShapeInfo, int *allocationBuffer) { functions::scalar::ScalarBoolTransform::template transformCuda(y, x, xShapeInfo, params, z, zShapeInfo, allocationBuffer); } @@ -60,13 +60,13 @@ namespace scalar { //////////////////////////////////////////////////////////////////////// template template -__device__ void ScalarBoolTransform::transformCuda(void* vscalar, - void *vy, Nd4jLong *yShapeInfo, +__device__ void ScalarBoolTransform::transformCuda(void const* vscalar, + void const* vy, Nd4jLong const* yShapeInfo, void *vparams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int *allocationBuffer) { - auto scalar = reinterpret_cast(vscalar)[0]; - auto y = reinterpret_cast(vy); + auto scalar = reinterpret_cast(vscalar)[0]; + auto y = reinterpret_cast(vy); auto params = reinterpret_cast(vparams); auto z = reinterpret_cast(vz); @@ -101,14 +101,14 @@ __device__ void ScalarBoolTransform::transformCuda(void* vscalar, template template __device__ void ScalarBoolTransform::transformCuda(Nd4jLong len, - void* vx, - void *vy, Nd4jLong yEWS, + void const* vx, + void const* vy, Nd4jLong yEWS, void *vparams, void *vz, Nd4jLong zEWS, int *allocationBuffer) { - auto x = reinterpret_cast(vx)[0]; - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx)[0]; + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto params = reinterpret_cast(vparams); @@ -130,15 +130,15 @@ __device__ void ScalarBoolTransform::transformCuda(Nd4jLong len, //////////////////////////////////////////////////////////////////////// template template -__device__ void ScalarBoolTransform::transformCuda(void *vx, Nd4jLong *xShapeInfo, +__device__ void ScalarBoolTransform::transformCuda(void const* vx, Nd4jLong const* xShapeInfo, void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, - void *vscalars, + void *vz, Nd4jLong const* zShapeInfo, + void const* vscalars, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { - auto x = reinterpret_cast(vx); - auto scalars = reinterpret_cast(vscalars); + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { + auto x = reinterpret_cast(vx); + auto scalars = reinterpret_cast(vscalars); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -158,7 +158,7 @@ __device__ void ScalarBoolTransform::transformCuda(void *vx, Nd4jLong *xS // main loop, rolling over tads for (int r = blockIdx.x; r < numTads; r += gridDim.x) { Z *oZ = z + tadOffsetsZ[r]; - X *oX = x + tadOffsets[r]; + auto oX = x + tadOffsets[r]; auto s = scalars[r]; @@ -169,7 +169,7 @@ __device__ void ScalarBoolTransform::transformCuda(void *vx, Nd4jLong *xS // main loop, rolling over tads for (int r = blockIdx.x; r < numTads; r += gridDim.x) { Z *oZ = z + tadOffsetsZ[r]; - X *oX = x + tadOffsets[r]; + auto oX = x + tadOffsets[r]; auto s = scalars[r]; @@ -184,13 +184,13 @@ __device__ void ScalarBoolTransform::transformCuda(void *vx, Nd4jLong *xS template template _CUDA_H void ScalarBoolTransform::intermediateAlongDimension(dim3& launchDims, cudaStream_t *stream, - void *x, Nd4jLong *xShapeInfo, - void *z, Nd4jLong *zShapeInfo, - void *scalars, + void const* x, Nd4jLong const* xShapeInfo, + void *z, Nd4jLong const* zShapeInfo, + void const* scalars, void *extraParams, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { scalarAlongDimension<<>>(x, xShapeInfo, extraParams, z, zShapeInfo, scalars, dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, tadOffsetsZ); sd::DebugHelper::checkErrorCode(stream, "scalarAlongDim(...) failed"); @@ -200,9 +200,9 @@ _CUDA_H void ScalarBoolTransform::intermediateAlongDimension(dim3& launchD template template void _CUDA_H ScalarBoolTransform::intermediateShaped(dim3& launchDims, cudaStream_t *stream, - void *vx, Nd4jLong *xShapeInfo, - void *vz, Nd4jLong *zShapeInfo, - void* vscalar, + void const* vx, Nd4jLong const* xShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, + void const* vscalar, void *vextraParams, int *allocPointer){ scalarSimpleShaped<<>>(vx, vscalar, xShapeInfo, vextraParams, vz, zShapeInfo, allocPointer); @@ -213,20 +213,20 @@ void _CUDA_H ScalarBoolTransform::intermediateShaped(dim3& launchDims, cuda template void ScalarBoolTransform::executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, - void *vx, Nd4jLong *xShapeInfo, - void *vz, Nd4jLong *zShapeInfo, - void* vscalar, - void *vextraParams) { + void const* vx, Nd4jLong const* xShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, + void const* vscalar, + void const* vextraParams) { if (sd::Environment::getInstance()->isDebugAndVerbose()) printf("H14 opNum:[%i]\n", opNum); - DISPATCH_BY_OPNUM_TT(intermediateShaped, PARAMS(launchDims, stream, vx, xShapeInfo, vz, zShapeInfo, vscalar, vextraParams, nullptr), SCALAR_BOOL_OPS); + DISPATCH_BY_OPNUM_TT(intermediateShaped, PARAMS(launchDims, stream, vx, xShapeInfo, vz, zShapeInfo, vscalar, const_cast(vextraParams), nullptr), SCALAR_BOOL_OPS); } //////////////////////////////////////////////////////////////////////// template -void ScalarBoolTransform::executeCudaAlongDimension(dim3& launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, void *vscalars, void *vextraParams, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { +void ScalarBoolTransform::executeCudaAlongDimension(dim3& launchDims, cudaStream_t *stream, int opNum, void const* vx, Nd4jLong const* xShapeInfo, void *vz, Nd4jLong const* zShapeInfo, void const* vscalars, void *vextraParams, int *dimension, int dimensionLength, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { DISPATCH_BY_OPNUM_TT(intermediateAlongDimension, PARAMS(launchDims, stream, vx, xShapeInfo, vz, zShapeInfo, vscalars, vextraParams, dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, tadOffsetsZ), SCALAR_BOOL_OPS); } diff --git a/libnd4j/include/loops/cuda/scalar_int.cu b/libnd4j/include/loops/cuda/scalar_int.cu index bb761c76c..2ca0ade26 100644 --- a/libnd4j/include/loops/cuda/scalar_int.cu +++ b/libnd4j/include/loops/cuda/scalar_int.cu @@ -29,13 +29,13 @@ using namespace simdOps; //////////////////////////////////////////////////////////////////////// template -__global__ void scalarAlongDimension(void *x, Nd4jLong *xShapeInfo, +__global__ void scalarAlongDimension(void const* x, Nd4jLong const* xShapeInfo, void *extraParams, - void *z, Nd4jLong *zShapeInfo, - void *scalars, + void *z, Nd4jLong const* zShapeInfo, + void const* scalars, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { functions::scalar::ScalarIntTransform::template transformCuda(x, xShapeInfo, extraParams, z, zShapeInfo, scalars, dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, tadOffsetsZ); } @@ -43,7 +43,7 @@ __global__ void scalarAlongDimension(void *x, Nd4jLong *xShapeInfo, //////////////////////////////////////////////////////////////////////// template -__global__ void scalarSimpleShaped(void* x, void *y, Nd4jLong *xShapeInfo, void *params, void *z, Nd4jLong *zShapeInfo, int *allocationBuffer) { +__global__ void scalarSimpleShaped(void const* x, void const* y, Nd4jLong const* xShapeInfo, void *params, void *z, Nd4jLong const* zShapeInfo, int *allocationBuffer) { functions::scalar::ScalarIntTransform::template transformCuda(y, x, xShapeInfo, params, z, zShapeInfo, allocationBuffer); } @@ -60,13 +60,13 @@ namespace scalar { //////////////////////////////////////////////////////////////////////// template template -__device__ void ScalarIntTransform::transformCuda(void* vscalar, - void *vy, Nd4jLong *yShapeInfo, +__device__ void ScalarIntTransform::transformCuda(void const* vscalar, + void const* vy, Nd4jLong const* yShapeInfo, void *vparams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int *allocationBuffer) { - auto scalar = reinterpret_cast(vscalar)[0]; - auto y = reinterpret_cast(vy); + auto scalar = reinterpret_cast(vscalar)[0]; + auto y = reinterpret_cast(vy); auto params = reinterpret_cast(vparams); auto z = reinterpret_cast(vz); @@ -101,14 +101,14 @@ __device__ void ScalarIntTransform::transformCuda(void* vscalar, template template __device__ void ScalarIntTransform::transformCuda(Nd4jLong len, - void* vx, - void *vy, Nd4jLong yEWS, + void const* vx, + void const* vy, Nd4jLong yEWS, void *vparams, void *vz, Nd4jLong zEWS, int *allocationBuffer) { - auto x = reinterpret_cast(vx)[0]; - auto y = reinterpret_cast(vy); + auto x = reinterpret_cast(vx)[0]; + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); auto params = reinterpret_cast(vparams); @@ -130,15 +130,15 @@ __device__ void ScalarIntTransform::transformCuda(Nd4jLong len, //////////////////////////////////////////////////////////////////////// template template -__device__ void ScalarIntTransform::transformCuda(void *vx, Nd4jLong *xShapeInfo, +__device__ void ScalarIntTransform::transformCuda(void const* vx, Nd4jLong const* xShapeInfo, void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, - void *vscalars, + void *vz, Nd4jLong const* zShapeInfo, + void const* vscalars, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { - auto x = reinterpret_cast(vx); - auto scalars = reinterpret_cast(vscalars); + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { + auto x = reinterpret_cast(vx); + auto scalars = reinterpret_cast(vscalars); auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); @@ -158,7 +158,7 @@ __device__ void ScalarIntTransform::transformCuda(void *vx, Nd4jLong *xShape // main loop, rolling over tads for (int r = blockIdx.x; r < numTads; r += gridDim.x) { X *oZ = z + tadOffsetsZ[r]; - X *oX = x + tadOffsets[r]; + auto oX = x + tadOffsets[r]; auto s = scalars[r]; @@ -169,7 +169,7 @@ __device__ void ScalarIntTransform::transformCuda(void *vx, Nd4jLong *xShape // main loop, rolling over tads for (int r = blockIdx.x; r < numTads; r += gridDim.x) { X *oZ = z + tadOffsetsZ[r]; - X *oX = x + tadOffsets[r]; + auto oX = x + tadOffsets[r]; auto s = scalars[r]; @@ -184,13 +184,13 @@ __device__ void ScalarIntTransform::transformCuda(void *vx, Nd4jLong *xShape template template _CUDA_H void ScalarIntTransform::intermediateAlongDimension(dim3& launchDims, cudaStream_t *stream, - void *x, Nd4jLong *xShapeInfo, - void *z, Nd4jLong *zShapeInfo, - void *scalars, + void const* x, Nd4jLong const* xShapeInfo, + void *z, Nd4jLong const* zShapeInfo, + void const* scalars, void *extraParams, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { scalarAlongDimension<<>>(x, xShapeInfo, extraParams, z, zShapeInfo, scalars, dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, tadOffsetsZ); } @@ -199,9 +199,9 @@ _CUDA_H void ScalarIntTransform::intermediateAlongDimension(dim3& launchDims, template template void _CUDA_H ScalarIntTransform::intermediateShaped(dim3& launchDims, cudaStream_t *stream, - void *vx, Nd4jLong *xShapeInfo, - void *vz, Nd4jLong *zShapeInfo, - void* vscalar, + void const* vx, Nd4jLong const* xShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, + void const* vscalar, void *vextraParams, int *allocPointer){ scalarSimpleShaped<<>>(vx, vscalar, xShapeInfo, vextraParams, vz, zShapeInfo, allocPointer); @@ -211,10 +211,10 @@ void _CUDA_H ScalarIntTransform::intermediateShaped(dim3& launchDims, cudaStr template void ScalarIntTransform::executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, - void *vx, Nd4jLong *xShapeInfo, - void *vz, Nd4jLong *zShapeInfo, - void* vscalar, - void *vextraParams) { + void const* vx, Nd4jLong const* xShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, + void const* vscalar, + void* vextraParams) { if (sd::Environment::getInstance()->isDebugAndVerbose()) printf("H14 opNum:[%i]\n", opNum); @@ -224,7 +224,7 @@ void ScalarIntTransform::executeCudaShaped(dim3& launchDims, cudaStream_t *st //////////////////////////////////////////////////////////////////////// template -void ScalarIntTransform::executeCudaAlongDimension(dim3& launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, void *vscalars, void *vextraParams, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { +void ScalarIntTransform::executeCudaAlongDimension(dim3& launchDims, cudaStream_t *stream, int opNum, void const* vx, Nd4jLong const* xShapeInfo, void *vz, Nd4jLong const* zShapeInfo, void const* vscalars, void *vextraParams, int *dimension, int dimensionLength, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* tadShapeInfoZ, Nd4jLong const* tadOffsetsZ) { DISPATCH_BY_OPNUM_T(intermediateAlongDimension, PARAMS(launchDims, stream, vx, xShapeInfo, vz, zShapeInfo, vscalars, vextraParams, dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, tadOffsetsZ), SCALAR_INT_OPS); } diff --git a/libnd4j/include/loops/cuda/specials/bitonicArbitraryStep.cu b/libnd4j/include/loops/cuda/specials/bitonicArbitraryStep.cu index 13ad1d5b4..999a09942 100644 --- a/libnd4j/include/loops/cuda/specials/bitonicArbitraryStep.cu +++ b/libnd4j/include/loops/cuda/specials/bitonicArbitraryStep.cu @@ -23,7 +23,7 @@ ////////////////////////////////////////////////////////////////////////// template -__global__ void bitonicArbitraryStepKernelKey(void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int window, int length, int reverse, bool descending) { +__global__ void bitonicArbitraryStepKernelKey(void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int window, int length, int reverse, bool descending) { auto x = static_cast(vx); auto y = static_cast(vy); @@ -101,7 +101,7 @@ __global__ void bitonicArbitraryStepKernelKey(void *vx, Nd4jLong *xShapeInfo, vo ////////////////////////////////////////////////////////////////////////// template -__global__ void execBitonicArbitraryStepKernel(void *vx, Nd4jLong *xShapeInfo, int window, int length, int reverse, bool descending) { +__global__ void execBitonicArbitraryStepKernel(void *vx, Nd4jLong const* xShapeInfo, int window, int length, int reverse, bool descending) { auto x = static_cast(vx); int tid = threadIdx.x + blockDim.x * blockIdx.x; @@ -177,14 +177,14 @@ __global__ void execBitonicArbitraryStepKernel(void *vx, Nd4jLong *xShapeInfo, i ////////////////////////////////////////////////////////////////////////// template -__host__ void bitonicArbitraryStepGeneric(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, int window, int length, int reverse, bool descending) { +__host__ void bitonicArbitraryStepGeneric(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, int window, int length, int reverse, bool descending) { execBitonicArbitraryStepKernel<<>>(vx, xShapeInfo, window, length, reverse, descending); } template -__host__ void bitonicArbitraryStepGenericKey(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int window, int length, int reverse, bool descending) { +__host__ void bitonicArbitraryStepGenericKey(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int window, int length, int reverse, bool descending) { bitonicArbitraryStepKernelKey<<>>(vx, xShapeInfo, vy, yShapeInfo, window, length, reverse, descending); } -BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT bitonicArbitraryStepGeneric, (dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, int window, int length, int reverse, bool descending), LIBND4J_TYPES); -BUILD_DOUBLE_TEMPLATE(template void ND4J_EXPORT bitonicArbitraryStepGenericKey, (dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int window, int length, int reverse, bool descending), LIBND4J_TYPES, LIBND4J_TYPES); +BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT bitonicArbitraryStepGeneric, (dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, int window, int length, int reverse, bool descending), LIBND4J_TYPES); +BUILD_DOUBLE_TEMPLATE(template void ND4J_EXPORT bitonicArbitraryStepGenericKey, (dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int window, int length, int reverse, bool descending), LIBND4J_TYPES, LIBND4J_TYPES); diff --git a/libnd4j/include/loops/cuda/specials/bitonicSortStep.cu b/libnd4j/include/loops/cuda/specials/bitonicSortStep.cu index 6bd1e8a33..679e44d1f 100644 --- a/libnd4j/include/loops/cuda/specials/bitonicSortStep.cu +++ b/libnd4j/include/loops/cuda/specials/bitonicSortStep.cu @@ -24,7 +24,7 @@ ////////////////////////////////////////////////////////////////////////// template -__global__ void bitonicSortStepKernelKey(void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int j, int k, int length, bool descending) { +__global__ void bitonicSortStepKernelKey(void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int j, int k, int length, bool descending) { auto x = static_cast(vx); auto y = static_cast(vy); @@ -79,7 +79,7 @@ __global__ void bitonicSortStepKernelKey(void *vx, Nd4jLong *xShapeInfo, void *v ////////////////////////////////////////////////////////////////////////// template -__global__ void bitonicSortStepKernel(void *vx, Nd4jLong *xShapeInfo, int j, int k, int length, bool descending) { +__global__ void bitonicSortStepKernel(void *vx, Nd4jLong const* xShapeInfo, int j, int k, int length, bool descending) { auto x = static_cast(vx); @@ -125,16 +125,16 @@ __global__ void bitonicSortStepKernel(void *vx, Nd4jLong *xShapeInfo, int j, int ////////////////////////////////////////////////////////////////////////// template -__host__ void bitonicSortStepGeneric(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, int j, int k, int length, bool descending) { +__host__ void bitonicSortStepGeneric(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, int j, int k, int length, bool descending) { bitonicSortStepKernel<<>>(vx, xShapeInfo, j, k, length, descending); } ////////////////////////////////////////////////////////////////////////// template -__host__ void bitonicSortStepGenericKey(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int j, int k, int length, bool descending) { +__host__ void bitonicSortStepGenericKey(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int j, int k, int length, bool descending) { bitonicSortStepKernelKey<<>>(vx, xShapeInfo, vy, yShapeInfo, j, k, length, descending); } -BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT bitonicSortStepGeneric, (dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, int j, int k, int length, bool descending), LIBND4J_TYPES); -BUILD_DOUBLE_TEMPLATE(template void ND4J_EXPORT bitonicSortStepGenericKey, (dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int j, int k, int length, bool descending), LIBND4J_TYPES, LIBND4J_TYPES); +BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT bitonicSortStepGeneric, (dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, int j, int k, int length, bool descending), LIBND4J_TYPES); +BUILD_DOUBLE_TEMPLATE(template void ND4J_EXPORT bitonicSortStepGenericKey, (dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int j, int k, int length, bool descending), LIBND4J_TYPES, LIBND4J_TYPES); diff --git a/libnd4j/include/loops/cuda/specials/fillDimensionalIsMax.cu b/libnd4j/include/loops/cuda/specials/fillDimensionalIsMax.cu index 813de162d..409f84cc6 100644 --- a/libnd4j/include/loops/cuda/specials/fillDimensionalIsMax.cu +++ b/libnd4j/include/loops/cuda/specials/fillDimensionalIsMax.cu @@ -26,13 +26,13 @@ namespace sd { //////////////////////////////////////////////////////////////////////// template - __device__ void fillDimensionalIsMax(void *vdX, - void *vdZ, Nd4jLong *zShapeInfo, - Nd4jLong *tadOnlyShapeInfo, + __device__ void fillDimensionalIsMax(const void *vdX, + void *vdZ, const Nd4jLong *zShapeInfo, + const Nd4jLong *tadOnlyShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadOffsets) { + const Nd4jLong *tadOffsets) { - auto dX = reinterpret_cast(vdX); + auto dX = reinterpret_cast(vdX); auto dZ = reinterpret_cast(vdZ); __shared__ int tadLength; @@ -69,11 +69,11 @@ namespace sd { //////////////////////////////////////////////////////////////////////// template - __global__ void execfillDimensionalIsMax(void *dX, - void *dZ, Nd4jLong *zShapeInfo, - Nd4jLong *tadOnlyShapeInfo, + __global__ void execfillDimensionalIsMax(const void *dX, + void *dZ, const Nd4jLong *zShapeInfo, + const Nd4jLong *tadOnlyShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadOffsets) { + const Nd4jLong *tadOffsets) { fillDimensionalIsMax(dX, dZ, zShapeInfo, tadOnlyShapeInfo, dimension, dimensionLength, tadOffsets); } @@ -81,14 +81,14 @@ namespace sd { //////////////////////////////////////////////////////////////////////// template __host__ void fillDimensionalIsMaxGeneric(dim3 &launchDims, cudaStream_t *stream, - void *dX, - void *dZ, Nd4jLong *zShapeInfo, - Nd4jLong *tadOnlyShapeInfo, + const void *dX, + void *dZ, const Nd4jLong *zShapeInfo, + const Nd4jLong *tadOnlyShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadOffsets) { + const Nd4jLong *tadOffsets) { execfillDimensionalIsMax<<>>(dX, dZ, zShapeInfo, tadOnlyShapeInfo, dimension, dimensionLength, tadOffsets); sd::DebugHelper::checkErrorCode(stream, "fillDimensionalIsMax(...) failed"); } - BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT fillDimensionalIsMaxGeneric, (dim3& launchDims, cudaStream_t *stream, void *dX, void *dZ, Nd4jLong *zShapeInfo, Nd4jLong *tadOnlyShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOffsets), LIBND4J_TYPES); + BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT fillDimensionalIsMaxGeneric, (dim3& launchDims, cudaStream_t *stream, const void *dX, void *dZ, const Nd4jLong *zShapeInfo, const Nd4jLong *tadOnlyShapeInfo, int *dimension, int dimensionLength, const Nd4jLong *tadOffsets), LIBND4J_TYPES); } \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/specials/fillIsMax.cu b/libnd4j/include/loops/cuda/specials/fillIsMax.cu index 1a994a13c..00997b022 100644 --- a/libnd4j/include/loops/cuda/specials/fillIsMax.cu +++ b/libnd4j/include/loops/cuda/specials/fillIsMax.cu @@ -25,7 +25,7 @@ namespace sd { //////////////////////////////////////////////////////////////////////// template - __global__ void execFillIsMax(void *vdZ, Nd4jLong *xShapeInfo, Nd4jLong length, long idx) { + __global__ void execFillIsMax(void *vdZ, const Nd4jLong *xShapeInfo, Nd4jLong length, long idx) { auto dz = reinterpret_cast(vdZ); int tid = blockIdx.x * blockDim.x + threadIdx.x; @@ -35,11 +35,11 @@ namespace sd { //////////////////////////////////////////////////////////////////////// template - __host__ void fillIsMaxGeneric(dim3 &launchDims, cudaStream_t *stream, void *dx, Nd4jLong *xShapeInfo, Nd4jLong length, long idx) { + __host__ void fillIsMaxGeneric(dim3 &launchDims, cudaStream_t *stream, void *dx, const Nd4jLong *xShapeInfo, Nd4jLong length, long idx) { execFillIsMax<<>>(dx, xShapeInfo, length, idx); sd::DebugHelper::checkErrorCode(stream, "fillIsMax(...) failed"); } - BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT fillIsMaxGeneric, (dim3& launchDims, cudaStream_t *stream, void* dz, Nd4jLong *zShapeInfo, Nd4jLong length, long idx), LIBND4J_TYPES); + BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT fillIsMaxGeneric, (dim3& launchDims, cudaStream_t *stream, void* dz, const Nd4jLong *zShapeInfo, Nd4jLong length, long idx), LIBND4J_TYPES); } \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/specials/oesTad.cu b/libnd4j/include/loops/cuda/specials/oesTad.cu index 9f41ffbb9..6f08e23ad 100644 --- a/libnd4j/include/loops/cuda/specials/oesTad.cu +++ b/libnd4j/include/loops/cuda/specials/oesTad.cu @@ -22,10 +22,10 @@ ////////////////////////////////////////////////////////////////////////// template -__global__ void execOesTadKernelKey(void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, +__global__ void execOesTadKernelKey(void *vx, Nd4jLong const* xShapeInfo, + void *vy, Nd4jLong const* yShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool descending) { auto x = static_cast(vx); @@ -94,9 +94,9 @@ __global__ void execOesTadKernelKey(void *vx, Nd4jLong *xShapeInfo, ////////////////////////////////////////////////////////////////////////// template -__global__ void execOesTadKernel(void *vx, Nd4jLong *xShapeInfo, +__global__ void execOesTadKernel(void *vx, Nd4jLong const* xShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool descending) { auto x = static_cast(vx); @@ -182,9 +182,9 @@ __global__ void execOesTadKernel(void *vx, Nd4jLong *xShapeInfo, ////////////////////////////////////////////////////////////////////////// template __host__ void oesTadGeneric(dim3 &launchDims, cudaStream_t *stream, - void *vx, Nd4jLong *xShapeInfo, + void *vx, Nd4jLong const* xShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool descending) { execOesTadKernel<<>>(vx, xShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffsets, descending); @@ -192,14 +192,14 @@ __host__ void oesTadGeneric(dim3 &launchDims, cudaStream_t *stream, template __host__ void oesTadGenericKey(dim3 &launchDims, cudaStream_t *stream, - void *vx, Nd4jLong *xShapeInfo, - void *vy, Nd4jLong *yShapeInfo, + void *vx, Nd4jLong const* xShapeInfo, + void *vy, Nd4jLong const* yShapeInfo, int *dimension, int dimensionLength, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool descending) { execOesTadKernelKey<<>>(vx, xShapeInfo, vy, yShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffsets, descending); } -BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT oesTadGeneric, (dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool descending), LIBND4J_TYPES); -BUILD_DOUBLE_TEMPLATE(template void ND4J_EXPORT oesTadGenericKey, (dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool descending), LIBND4J_TYPES, LIBND4J_TYPES); +BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT oesTadGeneric, (dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, int *dimension, int dimensionLength, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool descending), LIBND4J_TYPES); +BUILD_DOUBLE_TEMPLATE(template void ND4J_EXPORT oesTadGenericKey, (dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int *dimension, int dimensionLength, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool descending), LIBND4J_TYPES, LIBND4J_TYPES); diff --git a/libnd4j/include/loops/cuda/specials/pullRowsKernel.cu b/libnd4j/include/loops/cuda/specials/pullRowsKernel.cu index 7ef6a46db..69d103e67 100644 --- a/libnd4j/include/loops/cuda/specials/pullRowsKernel.cu +++ b/libnd4j/include/loops/cuda/specials/pullRowsKernel.cu @@ -29,8 +29,8 @@ namespace sd { void *vz, Nd4jLong len, Nd4jLong *indexes, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* zTadShapeInfo, Nd4jLong const* zTadOffsets) { auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); @@ -67,8 +67,8 @@ namespace sd { void *vz, Nd4jLong len, Nd4jLong *indexes, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* zTadShapeInfo, Nd4jLong const* zTadOffsets) { pullRowsKernel(vx, vz, len, indexes, tadShapeInfo, tadOffsets, zTadShapeInfo, zTadOffsets); } @@ -80,13 +80,13 @@ namespace sd { void *vz, Nd4jLong len, Nd4jLong *indexes, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets) { + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, + Nd4jLong const* zTadShapeInfo, Nd4jLong const* zTadOffsets) { execPullRowsKernel<<>>(vx, vz, len, indexes, tadShapeInfo, tadOffsets, zTadShapeInfo, zTadOffsets); sd::DebugHelper::checkErrorCode(stream, "pullRows(...) failed"); } - BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT pullRowsKernelGeneric, (dim3 & launchDims, cudaStream_t * stream, void * vx, void * vz, Nd4jLong len, Nd4jLong * indexes, Nd4jLong * tadShapeInfo, Nd4jLong * tadOffsets, Nd4jLong *zTadShapeInfo, Nd4jLong * zTadOffsets), LIBND4J_TYPES); + BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT pullRowsKernelGeneric, (dim3 & launchDims, cudaStream_t * stream, void * vx, void * vz, Nd4jLong len, Nd4jLong * indexes, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* zTadShapeInfo, Nd4jLong const* zTadOffsets), LIBND4J_TYPES); } diff --git a/libnd4j/include/loops/cuda/specials/swapUnsafeKernel.cu b/libnd4j/include/loops/cuda/specials/swapUnsafeKernel.cu index 796ea85c0..334584fab 100644 --- a/libnd4j/include/loops/cuda/specials/swapUnsafeKernel.cu +++ b/libnd4j/include/loops/cuda/specials/swapUnsafeKernel.cu @@ -27,7 +27,7 @@ namespace sd { // input - theSecondBuffer/Shape from input NDArray // output - theFirstBuffer/Shape from input NDArray template - static __global__ void swapUnsafeKernel(void* theFirstBuffer, Nd4jLong* theFirstShape, void* theSecondBuffer, Nd4jLong* theSecondShape) { + static __global__ void swapUnsafeKernel(void* theFirstBuffer, Nd4jLong const* theFirstShape, void* theSecondBuffer, Nd4jLong const* theSecondShape) { auto tid = blockIdx.x * blockDim.x + threadIdx.x; int totalThreads = gridDim.x * blockDim.x; @@ -51,12 +51,12 @@ namespace sd { } } - BUILD_SINGLE_TEMPLATE(template __global__ void swapUnsafeKernel, (void* theFirstBuffer, Nd4jLong* theFirstShape, void* theSecondBuffer, Nd4jLong* theSecondShape), LIBND4J_TYPES); + BUILD_SINGLE_TEMPLATE(template __global__ void swapUnsafeKernel, (void* theFirstBuffer, Nd4jLong const* theFirstShape, void* theSecondBuffer, Nd4jLong const* theSecondShape), LIBND4J_TYPES); template - void templatedSwapUnsafe(void* theFirstBuffer, Nd4jLong* theFirstShape, void* theSecondBuffer, Nd4jLong* theSecondShape, cudaStream_t* theStream) { + void templatedSwapUnsafe(void* theFirstBuffer, Nd4jLong const* theFirstShape, void* theSecondBuffer, Nd4jLong const* theSecondShape, cudaStream_t* theStream) { swapUnsafeKernel<<<256, 512, 8192, *theStream>>>(theFirstBuffer, theFirstShape, theSecondBuffer, theSecondShape); } - BUILD_SINGLE_TEMPLATE(template void templatedSwapUnsafe, (void* theFirstBuffer, Nd4jLong* theFirstShape, void* theSecondBuffer, Nd4jLong* theSecondShape, cudaStream_t* theStream), LIBND4J_TYPES); + BUILD_SINGLE_TEMPLATE(template void templatedSwapUnsafe, (void* theFirstBuffer, Nd4jLong const* theFirstShape, void* theSecondBuffer, Nd4jLong const* theSecondShape, cudaStream_t* theStream), LIBND4J_TYPES); } \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/specials/tearKernel.cu b/libnd4j/include/loops/cuda/specials/tearKernel.cu index a6285b5a5..e1d70e6b5 100644 --- a/libnd4j/include/loops/cuda/specials/tearKernel.cu +++ b/libnd4j/include/loops/cuda/specials/tearKernel.cu @@ -26,8 +26,8 @@ namespace sd { //////////////////////////////////////////////////////////////////////// template __device__ void - tearKernel(void *vx, Nd4jLong *xShapeInfo, Nd4jPointer *targets, Nd4jLong *zShapeInfo, Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets) { + tearKernel(void *vx, Nd4jLong const* xShapeInfo, Nd4jPointer *targets, Nd4jLong const* zShapeInfo, Nd4jLong const* tadShapeInfo, + Nd4jLong const* tadOffsets) { @@ -39,8 +39,8 @@ namespace sd { // __shared__ int zRank; // __shared__ Nd4jLong *tadShape; // __shared__ Nd4jLong *tadStride; -// __shared__ Nd4jLong *zShape; -// __shared__ Nd4jLong *zStride; +// __shared__ Nd4jLong const* zShape; +// __shared__ Nd4jLong const* zStride; __shared__ T* x; if (threadIdx.x == 0) { tadLength = shape::length(tadShapeInfo); @@ -74,8 +74,8 @@ namespace sd { //////////////////////////////////////////////////////////////////////// template __global__ void - execTearKernel(void *vx, Nd4jLong *xShapeInfo, Nd4jPointer *targets, Nd4jLong *zShapeInfo, Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets) { + execTearKernel(void *vx, Nd4jLong const* xShapeInfo, Nd4jPointer *targets, Nd4jLong const* zShapeInfo, Nd4jLong const* tadShapeInfo, + Nd4jLong const* tadOffsets) { tearKernel(vx, xShapeInfo, targets, zShapeInfo, tadShapeInfo, tadOffsets); } @@ -83,13 +83,13 @@ namespace sd { //////////////////////////////////////////////////////////////////////// template __host__ void tearKernelGeneric(dim3 &launchDims, cudaStream_t *stream, - void *vx, Nd4jLong *xShapeInfo, - Nd4jPointer *targets, Nd4jLong *zShapeInfo, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + void *vx, Nd4jLong const* xShapeInfo, + Nd4jPointer *targets, Nd4jLong const* zShapeInfo, + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets) { execTearKernel<<>>(vx, xShapeInfo, targets, zShapeInfo, tadShapeInfo, tadOffsets); sd::DebugHelper::checkErrorCode(stream, "tear(...) failed"); } - BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT tearKernelGeneric, (dim3 & launchDims, cudaStream_t * stream, void * vx, Nd4jLong * xShapeInfo, Nd4jPointer *targets, Nd4jLong * zShapeInfo, Nd4jLong * tadShapeInfo, Nd4jLong * tadOffsets), LIBND4J_TYPES); + BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT tearKernelGeneric, (dim3 & launchDims, cudaStream_t * stream, void * vx, Nd4jLong const* xShapeInfo, Nd4jPointer *targets, Nd4jLong const* zShapeInfo, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets), LIBND4J_TYPES); } \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/specials/tileKernel.cu b/libnd4j/include/loops/cuda/specials/tileKernel.cu index d6076d6cb..3a2684579 100644 --- a/libnd4j/include/loops/cuda/specials/tileKernel.cu +++ b/libnd4j/include/loops/cuda/specials/tileKernel.cu @@ -21,11 +21,11 @@ #include namespace sd { - static Nd4jLong __device__ __noinline__ getIndexOffset_(Nd4jLong index, Nd4jLong *shapeInfo) { + static Nd4jLong __device__ __noinline__ getIndexOffset_(Nd4jLong index, Nd4jLong const* shapeInfo) { return shape::getIndexOffset(index, shapeInfo); } - static Nd4jLong __device__ __noinline__ subArrayOffset(Nd4jLong index, Nd4jLong *shapeInfoA, Nd4jLong *shapeInfoB) { + static Nd4jLong __device__ __noinline__ subArrayOffset(Nd4jLong index, Nd4jLong const* shapeInfoA, Nd4jLong const* shapeInfoB) { return shape::subArrayOffset(index, shapeInfoA, shapeInfoB); } @@ -37,7 +37,7 @@ namespace sd { // resultLength - length for output array template static __global__ void - tileKernel(void const *inputBuffer, Nd4jLong *inputShape, void *outputBuffer, Nd4jLong *outputShape, + tileKernel(void const *inputBuffer, Nd4jLong const* inputShape, void *outputBuffer, Nd4jLong const* outputShape, Nd4jLong resultLength) { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Original code to transform in cuda-based @@ -58,22 +58,22 @@ namespace sd { } - BUILD_SINGLE_TEMPLATE(template __global__ void tileKernel,(void const* inputBuffer, Nd4jLong* inputShape, void* outputBuffer, Nd4jLong* outputShape, Nd4jLong resultLength), LIBND4J_TYPES); + BUILD_SINGLE_TEMPLATE(template __global__ void tileKernel,(void const* inputBuffer, Nd4jLong const* inputShape, void* outputBuffer, Nd4jLong const* outputShape, Nd4jLong resultLength), LIBND4J_TYPES); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// template - void tileKernelH(void const *inputBuffer, Nd4jLong *inputShape, void *outputBuffer, Nd4jLong *outputShape, Nd4jLong resultLength, cudaStream_t *stream) { + void tileKernelH(void const *inputBuffer, Nd4jLong const* inputShape, void *outputBuffer, Nd4jLong const* outputShape, Nd4jLong resultLength, cudaStream_t *stream) { dim3 launchDims(256, 512, 8192); tileKernel << < launchDims.x, launchDims.y, launchDims.z, *stream>>>(inputBuffer, inputShape, outputBuffer, outputShape, resultLength); } - BUILD_SINGLE_TEMPLATE(template void tileKernelH, (void const* inputBuffer, Nd4jLong* inputShape, void* outputBuffer, Nd4jLong* outputShape, Nd4jLong resultLength, cudaStream_t *stream), LIBND4J_TYPES); + BUILD_SINGLE_TEMPLATE(template void tileKernelH, (void const* inputBuffer, Nd4jLong const* inputShape, void* outputBuffer, Nd4jLong const* outputShape, Nd4jLong resultLength, cudaStream_t *stream), LIBND4J_TYPES); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // enhancement for tileKernel to different input and output data types: X - output type, Y - input type template static __global__ void - tileKernelDouble(void const *inputBuffer, Nd4jLong *inputShape, void *outputBuffer, Nd4jLong *outputShape, Nd4jLong resultLength, Nd4jLong ews) { + tileKernelDouble(void const *inputBuffer, Nd4jLong const* inputShape, void *outputBuffer, Nd4jLong const* outputShape, Nd4jLong resultLength, Nd4jLong ews) { char ordering = shape::order(outputShape); auto tid = blockIdx.x * blockDim.x + threadIdx.x; int totalThreads = gridDim.x * blockDim.x; @@ -99,13 +99,13 @@ namespace sd { } } - BUILD_SINGLE_TEMPLATE_TWICE(template __global__ void tileKernelDouble, (void const* inputBuffer, Nd4jLong* inputShape, void* outputBuffer, Nd4jLong* outputShape, Nd4jLong resultLength, Nd4jLong ews), LIBND4J_TYPES); + BUILD_SINGLE_TEMPLATE_TWICE(template __global__ void tileKernelDouble, (void const* inputBuffer, Nd4jLong const* inputShape, void* outputBuffer, Nd4jLong const* outputShape, Nd4jLong resultLength, Nd4jLong ews), LIBND4J_TYPES); template - void tileKernelHH(void const *inputBuffer, Nd4jLong *inputShape, void *outputBuffer, Nd4jLong *outputShape, Nd4jLong resultLength, Nd4jLong ews, cudaStream_t *stream) { + void tileKernelHH(void const *inputBuffer, Nd4jLong const* inputShape, void *outputBuffer, Nd4jLong const* outputShape, Nd4jLong resultLength, Nd4jLong ews, cudaStream_t *stream) { dim3 launchDims(256, 512, 8192); tileKernelDouble<<>>(inputBuffer, inputShape, outputBuffer, outputShape, resultLength, ews); } - BUILD_SINGLE_TEMPLATE_TWICE(template void tileKernelHH, (void const* inputBuffer, Nd4jLong* inputShape, void* outputBuffer, Nd4jLong* outputShape, Nd4jLong resultLength, Nd4jLong ews, cudaStream_t *stream),LIBND4J_TYPES); + BUILD_SINGLE_TEMPLATE_TWICE(template void tileKernelHH, (void const* inputBuffer, Nd4jLong const* inputShape, void* outputBuffer, Nd4jLong const* outputShape, Nd4jLong resultLength, Nd4jLong ews, cudaStream_t *stream),LIBND4J_TYPES); } \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/summarystatsreduce.cu b/libnd4j/include/loops/cuda/summarystatsreduce.cu index c858d8098..3d94b9097 100644 --- a/libnd4j/include/loops/cuda/summarystatsreduce.cu +++ b/libnd4j/include/loops/cuda/summarystatsreduce.cu @@ -39,7 +39,7 @@ namespace functions { namespace summarystats { template -void _CUDA_G summaryStatsReduceT(int op, void *dx, Nd4jLong *xShapeInfo, int xRank, void *extraParams, void *z, Nd4jLong *zShapeInfo, int zRank, int *dimension, int dimensionLength, int postProcessOrNot,bool biasCorrected,int *allocationBuffer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { +void _CUDA_G summaryStatsReduceT(int op, void const* dx, Nd4jLong const* xShapeInfo, int xRank, void *extraParams, void *z, Nd4jLong const* zShapeInfo, int zRank, int *dimension, int dimensionLength, int postProcessOrNot,bool biasCorrected,int *allocationBuffer, void *reductionBuffer, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets) { functions::summarystats::SummaryStatsReduce::transform(op,dx,xShapeInfo,extraParams,z,zShapeInfo,dimension,dimensionLength,biasCorrected,allocationBuffer,reductionBuffer,tadOnlyShapeInfo,tadOffsets); } @@ -103,15 +103,15 @@ void _CUDA_G summaryStatsReduceT(int op, void *dx, Nd4jLong *xShapeInfo, int xRa */ template template - _CUDA_D void SummaryStatsReduce::transform(void *vx, Nd4jLong *xShapeInfo, + _CUDA_D void SummaryStatsReduce::transform(void const* vx, Nd4jLong const* xShapeInfo, void *vextraParams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *vreductionBuffer, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { + Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets) { - auto dx = static_cast(vx); + auto dx = static_cast(vx); auto z = static_cast(vz); auto extraParams = static_cast(vextraParams); auto reductionBuffer = static_cast(vreductionBuffer); @@ -331,15 +331,15 @@ void _CUDA_G summaryStatsReduceT(int op, void *dx, Nd4jLong *xShapeInfo, int xRa template - _CUDA_D void SummaryStatsReduce::transform(const int opNum, void *dx, Nd4jLong *xShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { + _CUDA_D void SummaryStatsReduce::transform(const int opNum, void const* dx, Nd4jLong const* xShapeInfo, void *extraParams, void *z, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets) { DISPATCH_BY_OPNUM_TT(transform, PARAMS(dx, xShapeInfo, extraParams, z, zShapeInfo, dimension, dimensionLength, postProcessOrNot, allocationBuffer, reductionBuffer, tadOnlyShapeInfo, tadOffsets), SUMMARY_STATS_OPS); }; template - _CUDA_H void SummaryStatsReduce::execSummaryStatsReduceScalar(dim3& launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, Nd4jLong *hxShapeInfo, void *vextraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hzShapeInfo, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool biasCorrected, void *reductionBuffer) { + _CUDA_H void SummaryStatsReduce::execSummaryStatsReduceScalar(dim3& launchDims, cudaStream_t *stream, int opNum, void const* vx, Nd4jLong const* xShapeInfo, Nd4jLong const* hxShapeInfo, void *vextraParams, void *vz, Nd4jLong const* zShapeInfo, Nd4jLong const* hzShapeInfo, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool biasCorrected, void *reductionBuffer) { - auto x = static_cast(vx); + auto x = static_cast(vx); auto extraParams = static_cast(vextraParams); auto z = reinterpret_cast(vz); auto reductionPointerA = reinterpret_cast(reductionBuffer); @@ -363,9 +363,9 @@ void _CUDA_G summaryStatsReduceT(int op, void *dx, Nd4jLong *xShapeInfo, int xRa } template - _CUDA_H void SummaryStatsReduce::execSummaryStatsReduce(dim3& launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, Nd4jLong *hxShapeInfo, void *vextraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hzShapeInfo, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool biasCorrected, void *reductionBuffer) { + _CUDA_H void SummaryStatsReduce::execSummaryStatsReduce(dim3& launchDims, cudaStream_t *stream, int opNum, void const* vx, Nd4jLong const* xShapeInfo, Nd4jLong const* hxShapeInfo, void *vextraParams, void *vz, Nd4jLong const* zShapeInfo, Nd4jLong const* hzShapeInfo, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool biasCorrected, void *reductionBuffer) { - auto x = static_cast(vx); + auto x = static_cast(vx); auto z = static_cast(vz); auto extraParams = static_cast(vextraParams); @@ -390,9 +390,9 @@ void _CUDA_G summaryStatsReduceT(int op, void *dx, Nd4jLong *xShapeInfo, int xRa template - _CUDA_H void SummaryStatsReduce::execSummaryStatsReduce(dim3& launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, Nd4jLong *hxShapeInfo, void *vextraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hzShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool biasCorrected, void *reductionBuffer) { + _CUDA_H void SummaryStatsReduce::execSummaryStatsReduce(dim3& launchDims, cudaStream_t *stream, int opNum, void const* vx, Nd4jLong const* xShapeInfo, Nd4jLong const* hxShapeInfo, void *vextraParams, void *vz, Nd4jLong const* zShapeInfo, Nd4jLong const* hzShapeInfo, int *dimension, int dimensionLength, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool biasCorrected, void *reductionBuffer) { - auto x = static_cast(vx); + auto x = static_cast(vx); auto z = static_cast(vz); auto extraParams = static_cast(vextraParams); diff --git a/libnd4j/include/loops/cuda/transform/transform_any.cu b/libnd4j/include/loops/cuda/transform/transform_any.cu index d13b94599..8b00b28fe 100644 --- a/libnd4j/include/loops/cuda/transform/transform_any.cu +++ b/libnd4j/include/loops/cuda/transform/transform_any.cu @@ -30,12 +30,12 @@ using namespace simdOps; template -__global__ void transformAnySimple(void *x, Nd4jLong *xShapeInfo, int xRank, - void *params, - void *z, Nd4jLong *zShapeInfo, int zRank, - int *allocationPointer, - void *reductionPointer, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { +__global__ void transformAnySimple( + const void *x, const Nd4jLong *xShapeInfo, int xRank, + void *params, + void *z, const Nd4jLong *zShapeInfo, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { functions::transform::TransformAny::template transformCuda(x,xShapeInfo,params,z,zShapeInfo,allocationPointer,reductionPointer,tadShapeInfo, tadOffsets); } @@ -45,7 +45,14 @@ namespace functions { namespace transform { template - _CUDA_H void TransformAny::executeTransformShaped(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + _CUDA_H void TransformAny::executeTransformShaped( + dim3 launchDims, cudaStream_t *stream, + const int opNum, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { DISPATCH_BY_OPNUM_TT(intermediateShaped, PARAMS(launchDims, stream, x, xShape, xRank, extraParams, z, zShape, zRank, allocationPointer, reductionPointer, tadShapeInfo, tadOffsets), TRANSFORM_ANY_OPS); DEBUG_KERNEL(stream, opNum); @@ -54,13 +61,14 @@ namespace functions { template template - __device__ void TransformAny::transformCuda(void *vx, Nd4jLong *xShapeInfo, - void *vparams, - void *vz, Nd4jLong *zShapeInfo, - int *allocationPointer, void *vreductionPointer, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + __device__ void TransformAny::transformCuda( + const void *vx, const Nd4jLong *xShapeInfo, + void *vparams, + void *vz, const Nd4jLong *zShapeInfo, + int *allocationPointer, void *vreductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto params = reinterpret_cast(vparams); auto reductionPointer = reinterpret_cast(vreductionPointer); @@ -109,9 +117,17 @@ namespace functions { template template - _CUDA_H void TransformAny::intermediateShaped(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + _CUDA_H void TransformAny::intermediateShaped( + dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { + transformAnySimple<<>>(x, xShape, xRank, extraParams, z, zShape, zRank, allocationPointer, reductionPointer, tadShapeInfo, tadOffsets); - sd::DebugHelper::checkErrorCode(stream, "transformAny(...) failed"); + + sd::DebugHelper::checkErrorCode(stream, "transformAny(...) failed"); } BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT TransformAny, , LIBND4J_TYPES, LIBND4J_TYPES); diff --git a/libnd4j/include/loops/cuda/transform/transform_bool.cu b/libnd4j/include/loops/cuda/transform/transform_bool.cu index fec14a745..f9526d296 100644 --- a/libnd4j/include/loops/cuda/transform/transform_bool.cu +++ b/libnd4j/include/loops/cuda/transform/transform_bool.cu @@ -30,12 +30,12 @@ using namespace simdOps; template -__global__ void transformBoolSimple(void *x, Nd4jLong *xShapeInfo, int xRank, - void *params, - void *z, Nd4jLong *zShapeInfo, int zRank, - int *allocationPointer, - void *reductionPointer, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { +__global__ void transformBoolSimple( + const void *x, const Nd4jLong *xShapeInfo, int xRank, + void *params, + void *z, const Nd4jLong *zShapeInfo, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { functions::transform::TransformBool::template transformCuda(x,xShapeInfo,params,z,zShapeInfo,allocationPointer,reductionPointer,tadShapeInfo, tadOffsets); } @@ -45,7 +45,15 @@ namespace functions { namespace transform { template - _CUDA_H void TransformBool::executeTransformShaped(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + _CUDA_H void TransformBool::executeTransformShaped( + dim3 launchDims, cudaStream_t *stream, + const int opNum, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { + DISPATCH_BY_OPNUM_TT(intermediateShaped, PARAMS(launchDims, stream, x, xShape, xRank, extraParams, z, zShape, zRank, allocationPointer, reductionPointer, tadShapeInfo, tadOffsets), TRANSFORM_BOOL_OPS); DEBUG_KERNEL(stream, opNum); @@ -54,13 +62,14 @@ namespace functions { template template - __device__ void TransformBool::transformCuda(void *vx, Nd4jLong *xShapeInfo, - void *vparams, - void *vz, Nd4jLong *zShapeInfo, - int *allocationPointer, void *vreductionPointer, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + __device__ void TransformBool::transformCuda( + const void *vx, const Nd4jLong *xShapeInfo, + void *vparams, + void *vz, const Nd4jLong *zShapeInfo, + int *allocationPointer, void *vreductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { - auto x = static_cast(vx); + auto x = static_cast(vx); auto z = static_cast(vz); auto params = static_cast(vparams); auto reductionPointer = static_cast(vreductionPointer); @@ -115,7 +124,13 @@ namespace functions { template template - _CUDA_H void TransformBool::intermediateShaped(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + _CUDA_H void TransformBool::intermediateShaped( + dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { transformBoolSimple<<>>(x, xShape, xRank, extraParams, z, zShape, zRank, allocationPointer, reductionPointer, tadShapeInfo, tadOffsets); sd::DebugHelper::checkErrorCode(stream, "transformBool(...) failed"); } diff --git a/libnd4j/include/loops/cuda/transform/transform_float.cu b/libnd4j/include/loops/cuda/transform/transform_float.cu index f631fd4d7..6b6889009 100644 --- a/libnd4j/include/loops/cuda/transform/transform_float.cu +++ b/libnd4j/include/loops/cuda/transform/transform_float.cu @@ -29,12 +29,12 @@ using namespace simdOps; template -__global__ void transformFloatSimple(void *x, Nd4jLong *xShapeInfo, int xRank, +__global__ void transformFloatSimple(const void *x, const Nd4jLong *xShapeInfo, int xRank, void *params, - void *z, Nd4jLong *zShapeInfo, int zRank, + void *z, const Nd4jLong *zShapeInfo, int zRank, int *allocationPointer, void *reductionPointer, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { functions::transform::TransformFloat::template transformCuda( x, xShapeInfo, @@ -49,7 +49,7 @@ namespace functions { namespace transform { template - _CUDA_H void TransformFloat::executeTransformShaped(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + _CUDA_H void TransformFloat::executeTransformShaped(dim3 launchDims, cudaStream_t *stream, int opNum, const void *x, const Nd4jLong *xShape, int xRank, void *extraParams, void *z, const Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { DISPATCH_BY_OPNUM_TT(intermediateShaped, PARAMS(launchDims, stream, x, xShape, xRank, extraParams, z, zShape, zRank, allocationPointer, reductionPointer, tadShapeInfo, tadOffsets), TRANSFORM_FLOAT_OPS); DEBUG_KERNEL(stream, opNum); @@ -58,16 +58,13 @@ namespace functions { template template - __device__ void TransformFloat::transformCuda( - void *vx, - Nd4jLong *xShapeInfo, - void *vparams, - void *vz, - Nd4jLong *zShapeInfo, - int *allocationPointer, void *vreductionPointer, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + __device__ void TransformFloat::transformCuda(const void *vx, const Nd4jLong *xShapeInfo, + void *vparams, + void *vz, const Nd4jLong *zShapeInfo, + int *allocationPointer, void *vreductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { - auto x = reinterpret_cast(vx); + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto params = reinterpret_cast(vparams); auto reductionPointer = reinterpret_cast(vreductionPointer); @@ -122,24 +119,27 @@ namespace functions { template __device__ void TransformFloat::transformCudaLegacy( - int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *params, - void *z, - Nd4jLong *zShapeInfo, - int *allocationPointer, - void *reductionPointer, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets) { + const int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *params, + void *z, const Nd4jLong *zShapeInfo, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { DISPATCH_BY_OPNUM_TT(transformCuda, PARAMS(x, xShapeInfo, params, z, zShapeInfo, allocationPointer, reductionPointer, tadShapeInfo, tadOffsets), TRANSFORM_FLOAT_OPS); } template template - _CUDA_H void TransformFloat::intermediateShaped(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + _CUDA_H void TransformFloat::intermediateShaped( + dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { transformFloatSimple<<>>(x, xShape, xRank, extraParams, z, zShape, zRank, allocationPointer, reductionPointer, tadShapeInfo, tadOffsets); - sd::DebugHelper::checkErrorCode(stream, "transformFloat(...) failed"); + + sd::DebugHelper::checkErrorCode(stream, "transformFloat(...) failed"); } BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT TransformFloat, , LIBND4J_TYPES, FLOAT_TYPES); diff --git a/libnd4j/include/loops/cuda/transform/transform_same.cu b/libnd4j/include/loops/cuda/transform/transform_same.cu index 368a9b602..b03146da9 100644 --- a/libnd4j/include/loops/cuda/transform/transform_same.cu +++ b/libnd4j/include/loops/cuda/transform/transform_same.cu @@ -29,12 +29,12 @@ using namespace simdOps; template -__global__ void transformSameSimple(void *x, Nd4jLong *xShapeInfo, int xRank, +__global__ void transformSameSimple(const void *x, const Nd4jLong *xShapeInfo, int xRank, void *params, - void *z, Nd4jLong *zShapeInfo, int zRank, + void *z, const Nd4jLong *zShapeInfo, int zRank, int *allocationPointer, void *reductionPointer, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { functions::transform::TransformSame::template transformCuda(x,xShapeInfo,params,z,zShapeInfo,allocationPointer,reductionPointer, tadShapeInfo, tadOffsets); } @@ -44,7 +44,13 @@ namespace functions { namespace transform { template - _CUDA_H void TransformSame::executeTransformShaped(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + _CUDA_H void TransformSame::executeTransformShaped(dim3 launchDims, cudaStream_t *stream, + const int opNum, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { DISPATCH_BY_OPNUM_T(intermediateShaped, PARAMS(launchDims, stream, x, xShape, xRank, extraParams, z, zShape, zRank, allocationPointer, reductionPointer, tadShapeInfo, tadOffsets), TRANSFORM_SAME_OPS); DEBUG_KERNEL(stream, opNum); @@ -53,13 +59,13 @@ namespace functions { template template - __device__ void TransformSame::transformCuda(void *vx, Nd4jLong *xShapeInfo, + __device__ void TransformSame::transformCuda(const void *vx, const Nd4jLong *xShapeInfo, void *vparams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, int *allocationPointer, void *vreductionPointer, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { - auto x = static_cast(vx); + auto x = static_cast(vx); auto z = static_cast(vz); auto params = static_cast(vparams); auto reductionPointer = static_cast(vreductionPointer); @@ -113,7 +119,7 @@ namespace functions { template template - _CUDA_H void TransformSame::intermediateShaped(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + _CUDA_H void TransformSame::intermediateShaped(dim3 launchDims, cudaStream_t *stream, const void *x, const Nd4jLong *xShape, int xRank, void *extraParams, void *z, const Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { transformSameSimple<<>>(x, xShape, xRank, extraParams, z, zShape, zRank, allocationPointer, reductionPointer, tadShapeInfo, tadOffsets); sd::DebugHelper::checkErrorCode(stream, "transformSame(...) failed"); } diff --git a/libnd4j/include/loops/cuda/transform/transform_strict.cu b/libnd4j/include/loops/cuda/transform/transform_strict.cu index 155e5aa23..f36b50c29 100644 --- a/libnd4j/include/loops/cuda/transform/transform_strict.cu +++ b/libnd4j/include/loops/cuda/transform/transform_strict.cu @@ -29,12 +29,12 @@ using namespace simdOps; template -__global__ void transformStrictSimple(void *x, Nd4jLong *xShapeInfo, int xRank, - void *params, - void *z, Nd4jLong *zShapeInfo, int zRank, - int *allocationPointer, - void *reductionPointer, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { +__global__ void transformStrictSimple(const void *x, const Nd4jLong *xShapeInfo, int xRank, + void *params, + void *z, const Nd4jLong *zShapeInfo, int zRank, + int *allocationPointer, + void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { functions::transform::TransformStrict::template transformCuda(x,xShapeInfo,params,z,zShapeInfo,allocationPointer,reductionPointer,tadShapeInfo, tadOffsets); } @@ -44,7 +44,13 @@ namespace functions { namespace transform { template - _CUDA_H void TransformStrict::executeTransformShaped(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + _CUDA_H void TransformStrict::executeTransformShaped(dim3 launchDims, cudaStream_t *stream, + const int opNum, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { DISPATCH_BY_OPNUM_T(intermediateShaped, PARAMS(launchDims, stream, x, xShape, xRank, extraParams, z, zShape, zRank, allocationPointer, reductionPointer, tadShapeInfo, tadOffsets), TRANSFORM_STRICT_OPS); DEBUG_KERNEL(stream, opNum); @@ -53,13 +59,13 @@ namespace functions { template template - __device__ void TransformStrict::transformCuda(void *vx, Nd4jLong *xShapeInfo, + __device__ void TransformStrict::transformCuda(const void *vx, const Nd4jLong *xShapeInfo, void *vparams, - void *vz, Nd4jLong *zShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, int *allocationPointer, void *vreductionPointer, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { - auto x = static_cast(vx); + auto x = static_cast(vx); auto z = static_cast(vz); auto params = static_cast(vparams); auto reductionPointer = static_cast(vreductionPointer); @@ -114,7 +120,13 @@ namespace functions { template template - _CUDA_H void TransformStrict::intermediateShaped(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + _CUDA_H void TransformStrict::intermediateShaped(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) { + transformStrictSimple<<>>(x, xShape, xRank, extraParams, z, zShape, zRank, allocationPointer, reductionPointer, tadShapeInfo, tadOffsets); sd::DebugHelper::checkErrorCode(stream, "transformStrict(...) failed"); } diff --git a/libnd4j/include/loops/cuda/type_conversions.cu b/libnd4j/include/loops/cuda/type_conversions.cu index 8c38561f4..3ad8e2089 100644 --- a/libnd4j/include/loops/cuda/type_conversions.cu +++ b/libnd4j/include/loops/cuda/type_conversions.cu @@ -217,26 +217,104 @@ namespace sd { } ////////////////////////////////////////////////////////////////////////// +/* + * PLEASE NOTE: This kernel doesn't allow loop for data. Basically: grid will be huge. + */ template -__global__ static void execEncoderKernelP1(void *dx, Nd4jLong N, void *dz, float threshold) { +__global__ static void execEncoderKernelP1(const void *dx, Nd4jLong N, void *dz, float threshold) { + auto x = reinterpret_cast (dx); + auto z = reinterpret_cast (dz); - encoderKernelP1(dx, N, dz, threshold); + //basically, for phase One we want do calculation: how many eligible values we have, and which blocks will be holding data + Nd4jLong tid = blockIdx.x * blockDim.x + threadIdx.x; + + int pass = tid < N && sd::math::nd4j_abs(x[tid]) >= static_cast(threshold) ? 1 : 0; + int bp=__syncthreads_count(pass); + + if (threadIdx.x == 0) { + // saving out per-block passes + z[blockIdx.x+1] = bp; + + // saving out sum + atomicAdd(&z[0], bp); + } } ////////////////////////////////////////////////////////////////////////// template -__host__ void encoderKernelP1Generic(dim3 &launchDims, cudaStream_t *stream, void *dx, Nd4jLong N, void *dz, float threshold) { +__host__ void encoderKernelP1Generic(dim3 &launchDims, cudaStream_t *stream, const void *dx, Nd4jLong N, void *dz, float threshold) { execEncoderKernelP1<<>>(dx, N, dz, threshold); sd::DebugHelper::checkErrorCode(stream, "encoderP1(...) failed"); } -BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT encoderKernelP1Generic, (dim3 &launchDims, cudaStream_t *stream, void *dx, Nd4jLong N, void *dz, float threshold), LIBND4J_TYPES); +BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT encoderKernelP1Generic, (dim3 &launchDims, cudaStream_t *stream, const void *dx, Nd4jLong N, void *dz, float threshold), FLOAT_TYPES); ////////////////////////////////////////////////////////////////////////// +/* + * PLEASE NOTE: This kernel doesn't allow loop for data. Basically: grid will be huge. + * + * Based on: https://github.com/knotman90/cuStreamComp <-- efficient CUDA stream compaction algorithm + */ template __global__ static void execEncoderKernelP3(void *dx, int *offsets, Nd4jLong N, void *dz) { + auto x = reinterpret_cast (dx); + auto z = reinterpret_cast (dz); - encoderKernelP3(dx, offsets, N, dz); + auto tid = blockIdx.x * blockDim.x + threadIdx.x; + extern __shared__ int warpTotals[]; + + // fetch block offset only once + __shared__ float threshold; + __shared__ FloatBits fb; + __shared__ int bo; + __shared__ int limit; + if (threadIdx.x == 0) { + limit = z[0]; + fb.i_ = z[2]; + threshold = fb.f_; + bo = offsets[blockIdx.x]; + } + __syncthreads(); + + // out-of-limit threads do not play here + auto value = tid < N ? x[tid] : (T) 0.f; + + // out-of-limit threads just declare they have no changes + auto pred = tid >= N ? 0 : sd::math::nd4j_abs(value) >= static_cast(threshold) ? 1 : 0; + auto w_i = threadIdx.x / warpSize; // warp index (or, warp number) - index of the Warp within TOTAL_WARPS + auto t_i = threadIdx.x % warpSize; // thread index within a warp + unsigned int t_m = INT_MAX >> (warpSize - t_i - 1); //thread mask (ERROR IN THE PAPER minus one is required) + + int b = __ballot_sync(t_m, pred); // balres = number whose ith bit isone if the ith's thread pred is true masked up to the current index in warp + auto t_u = __popc(b); // popc count the number of bit one. simply count the number predicated true BEFORE MY INDEX + + if (t_i == warpSize - 1) + warpTotals[w_i] = t_u + pred; + + __syncthreads(); + + + int w_i_u = 0; + for (int j = 0; j <= 5; j++) { + unsigned int b_j = __ballot_sync(t_m, warpTotals[t_i] & pow2i(j)); //# of the ones in the j'th digit of the warp offsets + w_i_u += (__popc(b_j) << j); + } + + // we just ignore all results coming from non-0 threads + if (w_i == 0 && t_i < blockDim.x / warpSize) + warpTotals[t_i] = w_i_u; + + __syncthreads(); + + + // pred is always false if we're out-of-limits + if (pred) { + int idx = t_u + warpTotals[w_i] + bo + 4; + if (idx < limit + 4) { + z[idx] = value > static_cast(0.0f) ? tid + 1 : -(tid + 1); + x[tid] = value > static_cast(0.0f) ? x[tid] - threshold : x[tid] + threshold; + } + } } ////////////////////////////////////////////////////////////////////////// @@ -245,30 +323,119 @@ __host__ void encoderKernelP3Generic(dim3 &launchDims, cudaStream_t *stream, voi execEncoderKernelP3<<>>(dx, offsets, N, dz); sd::DebugHelper::checkErrorCode(stream, "encoderP3(...) failed"); } -BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT encoderKernelP3Generic, (dim3 &launchDims, cudaStream_t *stream, void *dx, int *offsets, Nd4jLong N, void *dz), LIBND4J_TYPES); +BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT encoderKernelP3Generic, (dim3 &launchDims, cudaStream_t *stream, void *dx, int *offsets, Nd4jLong N, void *dz), FLOAT_TYPES); ////////////////////////////////////////////////////////////////////////// +/* + * This kernel handles decode from sparse threshold array, to dense array + * + * PLEASE NOTE: Z is expected to be memset to 0 +*/ template -__global__ static void execDecoderKernel(void *dx, Nd4jLong N, void *dz) { +__global__ static void execDecoderKernel(const void *dx, Nd4jLong N, void *dz) { + auto x = reinterpret_cast (dx); + auto z = reinterpret_cast (dz); - decoderKernel(dx, N, dz); + int tid = blockIdx.x * blockDim.x + threadIdx.x; + __shared__ float threshold; + __shared__ int limit; + + __shared__ FloatBits fb; + if (threadIdx.x == 0) { + limit = x[0]; + fb.i_ = x[2]; + threshold = fb.f_; + } + __syncthreads(); + + for (int e = tid; e < limit; e += blockDim.x * gridDim.x) { + int el = x[e+4]; + int ael = sd::math::nd4j_abs(el) - 1; + + // TODO: investigate, if += would work better here, as in "decoded accumulation" + z[ael] += el > 0 ? threshold : -threshold; + } } ////////////////////////////////////////////////////////////////////////// template -__host__ void decoderKernelGeneric(dim3 &launchDims, cudaStream_t *stream, void *dx, Nd4jLong N, void *dz) { +__host__ void decoderKernelGeneric(dim3 &launchDims, cudaStream_t *stream, const void *dx, Nd4jLong N, void *dz) { execDecoderKernel<<>>(dx, N, dz); sd::DebugHelper::checkErrorCode(stream, "execDecoder(...) failed"); } -BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT decoderKernelGeneric, (dim3 &launchDims, cudaStream_t *stream, void *dx, Nd4jLong N, void *dz), LIBND4J_TYPES); +BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT decoderKernelGeneric, (dim3 &launchDims, cudaStream_t *stream, const void *dx, Nd4jLong N, void *dz), FLOAT_TYPES); ////////////////////////////////////////////////////////////////////////// template __global__ static void execCudaEncodeBitmapKernel(void *vdx, Nd4jLong N, int *dz, int *scalar, int *reductionBuffer, float threshold) { + auto dx = reinterpret_cast(vdx); + int tid = blockIdx.x * blockDim.x + threadIdx.x; - cudaEncodeBitmapKernel(vdx, N, dz, scalar, reductionBuffer, threshold); + T off(0.0f); + __shared__ int counter; + __shared__ int *shmem; + __shared__ T *vals; + if (threadIdx.x == 0){ + extern __shared__ char mem[]; + shmem = reinterpret_cast(mem); + vals = reinterpret_cast(shmem + blockDim.x); + counter = 0; + } + __syncthreads(); + + Nd4jLong loopRemainder = N % (blockDim.x * gridDim.x); + Nd4jLong loopLimit = N + (blockDim.x * gridDim.x - loopRemainder); + + for (Nd4jLong i = tid; i < loopLimit; i += blockDim.x * gridDim.x) { + // all threads in block reading stuff + T val = i < N ? dx[i] : off; + T abs = sd::math::nd4j_abs(val); + + int byteId = i / 16 + 4; + int bitId = i % 16; + + shmem[threadIdx.x] = 0; + vals[threadIdx.x] = val; + + if (abs >= static_cast(threshold) && i < N) { + shmem[threadIdx.x] = 1 << (bitId); + atomicAdd(&counter, 1); + if (val < static_cast(0.0f)) { + shmem[threadIdx.x] |= 1 << (bitId + 16); + vals[threadIdx.x] += static_cast(threshold); + } else { + vals[threadIdx.x] -= static_cast(threshold); + } + } else if (abs >= static_cast(threshold) / static_cast(2.0f) && val < static_cast(0.0f) && i < N) { + atomicAdd(&counter, 1); + shmem[threadIdx.x] = 1 << (bitId + 16); + + vals[threadIdx.x] += static_cast(threshold) / static_cast(2.0f); + } + __syncthreads(); + + if (threadIdx.x % 16 == 0 && i < N) { + int byte = 0; + for (int e = 0; e < 16; e++) { + if (i + e >= N) + continue; + + byte |= shmem[threadIdx.x + e]; + } + dz[byteId] = byte; + } + __syncthreads(); + + if (i < N) + dx[i] = vals[threadIdx.x]; + } + __syncthreads(); + + if (threadIdx.x == 0) { + atomicAdd(scalar, counter); + } } ////////////////////////////////////////////////////////////////////////// @@ -278,24 +445,72 @@ __host__ void cudaEncodeBitmapGeneric(dim3 &launchDims, cudaStream_t *stream, vo execCudaEncodeBitmapKernel<<>>(vdx, N, dz, scalar, reductionBuffer, threshold); sd::DebugHelper::checkErrorCode(stream, "encodeBitmap(...) failed"); } -BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT cudaEncodeBitmapGeneric, (dim3 &launchDims, cudaStream_t *stream, void *vdx, Nd4jLong N, int *dz, int *scalar, int *reductionBuffer, float threshold), LIBND4J_TYPES); +BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT cudaEncodeBitmapGeneric, (dim3 &launchDims, cudaStream_t *stream, void *vdx, Nd4jLong N, int *dz, int *scalar, int *reductionBuffer, float threshold), FLOAT_TYPES); ////////////////////////////////////////////////////////////////////////// template -__global__ static void execCudaDecodeBitmapKernel(void *dx, Nd4jLong N, void *vdz) { +__global__ static void execCudaDecodeBitmapKernel(const void *dx, Nd4jLong N, void *vdz) { + auto dz = static_cast(vdz); - cudaDecodeBitmapKernel(dx, N, vdz); + int tid = blockIdx.x * blockDim.x + threadIdx.x; + __shared__ T *shmem; + __shared__ FloatBits fb; + __shared__ float threshold; + __shared__ const int *x; + if (threadIdx.x == 0){ + extern __shared__ char mem[]; + shmem = reinterpret_cast(mem); + x = reinterpret_cast(dx); + fb.i_ = x[2]; + threshold = fb.f_; + } + __syncthreads(); + + int lim = N / 16 + 5; + for (int i = tid; i < N; i += blockDim.x * gridDim.x) { + int byteId = i / 16 + 4; +// printf("I: [%i]; byteId: [%i]\n", i, byteId); + + shmem[threadIdx.x] = dz[i]; + __syncthreads(); + + if (threadIdx.x % 16 == 0) { + int byte = x[byteId]; + + for (int e = 0; e < 16; e++) { + if (i + e >= N) + continue; + + int bitId = (i + e) % 16; + + bool hasBit = (byte & 1 << (bitId) ) != 0; + bool hasSign = (byte & 1 << (bitId + 16) ) != 0; + + if (hasBit) { + if (hasSign) + shmem[threadIdx.x + bitId] -= threshold; + else + shmem[threadIdx.x + bitId] += threshold; + } else if (hasSign) { + shmem[threadIdx.x + bitId] -= threshold / 2; + } + } + } + __syncthreads(); + + dz[i] = shmem[threadIdx.x]; + } } ////////////////////////////////////////////////////////////////////////// template -__host__ void cudaDecodeBitmapGeneric(dim3 &launchDims, cudaStream_t *stream, void *dx, Nd4jLong N, void *vdz) { +__host__ void cudaDecodeBitmapGeneric(dim3 &launchDims, cudaStream_t *stream, const void *dx, Nd4jLong N, void *vdz) { execCudaDecodeBitmapKernel<<>>(dx, N, vdz); sd::DebugHelper::checkErrorCode(stream, "cudeDecodeBitmap(...) failed"); } -BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT cudaDecodeBitmapGeneric, (dim3 &launchDims, cudaStream_t *stream, void *dx, Nd4jLong N, void *vdz), LIBND4J_TYPES); +BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT cudaDecodeBitmapGeneric, (dim3 &launchDims, cudaStream_t *stream, const void *dx, Nd4jLong N, void *vdz), FLOAT_TYPES); template diff --git a/libnd4j/include/loops/impl/type_conversions.cpp b/libnd4j/include/loops/impl/type_conversions.cpp index 16914bd86..a2f302d25 100644 --- a/libnd4j/include/loops/impl/type_conversions.cpp +++ b/libnd4j/include/loops/impl/type_conversions.cpp @@ -106,8 +106,14 @@ namespace sd { auto l = static_cast(N); z[1] = l; +#ifdef _OPENMP int threads = OmpLaunchHelper::betterThreads(N); - int span = OmpLaunchHelper::betterSpan(N, threads); + auto span = OmpLaunchHelper::betterSpan(N, threads); +#else + int threads = 1; + auto span = N; +#endif + T tt = static_cast(threshold); T mtt = -tt; @@ -165,10 +171,10 @@ PRAGMA_OMP_ATOMIC_ARGS(write) } template - void TypeCast::convertFromThreshold(Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz) { + void TypeCast::convertFromThreshold(Nd4jPointer * extras, const void *dx, Nd4jLong N, void *dz) { FloatBits fb; auto z = reinterpret_cast(dz); - auto x = reinterpret_cast(dx); + auto x = reinterpret_cast(dx); int limit = x[0]; fb.i_ = x[2]; float threshold = fb.f_; @@ -209,21 +215,23 @@ PRAGMA_OMP_ATOMIC_ARGS(write) samediff::Threads::parallel_for(func, 0, N); }; - template void TypeCast::convertFromThreshold(Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz); - template void TypeCast::convertFromThreshold(Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz); - template void TypeCast::convertFromThreshold(Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz); + template void TypeCast::convertFromThreshold(Nd4jPointer * extras, const void *dx, Nd4jLong N, void *dz); + template void TypeCast::convertFromThreshold(Nd4jPointer * extras, const void *dx, Nd4jLong N, void *dz); + template void TypeCast::convertFromThreshold(Nd4jPointer * extras, const void *dx, Nd4jLong N, void *dz); + template void TypeCast::convertFromThreshold(Nd4jPointer * extras, const void *dx, Nd4jLong N, void *dz); + template void TypeCast::convertToThreshold(Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz); template void TypeCast::convertToThreshold(Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz); template void TypeCast::convertToThreshold(Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz); - template void TypeCast::convertToThreshold(Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz); + template void TypeCast::convertToThreshold(Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz); + template void TypeCast::convertFromQuantized(Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz); template void TypeCast::convertFromQuantized(Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz); template void TypeCast::convertFromQuantized(Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz); - template void TypeCast::convertFromQuantized(Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz); + template void TypeCast::convertToQuantized(Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz); template void TypeCast::convertToQuantized(Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz); template void TypeCast::convertToQuantized(Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz); - template void TypeCast::convertToQuantized(Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz); #ifndef __CLION_IDE__ BUILD_DOUBLE_TEMPLATE(template void TypeCast::convertGeneric, (Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz), LIBND4J_TYPES, LIBND4J_TYPES) diff --git a/libnd4j/include/loops/indexreduce.h b/libnd4j/include/loops/indexreduce.h index 677d83db9..2e8bc33d2 100755 --- a/libnd4j/include/loops/indexreduce.h +++ b/libnd4j/include/loops/indexreduce.h @@ -51,32 +51,74 @@ namespace functions { template class IndexReduce { public: -#ifdef __CUDACC__ +#ifdef __CUDABLAS__ - static __device__ void transform(const int opNum, void *x, Nd4jLong *xShapeInfo, void *extraParams, void *result, Nd4jLong *resultShapeInfo, int *dimension,int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset); + static __device__ void transform(int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension,int dimensionLength, + int postProcessOrNot, + int *allocationBuffer, void *reductionBuffer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset); template - static __device__ void aggregatePartials(IndexValue **sPartialsRef, Nd4jLong tid, Nd4jLong numElements,void *extraParams); + static __device__ void aggregatePartials(IndexValue **sPartialsRef, Nd4jLong tid, Nd4jLong numElements, void *extraParams); template - static __device__ void transform(void *dx, Nd4jLong *xShapeInfo, void *extraParams, void *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets); + static __device__ void transform(const void *dx, const Nd4jLong *xShapeInfo, + void *extraParams, + void *result, const Nd4jLong *resultShapeInfo, + int *dimension, int dimensionLength, + int postProcessOrNot, + int *allocationBuffer, void *reductionBuffer, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets); - static _CUDA_H void executeIndexReduceScalar(dim3 launchDims, cudaStream_t *stream, const int op, void *dx, Nd4jLong *xShapeInfo, int xRank, void *extraParams, void *result, Nd4jLong *resultShapeInfo, int zRank, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets); + static _CUDA_H void executeIndexReduceScalar(dim3 launchDims, cudaStream_t *stream, + int op, + const void *dx, const Nd4jLong *xShapeInfo, + int xRank, + void *extraParams, + void *result, const Nd4jLong *resultShapeInfo, + int zRank, + int *dimension, int dimensionLength, + int postProcessOrNot, + int *allocationBuffer, void *reductionBuffer, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets); - static _CUDA_H void executeIndexReduce(dim3 launchDims, cudaStream_t *stream, const int op, void *dx, Nd4jLong *xShapeInfo, int xRank, void *extraParams, void *result, Nd4jLong *resultShapeInfo, int zRank, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets); + static _CUDA_H void executeIndexReduce(dim3 launchDims, cudaStream_t *stream, + int op, + const void *dx, const Nd4jLong *xShapeInfo, + int xRank, + void *extraParams, + void *result, const Nd4jLong *resultShapeInfo, + int zRank, + int *dimension, int dimensionLength, + int postProcessOrNot, + int *allocationBuffer, void *reductionBuffer, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets); #else - static Nd4jLong execScalar(const int opNum, void *x, Nd4jLong *xShapeInfo, void *extraParams); + static Nd4jLong execScalar(int opNum, const void *x, const Nd4jLong *xShapeInfo, void *extraParams); - static void exec(const int opNum, void *x, Nd4jLong *xShapeInfo, void *extraParams, void *result, Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset); + static void exec(int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *result, const Nd4jLong *resultShapeInfoBuffer, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset); template - static _CUDA_H Nd4jLong execScalar(void *x, Nd4jLong *xShapeInfo, void *extraParams); + static _CUDA_H Nd4jLong execScalar(const void *x, const Nd4jLong *xShapeInfo, void *extraParams); template - static _CUDA_H void exec(void *x, Nd4jLong *xShapeInfo, void *extraParams, void *result, Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset); + static _CUDA_H void exec(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *result, const Nd4jLong *resultShapeInfoBuffer, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset); #endif }; } diff --git a/libnd4j/include/loops/pairwise_bool.h b/libnd4j/include/loops/pairwise_bool.h index fee96df84..9cc8f220c 100644 --- a/libnd4j/include/loops/pairwise_bool.h +++ b/libnd4j/include/loops/pairwise_bool.h @@ -58,62 +58,52 @@ namespace functions { #ifdef __CUDACC__ template - static __host__ void intermediateShaped(dim3& launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, void *vextraParams); + static __host__ void intermediateShaped(dim3& launchDims, cudaStream_t *stream, + const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + void *vextraParams); - static __host__ void executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo, void *extraParams); + static __host__ void executeCudaShaped(dim3& launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + void *extraParams); #else - static void exec( - const int opNum, - void *dx, - Nd4jLong *xShapeBuffer, - void *y, - Nd4jLong *yShapeBuffer, - void *result, - Nd4jLong *resultShapeBuffer, - void *extraParams, - const uint64_t start, - const uint64_t stop); + static void exec(int opNum, + const void *dx, const Nd4jLong *xShapeBuffer, + const void *y, const Nd4jLong *yShapeBuffer, + void *result, const Nd4jLong *resultShapeBuffer, + void *extraParams, + uint64_t start, uint64_t stop); - static void exec( - const int opNum, - void *dx, - Nd4jLong xStride, - void *y, - Nd4jLong yStride, - void *result, - Nd4jLong resultStride, - void *extraParams, - Nd4jLong n, - const uint64_t start, - const uint64_t stop); + static void exec(int opNum, + const void *dx, Nd4jLong xStride, + const void *y, Nd4jLong yStride, + void *result, Nd4jLong resultStride, + void *extraParams, + Nd4jLong n, + uint64_t start, uint64_t stop); template - static void exec( - void *vx, - Nd4jLong* xShapeBuffer, - void *vy, - Nd4jLong* yShapeBuffer, - void *vresult, - Nd4jLong* resultShapeBuffer, - void *vextraParams, - const uint64_t start, - const uint64_t stop); + static void exec(const void *vx, const Nd4jLong* xShapeBuffer, + const void *vy, const Nd4jLong* yShapeBuffer, + void *vresult, const Nd4jLong* resultShapeBuffer, + void *vextraParams, + uint64_t start, uint64_t stop); template - static void exec(void *vx, - Nd4jLong xStride, - void *vy, - Nd4jLong yStride, - void *vresult, - Nd4jLong resultStride, + static void exec(const void *vx, Nd4jLong xStride, + const void *vy, Nd4jLong yStride, + void *vresult, Nd4jLong resultStride, void *vextraParams, - const Nd4jLong n, - const uint64_t start, - const uint64_t stop); + Nd4jLong n, + uint64_t start, uint64_t stop); #endif }; } diff --git a/libnd4j/include/loops/pairwise_int.h b/libnd4j/include/loops/pairwise_int.h index 4144963c7..64deebc04 100644 --- a/libnd4j/include/loops/pairwise_int.h +++ b/libnd4j/include/loops/pairwise_int.h @@ -59,62 +59,52 @@ namespace functions { #ifdef __CUDACC__ template - static __host__ void intermediateShaped(dim3& launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, void *vextraParams); + static __host__ void intermediateShaped(dim3& launchDims, cudaStream_t *stream, + const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + void *vextraParams); - static __host__ void executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo, void *extraParams); + static __host__ void executeCudaShaped(dim3& launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + void *extraParams); #else - static void exec( - const int opNum, - void *dx, - Nd4jLong *xShapeBuffer, - void *y, - Nd4jLong *yShapeBuffer, - void *result, - Nd4jLong *resultShapeBuffer, - void *extraParams, - const uint64_t start, - const uint64_t stop); + static void exec(int opNum, + const void *dx, const Nd4jLong *xShapeBuffer, + const void *y, const Nd4jLong *yShapeBuffer, + void *result, const Nd4jLong *resultShapeBuffer, + void *extraParams, + uint64_t start, uint64_t stop); - static void exec( - const int opNum, - void *dx, - Nd4jLong xStride, - void *y, - Nd4jLong yStride, - void *result, - Nd4jLong resultStride, - void *extraParams, - Nd4jLong n, - const uint64_t start, - const uint64_t stop); + static void exec(int opNum, + const void *dx, Nd4jLong xStride, + const void *y, Nd4jLong yStride, + void *result, Nd4jLong resultStride, + void *extraParams, + Nd4jLong n, + uint64_t start, uint64_t stop); template - static void exec( - void *vx, - Nd4jLong* xShapeBuffer, - void *vy, - Nd4jLong* yShapeBuffer, - void *vresult, - Nd4jLong* resultShapeBuffer, - void *vextraParams, - const uint64_t start, - const uint64_t stop); + static void exec(const void *vx, const Nd4jLong* xShapeBuffer, + const void *vy, const Nd4jLong* yShapeBuffer, + void *vresult, const Nd4jLong* resultShapeBuffer, + void *vextraParams, + uint64_t start,uint64_t stop); template - static void exec(void *vx, - Nd4jLong xStride, - void *vy, - Nd4jLong yStride, - void *vresult, - Nd4jLong resultStride, + static void exec(const void *vx, Nd4jLong xStride, + const void *vy, Nd4jLong yStride, + void *vresult, Nd4jLong resultStride, void *vextraParams, - const Nd4jLong n, - const uint64_t start, - const uint64_t stop); + Nd4jLong n, + uint64_t start, uint64_t stop); #endif }; } diff --git a/libnd4j/include/loops/pairwise_transform.h b/libnd4j/include/loops/pairwise_transform.h index 8576481f5..b3b514df6 100755 --- a/libnd4j/include/loops/pairwise_transform.h +++ b/libnd4j/include/loops/pairwise_transform.h @@ -52,65 +52,55 @@ namespace functions { class PairWiseTransform { public: -#ifdef __CUDACC__ +#ifdef __CUDABLAS__ template - static __host__ void intermediateShaped(dim3& launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, void *vextraParams); + static __host__ void intermediateShaped(dim3& launchDims, cudaStream_t *stream, + const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + void *vextraParams); - static __host__ void executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo, void *extraParams); + static __host__ void executeCudaShaped(dim3& launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + void *extraParams); #endif public: - static void exec( - const int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *y, - Nd4jLong *yShapeInfo, - void *z, - Nd4jLong *zShapeInfo, - void *extraParams, - uint64_t start, - uint64_t stop); + static void exec(int opNum, + const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + void *extraParams, + uint64_t start, uint64_t stop); - static void exec( - const int opNum, - void *x, - Nd4jLong xStride, - void *y, - Nd4jLong yStride, - void *z, - Nd4jLong resultStride, - void *extraParams, - Nd4jLong len, - uint64_t start, - uint64_t stop); + static void exec(int opNum, + const void *x, Nd4jLong xStride, + const void *y, Nd4jLong yStride, + void *z, Nd4jLong resultStride, + void *extraParams, + Nd4jLong len, + uint64_t start, uint64_t stop); template - static void exec( - void *vx, - Nd4jLong* xShapeInfo, - void *vy, - Nd4jLong* yShapeInfo, - void *vresult, - Nd4jLong* zShapeInfo, - void *vextraParams, - uint64_t start, - uint64_t stop); + static void exec(const void *vx, const Nd4jLong* xShapeInfo, + const void *vy, const Nd4jLong* yShapeInfo, + void *vresult, const Nd4jLong* zShapeInfo, + void *vextraParams, + uint64_t start, uint64_t stop); template - static void exec(void *vx, - Nd4jLong xStride, - void *vy, - Nd4jLong yStride, - void *vresult, - Nd4jLong resultStride, + static void exec(const void *vx, Nd4jLong xStride, + const void *vy, Nd4jLong yStride, + void *vresult, Nd4jLong resultStride, void *vextraParams, Nd4jLong len, - uint64_t start, - uint64_t stop); + uint64_t start, uint64_t stop); }; } } diff --git a/libnd4j/include/loops/random.h b/libnd4j/include/loops/random.h index 5048e5ce0..9b35f472f 100644 --- a/libnd4j/include/loops/random.h +++ b/libnd4j/include/loops/random.h @@ -38,34 +38,60 @@ namespace functions { class RandomFunction { public: -#ifdef __CUDACC__ +#ifdef __CUDABLAS__ template - static _CUDA_D void execTransformCuda(Nd4jPointer state, void *x, Nd4jLong *xShapeBuffer, void *y, Nd4jLong *yShapeBuffer, void *z, Nd4jLong *zShapeBuffer, void *extraArguments); + static _CUDA_D void execTransformCuda(Nd4jPointer state, + const void *x, const Nd4jLong *xShapeBuffer, + const void *y, const Nd4jLong *yShapeBuffer, + void *z, const Nd4jLong *zShapeBuffer, + void *extraArguments); template - static _CUDA_D void execTransformCuda(Nd4jPointer state, void *x, Nd4jLong *xShapeBuffer, void *z, Nd4jLong *zShapeBuffer, void *extraArguments); + static _CUDA_D void execTransformCuda(Nd4jPointer state, + const void *x, const Nd4jLong *xShapeBuffer, + void *z, const Nd4jLong *zShapeBuffer, + void *extraArguments); template - static _CUDA_D void execTransformCuda(Nd4jPointer state, void *z, Nd4jLong *zShapeBuffer, void *extraArguments); + static _CUDA_D void execTransformCuda(Nd4jPointer state, void *z, const Nd4jLong *zShapeBuffer, void *extraArguments); - static _CUDA_H void executeCudaSingle(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *z, Nd4jLong *zShapeBuffer, void *extraArguments); - static _CUDA_H void executeCudaDouble(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *x, Nd4jLong *xShapeBuffer, void *z, Nd4jLong *zShapeBuffer, void *extraArguments); - static _CUDA_H void executeCudaTriple(dim3& launchDims, cudaStream_t* stream, int opNum, Nd4jPointer stateHost, void *x, Nd4jLong *xShapeBuffer, void *y, Nd4jLong *yShapeBuffer, void *z, Nd4jLong *zShapeBuffer, void *extraArguments); + static _CUDA_H void executeCudaSingle(dim3& launchDims, cudaStream_t* stream, + int opNum, + Nd4jPointer stateHost, + void *z, const Nd4jLong *zShapeBuffer, + void *extraArguments); + + + static _CUDA_H void executeCudaDouble(dim3& launchDims, cudaStream_t* stream, + int opNum, + Nd4jPointer stateHost, + const void *x, const Nd4jLong *xShapeBuffer, + void *z, const Nd4jLong *zShapeBuffer, + void *extraArguments); + + + static _CUDA_H void executeCudaTriple(dim3& launchDims, cudaStream_t* stream, + int opNum, + Nd4jPointer stateHost, + const void *x, const Nd4jLong *xShapeBuffer, + const void *y, const Nd4jLong *yShapeBuffer, + void *z, const Nd4jLong* zShapeBuffer, + void *extraArguments); #else template - static void execTransform(Nd4jPointer state, void *x, Nd4jLong *xShapeBuffer, void *y, Nd4jLong *yShapeBuffer, void *z, Nd4jLong *zShapeBuffer, void *extraArguments); + static void execTransform(Nd4jPointer state, const void *x, const Nd4jLong *xShapeBuffer, const void *y, const Nd4jLong *yShapeBuffer, void *z, const Nd4jLong *zShapeBuffer, void *extraArguments); template - static void execTransform(Nd4jPointer state, void *x, Nd4jLong *xShapeBuffer, void *z, Nd4jLong *zShapeBuffer, void *extraArguments); + static void execTransform(Nd4jPointer state, const void *x, const Nd4jLong *xShapeBuffer, void *z, const Nd4jLong *zShapeBuffer, void *extraArguments); template - static void execTransform(Nd4jPointer state, void *z, Nd4jLong *zShapeBuffer, void *extraArguments); + static void execTransform(Nd4jPointer state, void *z, const Nd4jLong *zShapeBuffer, void *extraArguments); - static void execTransform(int opNum, Nd4jPointer state, void *x, Nd4jLong *xShapeBuffer, void *z, Nd4jLong *zShapeBuffer, void *extraArguments); - static void execTransform(int opNum, Nd4jPointer state, void *x, Nd4jLong *xShapeBuffer, void *y, Nd4jLong *yShapeBuffer, void *z, Nd4jLong *zShapeBuffer, void *extraArguments); - static void execTransform(int opNum, Nd4jPointer state, void *z, Nd4jLong *zShapeBuffer, void *extraArguments); + static void execTransform(int opNum, Nd4jPointer state, const void *x, const Nd4jLong *xShapeBuffer, void *z, const Nd4jLong *zShapeBuffer, void *extraArguments); + static void execTransform(int opNum, Nd4jPointer state, const void *x, const Nd4jLong *xShapeBuffer, const void *y, const Nd4jLong *yShapeBuffer, void *z, const Nd4jLong *zShapeBuffer, void *extraArguments); + static void execTransform(int opNum, Nd4jPointer state, void *z, const Nd4jLong *zShapeBuffer, void *extraArguments); #endif }; } diff --git a/libnd4j/include/loops/reduce3.h b/libnd4j/include/loops/reduce3.h index 597e450b1..f2496f1fe 100755 --- a/libnd4j/include/loops/reduce3.h +++ b/libnd4j/include/loops/reduce3.h @@ -75,10 +75,23 @@ class Reduce3 { static __device__ void aggregatePartials(void* sPartials, Nd4jLong tid, Nd4jLong numItems, void *extraParams); template - static __device__ void execScalarCuda(void *x, Nd4jLong *xShapeInfo, void *y, Nd4jLong *yShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, int *allocationPointer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo); + static __device__ void execScalarCuda(const void *x, const Nd4jLong *xShapeInfo, + const void *y, const Nd4jLong *yShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, + int *allocationPointer, void *reductionBuffer, + const Nd4jLong *tadOnlyShapeInfo); template - static __device__ void transformAll(void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationPointer, Nd4jLong *xTadShapeInfo, Nd4jLong *xOffsets, Nd4jLong *yTadShapeInfo, Nd4jLong *yOffsets); + static __device__ void transformAll(const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *extraParams, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + int postProcessOrNot, + int *allocationPointer, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xOffsets, + const Nd4jLong *yTadShapeInfo, const Nd4jLong *yOffsets); /** Perform a reduction @@ -90,54 +103,157 @@ class Reduce3 { @param result where to store the result of the reduction */ template - static __device__ void transform(void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationPointer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets); + static __device__ void transform(const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *extraParams, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + int postProcessOrNot, + int *allocationPointer, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *yTadOnlyShapeInfo, const Nd4jLong *yTadOffsets); - static __device__ void execCuda(const int opNum, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationPointer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets); + static __device__ void execCuda(int opNum, + const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *extraParams, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + int postProcessOrNot, + int *allocationPointer, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *yTadOnlyShapeInfo, const Nd4jLong *yTadOffsets); - static __device__ void execAllCuda( const int opNum, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationPointer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets); + static __device__ void execAllCuda(int opNum, + const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *extraParams, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + int postProcessOrNot, + int *allocationPointer, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *yTadOnlyShapeInfo, const Nd4jLong *yTadOffsets); - static __device__ void execScalarCuda(const int opNum, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, int * allocationPointer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo); + static __device__ void execScalarCuda(int opNum, + const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *extraParams, + void *vz, const Nd4jLong *zShapeInfo, + int * allocationPointer, void *reductionBuffer, + const Nd4jLong *tadOnlyShapeInfo); - static __host__ void exec(dim3 launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationPointer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets); - - static __host__ void execAll(dim3 launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationPointer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets); - - static __host__ void execScalar(dim3 launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, int* allocationPointer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo); + static __host__ void exec(dim3 launchDims, cudaStream_t *stream, + int opNum, + const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *extraParams, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + int postProcessOrNot, + int *allocationPointer, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *yTadOnlyShapeInfo, const Nd4jLong *yTadOffsets); + static __host__ void execAll(dim3 launchDims, cudaStream_t *stream, + int opNum, + const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *extraParams, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + int postProcessOrNot, + int *allocationPointer, + const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *yTadOnlyShapeInfo, const Nd4jLong *yTadOffsets); + static __host__ void execScalar(dim3 launchDims, cudaStream_t *stream, + int opNum, + const void *vx, const Nd4jLong *xShapeInfo, + const void *vy, const Nd4jLong *yShapeInfo, + void *extraParams, + void *vz, const Nd4jLong *zShapeInfo, + int* allocationPointer, void *reductionBuffer, + const Nd4jLong *tadOnlyShapeInfo); #else template - static void execScalar(void *vx, Nd4jLong *xShapeInfo, void *vextraParams, void *vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo); + static void execScalar(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo); - static void execScalar(const int opNum, void *x, Nd4jLong *xShapeInfo, void *extraParamsVals, void *y, Nd4jLong *yShapeInfo, void *z, Nd4jLong *zShapeInfo); + static void execScalar(int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *extraParamsVals, + const void *y, const Nd4jLong *yShapeInfo, + void *z, const Nd4jLong *zShapeInfo); template - static void exec(void *vx, Nd4jLong *xShapeInfo, void *vextraParams, void *vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, int64_t start, int64_t stop); + static void exec(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + int64_t start, int64_t stop); template - static void exec(void *vx, Nd4jLong *xShapeInfo, void *vextraParams, void *vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, int64_t start, int64_t stop); + static void exec(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + int64_t start, int64_t stop); template - static void execAll(void *vx, Nd4jLong *xShapeInfo, void *vextraParams, void *vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, Nd4jLong *xTadShapeInfo, Nd4jLong *xOffsets, Nd4jLong *yTadShapeInfo, Nd4jLong *yOffsets, int64_t start, int64_t stop); + static void execAll(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xOffsets, + const Nd4jLong *yTadShapeInfo, const Nd4jLong *yOffsets, + int64_t start, int64_t stop); - static void exec(const int opNum, void *vx, Nd4jLong *xShapeInfo, void *extraParamsVals, void *vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, int64_t start, int64_t stop); + static void exec(int opNum, + const void *vx, const Nd4jLong *xShapeInfo, + void *extraParamsVals, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + int64_t start, int64_t stop); - static void exec(const int opNum, void *vx, Nd4jLong *xShapeInfo, void *extraParamsVals, void *vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, int64_t start, int64_t stop); + static void exec(int opNum, + const void *vx, const Nd4jLong *xShapeInfo, + void *extraParamsVals, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + int64_t start, int64_t stop); - static void execAll(const int opNum, void *vx, Nd4jLong *xShapeInfo, void *extraParamsVals, void *vy, Nd4jLong *yShapeInfo, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, Nd4jLong *xTadShapeInfo, Nd4jLong *xOffsets, Nd4jLong *yTadShapeInfo, Nd4jLong *yOffsets, int64_t start, int64_t stop); + static void execAll(int opNum, + const void *vx, const Nd4jLong *xShapeInfo, + void *extraParamsVals, + const void *vy, const Nd4jLong *yShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + int *dimension, int dimensionLength, + const Nd4jLong *xTadShapeInfo, const Nd4jLong *xOffsets, + const Nd4jLong *yTadShapeInfo, const Nd4jLong *yOffsets, + int64_t start, int64_t stop); #endif }; diff --git a/libnd4j/include/loops/reduce_bool.h b/libnd4j/include/loops/reduce_bool.h index 815557d41..a74d53033 100644 --- a/libnd4j/include/loops/reduce_bool.h +++ b/libnd4j/include/loops/reduce_bool.h @@ -58,20 +58,20 @@ namespace functions { static __device__ void aggregatePartials(void *sPartials, Nd4jLong tid, Nd4jLong numItems, void *extraParams); template - static __device__ void execScalarCuda( void *vx, Nd4jLong *xShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo); + static __device__ void execScalarCuda(const void *vx, const Nd4jLong *xShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShapeInfo, void *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo); template - static __device__ void transformCudaXD( void *vx, Nd4jLong *xShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets); + static __device__ void transformCudaXD(const void *vx, const Nd4jLong *xShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets); template - static __host__ void intermediateScalar(dim3 launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo); + static __host__ void intermediateScalar(dim3 launchDims, cudaStream_t *stream, const void *vx, const Nd4jLong *xShapeInfo, const Nd4jLong *hXShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShapeInfo, const Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo); template - static __host__ void intermediateXD(dim3 launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static __host__ void intermediateXD(dim3 launchDims, cudaStream_t *stream, const void *vx, const Nd4jLong *xShapeInfo, const Nd4jLong *hXShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShapeInfo, const Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); - static __host__ void execReduceScalar(dim3 launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, Nd4jLong* hXShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong* hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo); + static __host__ void execReduceScalar(dim3 launchDims, cudaStream_t *stream, int opNum, const void *vx, const Nd4jLong *xShapeInfo, const Nd4jLong* hXShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShapeInfo, const Nd4jLong* hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo); - static __host__ void execReduceXD(dim3 launchDims, cudaStream_t *stream, int opNum, int rank, void *vx, Nd4jLong *xShapeInfo, Nd4jLong* hXShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong* hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static __host__ void execReduceXD(dim3 launchDims, cudaStream_t *stream, int opNum, int rank, const void *vx, const Nd4jLong *xShapeInfo, const Nd4jLong* hXShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShapeInfo, const Nd4jLong* hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); #else /** @@ -83,40 +83,28 @@ namespace functions { * @return */ template - static _CUDA_H Z execScalar(void *x, - Nd4jLong *xShapeInfo, - void *extraParams); + static _CUDA_H Z execScalar(const void *x, const Nd4jLong *xShapeInfo, void *extraParams); template - static _CUDA_H void execScalar(void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo); + static _CUDA_H void execScalar(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo); - static Z execScalar(int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams); + static Z execScalar(int opNum, const void *x, const Nd4jLong *xShapeInfo, void *extraParams); static void execScalar(int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo); + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo); static void exec(int opNum, - void *x, - Nd4jLong *xShapeInfo, + const void *x, const Nd4jLong *xShapeInfo, void *extraParams, - void *result, - Nd4jLong *resultShapeInfoBuffer, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, int64_t start, int64_t stop); + void *result, const Nd4jLong *resultShapeInfoBuffer, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + int64_t start, int64_t stop); /** * Execute on the cpu @@ -132,15 +120,12 @@ namespace functions { template - static void _CUDA_H exec(void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *result, - Nd4jLong *resultShapeInfoBuffer, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, int64_t start, int64_t stop); + static void _CUDA_H exec(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *result, const Nd4jLong *resultShapeInfoBuffer, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + int64_t start, int64_t stop); /** * CPU implementation @@ -152,11 +137,9 @@ namespace functions { * @param resultShapeInfo the shape information */ template - static void _CUDA_H exec(void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *result, - Nd4jLong *resultShapeInfo); + static void _CUDA_H exec(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *result, const Nd4jLong *resultShapeInfo); @@ -169,10 +152,7 @@ namespace functions { * @return */ template - static Z _CUDA_H execScalar(void *x, - Nd4jLong xElementWiseStride, - Nd4jLong length, - void *extraParams); + static Z _CUDA_H execScalar(const void *x, Nd4jLong xElementWiseStride, Nd4jLong length, void *extraParams); #endif }; diff --git a/libnd4j/include/loops/reduce_float.h b/libnd4j/include/loops/reduce_float.h index 6ff3f88ab..c78082f8e 100644 --- a/libnd4j/include/loops/reduce_float.h +++ b/libnd4j/include/loops/reduce_float.h @@ -60,20 +60,20 @@ namespace functions { static __device__ void aggregatePartials(void *sPartials, Nd4jLong tid, Nd4jLong numItems, void *extraParams); template - static __device__ void execScalarCuda( void *vx, Nd4jLong *xShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo); + static __device__ void execScalarCuda(const void *vx, const Nd4jLong *xShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShapeInfo, void *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo); template - static __device__ void transformCudaXD( void *vx, Nd4jLong *xShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets); + static __device__ void transformCudaXD(const void *vx, const Nd4jLong *xShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets); template - static __host__ void intermediateScalar(dim3 launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo); + static __host__ void intermediateScalar(dim3 launchDims, cudaStream_t *stream, const void *vx, const Nd4jLong *xShapeInfo, const Nd4jLong *hXShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShapeInfo, const Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo); template - static __host__ void intermediateXD(dim3 launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *vz, Nd4jLong *zShape, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static __host__ void intermediateXD(dim3 launchDims, cudaStream_t *stream, const void *vx, const Nd4jLong *xShapeInfo, const Nd4jLong *hXShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShape, const Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); - static __host__ void execReduceScalar(dim3 launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo); + static __host__ void execReduceScalar(dim3 launchDims, cudaStream_t *stream, int opNum, const void *vx, const Nd4jLong *xShapeInfo, const Nd4jLong *hXShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShapeInfo, const Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo); - static __host__ void execReduceXD(dim3 launchDims, cudaStream_t *stream, int opNum, int rank, void *vx, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *vz, Nd4jLong *zShape, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static __host__ void execReduceXD(dim3 launchDims, cudaStream_t *stream, int opNum, int rank, const void *vx, const Nd4jLong *xShapeInfo, const Nd4jLong *hXShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShape, const Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); #else /** @@ -85,40 +85,30 @@ namespace functions { * @return */ template - static _CUDA_H Z execScalar(void *vx, - Nd4jLong *xShapeInfo, - void *extraParams); + static _CUDA_H Z execScalar(const void *vx, const Nd4jLong *xShapeInfo, void *extraParams); template - static _CUDA_H void execScalar(void *vx, - Nd4jLong *xShapeInfo, - void *extraParams, - void *vz, - Nd4jLong *zShapeInfo); + static _CUDA_H void execScalar(const void *vx, const Nd4jLong *xShapeInfo, + void *extraParams, + void *vz, const Nd4jLong *zShapeInfo); static Z execScalar(int opNum, - void *vx, - Nd4jLong *xShapeInfo, - void *extraParams); + const void *vx, const Nd4jLong *xShapeInfo, + void *extraParams); static void execScalar(int opNum, - void *vx, - Nd4jLong *xShapeInfo, - void *extraParams, - void *vz, - Nd4jLong *zShapeInfo); + const void *vx, const Nd4jLong *xShapeInfo, + void *extraParams, + void *vz, const Nd4jLong *zShapeInfo); static void exec(int opNum, - void *vx, - Nd4jLong *xShapeInfo, + const void *vx, const Nd4jLong *xShapeInfo, void *extraParams, - void *vz, - Nd4jLong *resultShapeInfoBuffer, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, int64_t start, int64_t stop); + void *vz, const Nd4jLong *resultShapeInfoBuffer, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + int64_t start, int64_t stop); /** * Execute on the cpu @@ -134,15 +124,12 @@ namespace functions { template - static void _CUDA_H exec(void *vx, - Nd4jLong *xShapeInfo, - void *extraParams, - void *vz, - Nd4jLong *resultShapeInfoBuffer, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, int64_t start, int64_t stop); + static void _CUDA_H exec(const void *vx, const Nd4jLong *xShapeInfo, + void *extraParams, + void *vz, const Nd4jLong *resultShapeInfoBuffer, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + int64_t start, int64_t stop); /** * CPU implementation @@ -154,11 +141,9 @@ namespace functions { * @param zShapeInfo the shape information */ template - static void _CUDA_H exec(void *vx, - Nd4jLong *xShapeInfo, + static void _CUDA_H exec(const void *vx, const Nd4jLong *xShapeInfo, void *extraParams, - void *vz, - Nd4jLong *zShapeInfo); + void *vz, const Nd4jLong *zShapeInfo); @@ -171,10 +156,7 @@ namespace functions { * @return */ template - static Z _CUDA_H execScalar(void *vx, - Nd4jLong xElementWiseStride, - Nd4jLong length, - void *extraParams); + static Z _CUDA_H execScalar(const void *vx, Nd4jLong xElementWiseStride, Nd4jLong length, void *extraParams); #endif }; diff --git a/libnd4j/include/loops/reduce_long.h b/libnd4j/include/loops/reduce_long.h index 4c83e1057..45ede2985 100644 --- a/libnd4j/include/loops/reduce_long.h +++ b/libnd4j/include/loops/reduce_long.h @@ -57,20 +57,20 @@ namespace functions { static __device__ void aggregatePartials(void *sPartials, Nd4jLong tid, Nd4jLong numItems, void *extraParams); template - static __device__ void execScalarCuda( void *vx, Nd4jLong *xShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo); + static __device__ void execScalarCuda(const void *vx, const Nd4jLong *xShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShapeInfo, void *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo); template - static __device__ void transformCudaXD( void *vx, Nd4jLong *xShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets); + static __device__ void transformCudaXD(const void *vx, const Nd4jLong *xShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets); template - static __host__ void intermediateScalar(dim3 launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo); + static __host__ void intermediateScalar(dim3 launchDims, cudaStream_t *stream, const void *vx, const Nd4jLong *xShapeInfo, const Nd4jLong *hXShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShapeInfo, const Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo); template - static __host__ void intermediateXD(dim3 launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static __host__ void intermediateXD(dim3 launchDims, cudaStream_t *stream, const void *vx, const Nd4jLong *xShapeInfo, const Nd4jLong *hXShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShapeInfo, const Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); - static __host__ void execReduceScalar(dim3 launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, Nd4jLong* hXShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong* hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo); + static __host__ void execReduceScalar(dim3 launchDims, cudaStream_t *stream, int opNum, const void *vx, const Nd4jLong *xShapeInfo, const Nd4jLong* hXShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShapeInfo, const Nd4jLong* hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo); - static __host__ void execReduceXD(dim3 launchDims, cudaStream_t *stream, int opNum, int rank, void *vx, Nd4jLong *xShapeInfo, Nd4jLong* hXShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong* hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static __host__ void execReduceXD(dim3 launchDims, cudaStream_t *stream, int opNum, int rank, const void *vx, const Nd4jLong *xShapeInfo, const Nd4jLong* hXShapeInfo, void *extraParams, void *vz, const Nd4jLong *zShapeInfo, const Nd4jLong* hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); #else @@ -83,40 +83,30 @@ namespace functions { * @return */ template - static _CUDA_H Z execScalar(void *x, - Nd4jLong *xShapeInfo, - void *extraParams); + static _CUDA_H Z execScalar(const void *x, const Nd4jLong *xShapeInfo, void *extraParams); template - static _CUDA_H void execScalar(void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo); + static _CUDA_H void execScalar(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo); static Z execScalar(int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams); + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams); static void execScalar(int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo); + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo); static void exec(int opNum, - void *x, - Nd4jLong *xShapeInfo, + const void *x, const Nd4jLong *xShapeInfo, void *extraParams, - void *result, - Nd4jLong *resultShapeInfoBuffer, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, int64_t start, int64_t stop); + void *result, const Nd4jLong *resultShapeInfoBuffer, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + int64_t start, int64_t stop); /** * Execute on the cpu @@ -132,15 +122,12 @@ namespace functions { template - static void _CUDA_H exec(void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *result, - Nd4jLong *resultShapeInfoBuffer, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, int64_t start, int64_t stop); + static void _CUDA_H exec(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *result, const Nd4jLong *resultShapeInfoBuffer, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + int64_t start, int64_t stop); /** * CPU implementation @@ -152,11 +139,9 @@ namespace functions { * @param resultShapeInfo the shape information */ template - static void _CUDA_H exec(void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *result, - Nd4jLong *resultShapeInfo); + static void _CUDA_H exec(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *result, const Nd4jLong *resultShapeInfo); @@ -169,10 +154,9 @@ namespace functions { * @return */ template - static Z _CUDA_H execScalar(void *x, - Nd4jLong xElementWiseStride, - Nd4jLong length, - void *extraParams); + static Z _CUDA_H execScalar(const void *x, Nd4jLong xElementWiseStride, + Nd4jLong length, + void *extraParams); #endif }; diff --git a/libnd4j/include/loops/reduce_same.h b/libnd4j/include/loops/reduce_same.h index 641551b6f..5f3622f39 100644 --- a/libnd4j/include/loops/reduce_same.h +++ b/libnd4j/include/loops/reduce_same.h @@ -58,22 +58,22 @@ namespace functions { static __device__ void aggregatePartials(void *sPartials, Nd4jLong tid, Nd4jLong numItems, void *extraParams); template - static __device__ void execScalarCuda( void *vx, Nd4jLong *xShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo); + static __device__ void execScalarCuda( void const* vx, Nd4jLong const *xShapeInfo, void *extraParams, void *vz, Nd4jLong const* zShapeInfo, void *reductionBuffer, Nd4jLong const* tadOnlyShapeInfo); - static __device__ void execScalarCudaLegacy(int opNum, void *vx, Nd4jLong *xShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo); + static __device__ void execScalarCudaLegacy(int opNum, void const* vx, Nd4jLong const* xShapeInfo, void *extraParams, void *vz, Nd4jLong const* zShapeInfo, void *reductionBuffer, Nd4jLong const* tadOnlyShapeInfo); template - static __device__ void transformCudaXD( void *vx, Nd4jLong *xShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets); + static __device__ void transformCudaXD( void const* vx, Nd4jLong const* xShapeInfo, void *extraParams, void *vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets); template - static __host__ void intermediateScalar(dim3 launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo); + static __host__ void intermediateScalar(dim3 launchDims, cudaStream_t *stream, void const* vx, Nd4jLong const* xShapeInfo, Nd4jLong const* hXShapeInfo, void *extraParams, void *vz, Nd4jLong const* zShapeInfo, Nd4jLong const* hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong const* tadOnlyShapeInfo); template - static __host__ void intermediateXD(dim3 launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, Nd4jLong *hXShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static __host__ void intermediateXD(dim3 launchDims, cudaStream_t *stream, void const* vx, Nd4jLong const* xShapeInfo, Nd4jLong const* hXShapeInfo, void *extraParams, void *vz, Nd4jLong const* zShapeInfo, Nd4jLong const* hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets); - static __host__ void execReduceScalar(dim3 launchDims, cudaStream_t *stream, int opNum, void *vx, Nd4jLong *xShapeInfo, Nd4jLong* hXShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong* hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo); + static __host__ void execReduceScalar(dim3 launchDims, cudaStream_t *stream, int opNum, void const* vx, Nd4jLong const* xShapeInfo, Nd4jLong const* hXShapeInfo, void *extraParams, void *vz, Nd4jLong const* zShapeInfo, Nd4jLong const* hZShapeInfo, int *dimension, int dimensionLength, void *reductionBuffer, Nd4jLong const* tadOnlyShapeInfo); - static __host__ void execReduceXD(dim3 launchDims, cudaStream_t *stream, int opNum, int rank, void *vx, Nd4jLong *xShapeInfo, Nd4jLong* hXShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong* hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static __host__ void execReduceXD(dim3 launchDims, cudaStream_t *stream, int opNum, int rank, void const* vx, Nd4jLong const* xShapeInfo, Nd4jLong const* hXShapeInfo, void *extraParams, void *vz, Nd4jLong const* zShapeInfo, Nd4jLong const* hZShapeInfo, int *dimension, int dimensionLength, void *reductionPointer, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets); #else /** @@ -85,40 +85,31 @@ namespace functions { * @return */ template - static _CUDA_H X execScalar(void *x, - Nd4jLong *xShapeInfo, - void *extraParams); + static _CUDA_H X execScalar(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams); template - static _CUDA_H void execScalar(void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo); + static _CUDA_H void execScalar(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo); static X execScalar(int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams); + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams); static void execScalar(int opNum, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *z, - Nd4jLong *zShapeInfo); + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo); static void exec(int opNum, - void *x, - Nd4jLong *xShapeInfo, + const void *x, const Nd4jLong *xShapeInfo, void *extraParams, - void *result, - Nd4jLong *resultShapeInfoBuffer, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, int64_t start, int64_t stop); + void *result, const Nd4jLong *resultShapeInfoBuffer, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + int64_t start, int64_t stop); /** * Execute on the cpu @@ -134,15 +125,12 @@ namespace functions { template - static void _CUDA_H exec(void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *result, - Nd4jLong *resultShapeInfoBuffer, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffset, int64_t start, int64_t stop); + static void _CUDA_H exec(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *result, const Nd4jLong *resultShapeInfoBuffer, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset, + int64_t start, int64_t stop); /** * CPU implementation @@ -154,11 +142,9 @@ namespace functions { * @param resultShapeInfo the shape information */ template - static void _CUDA_H exec(void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *result, - Nd4jLong *resultShapeInfo); + static void _CUDA_H exec(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *result, const Nd4jLong *resultShapeInfo); @@ -171,10 +157,9 @@ namespace functions { * @return */ template - static X _CUDA_H execScalar(void *x, - Nd4jLong xElementWiseStride, - Nd4jLong length, - void *extraParams); + static X _CUDA_H execScalar(const void *x, Nd4jLong xElementWiseStride, + Nd4jLong length, + void *extraParams); #endif }; diff --git a/libnd4j/include/loops/scalar.h b/libnd4j/include/loops/scalar.h index dc3a5b16c..f7333d57d 100755 --- a/libnd4j/include/loops/scalar.h +++ b/libnd4j/include/loops/scalar.h @@ -58,27 +58,77 @@ namespace functions { template __host__ - static void intermediateShaped(dim3& launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, Nd4jLong *hxShapeInfo, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hzShapeInfo, void* vscalar, void *vextraParams, int *allocPointer); + static void intermediateShaped(dim3& launchDims, cudaStream_t *stream, + const void *vx, const Nd4jLong *xShapeInfo, const Nd4jLong *hxShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, const Nd4jLong *hzShapeInfo, + const void* vscalar, + void *vextraParams, + int *allocPointer); template __host__ - static void intermediateAlongDimension(dim3& launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, void *z, Nd4jLong *zShapeInfo, void *scalars, void *extraParams, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ); + static void intermediateAlongDimension(dim3& launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + const void *scalars, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ); __host__ - static void executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, Nd4jLong *hxShapeInfo, void *result, Nd4jLong *resultShapeInfo, Nd4jLong *hzShapeInfo, void* scalar, void *extraParams); + static void executeCudaShaped(dim3& launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShapeInfo, const Nd4jLong *hxShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, const Nd4jLong *hzShapeInfo, + const void* scalar, + void *extraParams); __host__ - static void executeCudaAlongDimension(dim3& launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *z, Nd4jLong *zShapeInfo, void *scalars, void *extraParams, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ); + static void executeCudaAlongDimension(dim3& launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + const void *scalars, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ); #else template - static void transform(void *x, Nd4jLong *xShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, void *scalars, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ, const uint64_t start, const uint64_t stop); + static void transform(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, + const void *scalars, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ, + uint64_t start, uint64_t stop); - static void transform(int opNum, void *x, Nd4jLong *xShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, void *scalars, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ, const uint64_t start, const uint64_t stop); + static void transform(int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, + const void *scalars, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ, + uint64_t start, uint64_t stop); - static void transform(const int opNum, void *x, Nd4jLong *xShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *scalar, void *extraParams, const uint64_t start, const uint64_t stop); + static void transform(int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + const void *scalar, + void *extraParams, + uint64_t start, uint64_t stop); - static void transform(const int opNum, void *x, Nd4jLong xStride, void *result, Nd4jLong resultStride, void *scalar, void *extraParams, const uint64_t len, const uint64_t start, const uint64_t stop); + static void transform(int opNum, + const void *x, Nd4jLong xStride, + void *result, Nd4jLong resultStride, + const void *scalar, + void *extraParams, + uint64_t len, uint64_t start, uint64_t stop); @@ -101,7 +151,11 @@ namespace functions { */ template - static void transform(void *x, Nd4jLong *xShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *scalar, void *extraParams, const uint64_t start, const uint64_t stop); + static void transform(const void *x, const Nd4jLong *xShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + const void *scalar, + void *extraParams, + uint64_t start, uint64_t stop); /** @@ -117,7 +171,11 @@ namespace functions { */ template - static void transform(void *x, Nd4jLong xStride, void *result, Nd4jLong resultStride, void *scalar, void *extraParams, const uint64_t len, const uint64_t start, const uint64_t stop); + static void transform(const void *x, Nd4jLong xStride, + void *result, Nd4jLong resultStride, + const void *scalar, + void *extraParams, + uint64_t len, uint64_t start, uint64_t stop); #endif }; } diff --git a/libnd4j/include/loops/scalar_bool.h b/libnd4j/include/loops/scalar_bool.h index 0b26531b2..4992df5a1 100644 --- a/libnd4j/include/loops/scalar_bool.h +++ b/libnd4j/include/loops/scalar_bool.h @@ -58,43 +58,106 @@ namespace functions { template __device__ - static void transformCuda(void* scalar, void *vy, Nd4jLong *shapeInfo, void *vparams, void *vresult, Nd4jLong *resultShapeInfo, int *allocationBuffer); + static void transformCuda(const void* scalar, + const void *vy, const Nd4jLong *shapeInfo, + void *vparams, + void *vresult, const Nd4jLong *resultShapeInfo, + int *allocationBuffer); template __device__ - static void transformCuda(Nd4jLong n, void* vx, void *vy, Nd4jLong yEWS, void *vparams, void *vz, Nd4jLong zEWS, int *allocationBuffer); + static void transformCuda(Nd4jLong n, + const void* vx, const void *vy, Nd4jLong yEWS, + void *vparams, + void *vz, Nd4jLong zEWS, + int *allocationBuffer); template __device__ - static void transformCuda(void *vx, Nd4jLong *xShapeInfo, void *vextraParams, void *vz, Nd4jLong *zShapeInfo, void *vscalars, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ); + static void transformCuda(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vz, const Nd4jLong *zShapeInfo, + const void *vscalars, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ); template __host__ - static void intermediateAlongDimension(dim3& launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, void *z, Nd4jLong *zShapeInfo, void *scalars, void *extraParams, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ); + static void intermediateAlongDimension(dim3& launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + const void *scalars, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ); template __host__ - static void intermediateShaped(dim3& launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, void* vscalar, void *vextraParams, int *allocPointer); + static void intermediateShaped(dim3& launchDims, cudaStream_t *stream, + const void *vx, const Nd4jLong *xShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + const void* vscalar, + void *vextraParams, + int *allocPointer); __host__ - static void executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *result, Nd4jLong *resultShapeInfo, void* scalar, void *extraParams); + static void executeCudaShaped(dim3& launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + const void* scalar, + const void *extraParams); __host__ - static void executeCudaAlongDimension(dim3& launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *z, Nd4jLong *zShapeInfo, void *scalars, void *extraParams, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ); - + static void executeCudaAlongDimension(dim3& launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + const void *scalars, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ); /* #include "cuda/scalar_temp.cu" */ #else template - static void transform(void *x, Nd4jLong *xShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, void *scalars, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ, const uint64_t start, const uint64_t stop); + static void transform(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, + const void *scalars, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ, + uint64_t start, uint64_t stop); - static void transform(int opNum, void *x, Nd4jLong *xShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, void *scalars, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ, const uint64_t start, const uint64_t stop); + static void transform(int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, + const void *scalars, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ, + uint64_t start, uint64_t stop); - static void transform(const int opNum, void *x, Nd4jLong *xShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *scalar, void *extraParams, const uint64_t start, const uint64_t stop); + static void transform(int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + const void *scalar, + void *extraParams, + uint64_t start, uint64_t stop); - static void transform(const int opNum, void *x, Nd4jLong xStride, void *result, Nd4jLong resultStride, void *scalar, void *extraParams, const uint64_t n, const uint64_t start, const uint64_t stop); + static void transform(int opNum, + const void *x, Nd4jLong xStride, + void *result, Nd4jLong resultStride, + const void *scalar, + void *extraParams, + uint64_t n, uint64_t start, uint64_t stop); @@ -117,7 +180,11 @@ namespace functions { */ template - static void transform(void *x, Nd4jLong *xShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *scalar, void *extraParams, const uint64_t start, const uint64_t stop); + static void transform(const void *x, const Nd4jLong *xShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + const void *scalar, + void *extraParams, + uint64_t start, uint64_t stop); /** @@ -133,7 +200,10 @@ namespace functions { */ template - static void transform(void *x, Nd4jLong xStride, void *result, Nd4jLong resultStride, void *scalar, void *extraParams, const uint64_t n, const uint64_t start, const uint64_t stop); + static void transform(const void *x, Nd4jLong xStride, + void *result, Nd4jLong resultStride, + const void *scalar, void *extraParams, + uint64_t n, uint64_t start, uint64_t stop); #endif }; } diff --git a/libnd4j/include/loops/scalar_int.h b/libnd4j/include/loops/scalar_int.h index dde7af4c7..c3a53199e 100644 --- a/libnd4j/include/loops/scalar_int.h +++ b/libnd4j/include/loops/scalar_int.h @@ -58,40 +58,104 @@ namespace functions { template __device__ - static void transformCuda(void* scalar, void *vy, Nd4jLong *shapeInfo, void *vparams, void *vresult, Nd4jLong *resultShapeInfo, int *allocationBuffer); + static void transformCuda(const void* scalar, + const void *vy, const Nd4jLong *shapeInfo, + void *vparams, + void *vresult, const Nd4jLong *resultShapeInfo, + int *allocationBuffer); template __device__ - static void transformCuda(Nd4jLong n, void* vx, void *vy, Nd4jLong yEWS, void *vparams, void *vz, Nd4jLong zEWS, int *allocationBuffer); + static void transformCuda(Nd4jLong n, + const void* vx, const void *vy, Nd4jLong yEWS, + void *vparams, + void *vz, Nd4jLong zEWS, + int *allocationBuffer); template __device__ - static void transformCuda(void *vx, Nd4jLong *xShapeInfo, void *vextraParams, void *vz, Nd4jLong *zShapeInfo, void *vscalars, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ); + static void transformCuda(const void *vx, const Nd4jLong *xShapeInfo, + void *vextraParams, + void *vz, const Nd4jLong *zShapeInfo, + const void *vscalars, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ); template __host__ - static void intermediateAlongDimension(dim3& launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShapeInfo, void *z, Nd4jLong *zShapeInfo, void *scalars, void *extraParams, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ); + static void intermediateAlongDimension(dim3& launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + const void *scalars, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ); template __host__ - static void intermediateShaped(dim3& launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, void* vscalar, void *vextraParams, int *allocPointer); + static void intermediateShaped(dim3& launchDims, cudaStream_t *stream, + const void *vx, const Nd4jLong *xShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + const void* vscalar, + void *vextraParams, + int *allocPointer); __host__ - static void executeCudaShaped(dim3& launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *result, Nd4jLong *resultShapeInfo, void* scalar, void *extraParams); + static void executeCudaShaped(dim3& launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + const void* scalar, + void *extraParams); __host__ - static void executeCudaAlongDimension(dim3& launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, void *z, Nd4jLong *zShapeInfo, void *scalars, void *extraParams, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ); - + static void executeCudaAlongDimension(dim3& launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *z, const Nd4jLong *zShapeInfo, + const void *scalars, + void *extraParams, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ); #else template - static void transform(void *x, Nd4jLong *xShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, void *scalars, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ, const uint64_t start, const uint64_t stop); + static void transform(const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, + const void *scalars, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ, + uint64_t start, uint64_t stop); - static void transform(int opNum, void *x, Nd4jLong *xShapeInfo, void *extraParams, void *z, Nd4jLong *zShapeInfo, void *scalars, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ, const uint64_t start, const uint64_t stop); + static void transform(int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *z, const Nd4jLong *zShapeInfo, + const void *scalars, + int *dimension, int dimensionLength, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, + const Nd4jLong *tadShapeInfoZ, const Nd4jLong *tadOffsetsZ, + uint64_t start, uint64_t stop); - static void transform(const int opNum, void *x, Nd4jLong *xShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *scalar, void *extraParams, const uint64_t start, const uint64_t stop); + static void transform(int opNum, + const void *x, const Nd4jLong *xShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + const void *scalar, + void *extraParams, + uint64_t start, + uint64_t stop); - static void transform(const int opNum, void *x, Nd4jLong xStride, void *result, Nd4jLong resultStride, void *scalar, void *extraParams, const uint64_t n, const uint64_t start, const uint64_t stop); + static void transform(int opNum, + const void *x, Nd4jLong xStride, + void *result, Nd4jLong resultStride, + const void *scalar, + void *extraParams, + uint64_t n, uint64_t start, uint64_t stop); @@ -114,7 +178,11 @@ namespace functions { */ template - static void transform(void *x, Nd4jLong *xShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *scalar, void *extraParams, const uint64_t start, const uint64_t stop); + static void transform(const void *x, const Nd4jLong *xShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + const void *scalar, + void *extraParams, + uint64_t start, uint64_t stop); /** @@ -130,7 +198,11 @@ namespace functions { */ template - static void transform(void *x, Nd4jLong xStride, void *result, Nd4jLong resultStride, void *scalar, void *extraParams, const uint64_t n, const uint64_t start, const uint64_t stop); + static void transform(const void *x, Nd4jLong xStride, + void *result, Nd4jLong resultStride, + const void *scalar, + void *extraParams, + uint64_t n, uint64_t start, uint64_t stop); #endif }; } diff --git a/libnd4j/include/loops/special_kernels.h b/libnd4j/include/loops/special_kernels.h index 52cdb7fdd..209d35120 100644 --- a/libnd4j/include/loops/special_kernels.h +++ b/libnd4j/include/loops/special_kernels.h @@ -36,44 +36,44 @@ namespace sd { template - _CUDA_H void fillIsMaxGeneric(dim3 &launchDims, cudaStream_t *stream, void *dx, Nd4jLong *xShapeInfo, Nd4jLong length, long idx); + _CUDA_H void fillIsMaxGeneric(dim3 &launchDims, cudaStream_t *stream, void *dx, const Nd4jLong *xShapeInfo, Nd4jLong length, long idx); template - _CUDA_H void fillDimensionalIsMaxGeneric(dim3 &launchDims, cudaStream_t *stream, void *dX, void *dZ, Nd4jLong *zShapeInfo, Nd4jLong *tadOnlyShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadOffsets); + _CUDA_H void fillDimensionalIsMaxGeneric(dim3 &launchDims, cudaStream_t *stream, const void *dX, void *dZ, const Nd4jLong *zShapeInfo, const Nd4jLong *tadOnlyShapeInfo, int *dimension, int dimensionLength, const Nd4jLong *tadOffsets); template _CUDA_H void convertToHalfGeneric(dim3 &launchDims, cudaStream_t *stream, void *dx, Nd4jLong n, half *dz); template - _CUDA_H void tearKernelGeneric(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, Nd4jPointer *targets, - Nd4jLong *zShapeInfo, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + _CUDA_H void tearKernelGeneric(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, Nd4jPointer *targets, + Nd4jLong const* zShapeInfo, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets); template _CUDA_H void shuffleKernelGeneric(dim3 &launchDims, cudaStream_t *stream, void **vdX, Nd4jLong **xShapeInfo, void **vdZ, int N, - int *shuffleMap, Nd4jLong **tadOnlyShapeInfo, Nd4jLong **tadOffsets); + int *shuffleMap, Nd4jLong** tadOnlyShapeInfo, Nd4jLong** tadOffsets); template _CUDA_H void convertHalfsToGeneric(dim3 &launchDims, cudaStream_t *stream, half *dx, Nd4jLong n, void *dz); template _CUDA_H void concatKernelVStackGeneric(dim3 &launchDims, cudaStream_t *stream, int numArrays, Nd4jPointer *data, - Nd4jPointer *inputShapeInfos, void *vz, Nd4jLong *zShapeInfo); + Nd4jPointer *inputShapeInfos, void *vz, Nd4jLong const* zShapeInfo); template _CUDA_H void concatKernelScalarGeneric(dim3 &launchDims, cudaStream_t *stream, int numArrays, Nd4jPointer *data, void *vresult); template _CUDA_H void concatKernelHStackGeneric(dim3 &launchDims, cudaStream_t *stream, int numArrays, Nd4jPointer *data, - Nd4jPointer *inputShapeInfos, void *vresult, Nd4jLong *resultShapeInfo); + Nd4jPointer *inputShapeInfos, void *vresult, Nd4jLong const* resultShapeInfo); template _CUDA_H void concatKernelGeneric(dim3 &launchDims, cudaStream_t *stream, int numArrays, Nd4jPointer *data, - Nd4jPointer *inputShapeInfos, void *vresult, Nd4jLong *resultShapeInfo, - Nd4jPointer *tadPointers, Nd4jPointer *offsetPointers, Nd4jLong *zTadShape, Nd4jLong *zOffsets); + Nd4jPointer *inputShapeInfos, void *vresult, Nd4jLong const* resultShapeInfo, + Nd4jPointer *tadPointers, Nd4jPointer *offsetPointers, Nd4jLong const* zTadShape, Nd4jLong const* zOffsets); template _CUDA_H void pullRowsKernelGeneric(dim3 &launchDims, cudaStream_t *stream, void *vx, void *vz, Nd4jLong n, Nd4jLong *indexes, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets); + Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, Nd4jLong const* zTadShapeInfo, Nd4jLong const* zTadOffsets); template _CUDA_H void averagingKernelGeneric(dim3 &launchDims, cudaStream_t *stream, void **vdx, void *vdz, int n, Nd4jLong length, bool propagate); @@ -85,20 +85,19 @@ namespace sd { _CUDA_H void flattenKernelGeneric(dim3& launchDims, cudaStream_t *stream, Nd4jPointer *extraPointers, int dOffset, char order, void *vz, Nd4jLong *zShapeInfo, void *vy, Nd4jLong *yShapeInfo); template - _CUDA_H void tileKernelH(void const* inputBuffer, Nd4jLong* inputShape, void* outputBuffer, Nd4jLong* outputShape, Nd4jLong resultLength, cudaStream_t *stream); + _CUDA_H void tileKernelH(void const* inputBuffer, Nd4jLong const* inputShape, void* outputBuffer, Nd4jLong const* outputShape, Nd4jLong resultLength, cudaStream_t *stream); template - _CUDA_H void tileKernelHH(void const* inputBuffer, Nd4jLong* inputShape, void* outputBuffer, Nd4jLong* outputShape, Nd4jLong resultLength, Nd4jLong ews, cudaStream_t *stream); - + _CUDA_H void tileKernelHH(void const* inputBuffer, Nd4jLong const* inputShape, void* outputBuffer, Nd4jLong const* outputShape, Nd4jLong resultLength, Nd4jLong ews, cudaStream_t *stream); class NDArray; template - _CUDA_H void setDiagonalValueUpper(void* buffer, Nd4jLong* shape, NDArray const& value, int diagonal, Nd4jLong rows, Nd4jLong cols, cudaStream_t& stream); + _CUDA_H void setDiagonalValueUpper(void* buffer, Nd4jLong const* shape, NDArray const& value, int diagonal, Nd4jLong rows, Nd4jLong cols, cudaStream_t& stream); template - _CUDA_H void setDiagonalValueLower(void* buffer, Nd4jLong* shape, NDArray const& value, int diagonal, Nd4jLong rows, Nd4jLong cols, cudaStream_t& stream); + _CUDA_H void setDiagonalValueLower(void* buffer, Nd4jLong const* shape, NDArray const& value, int diagonal, Nd4jLong rows, Nd4jLong cols, cudaStream_t& stream); template - _CUDA_H void templatedSwapUnsafe(void* theFirstBuffer, Nd4jLong* theFirstShape, void* theSecondBuffer, Nd4jLong* theSecondShape, cudaStream_t* theStream); + _CUDA_H void templatedSwapUnsafe(void* theFirstBuffer, Nd4jLong const* theFirstShape, void* theSecondBuffer, Nd4jLong const* theSecondShape, cudaStream_t* theStream); } diff --git a/libnd4j/include/loops/summarystatsreduce.h b/libnd4j/include/loops/summarystatsreduce.h index 0a429cd2b..1ab06a11b 100755 --- a/libnd4j/include/loops/summarystatsreduce.h +++ b/libnd4j/include/loops/summarystatsreduce.h @@ -270,7 +270,7 @@ namespace functions { #ifdef __CUDACC__ - static inline _CUDA_D Z startingValue(X *input) { + static inline _CUDA_D Z startingValue(X const* input) { return static_cast(0); } @@ -279,62 +279,51 @@ namespace functions { template - static _CUDA_D void transform(void *dx, Nd4jLong *xShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets); + static _CUDA_D void transform(void const* dx, Nd4jLong const* xShapeInfo, void *extraParams, void *vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets); - static _CUDA_D void transform(const int opNum, void *dx, Nd4jLong *xShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets); + static _CUDA_D void transform(const int opNum, void const* dx, Nd4jLong const* xShapeInfo, void *extraParams, void *vz, Nd4jLong const* zShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong const* tadOnlyShapeInfo, Nd4jLong const* tadOffsets); - static _CUDA_H void execSummaryStatsReduceScalar(dim3& launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, Nd4jLong *hxShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hzShapeInfo, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool biasCorrected, void *reductionBuffer); - static _CUDA_H void execSummaryStatsReduce(dim3& launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, Nd4jLong *hxShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hzShapeInfo, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool biasCorrected, void *reductionBuffer); - static _CUDA_H void execSummaryStatsReduce(dim3& launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShapeInfo, Nd4jLong *hxShapeInfo, void *extraParams, void *vz, Nd4jLong *zShapeInfo, Nd4jLong *hzShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool biasCorrected, void *reductionBuffer); + static _CUDA_H void execSummaryStatsReduceScalar(dim3& launchDims, cudaStream_t *stream, int opNum, void const* x, Nd4jLong const* xShapeInfo, Nd4jLong const* hxShapeInfo, void *extraParams, void *vz, Nd4jLong const* zShapeInfo, Nd4jLong const* hzShapeInfo, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool biasCorrected, void *reductionBuffer); + static _CUDA_H void execSummaryStatsReduce(dim3& launchDims, cudaStream_t *stream, int opNum, void const* x, Nd4jLong const* xShapeInfo, Nd4jLong const* hxShapeInfo, void *extraParams, void *vz, Nd4jLong const* zShapeInfo, Nd4jLong const* hzShapeInfo, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool biasCorrected, void *reductionBuffer); + static _CUDA_H void execSummaryStatsReduce(dim3& launchDims, cudaStream_t *stream, int opNum, void const* x, Nd4jLong const* xShapeInfo, Nd4jLong const* hxShapeInfo, void *extraParams, void *vz, Nd4jLong const* zShapeInfo, Nd4jLong const* hzShapeInfo, int *dimension, int dimensionLength, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool biasCorrected, void *reductionBuffer); #else static Z execScalar(int opNum, - bool biasCorrected, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams); + bool biasCorrected, + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams); static void execScalar(int opNum, - bool biasCorrected, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *vz, - Nd4jLong *resultShapeInfoBuffer); + bool biasCorrected, + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *vz, const Nd4jLong *resultShapeInfoBuffer); static void exec(int opNum, - bool biasCorrected, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *vz, - Nd4jLong *resultShapeInfoBuffer, - int *dimension, int dimensionLength); + bool biasCorrected, + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *vz, const Nd4jLong *resultShapeInfoBuffer, + int *dimension, int dimensionLength); template static Z execScalar(bool biasCorrected, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams); + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams); template static void execScalar(bool biasCorrected, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *vz, - Nd4jLong *resultShapeInfoBuffer); + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *vz, const Nd4jLong *resultShapeInfoBuffer); template static void exec(bool biasCorrected, - void *x, - Nd4jLong *xShapeInfo, - void *extraParams, - void *vz, - Nd4jLong *resultShapeInfoBuffer, - int *dimension, - int dimensionLength); + const void *x, const Nd4jLong *xShapeInfo, + void *extraParams, + void *vz, const Nd4jLong *resultShapeInfoBuffer, + int *dimension, int dimensionLength); #endif }; } diff --git a/libnd4j/include/loops/transform_any.h b/libnd4j/include/loops/transform_any.h index 22d56a4d3..751328b89 100644 --- a/libnd4j/include/loops/transform_any.h +++ b/libnd4j/include/loops/transform_any.h @@ -57,18 +57,40 @@ class TransformAny { #ifdef __CUDACC__ template - static __device__ void transformCuda(void *vx, Nd4jLong *xShapeInfo, void *params, void *vz, Nd4jLong *zShapeInfo, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static __device__ void transformCuda(const void *vx, const Nd4jLong *xShapeInfo, + void *params, + void *vz, const Nd4jLong *zShapeInfo, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); template - static _CUDA_H void intermediateShaped(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static _CUDA_H void intermediateShaped(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); - static _CUDA_H void executeTransformShaped(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static _CUDA_H void executeTransformShaped(dim3 launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); #else - static void exec(int opNum, void *dx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, void *extraParams, uint64_t threadId, uint64_t numThreads); + static void exec(int opNum, + const void *dx, const Nd4jLong *xShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + void *extraParams, + uint64_t threadId, uint64_t numThreads); template - static ND4J_EXPORT void exec(void *dx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, void *extraParams, uint64_t threadId, uint64_t numThreads); + static ND4J_EXPORT void exec(const void *dx, const Nd4jLong *xShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + void *extraParams, + uint64_t threadId, uint64_t numThreads); #endif }; diff --git a/libnd4j/include/loops/transform_bool.h b/libnd4j/include/loops/transform_bool.h index 56a7f8f7e..5553c164f 100644 --- a/libnd4j/include/loops/transform_bool.h +++ b/libnd4j/include/loops/transform_bool.h @@ -57,27 +57,40 @@ namespace functions { #ifdef __CUDACC__ template - static __device__ void transformCuda( - void *dy, - Nd4jLong *shapeInfo, - void *params, - void *result, - Nd4jLong *resultShapeInfo, - int *allocationPointer, - void *reductionPointer, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets); + static __device__ void transformCuda(const void *dy, const Nd4jLong *shapeInfo, + void *params, + void *result, const Nd4jLong *resultShapeInfo, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); template - static _CUDA_H void intermediateShaped(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static _CUDA_H void intermediateShaped(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); - static _CUDA_H void executeTransformShaped(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static _CUDA_H void executeTransformShaped(dim3 launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); #else - static void exec(int opNum, void *dx, Nd4jLong *xShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *extraParams, uint64_t threadId, uint64_t numThreads); + static void exec(int opNum, + const void *dx, const Nd4jLong *xShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *extraParams, + uint64_t threadId, uint64_t numThreads); template - static ND4J_EXPORT void exec(void *dx, Nd4jLong *xShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *extraParams, uint64_t threadId, uint64_t numThreads); + static ND4J_EXPORT void exec(const void *dx, const Nd4jLong *xShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *extraParams, + uint64_t threadId, uint64_t numThreads); #endif }; } diff --git a/libnd4j/include/loops/transform_float.h b/libnd4j/include/loops/transform_float.h index 1d9b6fb71..4264278ba 100644 --- a/libnd4j/include/loops/transform_float.h +++ b/libnd4j/include/loops/transform_float.h @@ -57,51 +57,55 @@ namespace functions { #ifdef __CUDACC__ template - static __device__ void transformCuda( - void *dy, - Nd4jLong *shapeInfo, - void *params, - void *result, - Nd4jLong *resultShapeInfo, - int *allocationPointer, - void *reductionPointer, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets); + static __device__ void transformCuda(const void *dy, const Nd4jLong *shapeInfo, + void *params, + void *result, const Nd4jLong *resultShapeInfo, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); - static __device__ void transformCudaLegacy( - int opNum, - void *dy, - Nd4jLong *shapeInfo, - void *params, - void *result, - Nd4jLong *resultShapeInfo, - int *allocationPointer, - void *reductionPointer, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets); + static __device__ void transformCudaLegacy(int opNum, + const void *dy, const Nd4jLong *shapeInfo, + void *params, + void *result, const Nd4jLong *resultShapeInfo, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); template - static __device__ void transformCuda( - Nd4jLong n, - void *dy, - Nd4jLong incy, - void *params, - void *result, - Nd4jLong resultStride, - int *allocationPointer, - void *reductionPointer); + static __device__ void transformCuda(Nd4jLong n, + const void *dy, Nd4jLong incy, + void *params, + void *result, Nd4jLong resultStride, + int *allocationPointer, void *reductionPointer); template - static _CUDA_H void intermediateShaped(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static _CUDA_H void intermediateShaped(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); - static _CUDA_H void executeTransformShaped(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static _CUDA_H void executeTransformShaped(dim3 launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); #else - static void exec(int opNum, void *dx, Nd4jLong *xShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *extraParams, uint64_t threadId, uint64_t numThreads); + static void exec(int opNum, + const void *dx, const Nd4jLong *xShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *extraParams, + uint64_t threadId, uint64_t numThreads); template - static ND4J_EXPORT void exec(void *dx, Nd4jLong *xShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *extraParams, uint64_t threadId, uint64_t numThreads); + static ND4J_EXPORT void exec(const void *dx, const Nd4jLong *xShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *extraParams, + uint64_t threadId, uint64_t numThreads); #endif }; } diff --git a/libnd4j/include/loops/transform_same.h b/libnd4j/include/loops/transform_same.h index cb36ba872..cb069ecc9 100644 --- a/libnd4j/include/loops/transform_same.h +++ b/libnd4j/include/loops/transform_same.h @@ -57,29 +57,42 @@ namespace functions { #ifdef __CUDACC__ template - static __device__ void transformCuda( - void *dy, - Nd4jLong *shapeInfo, - void *params, - void *result, - Nd4jLong *resultShapeInfo, - int *allocationPointer, - void *reductionPointer, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets); + static __device__ void transformCuda(const void *dy, const Nd4jLong *shapeInfo, + void *params, + void *result, const Nd4jLong *resultShapeInfo, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); template - static _CUDA_H void intermediateShaped(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static _CUDA_H void intermediateShaped(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); - static _CUDA_H void executeTransformShaped(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static _CUDA_H void executeTransformShaped(dim3 launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); #else - static void exec(int opNum, void *dx, Nd4jLong *xShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *extraParams, uint64_t threadId, uint64_t numThreads); + static void exec(int opNum, + const void *dx, const Nd4jLong *xShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *extraParams, + uint64_t threadId, uint64_t numThreads); template - static ND4J_EXPORT void exec(void *dx, Nd4jLong *xShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *extraParams, uint64_t threadId, uint64_t numThreads); + static ND4J_EXPORT void exec(const void *dx, const Nd4jLong *xShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *extraParams, + uint64_t threadId, uint64_t numThreads); #endif }; } diff --git a/libnd4j/include/loops/transform_strict.h b/libnd4j/include/loops/transform_strict.h index b7ba63e46..903f4e9df 100644 --- a/libnd4j/include/loops/transform_strict.h +++ b/libnd4j/include/loops/transform_strict.h @@ -57,31 +57,44 @@ namespace functions { #ifdef __CUDACC__ template - static __device__ void transformCuda( - void *dy, - Nd4jLong *shapeInfo, - void *params, - void *result, - Nd4jLong *resultShapeInfo, - int *allocationPointer, - void *reductionPointer, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets); + static __device__ void transformCuda(const void *dy, const Nd4jLong *shapeInfo, + void *params, + void *result, const Nd4jLong *resultShapeInfo, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); template - static _CUDA_H void intermediateShaped(dim3 launchDims, cudaStream_t *stream, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static _CUDA_H void intermediateShaped(dim3 launchDims, cudaStream_t *stream, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); - static _CUDA_H void executeTransformShaped(dim3 launchDims, cudaStream_t *stream, int opNum, void *x, Nd4jLong *xShape, int xRank, void *extraParams, void *z, Nd4jLong *zShape, int zRank, int *allocationPointer, void *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); + static _CUDA_H void executeTransformShaped(dim3 launchDims, cudaStream_t *stream, + int opNum, + const void *x, const Nd4jLong *xShape, int xRank, + void *extraParams, + void *z, const Nd4jLong *zShape, int zRank, + int *allocationPointer, void *reductionPointer, + const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets); #else - static void exec(int opNum, void *dx, Nd4jLong *xShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *extraParams, uint64_t threadId, uint64_t numThreads); + static void exec(int opNum, + const void *dx, const Nd4jLong *xShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *extraParams, + uint64_t threadId, uint64_t numThreads); template - static ND4J_EXPORT void exec(void *dx, Nd4jLong *xShapeInfo, void *result, Nd4jLong *resultShapeInfo, void *extraParams, uint64_t threadId, uint64_t numThreads); + static ND4J_EXPORT void exec(const void *dx, const Nd4jLong *xShapeInfo, + void *result, const Nd4jLong *resultShapeInfo, + void *extraParams, + uint64_t threadId, uint64_t numThreads); #endif }; diff --git a/libnd4j/include/loops/type_conversions.h b/libnd4j/include/loops/type_conversions.h index ff5ac5400..b56921435 100644 --- a/libnd4j/include/loops/type_conversions.h +++ b/libnd4j/include/loops/type_conversions.h @@ -67,7 +67,7 @@ namespace sd { static _CUDA_H void convertToThreshold(Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz); template - static _CUDA_H void convertFromThreshold(Nd4jPointer * extras, void *dx, Nd4jLong N, void *dz); + static _CUDA_H void convertFromThreshold(Nd4jPointer * extras, const void *dx, Nd4jLong N, void *dz); FORCEINLINE static _CUDA_H Nd4jLong estimateQuantizedSize(Nd4jLong rawSize) { if (rawSize <= 0) @@ -110,303 +110,27 @@ namespace sd { } #ifdef __CUDACC__ - /* - * PLEASE NOTE: This kernel doesn't allow loop for data. Basically: grid will be huge. - */ - template - __device__ inline void encoderKernelP1(void *dx, Nd4jLong N, void *dz, float threshold) { - auto x = reinterpret_cast (dx); - auto z = reinterpret_cast (dz); - - //basically, for phase One we want do calculation: how many eligible values we have, and which blocks will be holding data - Nd4jLong tid = blockIdx.x * blockDim.x + threadIdx.x; - - int pass = tid < N && sd::math::nd4j_abs(x[tid]) >= static_cast(threshold) ? 1 : 0; - int bp=__syncthreads_count(pass); - - if (threadIdx.x == 0) { - // saving out per-block passes - z[blockIdx.x+1] = bp; - - // saving out sum - atomicAdd(&z[0], bp); - } - } - __device__ __inline__ int pow2i (int e){ return 1< - __host__ void encoderKernelP1Generic(dim3 &launchDims, cudaStream_t *stream, void *dx, Nd4jLong N, void *dz, float threshold); + __host__ void encoderKernelP1Generic(dim3 &launchDims, cudaStream_t *stream, const void *dx, Nd4jLong N, void *dz, float threshold); -/* - * PLEASE NOTE: This kernel doesn't allow loop for data. Basically: grid will be huge. - * - * Based on: https://github.com/knotman90/cuStreamComp <-- efficient CUDA stream compaction algorithm - */ - template - __device__ inline void encoderKernelP3(void *dx, int *offsets, Nd4jLong N, void *dz) { - T *x = reinterpret_cast (dx); - int *z = reinterpret_cast (dz); - - Nd4jLong tid = blockIdx.x * blockDim.x + threadIdx.x; - extern __shared__ int warpTotals[]; - - // fetch block offset only once - __shared__ float threshold; - __shared__ FloatBits fb; - __shared__ int bo; - __shared__ int limit; - if (threadIdx.x == 0) { - limit = z[0]; - fb.i_ = z[2]; - threshold = fb.f_; - bo = offsets[blockIdx.x]; - } - __syncthreads(); - - if (tid < N) { - T value = x[tid]; - int pred = sd::math::nd4j_abs(value) >= static_cast(threshold) ? 1 : 0; - int w_i = threadIdx.x/warpSize; //warp index - int w_l = tid % warpSize;//thread index within a warp - unsigned int t_m = INT_MAX >> (warpSize-w_l-1); //thread mask (ERROR IN THE PAPER minus one is required) - - int b = __ballot_sync(t_m, pred); //balres = number whose ith bit isone if the ith's thread pred is true masked up to the current index in warp - int t_u = __popc(b); // popc count the number of bit one. simply count the number predicated true BEFORE MY INDEX - - if(w_l==warpSize-1){ - warpTotals[w_i]=t_u+pred; - } -// __syncthreads(); // Eliminated due RTX20xx specific - - if(w_i==0 && w_l static_cast(0.0f) ? tid+1 : -(tid + 1); - x[tid] = value > static_cast(0.0f) ? x[tid] - threshold : x[tid] + threshold; - } - } - } - } template __host__ void encoderKernelP3Generic(dim3 &launchDims, cudaStream_t *stream, void *dx, int *offsets, Nd4jLong N, void *dz); - /* -* This kernel handles decode from sparse threshold array, to dense array - * - * PLEASE NOTE: Z is expected to be memset to 0 -*/ template - __device__ inline void decoderKernel(void *dx, Nd4jLong N, void *dz) { - auto x = reinterpret_cast (dx); - auto z = reinterpret_cast (dz); - - int tid = blockIdx.x * blockDim.x + threadIdx.x; - __shared__ float threshold; - __shared__ int limit; - - __shared__ FloatBits fb; - if (threadIdx.x == 0) { - limit = x[0]; - fb.i_ = x[2]; - threshold = fb.f_; - } - __syncthreads(); - - for (int e = tid; e < limit; e += blockDim.x * gridDim.x) { - int el = x[e+4]; - int ael = sd::math::nd4j_abs(el) - 1; - - // TODO: investigate, if += would work better here, as in "decoded accumulation" - z[ael] += el > 0 ? threshold : -threshold; - } - } - - template - __host__ void decoderKernelGeneric(dim3 &launchDims, cudaStream_t *stream, void *dx, Nd4jLong N, void *dz); - - -////////////////////////////////////////////////////////////////////////// - template - __device__ inline void cudaEncodeBitmapKernel(void *vdx, Nd4jLong N, int *dz, int *scalar, int *reductionBuffer, float threshold) { - - auto dx = reinterpret_cast(vdx); - int tid = blockIdx.x * blockDim.x + threadIdx.x; - - T off(0.0f); - __shared__ int counter; - __shared__ int *shmem; - __shared__ T *vals; - if (threadIdx.x == 0){ - extern __shared__ char mem[]; - shmem = reinterpret_cast(mem); - vals = reinterpret_cast(shmem + blockDim.x); - counter = 0; - } - __syncthreads(); - - Nd4jLong loopRemainder = N % (blockDim.x * gridDim.x); - Nd4jLong loopLimit = N + (blockDim.x * gridDim.x - loopRemainder); - - for (Nd4jLong i = tid; i < loopLimit; i += blockDim.x * gridDim.x) { - // all threads in block reading stuff - T val = i < N ? dx[i] : off; - T abs = sd::math::nd4j_abs(val); - - int byteId = i / 16 + 4; - int bitId = i % 16; - - shmem[threadIdx.x] = 0; - vals[threadIdx.x] = val; - - if (abs >= static_cast(threshold) && i < N) { - shmem[threadIdx.x] = 1 << (bitId); - atomicAdd(&counter, 1); - if (val < static_cast(0.0f)) { - shmem[threadIdx.x] |= 1 << (bitId + 16); - vals[threadIdx.x] += static_cast(threshold); - } else { - vals[threadIdx.x] -= static_cast(threshold); - } - } else if (abs >= static_cast(threshold) / static_cast(2.0f) && val < static_cast(0.0f) && i < N) { - atomicAdd(&counter, 1); - shmem[threadIdx.x] = 1 << (bitId + 16); - - vals[threadIdx.x] += static_cast(threshold) / static_cast(2.0f); - } - __syncthreads(); - - if (threadIdx.x % 16 == 0 && i < N) { - int byte = 0; - for (int e = 0; e < 16; e++) { - if (i + e >= N) - continue; - - byte |= shmem[threadIdx.x + e]; - } - dz[byteId] = byte; - } - __syncthreads(); - - if (i < N) - dx[i] = vals[threadIdx.x]; - } - __syncthreads(); - - if (threadIdx.x == 0) { - atomicAdd(scalar, counter); - } - } + __host__ void decoderKernelGeneric(dim3 &launchDims, cudaStream_t *stream, const void *dx, Nd4jLong N, void *dz); template __host__ void cudaEncodeBitmapGeneric(dim3 &launchDims, cudaStream_t *stream, void *vdx, Nd4jLong N, int *dz, int *scalar, int *reductionBuffer, float threshold); -////////////////////////////////////////////////////////////////////////// - template - __device__ inline void cudaDecodeBitmapKernel(void *dx, Nd4jLong N, void *vdz) { - - auto dz = static_cast(vdz); - - int tid = blockIdx.x * blockDim.x + threadIdx.x; - __shared__ T *shmem; - __shared__ FloatBits fb; - __shared__ float threshold; - __shared__ int *x; - if (threadIdx.x == 0){ - extern __shared__ char mem[]; - shmem = reinterpret_cast(mem); - x = reinterpret_cast(dx); - fb.i_ = x[2]; - threshold = fb.f_; - } - __syncthreads(); - - int lim = N / 16 + 5; - for (int i = tid; i < N; i += blockDim.x * gridDim.x) { - int byteId = i / 16 + 4; -// printf("I: [%i]; byteId: [%i]\n", i, byteId); - - shmem[threadIdx.x] = dz[i]; - __syncthreads(); - - if (threadIdx.x % 16 == 0) { - int byte = x[byteId]; - - for (int e = 0; e < 16; e++) { - if (i + e >= N) - continue; - - int bitId = (i + e) % 16; - - bool hasBit = (byte & 1 << (bitId) ) != 0; - bool hasSign = (byte & 1 << (bitId + 16) ) != 0; - - if (hasBit) { - if (hasSign) - shmem[threadIdx.x + bitId] -= threshold; - else - shmem[threadIdx.x + bitId] += threshold; - } else if (hasSign) { - shmem[threadIdx.x + bitId] -= threshold / 2; - } - } - } - __syncthreads(); - - dz[i] = shmem[threadIdx.x]; - } - } template - __host__ void cudaDecodeBitmapGeneric(dim3 &launchDims, cudaStream_t *stream, void *dx, Nd4jLong N, void *vdz); - - // __global__ void cudaEncodeBitmapFloat(float *dx, Nd4jLong N, int *dz, int *scalar, int *reductionBuffer, float threshold); - - // __global__ void cudaEncodeBitmapDouble(double *dx, Nd4jLong N, int *dz, int *scalar, int *reductionBuffer, float threshold); - - // __global__ void cudaEncodeBitmapHalf(float16 *dx, Nd4jLong N, int *dz, int *scalar, int *reductionBuffer, float threshold); - - // __global__ void cudaDecodeBitmapFloat(void *dx, Nd4jLong N, float *dz); - - // __global__ void cudaDecodeBitmapDouble(void *dx, Nd4jLong N, double *dz); - - // __global__ void cudaDecodeBitmapHalf(void *dx, Nd4jLong N, float16 *dz); - - // __global__ void encoderKernelP1Float(void *dx, Nd4jLong N, void *dz, float threshold); - - // __global__ void encoderKernelP1Double(void *dx, Nd4jLong N, void *dz, float threshold); - - // __global__ void encoderKernelP1Half(void *dx, Nd4jLong N, void *dz, float threshold); - - // __global__ void encoderKernelP2Float(int *dx, Nd4jLong N, int *dz); - - // __global__ void encoderKernelP3Float(void *dx, int *offsets, Nd4jLong N, void *dz); - - // __global__ void encoderKernelP3Double(void *dx, int *offsets, Nd4jLong N, void *dz); - - // __global__ void encoderKernelP3Half(void *dx, int *offsets, Nd4jLong N, void *dz); - - // __global__ void decoderKernelFloat(void *dx, Nd4jLong N, void *dz); - - // __global__ void decoderKernelDouble(void *dx, Nd4jLong N, void *dz); - - // __global__ void decoderKernelHalf(void *dx, Nd4jLong N, void *dz); + __host__ void cudaDecodeBitmapGeneric(dim3 &launchDims, cudaStream_t *stream, const void *dx, Nd4jLong N, void *vdz); __global__ void uniformAdd(int *g_data, int *uniforms, int n, int blockOffset, int baseIndex); diff --git a/libnd4j/include/ops/declarable/CustomOperations.h b/libnd4j/include/ops/declarable/CustomOperations.h index f98deb784..8aa612c7b 100644 --- a/libnd4j/include/ops/declarable/CustomOperations.h +++ b/libnd4j/include/ops/declarable/CustomOperations.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include diff --git a/libnd4j/include/ops/declarable/generic/blas/matmul.cpp b/libnd4j/include/ops/declarable/generic/blas/matmul.cpp index 370aa50c6..c9d8c9476 100644 --- a/libnd4j/include/ops/declarable/generic/blas/matmul.cpp +++ b/libnd4j/include/ops/declarable/generic/blas/matmul.cpp @@ -138,9 +138,9 @@ DECLARE_SHAPE_FN(matmul) { ////////////////////////////////////////////////////////////////////// DECLARE_TYPES(matmul) { getOpDescriptor() - ->setAllowedInputTypes(0, {ALL_FLOATS}) - ->setAllowedInputTypes(1, {ALL_FLOATS}) - ->setAllowedOutputTypes(0, {ALL_FLOATS}); + ->setAllowedInputTypes(0, {ALL_FLOATS, ALL_INTS}) + ->setAllowedInputTypes(1, {ALL_FLOATS, ALL_INTS}) + ->setAllowedOutputTypes(0, {ALL_FLOATS, ALL_INTS}); } ////////////////////////////////////////////////////////////////////// diff --git a/libnd4j/include/ops/declarable/generic/boolean/choose.cpp b/libnd4j/include/ops/declarable/generic/boolean/choose.cpp index 9689c9cd5..e5d67baf1 100644 --- a/libnd4j/include/ops/declarable/generic/boolean/choose.cpp +++ b/libnd4j/include/ops/declarable/generic/boolean/choose.cpp @@ -59,7 +59,7 @@ namespace sd { } DECLARE_SHAPE_FN(choose) { - Nd4jLong *shape; + Nd4jLong const* shape; int rank; int mode = INT_ARG(0); auto numResults = NDArrayFactory::create(0L); @@ -67,11 +67,11 @@ namespace sd { auto first = INPUT_VARIABLE(0); auto second = INPUT_VARIABLE(1); if(first->lengthOf() > second->lengthOf()) { - shape = first->getShapeInfo(); + shape = first->shapeInfo(); rank = first->rankOf(); } else { - shape = second->getShapeInfo(); + shape = second->shapeInfo(); rank = second->rankOf(); } @@ -79,7 +79,7 @@ namespace sd { } else { auto first = INPUT_VARIABLE(0); - shape = first->getShapeInfo(); + shape = first->shapeInfo(); rank = first->rankOf(); double scalar = T_ARG(0); diff --git a/libnd4j/include/ops/declarable/generic/boolean/where.cpp b/libnd4j/include/ops/declarable/generic/boolean/where.cpp index c72c10d6b..c26179179 100644 --- a/libnd4j/include/ops/declarable/generic/boolean/where.cpp +++ b/libnd4j/include/ops/declarable/generic/boolean/where.cpp @@ -99,9 +99,9 @@ namespace sd { for (Nd4jLong i = 0; i < condition->lengthOf(); i++) if (condition->e(i)) numOfTrue++; - Nd4jLong *newShape; - + Nd4jLong const* theNewShape; if (numOfTrue > 0) { + Nd4jLong* newShape; ALLOCATE(newShape, block.getWorkspace(), shape::shapeInfoLength(2), Nd4jLong); newShape[0] = 2; @@ -114,13 +114,13 @@ namespace sd { newShape[7] = 99; ShapeUtils::updateStridesAndType(newShape, sd::DataType::INT64, 'c'); - newShape = CONSTANT(newShape); + theNewShape = CONSTANT(newShape); } else { - newShape = ConstantShapeHelper::getInstance()->emptyShapeInfo(sd::DataType::INT64); + theNewShape = ConstantShapeHelper::getInstance()->emptyShapeInfo(sd::DataType::INT64); } - return SHAPELIST(newShape); + return SHAPELIST(theNewShape); } } diff --git a/libnd4j/include/ops/declarable/generic/broadcastable/multiply.cpp b/libnd4j/include/ops/declarable/generic/broadcastable/multiply.cpp index 3ddbe57ca..b7635c664 100644 --- a/libnd4j/include/ops/declarable/generic/broadcastable/multiply.cpp +++ b/libnd4j/include/ops/declarable/generic/broadcastable/multiply.cpp @@ -34,8 +34,8 @@ namespace ops { BROADCAST_CHECK_EMPTY(x,y,z); - Nd4jLong* zShapeInfo = nullptr; - const bool areShapesBroadcastable = ShapeUtils::evalBroadcastShapeInfo(x->getShapeInfo(), y->getShapeInfo(), true, zShapeInfo, block.getWorkspace()); + const Nd4jLong* zShapeInfo = nullptr; + const bool areShapesBroadcastable = ShapeUtils::evalBroadcastShapeInfo(x->shapeInfo(), y->shapeInfo(), true, zShapeInfo, block.getWorkspace()); REQUIRE_TRUE(areShapesBroadcastable, 0, "MULTIPLY OP: the shapes of x %s and y %s are not suitable for broadcast !", ShapeUtils::shapeAsString(x).c_str(), ShapeUtils::shapeAsString(y).c_str()); auto tZ = BroadcastHelper::broadcastApply(sd::BroadcastOpsTuple::Multiply(), x, y, z); @@ -70,8 +70,8 @@ CUSTOM_OP_IMPL(multiply_bp, 3, 2, false, 0, 0) { auto dLdx = OUTPUT_VARIABLE(0); auto dLdy = OUTPUT_VARIABLE(1); - Nd4jLong* dLdzShapeInfo = nullptr; - const bool areShapesBroadcastable = ShapeUtils::evalBroadcastShapeInfo(x->getShapeInfo(), y->getShapeInfo(), true, dLdzShapeInfo, block.getWorkspace()); + const Nd4jLong* dLdzShapeInfo = nullptr; + const bool areShapesBroadcastable = ShapeUtils::evalBroadcastShapeInfo(x->shapeInfo(), y->shapeInfo(), true, dLdzShapeInfo, block.getWorkspace()); REQUIRE_TRUE(areShapesBroadcastable, 0, "MULTIPLY_BP OP: the shapes of x %s and y %s are not suitable for broadcast !", ShapeUtils::shapeAsString(x).c_str(), ShapeUtils::shapeAsString(y).c_str()); REQUIRE_TRUE(shape::equalsSoft(dLdz->shapeInfo(), dLdzShapeInfo), 0, "MULTIPLY_BP OP: wrong shape of next epsilon array (dLdOut), expected is %s, but got %s instead !", ShapeUtils::shapeAsString(dLdzShapeInfo).c_str(), ShapeUtils::shapeAsString(dLdz).c_str()); @@ -102,7 +102,7 @@ CUSTOM_OP_IMPL(multiply_bp, 3, 2, false, 0, 0) { auto yTiled = NDArray(dLdz, false, block.launchContext()); y->tile(yTiled); - std::vector axesForY = ShapeUtils::evalBroadcastBackwardAxis(y->getShapeInfo(), dLdz->getShapeInfo()); + std::vector axesForY = ShapeUtils::evalBroadcastBackwardAxis(y->shapeInfo(), dLdz->shapeInfo()); dLdy->assign( (*x * *dLdz).reduceAlongDimension(reduce::Sum, axesForY) ); yTiled.applyPairwiseTransform(pairwise::Multiply, *dLdz, *dLdx); @@ -111,7 +111,7 @@ CUSTOM_OP_IMPL(multiply_bp, 3, 2, false, 0, 0) { auto xTiled = NDArray(dLdz, false, block.launchContext()); x->tile(xTiled); - std::vector axesForX = ShapeUtils::evalBroadcastBackwardAxis(x->getShapeInfo(), dLdz->getShapeInfo()); + std::vector axesForX = ShapeUtils::evalBroadcastBackwardAxis(x->shapeInfo(), dLdz->shapeInfo()); dLdx->assign( (*y * *dLdz).reduceAlongDimension(reduce::Sum, axesForX) ); xTiled.applyPairwiseTransform(pairwise::Multiply, *dLdz, *dLdy); @@ -122,8 +122,8 @@ CUSTOM_OP_IMPL(multiply_bp, 3, 2, false, 0, 0) { auto yTiled = NDArray(dLdz, false, block.launchContext()); x->tile(xTiled); y->tile(yTiled); - std::vector axesForX = ShapeUtils::evalBroadcastBackwardAxis(x->getShapeInfo(), dLdz->getShapeInfo()); - std::vector axesForY = ShapeUtils::evalBroadcastBackwardAxis(y->getShapeInfo(), dLdz->getShapeInfo()); + std::vector axesForX = ShapeUtils::evalBroadcastBackwardAxis(x->shapeInfo(), dLdz->shapeInfo()); + std::vector axesForY = ShapeUtils::evalBroadcastBackwardAxis(y->shapeInfo(), dLdz->shapeInfo()); dLdx->assign( (*y * *dLdz).reduceAlongDimension(reduce::Sum, axesForX) ); dLdy->assign( (*x * *dLdz).reduceAlongDimension(reduce::Sum, axesForY) ); diff --git a/libnd4j/include/ops/declarable/generic/broadcastable/percentile.cpp b/libnd4j/include/ops/declarable/generic/broadcastable/percentile.cpp index e2bf723b3..f5fbd4b18 100644 --- a/libnd4j/include/ops/declarable/generic/broadcastable/percentile.cpp +++ b/libnd4j/include/ops/declarable/generic/broadcastable/percentile.cpp @@ -64,8 +64,7 @@ CUSTOM_OP_IMPL(percentile, 1, 1, false, 1, -2) { DECLARE_SHAPE_FN(percentile) { - - Nd4jLong* inputShapeInfo = inputShape->at(0); + auto inputShapeInfo = inputShape->at(0); const int keepDims = block.getTArguments()->size() > 2 ? T_ARG(2) : 0.; // false is default const int axisArrRank = block.getIArguments()->size(); @@ -80,7 +79,7 @@ DECLARE_SHAPE_FN(percentile) { } std::vector axises = *block.getIArguments(); - Nd4jLong* outputShapeInfo = ShapeUtils::evalReduceShapeInfo(shape::order(inputShapeInfo), axises, inputShapeInfo, keepDims, false, block.getWorkspace()); + auto outputShapeInfo = ShapeUtils::evalReduceShapeInfo(shape::order(inputShapeInfo), axises, inputShapeInfo, keepDims, false, block.getWorkspace()); return SHAPELIST(outputShapeInfo); } diff --git a/libnd4j/include/ops/declarable/generic/broadcastable/pow.cpp b/libnd4j/include/ops/declarable/generic/broadcastable/pow.cpp index 5a1ac02c5..8ceb61e18 100644 --- a/libnd4j/include/ops/declarable/generic/broadcastable/pow.cpp +++ b/libnd4j/include/ops/declarable/generic/broadcastable/pow.cpp @@ -62,8 +62,8 @@ namespace ops { auto dLdx = OUTPUT_VARIABLE(0); auto dLdy = OUTPUT_VARIABLE(1); - Nd4jLong* dLdzShapeInfo = nullptr; - const bool areShapesBroadcastable = ShapeUtils::evalBroadcastShapeInfo(x->getShapeInfo(), y->getShapeInfo(), true, dLdzShapeInfo, block.getWorkspace()); + const Nd4jLong* dLdzShapeInfo = nullptr; + const bool areShapesBroadcastable = ShapeUtils::evalBroadcastShapeInfo(x->shapeInfo(), y->shapeInfo(), true, dLdzShapeInfo, block.getWorkspace()); REQUIRE_TRUE(areShapesBroadcastable, 0, "POW_BP OP: the shapes of x %s" " and y %s are not suitable for broadcast !", ShapeUtils::shapeAsString(x).c_str(), ShapeUtils::shapeAsString(y).c_str()); @@ -82,7 +82,7 @@ namespace ops { dLdy->assign(temp); } else { - std::vector axesForY = ShapeUtils::evalBroadcastBackwardAxis(y->getShapeInfo(), dLdz->getShapeInfo()); + std::vector axesForY = ShapeUtils::evalBroadcastBackwardAxis(y->shapeInfo(), dLdz->shapeInfo()); dLdy->assign(temp.reduceAlongDimension(reduce::Sum, axesForY)); // dL/dy = sum(c * dL/dz) } @@ -94,7 +94,7 @@ namespace ops { dLdx->assign(temp); // dLdx = a*dL/dz } else { - std::vector axesForX = ShapeUtils::evalBroadcastBackwardAxis(x->getShapeInfo(), dLdz->getShapeInfo()); + std::vector axesForX = ShapeUtils::evalBroadcastBackwardAxis(x->shapeInfo(), dLdz->shapeInfo()); dLdx->assign(temp.reduceAlongDimension(reduce::Sum, axesForX)); // dLdx = a*dL/dz } diff --git a/libnd4j/include/ops/declarable/generic/compression/bitmap.cpp b/libnd4j/include/ops/declarable/generic/compression/bitmap.cpp new file mode 100644 index 000000000..4b77e2a45 --- /dev/null +++ b/libnd4j/include/ops/declarable/generic/compression/bitmap.cpp @@ -0,0 +1,92 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author George A. Shulinok +// + +#include +#include +#include + +#if NOT_EXCLUDED(OP_decode_bitmap) +namespace sd { + namespace ops { + CUSTOM_OP_IMPL(decode_bitmap, 2, 1, true, 0, 0) { + const auto encoded = INPUT_VARIABLE(1); + auto updates = OUTPUT_VARIABLE(0); + + helpers::decodeBitmap(block.launchContext(), encoded, updates); + return Status::OK(); + } + + DECLARE_SHAPE_FN(decode_bitmap) { + auto weights = INPUT_VARIABLE(0); + + return SHAPELIST(weights->shapeInfo()); + } + + DECLARE_TYPES(decode_bitmap) { + getOpDescriptor() + ->setAllowedInputTypes(0, {ALL_FLOATS}) + ->setAllowedInputTypes(1, DataType::INT32) + ->setAllowedOutputTypes({ALL_FLOATS}); + } + } +} +#endif + +#if NOT_EXCLUDED(OP_encode_bitmap) +namespace sd { + namespace ops { + CUSTOM_OP_IMPL(encode_bitmap, 1, 3, true, 1, 0) { + auto input = INPUT_VARIABLE(0); + auto encoded = OUTPUT_NULLIFIED(1); + auto counter = OUTPUT_NULLIFIED(2); + + float threshold = T_ARG(0); + + encoded->p(0, (int) input->lengthOf()); + encoded->p(1, (int) input->lengthOf()); + encoded->p(2, reinterpret_cast(&threshold)[0]); + encoded->p(3, 1); // flag for BITMAP_ENCODING + + auto result = helpers::encodeBitmap(block.launchContext(), input, encoded, threshold); + counter->p(0, result); + counter->syncToDevice(); + + return Status::OK(); + } + + DECLARE_SHAPE_FN(encode_bitmap) { + auto input = inputShape->at(0); + + auto outputLength = shape::length(input) / 16 + 5; + auto encodedShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(outputLength, DataType::INT32); + auto encodedCounter = ConstantShapeHelper::getInstance()->scalarShapeInfo(DataType::INT32); + return SHAPELIST(input, encodedShape, encodedCounter); + } + + DECLARE_TYPES(encode_bitmap) { + getOpDescriptor() + ->setAllowedInputTypes(sd::DataType::ANY) + ->setAllowedOutputTypes(0, {ALL_FLOATS}) + ->setAllowedInputTypes(1, DataType::INT32) + ->setAllowedInputTypes(2, DataType::INT32); + } + } +} +#endif \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/generic/compression/threshold.cpp b/libnd4j/include/ops/declarable/generic/compression/threshold.cpp new file mode 100644 index 000000000..9512621e8 --- /dev/null +++ b/libnd4j/include/ops/declarable/generic/compression/threshold.cpp @@ -0,0 +1,104 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include +#include +#include + +namespace sd { + namespace ops { + CUSTOM_OP_IMPL(encode_threshold, 1, 2, true, 1, 0) { + auto x = INPUT_VARIABLE(0); + auto updated = OUTPUT_VARIABLE(0); + auto encoded = OUTPUT_NULLIFIED(1); + + float threshold = T_ARG(0); + + REQUIRE_TRUE(x->lengthOf() <= DataTypeUtils::max(), 0, "encode_threshold: gradients array must have length <= MAX_INT"); + REQUIRE_TRUE(encoded->lengthOf() >= 4, 0, "encode_threshold: array for encoded updates can't have less than 4 elements"); +// REQUIRE_TRUE(x->platformBuffer() == updated->platformBuffer(), 0, "encode_threshold: gradients array must be the same at input and output"); + + // filling header bytes + encoded->p(0, encoded->lengthOf() - 4); + encoded->p(1, (int) x->lengthOf()); + encoded->p(2, reinterpret_cast(&threshold)[0]); + encoded->p(3, 0); // flag for FLEXIBLE_ENCODING + + // if there's no updates to process - just skip execution + if (encoded->lengthOf() == 4) + return Status::OK(); + + helpers::thresholdEncode(*x, *encoded, threshold); + + return Status::OK(); + } + + DECLARE_SHAPE_FN(encode_threshold) { + auto x = INPUT_VARIABLE(0); + // we have limit option here + int boundary = block.numI() > 0 ? I_ARG(0) : DataTypeUtils::max(); + float threshold = T_ARG(0); + + REQUIRE_TRUE(boundary >= 0, 0, "encode_threshold: boundary must be positive"); + REQUIRE_TRUE(x->lengthOf() <= DataTypeUtils::max(), 0, "encode_threshold: gradients array must have length <= MAX_INT"); + + // we must calculate number of elements that >= threshold + auto elements = sd::math::nd4j_min(helpers::thresholdEstimate(*x, threshold), boundary); + if (elements < 2) + elements = 0; + + // result array must have 4 additional int elements for header + return SHAPELIST(x->shapeInfo(), sd::ConstantShapeHelper::getInstance()->vectorShapeInfo(elements + 4, DataType::INT32)); + } + + DECLARE_TYPES(encode_threshold) { + getOpDescriptor() + ->setAllowedInputTypes(0, {ALL_FLOATS}) + ->setAllowedOutputTypes(0, {ALL_FLOATS}) + ->setAllowedOutputTypes(1, DataType::INT32); + } + + CUSTOM_OP_IMPL(decode_threshold, 2, 1, true, 0, 0) { + auto weights = INPUT_VARIABLE(0); + auto encoded = INPUT_VARIABLE(1); + auto updates = OUTPUT_VARIABLE(0); + + REQUIRE_TRUE(encoded->lengthOf() >= 4, 0, "decode_threshold: encoded array can't have length < 4"); + REQUIRE_TRUE(updates->lengthOf() == encoded->e(1), 0, "decode_threshold: updates array must have length equal to [%i]", encoded->e(1)); + REQUIRE_TRUE(encoded->e(3) == 0, 0, "decode_threshold: encoded array doesn't look like threshold-encoded"); + + helpers::thresholdDecode(*encoded, *updates); + + return Status::OK(); + } + + DECLARE_SHAPE_FN(decode_threshold) { + auto weights = inputShape->at(0); + return SHAPELIST(weights); + } + + DECLARE_TYPES(decode_threshold) { + getOpDescriptor() + ->setAllowedInputTypes(0, {ALL_FLOATS}) + ->setAllowedInputTypes(1, DataType::INT32) + ->setAllowedOutputTypes(0,{ALL_FLOATS}); + } + } +} \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/generic/linalg/lstsq.cpp b/libnd4j/include/ops/declarable/generic/linalg/lstsq.cpp index 6b02f6d70..81831e3fc 100644 --- a/libnd4j/include/ops/declarable/generic/linalg/lstsq.cpp +++ b/libnd4j/include/ops/declarable/generic/linalg/lstsq.cpp @@ -94,7 +94,8 @@ namespace sd { } auto resShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(in0), shape::order(in1), shapeOf);//ShapeBuilders::copyShapeInfoAndType(in1, in0, true, block.workspace()); if (shapeOf[rank - 1] == 0) { - ArrayOptions::setPropertyBit(resShape, ARRAY_EMPTY); +// ArrayOptions::setPropertyBit(resShape, ARRAY_EMPTY); + resShape = ConstantShapeHelper::getInstance()->emptyShapeInfo(ArrayOptions::dataType(in0)); } return SHAPELIST(resShape); } @@ -117,7 +118,8 @@ namespace sd { } auto resShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(in0), shape::order(in1), shapeOf);//ShapeBuilders::copyShapeInfoAndType(in1, in0, true, block.workspace()); if (shapeOf[rank - 1] == 0) { - ArrayOptions::setPropertyBit(resShape, ARRAY_EMPTY); + resShape = ConstantShapeHelper::getInstance()->emptyShapeInfo(ArrayOptions::dataType(in1)); +// ArrayOptions::setPropertyBit(resShape, ARRAY_EMPTY); } return SHAPELIST(resShape); } diff --git a/libnd4j/include/ops/declarable/generic/linalg/matrixDiagPart.cpp b/libnd4j/include/ops/declarable/generic/linalg/matrixDiagPart.cpp index 9d4a00be3..deabe8443 100644 --- a/libnd4j/include/ops/declarable/generic/linalg/matrixDiagPart.cpp +++ b/libnd4j/include/ops/declarable/generic/linalg/matrixDiagPart.cpp @@ -36,7 +36,7 @@ namespace sd { } DECLARE_SHAPE_FN(matrix_diag_part) { - Nd4jLong* outShapeInfo = nullptr; + Nd4jLong const* outShapeInfo = nullptr; auto in = inputShape->at(0); int inRank = shape::rank(in); @@ -49,14 +49,15 @@ namespace sd { outShapeInfo = ConstantShapeHelper::getInstance()->vectorShapeInfo(lastDimension, ArrayOptions::dataType(in)); } else { - ALLOCATE(outShapeInfo, block.getWorkspace(), shape::shapeInfoLength(outRank), Nd4jLong); - outShapeInfo[0] = outRank; + Nd4jLong* anShapeInfo; + ALLOCATE(anShapeInfo, block.getWorkspace(), shape::shapeInfoLength(outRank), Nd4jLong); + anShapeInfo[0] = outRank; for(int i = 0; i < outRank - 1; ++i) - outShapeInfo[i + 1] = shape::sizeAt(in, i); - outShapeInfo[outRank] = lastDimension; + anShapeInfo[i + 1] = shape::sizeAt(in, i); + anShapeInfo[outRank] = lastDimension; - ShapeUtils::updateStridesAndType(outShapeInfo, in, shape::order(in)); - outShapeInfo = CONSTANT(outShapeInfo); + ShapeUtils::updateStridesAndType(anShapeInfo, in, shape::order(in)); + outShapeInfo = CONSTANT(anShapeInfo); } return SHAPELIST(outShapeInfo); } diff --git a/libnd4j/include/ops/declarable/generic/linalg/matrix_determinant.cpp b/libnd4j/include/ops/declarable/generic/linalg/matrix_determinant.cpp index 2268a9e9c..edd10e6ea 100644 --- a/libnd4j/include/ops/declarable/generic/linalg/matrix_determinant.cpp +++ b/libnd4j/include/ops/declarable/generic/linalg/matrix_determinant.cpp @@ -38,7 +38,7 @@ namespace sd { DECLARE_SHAPE_FN(matrix_determinant) { auto inShape = inputShape->at(0); - Nd4jLong* determinantShape; + Nd4jLong const* determinantShape; int targetRank = shape::rank(inShape) - 2; // last two dimensions will be reduced to scalar if (targetRank == 0) { // scalar only @@ -85,7 +85,7 @@ namespace sd { DECLARE_SHAPE_FN(log_matrix_determinant) { auto inShape = inputShape->at(0); - Nd4jLong* determinantShape; + Nd4jLong const* determinantShape; int targetRank = shape::rank(inShape) - 2; // last two dimensions will be reduced to scalar if (targetRank == 0) { // scalar only @@ -126,7 +126,7 @@ namespace sd { DECLARE_SHAPE_FN(logdet) { auto inShape = inputShape->at(0); - Nd4jLong* determinantShape; + Nd4jLong const* determinantShape; int targetRank = shape::rank(inShape) - 2; // last two dimensions will be reduced to scalar if (targetRank == 0) { // scalar only diff --git a/libnd4j/include/ops/declarable/generic/linalg/qr.cpp b/libnd4j/include/ops/declarable/generic/linalg/qr.cpp index 2cf9156ce..9a351a13f 100644 --- a/libnd4j/include/ops/declarable/generic/linalg/qr.cpp +++ b/libnd4j/include/ops/declarable/generic/linalg/qr.cpp @@ -44,8 +44,8 @@ namespace sd { DECLARE_SHAPE_FN(qr) { auto inShape = inputShape->at(0); - Nd4jLong* shapeQ; - Nd4jLong* shapeR; + Nd4jLong const* shapeQ; + Nd4jLong const* shapeR; int targetRank = shape::rank(inShape); // last two dimensions will be reduced to scalar auto fullMatricies = false; diff --git a/libnd4j/include/ops/declarable/generic/loss/absoluteDifference.cpp b/libnd4j/include/ops/declarable/generic/loss/absoluteDifference.cpp index 812588710..d745b0209 100644 --- a/libnd4j/include/ops/declarable/generic/loss/absoluteDifference.cpp +++ b/libnd4j/include/ops/declarable/generic/loss/absoluteDifference.cpp @@ -49,7 +49,7 @@ CUSTOM_OP_IMPL(absolute_difference_loss, 3, 1, false, 0, 1) { // perform weights broadcasting/tile to labels if needed auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(predictions)) - weightsBroad = new NDArray(weights->tileToShape(predictions->getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(predictions->shapeInfo())); NDArray E = (*predictions - *labels).transform(sd::transform::Abs); E *= *weightsBroad; @@ -118,7 +118,7 @@ DECLARE_SHAPE_FN(absolute_difference_loss) { REQUIRE_TRUE(shape::isScalar(weightsShapeInfo) || ShapeUtils::areShapesBroadcastable(weightsShapeInfo, labelsShapeInfo), 0, "ABSOLUTE_DIFFERENCE_LOSS OP: shapes of weights and labels arrays should be broadcastable, but got weights = %s and labels = %s instead!", ShapeUtils::shapeAsString(weightsShapeInfo).c_str(), ShapeUtils::shapeAsString(labelsShapeInfo).c_str()); DataType outType = DataTypeUtils::pickFloatingType(ArrayOptions::dataType(predictionsShapeInfo)); - Nd4jLong* outShapeInfo = nullptr; + Nd4jLong const* outShapeInfo = nullptr; if(INT_ARG(0) != 0) // in this case output is scalar outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType); @@ -164,7 +164,7 @@ CUSTOM_OP_IMPL(absolute_difference_loss_grad, 3, 3, false, 0, 1) { // perform weights broadcasting/tile to labels if needed auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(predictions)) - weightsBroad = new NDArray(weights->tileToShape(predictions->getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(predictions->shapeInfo())); NDArray E = *predictions - *labels; @@ -183,7 +183,7 @@ CUSTOM_OP_IMPL(absolute_difference_loss_grad, 3, 3, false, 0, 1) { if(weights->isScalar()) dLdw->assign(E.reduceNumber(reduce::Sum)); else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -209,7 +209,7 @@ CUSTOM_OP_IMPL(absolute_difference_loss_grad, 3, 3, false, 0, 1) { if(weights->isScalar()) *dLdw = 0.; else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); ((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -237,7 +237,7 @@ CUSTOM_OP_IMPL(absolute_difference_loss_grad, 3, 3, false, 0, 1) { if(weights->isScalar()) dLdw->assign(E.reduceNumber(reduce::Sum) / double(numOfNonZeroWeights)); else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); *dLdw /= numOfNonZeroWeightsScalar; } diff --git a/libnd4j/include/ops/declarable/generic/loss/cosineDistance.cpp b/libnd4j/include/ops/declarable/generic/loss/cosineDistance.cpp index 10995c90b..4d134f6b1 100644 --- a/libnd4j/include/ops/declarable/generic/loss/cosineDistance.cpp +++ b/libnd4j/include/ops/declarable/generic/loss/cosineDistance.cpp @@ -61,7 +61,7 @@ CUSTOM_OP_IMPL(cosine_distance_loss, 3, 1, false, 0, 2) { // perform weights broadcasting/tile to E if it is necessary auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(&E)) - weightsBroad = new NDArray(weights->tileToShape(E.getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(E.shapeInfo())); // multiply E on weights E *= (*weightsBroad); @@ -141,7 +141,7 @@ DECLARE_SHAPE_FN(cosine_distance_loss) { DataType outType = DataTypeUtils::pickFloatingType(ArrayOptions::dataType(predictionsShapeInfo)); // evaluate output shapeInfo - Nd4jLong* outShapeInfo = nullptr; + Nd4jLong const* outShapeInfo = nullptr; if(INT_ARG(0) != 0) // in this case output is scalar outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType); else { // in this case output has the same shape as labels reduced by dim axis @@ -186,11 +186,11 @@ CUSTOM_OP_IMPL(cosine_distance_loss_grad, 3, 3, false, 0, 2) { REQUIRE_TRUE(labels->isSameShape(predictions), 0, "COSINE_DISTANCE_LOSS_GRAD OP: labels and predictions arrays must have the same shapes, but got %s and %s correspondingly !", ShapeUtils::shapeAsString(labels).c_str(), ShapeUtils::shapeAsString(predictions).c_str()); // only 4 possible reduction modes exist REQUIRE_TRUE(reductionMode==0 || reductionMode==1 || reductionMode==2 || reductionMode==3, 0, "COSINE_DISTANCE_LOSS_GRAD OP: reduction mode value is not acceptable, possible values are 0, 1, 2, 3, but got %i instead!", reductionMode); - auto lossShapeInfo = ShapeUtils::evalReduceShapeInfo(predictions->ordering(), dimensions, predictions->getShapeInfo(), true, false, block.getWorkspace()); + auto lossShapeInfo = ShapeUtils::evalReduceShapeInfo(predictions->ordering(), dimensions, predictions->shapeInfo(), true, false, block.getWorkspace()); // weights array can be single scalar or has the same shape as loss, and must be broadcastable to loss shape REQUIRE_TRUE(weights->isScalar() || weights->rankOf() == shape::rank(lossShapeInfo), 0, "COSINE_DISTANCE_LOSS_GRAD OP: weights array should be scalar or have the same rank as loss array, but got %i and %i correspondingly!", weights->rankOf(), shape::rank(lossShapeInfo)); // check whether broadcast operation is possible for weights array - REQUIRE_TRUE(weights->isScalar() || ShapeUtils::areShapesBroadcastable(weights->getShapeInfo(), lossShapeInfo), 0, "COSINE_DISTANCE_LOSS_GRAD OP: shapes of weights and loss arrays should be broadcastable, but got weights = %s and loss = %s instead!", ShapeUtils::shapeAsString(weights).c_str(), ShapeUtils::shapeAsString(lossShapeInfo).c_str()); + REQUIRE_TRUE(weights->isScalar() || ShapeUtils::areShapesBroadcastable(weights->shapeInfo(), lossShapeInfo), 0, "COSINE_DISTANCE_LOSS_GRAD OP: shapes of weights and loss arrays should be broadcastable, but got weights = %s and loss = %s instead!", ShapeUtils::shapeAsString(weights).c_str(), ShapeUtils::shapeAsString(lossShapeInfo).c_str()); // input dimension can't be larger than labels/predictions/weights rank REQUIRE_TRUE(dim < labels->rankOf(), 0, "COSINE_DISTANCE_LOSS_GRAD OP: input reduction dimension (got %i) must be < labels rank %i!", dim, labels->rankOf()); @@ -199,7 +199,7 @@ CUSTOM_OP_IMPL(cosine_distance_loss_grad, 3, 3, false, 0, 2) { // perform weights broadcasting/tile to E if it is necessary auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(&E)) - weightsBroad = new NDArray(weights->tileToShape(E.getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(E.shapeInfo())); dLdp->assign(-*labels); dLdl->assign(-*predictions); @@ -215,7 +215,7 @@ CUSTOM_OP_IMPL(cosine_distance_loss_grad, 3, 3, false, 0, 2) { } else { if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -248,7 +248,7 @@ CUSTOM_OP_IMPL(cosine_distance_loss_grad, 3, 3, false, 0, 2) { else { if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); ((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -283,7 +283,7 @@ CUSTOM_OP_IMPL(cosine_distance_loss_grad, 3, 3, false, 0, 2) { else { if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); *dLdw /= numOfNonZeroWeights; } diff --git a/libnd4j/include/ops/declarable/generic/loss/hingeLoss.cpp b/libnd4j/include/ops/declarable/generic/loss/hingeLoss.cpp index 7d8eeec3a..fe66387a8 100644 --- a/libnd4j/include/ops/declarable/generic/loss/hingeLoss.cpp +++ b/libnd4j/include/ops/declarable/generic/loss/hingeLoss.cpp @@ -48,7 +48,7 @@ namespace sd { // perform weights broadcasting/tile to logits if needed auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(logits)) - weightsBroad = new NDArray(weights->tileToShape(logits->getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(logits->shapeInfo())); // We first need to convert binary labels to -1/1 labels (as floats) NDArray E = 1.f - (*labels * 2.f - 1.f) * (*logits); @@ -125,7 +125,7 @@ namespace sd { REQUIRE_TRUE(shape::isScalar(weightsShapeInfo) || ShapeUtils::areShapesBroadcastable(weightsShapeInfo, labelsShapeInfo), 0, "HINGE_LOSS OP: shapes of weights and labels arrays should be broadcastable, but got weights = %s and labels = %s instead!", ShapeUtils::shapeAsString(weightsShapeInfo).c_str(), ShapeUtils::shapeAsString(labelsShapeInfo).c_str()); DataType outType = DataTypeUtils::pickFloatingType(ArrayOptions::dataType(logitsShapeInfo)); - Nd4jLong* outShapeInfo = nullptr; + Nd4jLong const* outShapeInfo = nullptr; if(INT_ARG(0) != 0) // in this case output is scalar outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType); @@ -167,7 +167,7 @@ namespace sd { // perform weights broadcasting/tile to labels if needed auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(logits)) - weightsBroad = new NDArray(weights->tileToShape(logits->getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(logits->shapeInfo())); // We first need to convert binary labels to -1/1 labels (as floats) NDArray z = (*labels * 2.f - 1.f); @@ -176,7 +176,7 @@ namespace sd { E.applyScalar(scalar::RELU, 0.0f, E); // turn E into gradient mask - NDArray gradientMask(E.getShapeInfo(), block.getWorkspace()); + NDArray gradientMask(E.shapeInfo(), block.getWorkspace()); E.applyTransform(sd::transform::Sign, gradientMask); dLdp->assign(-z * gradientMask); @@ -192,7 +192,7 @@ namespace sd { if(weights->isScalar()) dLdw->assign(E.reduceNumber(reduce::Sum)); else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -221,7 +221,7 @@ namespace sd { if(weights->isScalar()) *dLdw = 0.; else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); ((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -250,7 +250,7 @@ namespace sd { if(weights->isScalar()) dLdw->assign(E.reduceNumber(reduce::Sum) / double(numOfNonZeroWeights)); else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); *dLdw /= numOfNonZeroWeightsScalar; } diff --git a/libnd4j/include/ops/declarable/generic/loss/huberLoss.cpp b/libnd4j/include/ops/declarable/generic/loss/huberLoss.cpp index a29bd1cf2..df57092e1 100644 --- a/libnd4j/include/ops/declarable/generic/loss/huberLoss.cpp +++ b/libnd4j/include/ops/declarable/generic/loss/huberLoss.cpp @@ -50,11 +50,11 @@ CUSTOM_OP_IMPL(huber_loss, 3, 1, false, 1, 1) { // perform weights broadcasting/tile to predictions if needed auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(predictions)) - weightsBroad = new NDArray(weights->tileToShape(predictions->getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(predictions->shapeInfo())); auto error = *predictions - *labels; error.applyTransform(transform::Abs, error); - NDArray quadratic(error.getShapeInfo(), block.getWorkspace()); + NDArray quadratic(error.shapeInfo(), block.getWorkspace()); error.applyScalar(scalar::MinPairwise, delta, quadratic); NDArray E = quadratic * quadratic * 0.5f + (error - quadratic)*delta; @@ -130,7 +130,7 @@ DECLARE_SHAPE_FN(huber_loss) { REQUIRE_TRUE(shape::isScalar(weightsShapeInfo) || ShapeUtils::areShapesBroadcastable(weightsShapeInfo, labelsShapeInfo), 0, "HUBER_LOSS OP: shapes of weights and labels arrays should be broadcastable, but got weights = %s and labels = %s instead!", ShapeUtils::shapeAsString(weightsShapeInfo).c_str(), ShapeUtils::shapeAsString(labelsShapeInfo).c_str()); DataType outType = DataTypeUtils::pickFloatingType(ArrayOptions::dataType(predictionsShapeInfo)); - Nd4jLong* outShapeInfo = nullptr; + Nd4jLong const* outShapeInfo = nullptr; if(INT_ARG(0) != 0) // in this case output is scalar outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType); @@ -170,7 +170,7 @@ DECLARE_SHAPE_FN(huber_loss) { // perform weights broadcasting/tile to labels if needed auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(predictions)) - weightsBroad = new NDArray(weights->tileToShape(predictions->getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(predictions->shapeInfo())); NDArray diff = *predictions - *labels; NDArray absDiff(diff); @@ -180,10 +180,10 @@ DECLARE_SHAPE_FN(huber_loss) { NDArray E = quadratic * quadratic * 0.5f + (absDiff - quadratic)*delta; - NDArray lteMask(diff.getShapeInfo(), BOOL, true, block.launchContext()); + NDArray lteMask(diff.shapeInfo(), BOOL, true, block.launchContext()); absDiff.applyScalar(scalar::LessThanOrEqual, delta, lteMask); - NDArray gtMask(diff.getShapeInfo(), BOOL, true, block.launchContext()); + NDArray gtMask(diff.shapeInfo(), BOOL, true, block.launchContext()); absDiff.applyScalar(scalar::GreaterThan, delta, gtMask); NDArray signDiff(diff); @@ -207,7 +207,7 @@ DECLARE_SHAPE_FN(huber_loss) { if(weights->isScalar()) dLdw->assign(E.reduceNumber(reduce::Sum)); else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -236,7 +236,7 @@ DECLARE_SHAPE_FN(huber_loss) { if(weights->isScalar()) *dLdw = 0.; else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); ((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -265,7 +265,7 @@ DECLARE_SHAPE_FN(huber_loss) { if(weights->isScalar()) dLdw->assign(E.reduceNumber(reduce::Sum) / double(numOfNonZeroWeights)); else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); *dLdw /= numOfNonZeroWeightsScalar; } @@ -306,9 +306,9 @@ DECLARE_SHAPE_FN(huber_loss) { DataType outType = DataTypeUtils::pickFloatingType(ArrayOptions::dataType(predictionsShapeInfo)); - Nd4jLong *dLdpShapeInfo = ShapeBuilders::copyShapeInfoAndType(predictionsShapeInfo, outType, false, block.getWorkspace()); - Nd4jLong *dLdwShapeInfo = ShapeBuilders::copyShapeInfoAndType(weightsShapeInfo, outType, false, block.getWorkspace()); - Nd4jLong *dLdlShapeInfo = ShapeBuilders::copyShapeInfoAndType(labelsShapeInfo, outType, false, block.getWorkspace()); + auto dLdpShapeInfo = ShapeBuilders::copyShapeInfoAndType(predictionsShapeInfo, outType, false, block.getWorkspace()); + auto dLdwShapeInfo = ShapeBuilders::copyShapeInfoAndType(weightsShapeInfo, outType, false, block.getWorkspace()); + auto dLdlShapeInfo = ShapeBuilders::copyShapeInfoAndType(labelsShapeInfo, outType, false, block.getWorkspace()); return SHAPELIST(dLdpShapeInfo, dLdwShapeInfo, dLdlShapeInfo); } diff --git a/libnd4j/include/ops/declarable/generic/loss/logLoss.cpp b/libnd4j/include/ops/declarable/generic/loss/logLoss.cpp index 99140a394..e43e7b1d1 100644 --- a/libnd4j/include/ops/declarable/generic/loss/logLoss.cpp +++ b/libnd4j/include/ops/declarable/generic/loss/logLoss.cpp @@ -52,7 +52,7 @@ CUSTOM_OP_IMPL(log_loss, 3, 1, false, 1, 1) { // perform weights broadcasting/tile to predictions if needed auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(predictions)) - weightsBroad = new NDArray(weights->tileToShape(predictions->getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(predictions->shapeInfo())); NDArray E = -(*labels)*((*predictions + epsilon).transform(transform::Log)) - (1. - *labels)*(((1. + epsilon) - *predictions).transform(transform::Log)); @@ -127,7 +127,7 @@ DECLARE_SHAPE_FN(log_loss) { REQUIRE_TRUE(shape::isScalar(weightsShapeInfo) || ShapeUtils::areShapesBroadcastable(weightsShapeInfo, labelsShapeInfo), 0, "LOG_LOSS OP: shapes of weights and labels arrays should be broadcastable, but got weights = %s and labels = %s instead!", ShapeUtils::shapeAsString(weightsShapeInfo).c_str(), ShapeUtils::shapeAsString(labelsShapeInfo).c_str()); DataType outType = DataTypeUtils::pickFloatingType(ArrayOptions::dataType(predictionsShapeInfo)); - Nd4jLong* outShapeInfo = nullptr; + Nd4jLong const* outShapeInfo = nullptr; if(INT_ARG(0) != 0) // in this case output is scalar outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType); @@ -173,7 +173,7 @@ CUSTOM_OP_IMPL(log_loss_grad, 3, 3, false, 1, 1) { // perform weights broadcasting/tile to labels if needed auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(predictions)) - weightsBroad = new NDArray(weights->tileToShape(predictions->getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(predictions->shapeInfo())); NDArray predictPlusEps = *predictions + epsilon; NDArray oneMinusLabels = 1. - *labels; @@ -196,7 +196,7 @@ CUSTOM_OP_IMPL(log_loss_grad, 3, 3, false, 1, 1) { if(weights->isScalar()) dLdw->assign(E.reduceNumber(reduce::Sum)); else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -227,7 +227,7 @@ CUSTOM_OP_IMPL(log_loss_grad, 3, 3, false, 1, 1) { if(weights->isScalar()) *dLdw = 0.; else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); ((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -255,7 +255,7 @@ CUSTOM_OP_IMPL(log_loss_grad, 3, 3, false, 1, 1) { if(weights->isScalar()) dLdw->assign(E.reduceNumber(reduce::Sum) / numOfNonZeroWeights); else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); *dLdw /= numOfNonZeroWeightsScalar; } diff --git a/libnd4j/include/ops/declarable/generic/loss/log_poisson_loss.cpp b/libnd4j/include/ops/declarable/generic/loss/log_poisson_loss.cpp index 20e03e92b..b39326071 100644 --- a/libnd4j/include/ops/declarable/generic/loss/log_poisson_loss.cpp +++ b/libnd4j/include/ops/declarable/generic/loss/log_poisson_loss.cpp @@ -50,10 +50,10 @@ namespace ops { // perform weights broadcasting/tile to labels if needed auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(log_predictions)) - weightsBroad = new NDArray(weights->tileToShape(log_predictions->getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(log_predictions->shapeInfo())); - NDArray E(labels->getShapeInfo(), block.getWorkspace()); + NDArray E(labels->shapeInfo(), block.getWorkspace()); if (computeFullLoss) labels->applyPairwiseTransform(pairwise::LogPoissonLossFull, *log_predictions, E); else @@ -130,7 +130,7 @@ namespace ops { REQUIRE_TRUE(shape::isScalar(weightsShapeInfo) || ShapeUtils::areShapesBroadcastable(weightsShapeInfo, labelsShapeInfo), 0, "LOG_POISSON_LOSS OP: shapes of weights and labels arrays should be broadcastable, but got weights = %s and labels = %s instead!", ShapeUtils::shapeAsString(weightsShapeInfo).c_str(), ShapeUtils::shapeAsString(labelsShapeInfo).c_str()); DataType outType = DataTypeUtils::pickFloatingType(ArrayOptions::dataType(predictionsShapeInfo)); - Nd4jLong* outShapeInfo = nullptr; + Nd4jLong const* outShapeInfo = nullptr; if(INT_ARG(0) != 0) // in this case output is scalar outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType); @@ -172,14 +172,14 @@ namespace ops { // perform weights broadcasting/tile to labels if needed auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(log_predictions)) - weightsBroad = new NDArray(weights->tileToShape(log_predictions->getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(log_predictions->shapeInfo())); - NDArray E(labels->getShapeInfo(), block.getWorkspace()); + NDArray E(labels->shapeInfo(), block.getWorkspace()); if (computeFullLoss) { labels->applyPairwiseTransform(pairwise::LogPoissonLossFull, *log_predictions, E); - NDArray rDiv(labels->getShapeInfo(), block.getWorkspace()); + NDArray rDiv(labels->shapeInfo(), block.getWorkspace()); labels->applyScalar(scalar::ReverseDivide, 0.5f, rDiv); dLdl->assign(rDiv + labels->transform(transform::Log) + -(*log_predictions)); } else { @@ -200,7 +200,7 @@ namespace ops { if(weights->isScalar()) dLdw->assign(E.reduceNumber(reduce::Sum)); else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -229,7 +229,7 @@ namespace ops { if(weights->isScalar()) *dLdw = 0.; else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); ((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -258,7 +258,7 @@ namespace ops { if(weights->isScalar()) dLdw->assign(E.reduceNumber(reduce::Sum) / double(numOfNonZeroWeights)); else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); *dLdw /= numOfNonZeroWeightsScalar; } @@ -299,9 +299,9 @@ namespace ops { DataType outType = DataTypeUtils::pickFloatingType(ArrayOptions::dataType(predictionsShapeInfo)); - Nd4jLong *dLdpShapeInfo = ShapeBuilders::copyShapeInfoAndType(predictionsShapeInfo, outType, false, block.getWorkspace()); - Nd4jLong *dLdwShapeInfo = ShapeBuilders::copyShapeInfoAndType(weightsShapeInfo, outType, false, block.getWorkspace()); - Nd4jLong *dLdlShapeInfo = ShapeBuilders::copyShapeInfoAndType(labelsShapeInfo, outType, false, block.getWorkspace()); + auto dLdpShapeInfo = ShapeBuilders::copyShapeInfoAndType(predictionsShapeInfo, outType, false, block.getWorkspace()); + auto dLdwShapeInfo = ShapeBuilders::copyShapeInfoAndType(weightsShapeInfo, outType, false, block.getWorkspace()); + auto dLdlShapeInfo = ShapeBuilders::copyShapeInfoAndType(labelsShapeInfo, outType, false, block.getWorkspace()); return SHAPELIST(dLdpShapeInfo, dLdwShapeInfo, dLdlShapeInfo); } diff --git a/libnd4j/include/ops/declarable/generic/loss/meanPairWsSqErr.cpp b/libnd4j/include/ops/declarable/generic/loss/meanPairWsSqErr.cpp index f8006a3ed..5a0e20807 100644 --- a/libnd4j/include/ops/declarable/generic/loss/meanPairWsSqErr.cpp +++ b/libnd4j/include/ops/declarable/generic/loss/meanPairWsSqErr.cpp @@ -128,7 +128,7 @@ namespace sd { // perform weights broadcasting/tile to labels if needed auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(E)) - weightsBroad = new NDArray(weights->tileToShape(E.getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(E.shapeInfo())); E *= *weightsBroad; @@ -197,7 +197,7 @@ namespace sd { ShapeUtils::shapeAsString(labelsShapeInfo).c_str(), ShapeUtils::shapeAsString(predictionsShapeInfo).c_str()); DataType outType = DataTypeUtils::pickFloatingType(ArrayOptions::dataType(predictionsShapeInfo)); - Nd4jLong* outShapeInfo = nullptr; + Nd4jLong const* outShapeInfo = nullptr; if(INT_ARG(0) != 0) // in this case output is scalar outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType); @@ -262,7 +262,7 @@ namespace sd { // perform weights broadcasting/tile to labels if needed auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(E)) - weightsBroad = new NDArray(weights->tileToShape(E.getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(E.shapeInfo())); switch (reductionMode) { @@ -273,7 +273,7 @@ namespace sd { if(weights->isScalar()) dLdw->assign(E.reduceNumber(reduce::Sum)); else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -300,7 +300,7 @@ namespace sd { if(weights->isScalar()) *dLdw = 0.; else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); ((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -328,7 +328,7 @@ namespace sd { if(weights->isScalar()) dLdw->assign(E.reduceNumber(reduce::Sum) / double(numOfNonZeroWeights)); else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); *dLdw /= numOfNonZeroWeightsScalar; } diff --git a/libnd4j/include/ops/declarable/generic/loss/meanSqErr.cpp b/libnd4j/include/ops/declarable/generic/loss/meanSqErr.cpp index b0ccf968b..fd00a0364 100644 --- a/libnd4j/include/ops/declarable/generic/loss/meanSqErr.cpp +++ b/libnd4j/include/ops/declarable/generic/loss/meanSqErr.cpp @@ -48,9 +48,9 @@ CUSTOM_OP_IMPL(mean_sqerr_loss, 3, 1, false, 0, 1) { // perform weights broadcasting/tile to labels if needed auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(predictions)) - weightsBroad = new NDArray(weights->tileToShape(predictions->getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(predictions->shapeInfo())); - NDArray E(labels->getShapeInfo(), false, block.launchContext()); + NDArray E(labels->shapeInfo(), false, block.launchContext()); predictions->applyPairwiseTransform(pairwise::SquaredSubtract, *labels, E); // multiply E on weights @@ -126,7 +126,7 @@ DECLARE_SHAPE_FN(mean_sqerr_loss) { REQUIRE_TRUE(shape::isScalar(weightsShapeInfo) || ShapeUtils::areShapesBroadcastable(weightsShapeInfo, labelsShapeInfo), 0, "MEAN_SQERR_LOSS OP: shapes of weights and labels arrays should be broadcastable, but got weights = %s and labels = %s instead!", ShapeUtils::shapeAsString(weightsShapeInfo).c_str(), ShapeUtils::shapeAsString(labelsShapeInfo).c_str()); DataType outType = DataTypeUtils::pickFloatingType(ArrayOptions::dataType(predictionsShapeInfo)); - Nd4jLong* outShapeInfo = nullptr; + Nd4jLong const* outShapeInfo = nullptr; if(INT_ARG(0) != 0) // in this case output is scalar outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType); @@ -171,7 +171,7 @@ CUSTOM_OP_IMPL(mean_sqerr_loss_grad, 3, 3, false, 0, 1) { // perform weights broadcasting/tile to labels if needed auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(predictions)) - weightsBroad = new NDArray(weights->tileToShape(predictions->getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(predictions->shapeInfo())); NDArray diff = *predictions - *labels; @@ -191,7 +191,7 @@ CUSTOM_OP_IMPL(mean_sqerr_loss_grad, 3, 3, false, 0, 1) { if(weights->isScalar()) dLdw->assign(E.reduceNumber(reduce::Sum)); else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -218,7 +218,7 @@ CUSTOM_OP_IMPL(mean_sqerr_loss_grad, 3, 3, false, 0, 1) { if(weights->isScalar()) *dLdw = 0.; else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); ((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -246,7 +246,7 @@ CUSTOM_OP_IMPL(mean_sqerr_loss_grad, 3, 3, false, 0, 1) { if(weights->isScalar()) dLdw->assign(E.reduceNumber(reduce::Sum) / double(numOfNonZeroWeights)); else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); *dLdw /= numOfNonZeroWeightsScalar; } diff --git a/libnd4j/include/ops/declarable/generic/loss/sigmCrossEntropy.cpp b/libnd4j/include/ops/declarable/generic/loss/sigmCrossEntropy.cpp index 28d66bc93..f2e665bdb 100644 --- a/libnd4j/include/ops/declarable/generic/loss/sigmCrossEntropy.cpp +++ b/libnd4j/include/ops/declarable/generic/loss/sigmCrossEntropy.cpp @@ -50,7 +50,7 @@ CUSTOM_OP_IMPL(sigm_cross_entropy_loss, 3, 1, false, 1, 1) { // perform weights broadcasting/tile to labels if needed auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(logits)) - weightsBroad = new NDArray(weights->tileToShape(logits->getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(logits->shapeInfo())); // If labelsSmoothing is nonzero, smooth the labels towards 1/2: auto newLabels = labels; @@ -137,7 +137,7 @@ DECLARE_SHAPE_FN(sigm_cross_entropy_loss) { REQUIRE_TRUE(shape::isScalar(weightsShapeInfo) || ShapeUtils::areShapesBroadcastable(weightsShapeInfo, labelsShapeInfo), 0, "SIGM_CROSS_ENTROPY_LOSS OP: shapes of weights and labels arrays should be broadcastable, but got weights = %s and labels = %s instead!", ShapeUtils::shapeAsString(weightsShapeInfo).c_str(), ShapeUtils::shapeAsString(labelsShapeInfo).c_str()); DataType outType = DataTypeUtils::pickFloatingType(ArrayOptions::dataType(logitsShapeInfo)); - Nd4jLong* outShapeInfo = nullptr; + Nd4jLong const* outShapeInfo = nullptr; if(INT_ARG(0) != 0) // in this case output is scalar outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType); @@ -181,7 +181,7 @@ CUSTOM_OP_IMPL(sigm_cross_entropy_loss_grad, 3, 3, false, 1, 1) { // perform weights broadcasting/tile to labels if needed auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(logits)) - weightsBroad = new NDArray(weights->tileToShape(logits->getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(logits->shapeInfo())); // If labelsSmoothing is nonzero, smooth the labels towards 1/2: auto newLabels = labels; @@ -211,7 +211,7 @@ CUSTOM_OP_IMPL(sigm_cross_entropy_loss_grad, 3, 3, false, 1, 1) { if(weights->isScalar()) dLdw->assign(E.reduceNumber(reduce::Sum)); else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -240,7 +240,7 @@ CUSTOM_OP_IMPL(sigm_cross_entropy_loss_grad, 3, 3, false, 1, 1) { if(weights->isScalar()) *dLdw = 0.; else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); ((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -268,7 +268,7 @@ CUSTOM_OP_IMPL(sigm_cross_entropy_loss_grad, 3, 3, false, 1, 1) { if(weights->isScalar()) dLdw->assign(E.reduceNumber(reduce::Sum) / numOfNonZeroWeightsScalar); else if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); *dLdw /= numOfNonZeroWeightsScalar; } diff --git a/libnd4j/include/ops/declarable/generic/loss/softmaxCrossEntropy.cpp b/libnd4j/include/ops/declarable/generic/loss/softmaxCrossEntropy.cpp index 3ea9ce2bd..f70a58a10 100644 --- a/libnd4j/include/ops/declarable/generic/loss/softmaxCrossEntropy.cpp +++ b/libnd4j/include/ops/declarable/generic/loss/softmaxCrossEntropy.cpp @@ -80,7 +80,7 @@ CUSTOM_OP_IMPL(softmax_cross_entropy_loss, 3, 1, false, 1, 1) { if(E.rankOf() == 1 && weights->isVector() && weights->rankOf() > 1) weightsBroad = new NDArray(weights->reshape(weights->ordering(), {weights->lengthOf()})); else - weightsBroad = new NDArray(weights->tileToShape(E.getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(E.shapeInfo())); } // multiply E on weights @@ -158,7 +158,7 @@ DECLARE_SHAPE_FN(softmax_cross_entropy_loss) { REQUIRE_TRUE(shape::shapeEquals(logitsShapeInfo, labelsShapeInfo), 0, "SOFTMAX_CROSS_ENTROPY_LOSS OP: labels and logits arrays must have the same shapes, but got %s and %s correspondingly!", ShapeUtils::shapeAsString(labelsShapeInfo).c_str(), ShapeUtils::shapeAsString(logitsShapeInfo).c_str()); DataType outType = DataTypeUtils::pickFloatingType(ArrayOptions::dataType(logitsShapeInfo)); - Nd4jLong* outShapeInfo = nullptr; + Nd4jLong const* outShapeInfo = nullptr; if(INT_ARG(0) != 0) // in this case output is scalar outShapeInfo = ConstantShapeHelper::getInstance()->scalarShapeInfo(outType); @@ -207,11 +207,11 @@ CUSTOM_OP_IMPL(softmax_cross_entropy_loss_grad, 3, 3, false, 1, 1) { REQUIRE_TRUE(labels->isSameShape(logits), 0, "SOFTMAX_CROSS_ENTROPY_LOSS_GRAD OP: labels and logits arrays must have the same shapes, but got %s and %s correspondingly !", ShapeUtils::shapeAsString(labels).c_str(), ShapeUtils::shapeAsString(logits).c_str()); // only 4 possible reduction modes exist REQUIRE_TRUE(reductionMode==0 || reductionMode==1 || reductionMode==2 || reductionMode==3, 0, "SOFTMAX_CROSS_ENTROPY_LOSS_GRAD OP: reduction mode value is not acceptable, possible values are 0, 1, 2, 3, but got %i instead!", reductionMode); - auto lossShapeInfo = ShapeUtils::evalReduceShapeInfo(logits->ordering(), dimensions, logits->getShapeInfo(), false, false, block.getWorkspace()); + auto lossShapeInfo = ShapeUtils::evalReduceShapeInfo(logits->ordering(), dimensions, logits->shapeInfo(), false, false, block.getWorkspace()); // weights array can be single scalar or has the same shape as loss, and must be broadcastable to loss shape REQUIRE_TRUE(weights->isScalar() || weights->rankOf() == shape::rank(lossShapeInfo), 0, "SOFTMAX_CROSS_ENTROPY_LOSS_GRAD OP: weights array should be scalar or have the same rank as loss array, but got %i and %i correspondingly!", weights->rankOf(), shape::rank(lossShapeInfo)); // check whether broadcast operation is possible for weights array - REQUIRE_TRUE(weights->isScalar() || ShapeUtils::areShapesBroadcastable(weights->getShapeInfo(), lossShapeInfo), 0, "SOFTMAX_CROSS_ENTROPY_LOSS_GRAD OP: shapes of weights and loss arrays should be broadcastable, but got weights = %s and loss = %s instead!", ShapeUtils::shapeAsString(weights).c_str(), ShapeUtils::shapeAsString(lossShapeInfo).c_str()); + REQUIRE_TRUE(weights->isScalar() || ShapeUtils::areShapesBroadcastable(weights->shapeInfo(), lossShapeInfo), 0, "SOFTMAX_CROSS_ENTROPY_LOSS_GRAD OP: shapes of weights and loss arrays should be broadcastable, but got weights = %s and loss = %s instead!", ShapeUtils::shapeAsString(weights).c_str(), ShapeUtils::shapeAsString(lossShapeInfo).c_str()); // smoothing is possible for rank of logits/labels > 1 REQUIRE_TRUE(labels->rankOf() > 1 || (labels->rankOf() == 1 && labelsSmoothing == 0.), 0, "SOFTMAX_CROSS_ENTROPY_LOSS_GRAD OP: smoothing is not possible when rank of labels/ logits = 1 !"); @@ -220,7 +220,7 @@ CUSTOM_OP_IMPL(softmax_cross_entropy_loss_grad, 3, 3, false, 1, 1) { NDArray* cLabels = new NDArray(labels->cast(weights->dataType())); NDArray* newLabels = cLabels; if(labelsSmoothing != 0.) { - newLabels = new NDArray(labels->getShapeInfo(), dLdl->dataType(), false, block.launchContext()); + newLabels = new NDArray(labels->shapeInfo(), dLdl->dataType(), false, block.launchContext()); newLabels->assign((1.f - labelsSmoothing) * *cLabels + labelsSmoothing / cLabels->sizeAt(1)); } @@ -240,7 +240,7 @@ CUSTOM_OP_IMPL(softmax_cross_entropy_loss_grad, 3, 3, false, 1, 1) { // perform weights broadcasting/tile to E if it is necessary auto weightsBroad = weights; if(!weights->isScalar() && !weights->isSameShape(&E)) - weightsBroad = new NDArray(weights->tileToShape(E.getShapeInfo())); + weightsBroad = new NDArray(weights->tileToShape(E.shapeInfo())); dimensions = ShapeUtils::evalDimsToExclude(dLdp->rankOf(), dimensions); @@ -257,7 +257,7 @@ CUSTOM_OP_IMPL(softmax_cross_entropy_loss_grad, 3, 3, false, 1, 1) { dLdl->applyBroadcast(sd::broadcast::Multiply, dimensions, *weightsBroad, *dLdl); if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -293,7 +293,7 @@ CUSTOM_OP_IMPL(softmax_cross_entropy_loss_grad, 3, 3, false, 1, 1) { dLdl->applyBroadcast(sd::broadcast::Multiply, dimensions, temp, *dLdl); if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); ((E * sum - (E * *weightsBroad).reduceNumber(reduce::Sum)) / (sum*sum)).reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); } else @@ -330,7 +330,7 @@ CUSTOM_OP_IMPL(softmax_cross_entropy_loss_grad, 3, 3, false, 1, 1) { dLdl->applyBroadcast(sd::broadcast::Multiply, dimensions, temp, *dLdl); if(weights != weightsBroad) { - std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->getShapeInfo(), weightsBroad->getShapeInfo()); + std::vector axesToReduceAlong = ShapeUtils::evalBroadcastBackwardAxis(weights->shapeInfo(), weightsBroad->shapeInfo()); E.reduceAlongDimension(reduce::Sum, *dLdw, axesToReduceAlong, true, false, false); *dLdw /= numOfNonZeroWeights; } diff --git a/libnd4j/include/ops/declarable/generic/nn/batchnorm.cpp b/libnd4j/include/ops/declarable/generic/nn/batchnorm.cpp index e69b370ca..56684c569 100644 --- a/libnd4j/include/ops/declarable/generic/nn/batchnorm.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/batchnorm.cpp @@ -224,7 +224,7 @@ CUSTOM_OP_IMPL(batchnorm_bp, 4, 3, false, 1, 2) { const bool keepUnitiesInShape = inRank == mean->rankOf(); // inverse batch size 1/N - const float Ninv = 1.f * shape::tadLength(input->getShapeInfo(), axes.data(), axes.size()) / input->lengthOf(); + const float Ninv = 1.f * shape::tadLength(input->shapeInfo(), axes.data(), axes.size()) / input->lengthOf(); // input - mean NDArray xMinusMean(input); // empty array with same shape as input @@ -322,8 +322,8 @@ DECLARE_TYPES(batchnorm_bp) { DECLARE_SHAPE_FN(batchnorm_bp) { - Nd4jLong* inShapeInfo = inputShape->at(0); - Nd4jLong* meanShapeInfo = inputShape->at(1); + Nd4jLong const* inShapeInfo = inputShape->at(0); + Nd4jLong const* meanShapeInfo = inputShape->at(1); const bool applyScale = (bool)INT_ARG(0); const bool applyOffset = (bool)INT_ARG(1); diff --git a/libnd4j/include/ops/declarable/generic/nn/convo/conv1d.cpp b/libnd4j/include/ops/declarable/generic/nn/convo/conv1d.cpp index 27081b545..881e60105 100644 --- a/libnd4j/include/ops/declarable/generic/nn/convo/conv1d.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/convo/conv1d.cpp @@ -98,7 +98,7 @@ DECLARE_SHAPE_FN(conv1d) { auto inputShapeInfo = inputShape->at(0); auto weightsShapeInfo = inputShape->at(1); - Nd4jLong* biasShapeInfo = block.width() > 2 ? inputShape->at(2) : nullptr; + Nd4jLong const* biasShapeInfo = block.width() > 2 ? inputShape->at(2) : nullptr; int kW = INT_ARG(0) > 0 ? INT_ARG(0) : static_cast(shape::sizeAt(weightsShapeInfo, 0)); // filter(kernel) width int sW = INT_ARG(1); // strides width @@ -240,8 +240,8 @@ DECLARE_SHAPE_FN(conv1d_bp) { auto inputShapeInfo = inputShape->at(0); // [bS, iW, iC] (NWC) or [bS, iC, iW] (NCW) auto weightsShapeInfo = inputShape->at(1); // [kW, iC, oC], [oC, iC, kW], [oC, kW, iC] - Nd4jLong* biasShapeInfo = block.width() > 3 ? inputShape->at(2) : nullptr; // [oC] - Nd4jLong* gradOShapeInfo = block.width() > 3 ? inputShape->at(3) : inputShape->at(2); // [bS, oW, oC] (NWC) or [bS, oC, oW] (NCW), epsilon_next + Nd4jLong const* biasShapeInfo = block.width() > 3 ? inputShape->at(2) : nullptr; // [oC] + Nd4jLong const* gradOShapeInfo = block.width() > 3 ? inputShape->at(3) : inputShape->at(2); // [bS, oW, oC] (NWC) or [bS, oC, oW] (NCW), epsilon_next const int rank = 3; REQUIRE_TRUE(inputShapeInfo[0] == rank, 0, "CUSTOM CONV1D_BP OP: rank of input array must be equal to %i, but got %i instead !", rank, inputShapeInfo[0]); diff --git a/libnd4j/include/ops/declarable/generic/nn/convo/conv3d.cpp b/libnd4j/include/ops/declarable/generic/nn/convo/conv3d.cpp index 0657f6dc2..889a01b9a 100644 --- a/libnd4j/include/ops/declarable/generic/nn/convo/conv3d.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/convo/conv3d.cpp @@ -300,10 +300,10 @@ CUSTOM_OP_IMPL(conv3dnew_bp, 3, 2, false, 0, 13) { DECLARE_SHAPE_FN(conv3dnew_bp) { - Nd4jLong* inputShapeInfo = inputShape->at(0); // [bS, iD, iH, iW, iC] (NDHWC) or [bS, iC, iD, iH, iW] (NCDHW) - Nd4jLong* weightsShapeInfo = inputShape->at(1); // [kD, kH, kW, iC, oC], [oC, iC, kD, kH, kW], [oC, kD, kH, kW, iC] - Nd4jLong* biasShapeInfo = block.width() > 3 ? inputShape->at(2) : nullptr; // [oC] - Nd4jLong* gradOShapeInfo = block.width() > 3 ? inputShape->at(3) : inputShape->at(2); // [bS, oD, oH, oW, oC] (NDHWC) or [bS, oC, oD, oH, oW] (NCDHW), epsilon_next + auto inputShapeInfo = inputShape->at(0); // [bS, iD, iH, iW, iC] (NDHWC) or [bS, iC, iD, iH, iW] (NCDHW) + auto weightsShapeInfo = inputShape->at(1); // [kD, kH, kW, iC, oC], [oC, iC, kD, kH, kW], [oC, kD, kH, kW, iC] + Nd4jLong const* biasShapeInfo = block.width() > 3 ? inputShape->at(2) : nullptr; // [oC] + Nd4jLong const* gradOShapeInfo = block.width() > 3 ? inputShape->at(3) : inputShape->at(2); // [bS, oD, oH, oW, oC] (NDHWC) or [bS, oC, oD, oH, oW] (NCDHW), epsilon_next int kD = INT_ARG(0) > 0 ? INT_ARG(0) : static_cast(shape::sizeAt(weightsShapeInfo, 0));// filter(kernel) depth int kH = INT_ARG(1) > 0 ? INT_ARG(1) : static_cast(shape::sizeAt(weightsShapeInfo, 1));// filter(kernel) height diff --git a/libnd4j/include/ops/declarable/generic/nn/convo/deconv2d.cpp b/libnd4j/include/ops/declarable/generic/nn/convo/deconv2d.cpp index 8d6c0e3a7..e0440692b 100644 --- a/libnd4j/include/ops/declarable/generic/nn/convo/deconv2d.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/convo/deconv2d.cpp @@ -264,8 +264,8 @@ DECLARE_SHAPE_FN(deconv2d_bp) { auto inputShapeInfo = inputShape->at(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCDHW) auto weightsShapeInfo = inputShape->at(1); // [kH, kW, oC, iC], [iC, oC, kH, kW], [iC, kH, kW, oC] - Nd4jLong* biasShapeInfo = block.width() > 3 ? inputShape->at(2) : nullptr; // [oC] - Nd4jLong* gradOShapeInfo = block.width() > 3 ? inputShape->at(3) : inputShape->at(2); // [bS, oH, oW, oC] (NHWC) or [bS, oC, oH, oW] (NCDHW), epsilon_next + Nd4jLong const* biasShapeInfo = block.width() > 3 ? inputShape->at(2) : nullptr; // [oC] + auto gradOShapeInfo = block.width() > 3 ? inputShape->at(3) : inputShape->at(2); // [bS, oH, oW, oC] (NHWC) or [bS, oC, oH, oW] (NCDHW), epsilon_next const int rank = 4; REQUIRE_TRUE(shape::rank(inputShapeInfo) == rank, 0, "CUSTOM DECONV2D_BP OP: rank of input array must be equal to %i, but got %i instead !", rank, shape::rank(inputShapeInfo)); diff --git a/libnd4j/include/ops/declarable/generic/nn/convo/deconv3d.cpp b/libnd4j/include/ops/declarable/generic/nn/convo/deconv3d.cpp index ab6e49836..7c68ee74c 100644 --- a/libnd4j/include/ops/declarable/generic/nn/convo/deconv3d.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/convo/deconv3d.cpp @@ -284,8 +284,8 @@ DECLARE_SHAPE_FN(deconv3d_bp) { auto inputShapeInfo = inputShape->at(0); // [bS, iD, iH, iW, iC] (NDHWC) or [bS, iC, iD, iH, iW] (NCDHW) auto weightsShapeInfo = inputShape->at(1); // [kD, kH, kW, oC, iC], [iC, oC, kD, kH, kW], [iC, kD, kH, kW, oC] - Nd4jLong* biasShapeInfo = block.width() > 3 ? inputShape->at(2) : nullptr; // [oC] - Nd4jLong* gradOShapeInfo = block.width() > 3 ? inputShape->at(3) : inputShape->at(2); // [bS, oD, oH, oW, oC] (NDHWC) or [bS, oC, oD, oH, oW] (NCDHW), epsilon_next + auto biasShapeInfo = block.width() > 3 ? inputShape->at(2) : nullptr; // [oC] + auto gradOShapeInfo = block.width() > 3 ? inputShape->at(3) : inputShape->at(2); // [bS, oD, oH, oW, oC] (NDHWC) or [bS, oC, oD, oH, oW] (NCDHW), epsilon_next const int rank = 5; REQUIRE_TRUE(shape::rank(inputShapeInfo) == rank, 0, "CUSTOM DECONV3D_BP OP: rank of input array must be equal to %i, but got %i instead !", rank, shape::rank(inputShapeInfo)); diff --git a/libnd4j/include/ops/declarable/generic/nn/convo/depthwiseConv2d.cpp b/libnd4j/include/ops/declarable/generic/nn/convo/depthwiseConv2d.cpp index 30580e7a6..744512a13 100644 --- a/libnd4j/include/ops/declarable/generic/nn/convo/depthwiseConv2d.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/convo/depthwiseConv2d.cpp @@ -74,10 +74,9 @@ CUSTOM_OP_IMPL(depthwise_conv2d, 2, 1, false, 0, 9) { ->setAllowedOutputTypes({ALL_FLOATS}); } DECLARE_SHAPE_FN(depthwise_conv2d) { - - Nd4jLong* inputShapeInfo = inputShape->at(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW) - Nd4jLong* weightsShapeInfo = inputShape->at(1); // [kH, kW, iC, mC], [mC, iC, kH, kW], [mC, kH, kW, iC] - Nd4jLong* biasShapeInfo = block.width() > 2 ? inputShape->at(2) : nullptr; // [oC] = iC*mC + auto inputShapeInfo = inputShape->at(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW) + auto weightsShapeInfo = inputShape->at(1); // [kH, kW, iC, mC], [mC, iC, kH, kW], [mC, kH, kW, iC] + auto biasShapeInfo = block.width() > 2 ? inputShape->at(2) : nullptr; // [oC] = iC*mC const int rank = 4; REQUIRE_TRUE(shape::rank(inputShapeInfo) == rank, 0, "CUSTOM DEPTHWISECONV2D OP: rank of input array must be equal to %i, but got %i instead !", rank, inputShapeInfo[0]); @@ -196,11 +195,10 @@ CUSTOM_OP_IMPL(depthwise_conv2d_bp, 3, 2, false, 0, 9) { ////////////////////////////////////////////////////////////////////// DECLARE_SHAPE_FN(depthwise_conv2d_bp) { - - Nd4jLong* inputShapeInfo = inputShape->at(0); - Nd4jLong* weightsShapeInfo = inputShape->at(1); - Nd4jLong* biasShapeInfo = block.width() > 3 ? inputShape->at(2) : nullptr; - Nd4jLong* gradOShapeInfo = block.width() > 3 ? inputShape->at(3) : inputShape->at(2); + auto inputShapeInfo = inputShape->at(0); + auto weightsShapeInfo = inputShape->at(1); + auto biasShapeInfo = block.width() > 3 ? inputShape->at(2) : nullptr; + auto gradOShapeInfo = block.width() > 3 ? inputShape->at(3) : inputShape->at(2); const int rank = 4; REQUIRE_TRUE(shape::rank(inputShapeInfo) == rank, 0, "CUSTOM DEPTHWISECONV2D_BP OP: rank of input array must be equal to %i, but got %i instead !", rank, shape::rank(inputShapeInfo)); diff --git a/libnd4j/include/ops/declarable/generic/nn/convo/dilation2d.cpp b/libnd4j/include/ops/declarable/generic/nn/convo/dilation2d.cpp index ea1193400..c3ecddf53 100644 --- a/libnd4j/include/ops/declarable/generic/nn/convo/dilation2d.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/convo/dilation2d.cpp @@ -98,8 +98,6 @@ namespace ops { std::vector strides(4); std::vector rates(4); - Nd4jLong *newShape; - if (block.width() > 2) { auto r = INPUT_VARIABLE(2); auto s = INPUT_VARIABLE(3); @@ -109,7 +107,7 @@ namespace ops { rates = r->template asVectorT(); } else { if (block.numI() < 9) { - newShape = ConstantShapeHelper::getInstance()->scalarShapeInfo(block.dataType()); + auto newShape = ConstantShapeHelper::getInstance()->scalarShapeInfo(block.dataType()); return SHAPELIST(newShape); } @@ -129,7 +127,7 @@ namespace ops { helpers::dilation_hw(block.launchContext(), input, weights, strides, rates, isSameShape, &sH, &sW, &pH, &pW, &dH, &dW, &oH, &oW); std::array shape = {{bS, oH, oW, iC}}; - newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(weights), 'c', 4, shape.data()); + auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(weights), 'c', 4, shape.data()); return SHAPELIST(newShape); } } diff --git a/libnd4j/include/ops/declarable/generic/nn/convo/pointwiseConv2d.cpp b/libnd4j/include/ops/declarable/generic/nn/convo/pointwiseConv2d.cpp index 52960c3fc..0f7bdde10 100644 --- a/libnd4j/include/ops/declarable/generic/nn/convo/pointwiseConv2d.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/convo/pointwiseConv2d.cpp @@ -72,10 +72,9 @@ CUSTOM_OP_IMPL(pointwise_conv2d, 2, 1, false, 0, 0) { DECLARE_SHAPE_FN(pointwise_conv2d) { - - Nd4jLong* inputShapeInfo = inputShape->at(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW) - Nd4jLong* weightsShapeInfo = inputShape->at(1); // [1, 1, iC, oC], [oC, iC, 1, 1], [oC, 1, 1, iC] - Nd4jLong* biasShapeInfo = block.width() > 2 ? inputShape->at(2) : nullptr; // [oC] + auto inputShapeInfo = inputShape->at(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW) + auto weightsShapeInfo = inputShape->at(1); // [1, 1, iC, oC], [oC, iC, 1, 1], [oC, 1, 1, iC] + auto biasShapeInfo = block.width() > 2 ? inputShape->at(2) : nullptr; // [oC] const int rank = 4; REQUIRE_TRUE(inputShapeInfo[0] == rank, 0, "CUSTOM POINTWISECONV2D OP: rank of input array must be equal to %i, but got %i instead !", rank, inputShapeInfo[0]); diff --git a/libnd4j/include/ops/declarable/generic/nn/convo/sconv2d.cpp b/libnd4j/include/ops/declarable/generic/nn/convo/sconv2d.cpp index a804abafa..d887d7c2a 100644 --- a/libnd4j/include/ops/declarable/generic/nn/convo/sconv2d.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/convo/sconv2d.cpp @@ -106,8 +106,8 @@ DECLARE_SHAPE_FN(sconv2d) { auto inputShapeInfo = inputShape->at(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW) auto weightsDShapeInfo = inputShape->at(1); // [kH, kW, iC, mC], [mC, iC, kH, kW], [mC, kH, kW, iC] - Nd4jLong* weightsPShapeInfo = nullptr; // [1, 1, iC*mC, oC], [oC, iC*mC, 1, 1], [oC, 1, 1, iC*mC] - Nd4jLong* biasShapeInfo = nullptr; // [oC], oC = iC*mC if weightsPoint=nullptr + Nd4jLong const* weightsPShapeInfo = nullptr; // [1, 1, iC*mC, oC], [oC, iC*mC, 1, 1], [oC, 1, 1, iC*mC] + Nd4jLong const* biasShapeInfo = nullptr; // [oC], oC = iC*mC if weightsPoint=nullptr if(block.width() == 3) if(inputShape->at(2)[0] == 4) @@ -306,8 +306,8 @@ DECLARE_SHAPE_FN(sconv2d_bp) { auto inputShapeInfo = inputShape->at(0); // [bS, iH, iW, iC] (NHWC) or [bS, iC, iH, iW] (NCHW) auto gradOShapeInfo = inputShape->at(1); // [bS, oH, oW, oC] (NHWC) or [bS, oC, oH, oW] (NCHW), epsilon_next auto weightsDShapeInfo = inputShape->at(2); // [kH, kW, iC, mC], [mC, iC, kH, kW], [mC, kH, kW, iC] - Nd4jLong* weightsPShapeInfo = nullptr; // [1, 1, iC*mC, oC], [oC, iC*mC, 1, 1], [oC, 1, 1, iC*mC] - Nd4jLong* biasShapeInfo = nullptr; // [oC], oC = iC*mC if weightsPoint=nullptr + Nd4jLong const* weightsPShapeInfo = nullptr; // [1, 1, iC*mC, oC], [oC, iC*mC, 1, 1], [oC, 1, 1, iC*mC] + Nd4jLong const* biasShapeInfo = nullptr; // [oC], oC = iC*mC if weightsPoint=nullptr if(block.width() == 4) { if(inputShape->at(3)[0] == 4) diff --git a/libnd4j/include/ops/declarable/generic/nn/dot_product_attention.cpp b/libnd4j/include/ops/declarable/generic/nn/dot_product_attention.cpp index bd0cf329a..c80608e03 100644 --- a/libnd4j/include/ops/declarable/generic/nn/dot_product_attention.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/dot_product_attention.cpp @@ -40,7 +40,7 @@ namespace ops { if(outputWeights){ weights = OUTPUT_VARIABLE(1); }else{ - auto weightShape = ShapeUtils::evalShapeForMatmul(keys->getShapeInfo(), queries->getShapeInfo(), true, false); + auto weightShape = ShapeUtils::evalShapeForMatmul(keys->shapeInfo(), queries->shapeInfo(), true, false); weights = new NDArray('c', weightShape, values->dataType(), block.launchContext()); } @@ -164,7 +164,7 @@ namespace ops { if(normalization) factor = sqrt((double)keys->sizeAt(-2)); - auto weightShape = ShapeUtils::evalShapeForMatmul(keys->getShapeInfo(), queries->getShapeInfo(), true, false); + auto weightShape = ShapeUtils::evalShapeForMatmul(keys->shapeInfo(), queries->shapeInfo(), true, false); sd::ops::matmul mmul; NDArray preSoftmax('c', weightShape, values->dataType(), block.launchContext()); @@ -188,7 +188,7 @@ namespace ops { softmax.execute({&preSoftmax}, {&weights},{}, {-2}, {}); sd::ops::matmul_bp mmul_bp; - NDArray dLdw(weights.getShapeInfo(), block.workspace()); + NDArray dLdw(weights.shapeInfo(), block.workspace()); mmul_bp.execute({values, &weights, eps}, std::vector{dLdv, &dLdw}, {}, {}, {}); NDArray dLds(preSoftmax.shapeInfo(), block.workspace()); diff --git a/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool2d.cpp b/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool2d.cpp index d92c27442..31dd72fc3 100644 --- a/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool2d.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool2d.cpp @@ -95,8 +95,8 @@ DECLARE_SYN(maxpool, maxpool2d); DECLARE_SHAPE_FN(maxpool2d) { //NDArray *x = block.getVariables().at(0)->getNDArray(); - Nd4jLong* inShape = inputShape->at(0); - Nd4jLong* shapeOf = shape::shapeOf(inShape); + auto inShape = inputShape->at(0); + auto shapeOf = shape::shapeOf(inShape); // 0 - number of dimensions; 1,2 - kernel Height/Width; 3,4 - stride Height/Width; 5,6 - pad Height/Width; 7,8 - dilation Height/Width; 9,10 - input Height/Width; 11 - batch size; 12 - input depth; 13 - same mode; int kH = INT_ARG(0); int kW = INT_ARG(1); diff --git a/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool3d.cpp b/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool3d.cpp index 3fd5f9c51..d1b5928b6 100644 --- a/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool3d.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/pooling/maxpool3d.cpp @@ -106,7 +106,7 @@ DECLARE_SHAPE_FN(maxpool3dnew) { REQUIRE_TRUE(dD != 0 && dH != 0 && dW != 0, 0, "MAXPOOL3DNEW op: dilation must not be zero, but got instead {%i, %i, %i}", dD, dH, dW); - Nd4jLong* inputShapeInfo = inputShape->at(0); + auto inputShapeInfo = inputShape->at(0); int idxID, idxIC; if(isNCDHW) { idxID = 2; idxIC = 1;} diff --git a/libnd4j/include/ops/declarable/generic/nn/pooling/pnormpool2d.cpp b/libnd4j/include/ops/declarable/generic/nn/pooling/pnormpool2d.cpp index 4c9319ca1..adcd40daa 100644 --- a/libnd4j/include/ops/declarable/generic/nn/pooling/pnormpool2d.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/pooling/pnormpool2d.cpp @@ -187,7 +187,7 @@ CUSTOM_OP_IMPL(pnormpool2d_bp, 2, 1, false, 1, 10) { // NDArray* columns = columnsWrongShape.permute({0, 1, 4, 5, 2, 3}); // [bS, iC, oH, oW, kH, kW] -> [bS, iC, kH, kW, oH, oW] // NDArray* gradOVector = gradO->reshape('c', {(int) gradO->lengthOf(), 1}); // NDArray* columns2d = columnsWrongShape.reshape('c', {bS*iC*oH*oW, kH*kW}); - // NDArray pNorm(columns2d->getShapeInfo(), block.getWorkspace()); + // NDArray pNorm(columns2d->shapeInfo(), block.getWorkspace()); // input->template applyTransform>(columns, std::vector({(T)kH, (T)kW, (T)sH, (T)sW, (T)pH, (T)pW, (T)dH, (T)dW, (T)0.f, (T)0.f}).data()); diff --git a/libnd4j/include/ops/declarable/generic/nn/recurrent/dynamicBidirectionalRNN.cpp b/libnd4j/include/ops/declarable/generic/nn/recurrent/dynamicBidirectionalRNN.cpp index 33fd5e8ea..d03f568b5 100644 --- a/libnd4j/include/ops/declarable/generic/nn/recurrent/dynamicBidirectionalRNN.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/recurrent/dynamicBidirectionalRNN.cpp @@ -214,10 +214,10 @@ DECLARE_SHAPE_FN(dynamic_bidirectional_rnn) { hFWFinalPrevShapeInfo[2] = numUnitsFW; hBWFinalPrevShapeInfo[2] = numUnitsBW; - ShapeUtils::updateStridesAndType(hFWShapeInfo, x->getShapeInfo(), x->ordering()); - ShapeUtils::updateStridesAndType(hBWShapeInfo, x->getShapeInfo(), x->ordering()); - ShapeUtils::updateStridesAndType(hFWFinalPrevShapeInfo, x->getShapeInfo(), x->ordering()); - ShapeUtils::updateStridesAndType(hBWFinalPrevShapeInfo, x->getShapeInfo(), x->ordering()); + ShapeUtils::updateStridesAndType(hFWShapeInfo, x->shapeInfo(), x->ordering()); + ShapeUtils::updateStridesAndType(hBWShapeInfo, x->shapeInfo(), x->ordering()); + ShapeUtils::updateStridesAndType(hFWFinalPrevShapeInfo, x->shapeInfo(), x->ordering()); + ShapeUtils::updateStridesAndType(hBWFinalPrevShapeInfo, x->shapeInfo(), x->ordering()); return SHAPELIST(CONSTANT(hFWShapeInfo), CONSTANT(hBWShapeInfo), CONSTANT(hFWFinalPrevShapeInfo), CONSTANT(hBWFinalPrevShapeInfo)); } diff --git a/libnd4j/include/ops/declarable/generic/nn/recurrent/dynamicRNN.cpp b/libnd4j/include/ops/declarable/generic/nn/recurrent/dynamicRNN.cpp index 41696638d..9836d65ce 100644 --- a/libnd4j/include/ops/declarable/generic/nn/recurrent/dynamicRNN.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/recurrent/dynamicRNN.cpp @@ -109,8 +109,8 @@ DECLARE_SHAPE_FN(dynamic_rnn) { auto WhShapeInfo = inputShape->at(2); // hidden-to-hidden weights, [numUnits x numUnits] auto bShapeInfo = inputShape->at(3); // biases for, [2*numUnits] - Nd4jLong* h0ShapeInfo = nullptr; // initial cell output (at time step = 0) [bS x numUnits] - Nd4jLong* maxTimeStepShapeInfo = nullptr; // vector [bS] containing integer values within [0,time), each element of this vector set max time step per each input in batch, this means there are no calculations for time >= maxTimeStep + Nd4jLong const* h0ShapeInfo = nullptr; // initial cell output (at time step = 0) [bS x numUnits] + Nd4jLong const* maxTimeStepShapeInfo = nullptr; // vector [bS] containing integer values within [0,time), each element of this vector set max time step per each input in batch, this means there are no calculations for time >= maxTimeStep const int timeMajor = block.getIArguments()->size() > 0 ? INT_ARG(0) : 0; // if true then [time, bS, ...], else [bS, time, ...] diff --git a/libnd4j/include/ops/declarable/generic/nn/recurrent/gru.cpp b/libnd4j/include/ops/declarable/generic/nn/recurrent/gru.cpp index dee9a7c88..a0b1e707b 100644 --- a/libnd4j/include/ops/declarable/generic/nn/recurrent/gru.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/recurrent/gru.cpp @@ -91,7 +91,7 @@ DECLARE_SHAPE_FN(gru) { REQUIRE_TRUE(Wh->isSameShape(whCorrectShape), 0, "GRU operation: wrong shape of hidden-to-hidden weights array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(whCorrectShape).c_str(), ShapeUtils::shapeAsString(Wh).c_str()); REQUIRE_TRUE(b->isSameShape(bCorrectShape), 0, "GRU operation: wrong shape of biases array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(bCorrectShape).c_str(), ShapeUtils::shapeAsString(b).c_str()); - auto* hShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(hI->dataType(), hI->ordering(), {time, bS, nOut}); + auto hShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(hI->dataType(), hI->ordering(), {time, bS, nOut}); return SHAPELIST(hShapeInfo); } @@ -173,11 +173,11 @@ DECLARE_SHAPE_FN(gru_bp) { REQUIRE_TRUE(b->isSameShape(bCorrectShape), 0, "GRU_BP operation: wrong shape of biases array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(bCorrectShape).c_str(), ShapeUtils::shapeAsString(b).c_str()); REQUIRE_TRUE(dLdh->isSameShape(hCorrectShape),0, "GRU_BP operation: wrong shape of gradient vs. ff output, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(hCorrectShape).c_str(), ShapeUtils::shapeAsString(dLdh).c_str()); - Nd4jLong* dLdxShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(dLdh->dataType(), x->getShapeInfo()); - Nd4jLong* dLdhIShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(dLdh->dataType(), hI->getShapeInfo()); - Nd4jLong* dLdWxShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(dLdh->dataType(), Wx->getShapeInfo()); - Nd4jLong* dLdWhShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(dLdh->dataType(), Wh->getShapeInfo()); - Nd4jLong* dLdbShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(dLdh->dataType(), b->getShapeInfo()); + auto dLdxShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(dLdh->dataType(), x->shapeInfo()); + auto dLdhIShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(dLdh->dataType(), hI->shapeInfo()); + auto dLdWxShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(dLdh->dataType(), Wx->shapeInfo()); + auto dLdWhShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(dLdh->dataType(), Wh->shapeInfo()); + auto dLdbShapeInfo = ConstantShapeHelper::getInstance()->createShapeInfo(dLdh->dataType(), b->shapeInfo()); return SHAPELIST(dLdxShapeInfo, dLdhIShapeInfo, dLdWxShapeInfo, dLdWhShapeInfo, dLdbShapeInfo); } diff --git a/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmBlock.cpp b/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmBlock.cpp index 3225f3f74..1fd7ec8cc 100644 --- a/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmBlock.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmBlock.cpp @@ -113,7 +113,7 @@ DECLARE_SHAPE_FN(lstmBlock) { } ShapeUtils::updateStridesAndType(s, x, 'c'); - Nd4jLong *s1 = CONSTANT(s); + auto s1 = CONSTANT(s); //7 outputs, all same shape/type return SHAPELIST(s1, s1, s1, s1, s1, s1, s1); diff --git a/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmBlockCell.cpp b/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmBlockCell.cpp index 333854ba3..55d3a6b7a 100644 --- a/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmBlockCell.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmBlockCell.cpp @@ -115,7 +115,7 @@ DECLARE_SHAPE_FN(lstmBlockCell) { ShapeUtils::updateStridesAndType(s, xt, 'c'); - Nd4jLong *s1 = CONSTANT(s); + auto s1 = CONSTANT(s); //7 outputs, all same shape: z, i, f, o, h, c, y return SHAPELIST(s1, s1, s1, s1, s1, s1, s1); diff --git a/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmLayer.cpp b/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmLayer.cpp index 871291165..a5c8b8d28 100644 --- a/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmLayer.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmLayer.cpp @@ -334,7 +334,7 @@ DECLARE_SHAPE_FN(lstmLayer) { else type = sd::DataType::FLOAT32; - std::vector shapes; + auto shapes = SHAPELIST(); // evaluate h shape (output) if(retFullSeq) { @@ -362,7 +362,7 @@ DECLARE_SHAPE_FN(lstmLayer) { hShape = {sL, 2, bS, nOut}; } - shapes.push_back(ConstantShapeHelper::getInstance()->createShapeInfo(type, x->ordering(), hShape)); + shapes->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(type, x->ordering(), hShape)); } // evaluate hL shape (output at last step) @@ -375,10 +375,10 @@ DECLARE_SHAPE_FN(lstmLayer) { else hLShape = {2, bS, nOut}; - shapes.push_back(ConstantShapeHelper::getInstance()->createShapeInfo(type, x->ordering(), hLShape)); + shapes->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(type, x->ordering(), hLShape)); if(retLastC) // cL and hL have same shapes - shapes.push_back(shapes.back()); + shapes->push_back(shapes->at(shapes->size() - 1)); } // evaluate cL shape (cell state at last step) @@ -391,10 +391,10 @@ DECLARE_SHAPE_FN(lstmLayer) { else cLShape = {2, bS, nOut}; - shapes.push_back(ConstantShapeHelper::getInstance()->createShapeInfo(type, x->ordering(), cLShape)); + shapes->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(type, x->ordering(), cLShape)); } - return new ShapeList(shapes); + return shapes; } @@ -785,20 +785,20 @@ DECLARE_SHAPE_FN(lstmLayer_bp) { const auto cI = hasInitC ? INPUT_VARIABLE(count++) : nullptr; // initial cell state const auto Wp = hasPH ? INPUT_VARIABLE(count++) : nullptr; // peephole weights - std::vector outShapes = {x->getShapeInfo(), Wx->getShapeInfo(), Wr->getShapeInfo()}; + auto outShapes = SHAPELIST(x->shapeInfo(), Wx->shapeInfo(), Wr->shapeInfo()); if(b != nullptr) - outShapes.push_back(b->getShapeInfo()); + outShapes->push_back(b->shapeInfo()); if(seqLen != nullptr) - outShapes.push_back(seqLen->getShapeInfo()); + outShapes->push_back(seqLen->shapeInfo()); if(hI != nullptr) - outShapes.push_back(hI->getShapeInfo()); + outShapes->push_back(hI->shapeInfo()); if(cI != nullptr) - outShapes.push_back(cI->getShapeInfo()); + outShapes->push_back(cI->shapeInfo()); if(Wp != nullptr) - outShapes.push_back(Wp->getShapeInfo()); + outShapes->push_back(Wp->shapeInfo()); - return new ShapeList(outShapes); + return outShapes; } } diff --git a/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmLayerCell.cpp b/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmLayerCell.cpp index 4f24219bd..645541d6b 100644 --- a/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmLayerCell.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/recurrent/lstmLayerCell.cpp @@ -154,7 +154,7 @@ DECLARE_SHAPE_FN(lstmLayerCell) { const auto hI = INPUT_VARIABLE(count++); // initial output const auto cI = INPUT_VARIABLE(count); // initial cell state - return new ShapeList({hI->getShapeInfo(), cI->getShapeInfo()}); + return new ShapeList({hI->shapeInfo(), cI->shapeInfo()}); } ////////////////////////////////////////////////////////////////////////// @@ -319,18 +319,18 @@ DECLARE_SHAPE_FN(lstmLayerCellBp) { const auto cI = INPUT_VARIABLE(count++); // initial cell state const auto Wp = hasPH ? INPUT_VARIABLE(count) : nullptr; // peephole weights - std::vector shapes = {x->getShapeInfo(), Wx->getShapeInfo(), Wr->getShapeInfo()}; + auto shapes = SHAPELIST(x->shapeInfo(), Wx->shapeInfo(), Wr->shapeInfo()); if(b != nullptr) - shapes.push_back(b->getShapeInfo()); + shapes->push_back(b->shapeInfo()); - shapes.push_back(hI->getShapeInfo()); - shapes.push_back(cI->getShapeInfo()); + shapes->push_back(hI->shapeInfo()); + shapes->push_back(cI->shapeInfo()); if(Wp != nullptr) - shapes.push_back(Wp->getShapeInfo()); + shapes->push_back(Wp->shapeInfo()); - return new ShapeList(shapes); + return shapes; } } diff --git a/libnd4j/include/ops/declarable/generic/nn/recurrent/sru.cpp b/libnd4j/include/ops/declarable/generic/nn/recurrent/sru.cpp index 9b78a5c56..84dd6356a 100644 --- a/libnd4j/include/ops/declarable/generic/nn/recurrent/sru.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/recurrent/sru.cpp @@ -67,7 +67,7 @@ CUSTOM_OP_IMPL(sru, 5, 2, false, 0, 0) { // xm = x * mask auto xm = x; if(mask) { - xm = new NDArray(x->getShapeInfo(), true, block.launchContext()); + xm = new NDArray(x->shapeInfo(), true, block.launchContext()); x->applyBroadcast(broadcast::Multiply, {0, 1}, *mask, *xm); } @@ -92,7 +92,7 @@ DECLARE_SHAPE_FN(sru) { auto wShapeInfo = inputShape->at(1); // W, 2d tensor of weights [3*inSize x inSize] auto bShapeInfo = inputShape->at(2); // B, row of biases with twice length [2*inSize] auto c0ShapeInfo = inputShape->at(3); // C_{0}, 2d tensor of initial state [bS x inSize] at time t=0 - Nd4jLong* maskShapeInfo = block.width() > 4 ? inputShape->at(4) : nullptr; // optional, 2d tensor of dropout mask [bS x inSize] + auto maskShapeInfo = block.width() > 4 ? inputShape->at(4) : nullptr; // optional, 2d tensor of dropout mask [bS x inSize] const int rank = xShapeInfo[0]; // = 3 const int bS = xShapeInfo[1]; @@ -367,7 +367,7 @@ DECLARE_SHAPE_FN(sru_bi) { auto wShapeInfo = inputShape->at(1); auto bShapeInfo = inputShape->at(2); auto c0ShapeInfo = inputShape->at(3); - Nd4jLong* maskShapeInfo = block.width() > 4 ? inputShape->at(4) : nullptr; // optional, 2d tensor of dropout mask [bS x inSize] + auto maskShapeInfo = block.width() > 4 ? inputShape->at(4) : nullptr; // optional, 2d tensor of dropout mask [bS x inSize] const int rank = xShapeInfo[0]; // = 3 const Nd4jLong time = xShapeInfo[1]; @@ -465,7 +465,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) { auto ctShapeInfo = inputShape->at(4); auto inGradC0ShapeInfo = inputShape->at(5); auto inGradHtShapeInfo = inputShape->at(6); - Nd4jLong* maskShapeInfo = block.width() > 7 ? inputShape->at(7) : nullptr; // optional, 2d tensor of dropout mask [bS x inSize] + auto maskShapeInfo = block.width() > 7 ? inputShape->at(7) : nullptr; // optional, 2d tensor of dropout mask [bS x inSize] // input shapes validation const int rank = xShapeInfo[0]; @@ -777,7 +777,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) { // } // static NDArray sigmoid_(const NDArray& arr) { -// NDArray result(arr.getShapeInfo(), false, arr.getContext()); +// NDArray result(arr.shapeInfo(), false, arr.getContext()); // (const_cast(arr)).applyTransform(transform::Sigmoid, &result); // return result; // } diff --git a/libnd4j/include/ops/declarable/generic/nn/recurrent/staticBidirectionalRNN.cpp b/libnd4j/include/ops/declarable/generic/nn/recurrent/staticBidirectionalRNN.cpp index bc27c08f6..fbe604a31 100644 --- a/libnd4j/include/ops/declarable/generic/nn/recurrent/staticBidirectionalRNN.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/recurrent/staticBidirectionalRNN.cpp @@ -147,9 +147,9 @@ DECLARE_SHAPE_FN(static_bidirectional_rnn) { auto WhBWShapeInfo = inputShape->at(5); // hidden-to-hidden weights for backward RNN, [numUnitsBW x numUnitsBW] auto bBWShapeInfo = inputShape->at(6); // biases for backward RNN, [2*numUnitsBW] - Nd4jLong* h0FWShapeInfo = nullptr; // initial cell output for forward RNN (at time step = 0) [bS x numUnitsFW] - Nd4jLong* h0BWShapeInfo = nullptr; // initial cell output for backward RNN (at time step = 0) [bS x numUnitsBW] - Nd4jLong* maxTimeStepShapeInfo = nullptr; // vector [bS] containing integer values within [0,time), each element of this vector set max time step per each input in batch, this means there are no calculations for time >= maxTimeStep + Nd4jLong const* h0FWShapeInfo = nullptr; // initial cell output for forward RNN (at time step = 0) [bS x numUnitsFW] + Nd4jLong const* h0BWShapeInfo = nullptr; // initial cell output for backward RNN (at time step = 0) [bS x numUnitsBW] + Nd4jLong const* maxTimeStepShapeInfo = nullptr; // vector [bS] containing integer values within [0,time), each element of this vector set max time step per each input in batch, this means there are no calculations for time >= maxTimeStep switch(block.width()) { case 8: diff --git a/libnd4j/include/ops/declarable/generic/nn/recurrent/staticRNN.cpp b/libnd4j/include/ops/declarable/generic/nn/recurrent/staticRNN.cpp index 4100f6745..26d2e0818 100644 --- a/libnd4j/include/ops/declarable/generic/nn/recurrent/staticRNN.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/recurrent/staticRNN.cpp @@ -90,8 +90,8 @@ DECLARE_SHAPE_FN(static_rnn) { auto WhShapeInfo = inputShape->at(2); // hidden-to-hidden weights, [numUnits x numUnits] auto bShapeInfo = inputShape->at(3); // biases for, [2*numUnits] - Nd4jLong* h0ShapeInfo = nullptr; // initial cell output (at time step = 0) [bS x numUnits] - Nd4jLong* maxTimeStepShapeInfo = nullptr; // vector [bS] containing integer values within [0,time), each element of this vector set max time step per each input in batch, this means there are no calculations for time >= maxTimeStep + const Nd4jLong* h0ShapeInfo = nullptr; // initial cell output (at time step = 0) [bS x numUnits] + const Nd4jLong* maxTimeStepShapeInfo = nullptr; // vector [bS] containing integer values within [0,time), each element of this vector set max time step per each input in batch, this means there are no calculations for time >= maxTimeStep if(block.width() == 5) { if (inputShape->at(4)[0] == 2) diff --git a/libnd4j/include/ops/declarable/generic/nn/relu_layer.cpp b/libnd4j/include/ops/declarable/generic/nn/relu_layer.cpp index 94a4a0ca4..c76b79b7b 100644 --- a/libnd4j/include/ops/declarable/generic/nn/relu_layer.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/relu_layer.cpp @@ -51,7 +51,7 @@ namespace sd { auto weightsShape = inputShape->at(1); auto outputShape = ShapeUtils::matrixProductShape(inShape, weightsShape, false, false, ArrayOptions::dataType(inShape), block.getWorkspace()); - return SHAPELIST(CONSTANT(outputShape)); + return SHAPELIST(outputShape); } DECLARE_TYPES(relu_layer) { diff --git a/libnd4j/include/ops/declarable/generic/nn/xw_plus_b.cpp b/libnd4j/include/ops/declarable/generic/nn/xw_plus_b.cpp index dbabad395..5b36ee0e5 100644 --- a/libnd4j/include/ops/declarable/generic/nn/xw_plus_b.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/xw_plus_b.cpp @@ -73,7 +73,7 @@ namespace sd { auto outputShape = ShapeUtils::matrixProductShape(inputShape->at(0), weightsShape, false, false, ArrayOptions::dataType(inputShape->at(0)), block.getWorkspace()); - return SHAPELIST(CONSTANT(outputShape)); + return SHAPELIST(outputShape); } DECLARE_TYPES(xw_plus_b) { @@ -121,7 +121,6 @@ namespace sd { } DECLARE_SHAPE_FN(xw_plus_b_bp) { - Nd4jLong* xShapeInfo; Nd4jLong* wShapeInfo; Nd4jLong* bShapeInfo; @@ -129,7 +128,6 @@ namespace sd { COPY_SHAPE(inputShape->at(0), xShapeInfo); COPY_SHAPE(inputShape->at(1), wShapeInfo); COPY_SHAPE(inputShape->at(2), bShapeInfo); - return SHAPELIST(CONSTANT(xShapeInfo), CONSTANT(wShapeInfo), CONSTANT(bShapeInfo)); } diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/broadcast_dynamic_shape.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/broadcast_dynamic_shape.cpp index 2f90adb78..4fc31dd51 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/broadcast_dynamic_shape.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/broadcast_dynamic_shape.cpp @@ -55,7 +55,7 @@ CUSTOM_OP_IMPL(broadcast_dynamic_shape, 2, 1, false, 0, 0) { for (Nd4jLong i = 0; i < y->lengthOf(); ++i) yShapeInfo[i + 1] = y->e(i); - Nd4jLong* poinerOnOutShapeInfo = nullptr; + const Nd4jLong* poinerOnOutShapeInfo = nullptr; const bool isBroadcastPossible = ShapeUtils::evalBroadcastShapeInfo(xShapeInfo.data(), yShapeInfo.data(), true, poinerOnOutShapeInfo, block.launchContext()->getWorkspace()); diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/non_max_suppression.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/non_max_suppression.cpp index c32ee1ba9..ecddab3bc 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/non_max_suppression.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/non_max_suppression.cpp @@ -80,7 +80,7 @@ namespace sd { DECLARE_SHAPE_FN(non_max_suppression) { auto in = inputShape->at(0); int outRank = shape::rank(in); - Nd4jLong *outputShape = nullptr; + const Nd4jLong *outputShape = nullptr; int maxOutputSize; if (block.width() > 2) @@ -178,7 +178,7 @@ namespace sd { DECLARE_SHAPE_FN(non_max_suppression_v3) { auto in = inputShape->at(0); int outRank = shape::rank(in); - Nd4jLong *outputShape = nullptr; + int maxOutputSize; if (block.width() > 2) @@ -211,7 +211,7 @@ namespace sd { if (len > 0) len = helpers::nonMaxSuppressionV3(block.launchContext(), boxes, scales, maxOutputSize, overlayThreshold, scoreThreshold, nullptr); - outputShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(len, DataType::INT32); + auto outputShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(len, DataType::INT32); return SHAPELIST(outputShape); } diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/non_max_suppression_overlaps.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/non_max_suppression_overlaps.cpp index a8477c63a..30f59ff35 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/non_max_suppression_overlaps.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/non_max_suppression_overlaps.cpp @@ -58,7 +58,6 @@ namespace sd { DECLARE_SHAPE_FN(non_max_suppression_overlaps) { auto in = inputShape->at(0); int outRank = shape::rank(in); - Nd4jLong *outputShape = nullptr; int maxOutputSize; if (block.width() > 2) @@ -76,7 +75,7 @@ namespace sd { if (boxSize < maxOutputSize) maxOutputSize = boxSize; - outputShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(maxOutputSize, DataType::INT32); + auto outputShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(maxOutputSize, DataType::INT32); return SHAPELIST(outputShape); } diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/nth_element.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/nth_element.cpp index b1b68c23d..b0a549c43 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/nth_element.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/nth_element.cpp @@ -49,24 +49,25 @@ namespace sd { auto in = inputShape->at(0); int outRank = shape::rank(in) - 1; - Nd4jLong *outputShape = nullptr; + Nd4jLong const* outShape = nullptr; if (outRank > 1) { + Nd4jLong *outputShape = nullptr; ALLOCATE(outputShape, block.getWorkspace(), shape::shapeInfoLength(outRank), Nd4jLong); outputShape[0] = outRank; for (Nd4jLong e = 0; e < outRank; e++) outputShape[e + 1] = in[e + 1]; ShapeUtils::updateStridesAndType(outputShape, in, shape::order(in)); - outputShape = CONSTANT(outputShape); + outShape = CONSTANT(outputShape); } else if (outRank == 1) { - outputShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(shape::sizeAt(in, 0), ArrayOptions::dataType(in)); + outShape = ConstantShapeHelper::getInstance()->vectorShapeInfo(shape::sizeAt(in, 0), ArrayOptions::dataType(in)); } else { //outputShape = shape::createScalarShapeInfo(); - outputShape = ConstantShapeHelper::getInstance()->scalarShapeInfo(ArrayOptions::dataType(in)); + outShape = ConstantShapeHelper::getInstance()->scalarShapeInfo(ArrayOptions::dataType(in)); } - return SHAPELIST(outputShape); + return SHAPELIST(outShape); } DECLARE_TYPES(nth_element) { getOpDescriptor() diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/onehot.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/onehot.cpp index d64499ecf..6349b84fe 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/onehot.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/onehot.cpp @@ -90,7 +90,6 @@ namespace sd { REQUIRE_TRUE(depth > 0, 0, "OneHot: depth must be positive value"); - Nd4jLong *newShape; int rank = shape::rank(inShape); if (axis < 0) @@ -101,7 +100,7 @@ namespace sd { shape.push_back(shape::shapeOf(inShape)[e]); shape.insert(shape.begin() + axis, depth); - newShape = ConstantShapeHelper::getInstance()->createShapeInfo(dtype, 'c', rank + 1, shape.data()); + auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(dtype, 'c', rank + 1, shape.data()); return SHAPELIST(newShape); } diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/segment_max.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/segment_max.cpp index 7ab19668a..b348c4549 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/segment_max.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/segment_max.cpp @@ -82,8 +82,8 @@ namespace sd { return helpers::segmentMaxFunctorBP(block.launchContext(), input, indices, gradOut, output); } DECLARE_SHAPE_FN(segment_max_bp){ - Nd4jLong* in = inputShape->at(0); - Nd4jLong* inIdx = inputShape->at(1); + auto in = inputShape->at(0); + auto inIdx = inputShape->at(1); Nd4jLong* outShape; Nd4jLong* outIndex; diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/segment_mean.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/segment_mean.cpp index abb865d8e..1d8a5bb7f 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/segment_mean.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/segment_mean.cpp @@ -82,14 +82,15 @@ namespace sd { return helpers::segmentMeanFunctorBP(block.launchContext(), input, indices, gradOut, output); } DECLARE_SHAPE_FN(segment_mean_bp){ - Nd4jLong* in = inputShape->at(0); - Nd4jLong* inIdx = inputShape->at(1); + auto in = inputShape->at(0); + auto inIdx = inputShape->at(1); Nd4jLong* outShape; Nd4jLong* outIndex; COPY_SHAPE(in, outShape); COPY_SHAPE(inIdx, outIndex); return SHAPELIST(CONSTANT(outShape), CONSTANT(outIndex)); +// return SHAPELIST(in, inIdx); } DECLARE_TYPES(segment_mean_bp) { getOpDescriptor() diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/segment_min.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/segment_min.cpp index a245b000b..10bc1dd26 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/segment_min.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/segment_min.cpp @@ -44,7 +44,7 @@ namespace sd { DECLARE_SHAPE_FN(segment_min) { auto idxVector = INPUT_VARIABLE(1); - Nd4jLong* in = inputShape->at(0); + auto in = inputShape->at(0); int outRank = shape::rank(in); Nd4jLong* outputShape = nullptr; int val = (*idxVector).e(idxVector->lengthOf() - 1); @@ -72,14 +72,15 @@ namespace sd { return helpers::segmentMinFunctorBP(block.launchContext(), input, indices, gradOut, output); } DECLARE_SHAPE_FN(segment_min_bp){ - Nd4jLong* in = inputShape->at(0); - Nd4jLong* inIdx = inputShape->at(1); + auto in = inputShape->at(0); + auto inIdx = inputShape->at(1); Nd4jLong* outShape; Nd4jLong* outIndex; COPY_SHAPE(in, outShape); COPY_SHAPE(inIdx, outIndex); return SHAPELIST(CONSTANT(outShape), CONSTANT(outIndex)); +// return SHAPELIST(in, inIdx); } DECLARE_TYPES(segment_min) { diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/segment_prod.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/segment_prod.cpp index 478eb9e23..4f83ac9b0 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/segment_prod.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/segment_prod.cpp @@ -85,8 +85,8 @@ namespace sd { DECLARE_SHAPE_FN(segment_prod_bp){ - Nd4jLong* in = inputShape->at(0); - Nd4jLong* inIdx = inputShape->at(1); + auto in = inputShape->at(0); + auto inIdx = inputShape->at(1); Nd4jLong* outShape; Nd4jLong* outIndex; diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/segment_sum.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/segment_sum.cpp index bb959fd3d..cb4734c5f 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/segment_sum.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/segment_sum.cpp @@ -68,15 +68,15 @@ namespace sd { return helpers::segmentSumFunctorBP(block.launchContext(), INPUT_VARIABLE(0), INPUT_VARIABLE(1), INPUT_VARIABLE(2), OUTPUT_NULLIFIED(0)); } DECLARE_SHAPE_FN(segment_sum_bp){ - Nd4jLong* in = inputShape->at(0); - Nd4jLong* inIdx = inputShape->at(1); + auto in = inputShape->at(0); + auto inIdx = inputShape->at(1); Nd4jLong* outShape; Nd4jLong* outIndex; COPY_SHAPE(in, outShape); COPY_SHAPE(inIdx, outIndex); return SHAPELIST(CONSTANT(outShape), CONSTANT(outIndex)); - +// return SHAPELIST(in, inIdx); } DECLARE_TYPES(segment_sum) { diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/unique.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/unique.cpp index 64b915c53..9005348a1 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/unique.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/unique.cpp @@ -40,8 +40,8 @@ namespace sd { auto in = inputShape->at(0); auto source = INPUT_VARIABLE(0); // auto shapeList = SHAPELIST(); - Nd4jLong* valuesShape; - Nd4jLong* indicesShape; + const Nd4jLong* valuesShape; + const Nd4jLong* indicesShape; int uniqueCount = helpers::uniqueCount(block.launchContext(), source); diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_max.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_max.cpp index 8ca01540c..1909005a7 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_max.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_max.cpp @@ -81,8 +81,8 @@ namespace sd { } DECLARE_SHAPE_FN(unsorted_segment_max_bp){ - Nd4jLong* in = inputShape->at(0); - Nd4jLong* inIdx = inputShape->at(1); + auto in = inputShape->at(0); + auto inIdx = inputShape->at(1); Nd4jLong* outShape; Nd4jLong* outIndex; diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_mean.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_mean.cpp index 7aa46295c..def3adb6a 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_mean.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_mean.cpp @@ -83,15 +83,15 @@ namespace sd { } DECLARE_SHAPE_FN(unsorted_segment_mean_bp){ - Nd4jLong* in = inputShape->at(0); - Nd4jLong* inIdx = inputShape->at(1); + auto in = inputShape->at(0); + auto inIdx = inputShape->at(1); Nd4jLong* outShape; Nd4jLong* outIndex; COPY_SHAPE(in, outShape); COPY_SHAPE(inIdx, outIndex); return SHAPELIST(CONSTANT(outShape), CONSTANT(outIndex)); - +// return SHAPELIST(in, inIdx); } } } diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_min.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_min.cpp index 76dd982f7..da31477eb 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_min.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_min.cpp @@ -43,7 +43,7 @@ namespace sd { DECLARE_SHAPE_FN(unsorted_segment_min) { - Nd4jLong* in = inputShape->at(0); + auto in = inputShape->at(0); int outRank = shape::rank(in); Nd4jLong* outputShape = nullptr; Nd4jLong numOfClasses = block.width() == 3 ? INPUT_VARIABLE(2)->e(0) : INT_ARG(0); @@ -83,8 +83,8 @@ namespace sd { } DECLARE_SHAPE_FN(unsorted_segment_min_bp){ - Nd4jLong* in = inputShape->at(0); - Nd4jLong* inIdx = inputShape->at(1); + auto in = inputShape->at(0); + auto inIdx = inputShape->at(1); Nd4jLong* outShape; Nd4jLong* outIndex; diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_prod.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_prod.cpp index d2f491c55..905a04b36 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_prod.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_prod.cpp @@ -96,15 +96,15 @@ namespace sd { } DECLARE_SHAPE_FN(unsorted_segment_prod_bp){ - Nd4jLong* in = inputShape->at(0); - Nd4jLong* inIdx = inputShape->at(1); + auto in = inputShape->at(0); + auto inIdx = inputShape->at(1); Nd4jLong* outShape; Nd4jLong* outIndex; COPY_SHAPE(in, outShape); COPY_SHAPE(inIdx, outIndex); return SHAPELIST(CONSTANT(outShape), CONSTANT(outIndex)); - +// return SHAPELIST(in, inIdx); } } diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_sqrt_n.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_sqrt_n.cpp index a8dbf8eaf..e208f4489 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_sqrt_n.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_sqrt_n.cpp @@ -81,15 +81,15 @@ namespace sd { } DECLARE_SHAPE_FN(unsorted_segment_sqrt_n_bp){ - Nd4jLong* in = inputShape->at(0); - Nd4jLong* inIdx = inputShape->at(1); + auto in = inputShape->at(0); + auto inIdx = inputShape->at(1); Nd4jLong* outShape; Nd4jLong* outIndex; COPY_SHAPE(in, outShape); COPY_SHAPE(inIdx, outIndex); return SHAPELIST(CONSTANT(outShape), CONSTANT(outIndex)); - +// return SHAPELIST(in, inIdx); } } diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_sum.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_sum.cpp index 1afcab34f..325385a86 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_sum.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/unsorted_segment_sum.cpp @@ -71,8 +71,8 @@ namespace sd { } DECLARE_SHAPE_FN(unsorted_segment_sum_bp){ - Nd4jLong* in = inputShape->at(0); - Nd4jLong* inIdx = inputShape->at(1); + auto in = inputShape->at(0); + auto inIdx = inputShape->at(1); Nd4jLong* outShape; Nd4jLong* outIndex; diff --git a/libnd4j/include/ops/declarable/generic/random/bernoulli.cpp b/libnd4j/include/ops/declarable/generic/random/bernoulli.cpp index 1441448c9..f0b2b587b 100644 --- a/libnd4j/include/ops/declarable/generic/random/bernoulli.cpp +++ b/libnd4j/include/ops/declarable/generic/random/bernoulli.cpp @@ -38,7 +38,7 @@ namespace sd { T f = T_ARG(0); - functions::random::RandomFunction::template execTransform>(block.getRNG(), z->getBuffer(), z->getShapeInfo(), &f); + functions::random::RandomFunction::template execTransform>(block.getRNG(), z->buffer(), z->shapeInfo(), &f); */ auto z = OUTPUT_VARIABLE(0); diff --git a/libnd4j/include/ops/declarable/generic/random/gamma.cpp b/libnd4j/include/ops/declarable/generic/random/gamma.cpp index d508e1929..e21458530 100644 --- a/libnd4j/include/ops/declarable/generic/random/gamma.cpp +++ b/libnd4j/include/ops/declarable/generic/random/gamma.cpp @@ -60,7 +60,9 @@ namespace sd { if (inputShape->size() > 2) { auto rest = inputShape->at(2); additionalShape = nullptr; REQUIRE_TRUE(ShapeUtils::areShapesBroadcastable(alphaShape, rest), 0, "random_gamma: alpha and beta shapes should be broadcastable."); - ShapeUtils::evalBroadcastShapeInfo(alphaShape, rest, true, additionalShape, block.workspace()); + const Nd4jLong* additionalShapeBroadcasted = nullptr; + ShapeUtils::evalBroadcastShapeInfo(alphaShape, rest, true, additionalShapeBroadcasted, block.workspace()); + additionalShape = additionalShapeBroadcasted; } auto lastDim = shape::sizeAt(alphaShape, 0); auto dtype = ArrayOptions::dataType(alphaShape); @@ -80,4 +82,4 @@ namespace sd { } } -#endif \ No newline at end of file +#endif diff --git a/libnd4j/include/ops/declarable/generic/random/normal.cpp b/libnd4j/include/ops/declarable/generic/random/normal.cpp index 8bfbd8db6..f81a06786 100644 --- a/libnd4j/include/ops/declarable/generic/random/normal.cpp +++ b/libnd4j/include/ops/declarable/generic/random/normal.cpp @@ -36,7 +36,7 @@ namespace sd { auto x = INPUT_VARIABLE(0); auto z = OUTPUT_VARIABLE(0); - functions::random::RandomFunction::template execTransform>(block.getRNG(), z->getBuffer(), z->getShapeInfo(), z->getBuffer(), z->getShapeInfo(), z->getBuffer(), z->getShapeInfo(), block.getTArguments()->data()); + functions::random::RandomFunction::template execTransform>(block.getRNG(), z->buffer(), z->shapeInfo(), z->buffer(), z->shapeInfo(), z->buffer(), z->shapeInfo(), block.getTArguments()->data()); */ RandomLauncher::fillGaussian(block.launchContext(), rng, OUTPUT_VARIABLE(0), T_ARG(0), T_ARG(1)); diff --git a/libnd4j/include/ops/declarable/generic/reduce/reduceStDev.cpp b/libnd4j/include/ops/declarable/generic/reduce/reduceStDev.cpp index 1682b9d72..d101a6a79 100644 --- a/libnd4j/include/ops/declarable/generic/reduce/reduceStDev.cpp +++ b/libnd4j/include/ops/declarable/generic/reduce/reduceStDev.cpp @@ -83,7 +83,7 @@ DECLARE_SHAPE_FN(reduce_stdev) { for(const auto& item : dimensions) REQUIRE_TRUE(item >= -inputShape->at(0)[0] && item < inputShape->at(0)[0], 0, "REDUCE_STDEV OP: the input dimension to reduce along must be in range [-%i, %i), but got %i instead !" , inputShape->at(0)[0], inputShape->at(0)[0], item); - Nd4jLong* outShapeInfo = ShapeUtils::evalReduceShapeInfo(shape::order(in), dimensions, in, keepDims, false, block.getWorkspace()); + auto outShapeInfo = ShapeUtils::evalReduceShapeInfo(shape::order(in), dimensions, in, keepDims, false, block.getWorkspace()); return SHAPELIST(outShapeInfo); } @@ -132,7 +132,7 @@ CUSTOM_OP_IMPL(reduce_stdev_bp, 2, 1, false, 0, 0) { auto mean = input->reduceAlongDimension(reduce::Mean, dimensions, true); - NDArray variance(mean.getShapeInfo(), true, block.launchContext()); // create empty array with shape matching shape of mean array + NDArray variance(mean.shapeInfo(), true, block.launchContext()); // create empty array with shape matching shape of mean array input->varianceAlongDimension(variance::SummaryStatsStandardDeviation, variance, biasCorrected, dimensions); gradI->assign( (*input - mean) / (variance * NminusOne)); // automatic broadcasting happens here @@ -165,6 +165,7 @@ DECLARE_SHAPE_FN(reduce_stdev_bp) { COPY_SHAPE(in, gradIshapeInfo); return SHAPELIST(CONSTANT(gradIshapeInfo)); +// return SHAPELIST(in); } DECLARE_TYPES(reduce_stdev_bp) { diff --git a/libnd4j/include/ops/declarable/generic/reduce/reduce_logsumexp.cpp b/libnd4j/include/ops/declarable/generic/reduce/reduce_logsumexp.cpp index 805db1883..556ad2a7c 100644 --- a/libnd4j/include/ops/declarable/generic/reduce/reduce_logsumexp.cpp +++ b/libnd4j/include/ops/declarable/generic/reduce/reduce_logsumexp.cpp @@ -70,7 +70,7 @@ namespace ops { axes = *block.getIArguments(); } - Nd4jLong* outShapeInfo = ShapeUtils::evalReduceShapeInfo(shape::order(inputShape->at(0)), axes, inputShape->at(0), keepDims, false, block.getWorkspace()); + auto outShapeInfo = ShapeUtils::evalReduceShapeInfo(shape::order(inputShape->at(0)), axes, inputShape->at(0), keepDims, false, block.getWorkspace()); return SHAPELIST(outShapeInfo); } diff --git a/libnd4j/include/ops/declarable/generic/reduce/reduce_max.cpp b/libnd4j/include/ops/declarable/generic/reduce/reduce_max.cpp index 3d2dbe57e..bea1e7ecc 100644 --- a/libnd4j/include/ops/declarable/generic/reduce/reduce_max.cpp +++ b/libnd4j/include/ops/declarable/generic/reduce/reduce_max.cpp @@ -77,7 +77,7 @@ DECLARE_SHAPE_FN(reduce_max) { for(const auto& item : dimensions) REQUIRE_TRUE(item >= -inputShape->at(0)[0] && item < inputShape->at(0)[0], 0, "REDUCE_MAX OP: the input dimension to reduce along must be in range [-%i, %i), but got %i instead !" , inputShape->at(0)[0], inputShape->at(0)[0], item); - Nd4jLong* outShapeInfo = ShapeUtils::evalReduceShapeInfo(shape::order(inputShape->at(0)), dimensions, inputShape->at(0), keepDims, false, block.getWorkspace()); + auto outShapeInfo = ShapeUtils::evalReduceShapeInfo(shape::order(inputShape->at(0)), dimensions, inputShape->at(0), keepDims, false, block.getWorkspace()); return SHAPELIST(outShapeInfo); } diff --git a/libnd4j/include/ops/declarable/generic/reduce/reduce_min.cpp b/libnd4j/include/ops/declarable/generic/reduce/reduce_min.cpp index 254cfe021..d4b470b8e 100644 --- a/libnd4j/include/ops/declarable/generic/reduce/reduce_min.cpp +++ b/libnd4j/include/ops/declarable/generic/reduce/reduce_min.cpp @@ -77,7 +77,7 @@ DECLARE_SHAPE_FN(reduce_min) { for(const auto& item : dimensions) REQUIRE_TRUE(item >= -inputShape->at(0)[0] && item < inputShape->at(0)[0], 0, "REDUCE_MIN OP: the input dimension to reduce along must be in range [-%i, %i), but got %i instead !" , inputShape->at(0)[0], inputShape->at(0)[0], item); - Nd4jLong* outShapeInfo = ShapeUtils::evalReduceShapeInfo(shape::order(inputShape->at(0)), dimensions, inputShape->at(0), keepDims, false, block.getWorkspace()); + auto outShapeInfo = ShapeUtils::evalReduceShapeInfo(shape::order(inputShape->at(0)), dimensions, inputShape->at(0), keepDims, false, block.getWorkspace()); return SHAPELIST(outShapeInfo); } diff --git a/libnd4j/include/ops/declarable/generic/reduce/reduce_sqnorm.cpp b/libnd4j/include/ops/declarable/generic/reduce/reduce_sqnorm.cpp index 0c53a261b..22d2c6e1b 100644 --- a/libnd4j/include/ops/declarable/generic/reduce/reduce_sqnorm.cpp +++ b/libnd4j/include/ops/declarable/generic/reduce/reduce_sqnorm.cpp @@ -75,7 +75,7 @@ DECLARE_SHAPE_FN(reduce_sqnorm) { for(const auto& item : dimensions) REQUIRE_TRUE(item >= -inputShape->at(0)[0] && item < inputShape->at(0)[0], 0, "REDUCE_SQNORM OP: the input dimension to reduce along must be in range [-%i, %i), but got %i instead !" , inputShape->at(0)[0], inputShape->at(0)[0], item); - Nd4jLong* outShapeInfo = ShapeUtils::evalReduceShapeInfo(shape::order(inputShape->at(0)), dimensions, inputShape->at(0), keepDims, false, block.getWorkspace()); + auto outShapeInfo = ShapeUtils::evalReduceShapeInfo(shape::order(inputShape->at(0)), dimensions, inputShape->at(0), keepDims, false, block.getWorkspace()); return SHAPELIST(outShapeInfo); } diff --git a/libnd4j/include/ops/declarable/generic/shape/reshape_as.cpp b/libnd4j/include/ops/declarable/generic/shape/reshape_as.cpp index 90e2ff398..9a8dc00c2 100644 --- a/libnd4j/include/ops/declarable/generic/shape/reshape_as.cpp +++ b/libnd4j/include/ops/declarable/generic/shape/reshape_as.cpp @@ -47,7 +47,7 @@ namespace sd { DECLARE_SHAPE_FN(reshapeas) { - return SHAPELIST(ShapeBuilders::copyShapeInfo(INPUT_VARIABLE(1)->getShapeInfo(), false, block.workspace())); + return SHAPELIST(ShapeBuilders::copyShapeInfo(INPUT_VARIABLE(1)->shapeInfo(), false, block.workspace())); } DECLARE_TYPES(reshapeas) { diff --git a/libnd4j/include/ops/declarable/generic/shape/squeeze.cpp b/libnd4j/include/ops/declarable/generic/shape/squeeze.cpp index 812947422..0b71dae52 100644 --- a/libnd4j/include/ops/declarable/generic/shape/squeeze.cpp +++ b/libnd4j/include/ops/declarable/generic/shape/squeeze.cpp @@ -93,7 +93,7 @@ namespace sd { DECLARE_SHAPE_FN(squeeze) { auto shapeList = SHAPELIST(); - Nd4jLong* newShape; +// Nd4jLong* newShape; auto in = inputShape->at(0); auto rank = shape::rank(in); auto length = shape::length(in); @@ -148,7 +148,7 @@ namespace sd { return shapeList; } - newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(in), order, shape); + auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(in), order, shape); shapeList->push_back(newShape); return shapeList; } diff --git a/libnd4j/include/ops/declarable/generic/tensor/strided_slice.cpp b/libnd4j/include/ops/declarable/generic/tensor/strided_slice.cpp index 747331ef0..88b06a631 100644 --- a/libnd4j/include/ops/declarable/generic/tensor/strided_slice.cpp +++ b/libnd4j/include/ops/declarable/generic/tensor/strided_slice.cpp @@ -415,7 +415,7 @@ namespace sd { ALLOCATE(subArrShapeInfo, block.getWorkspace(), shape::shapeInfoLength(x->rankOf()), Nd4jLong); Nd4jLong offset; - shape::calcSubArrShapeInfoAndOffset(indices.data(), x->getShapeInfo(), subArrShapeInfo, offset, true, true); + shape::calcSubArrShapeInfoAndOffset(indices.data(), x->shapeInfo(), subArrShapeInfo, offset, true, true); auto subArrShapeInfoPack = ConstantShapeHelper::getInstance()->bufferForShapeInfo(subArrShapeInfo); NDArray::prepareSpecialUse({z}, {x}); @@ -502,7 +502,6 @@ namespace sd { ++e; } - Nd4jLong *newShape; std::vector input_shape; //(shape::rank(inShape)); auto inputLen = shape::length(inShape); std::vector shape; @@ -519,7 +518,7 @@ namespace sd { std::vector indices; bool result = _preprocess_strided_slice(&indices, &shape, input_shape, begin, end, strides, begin_mask, ellipsis_mask, end_mask, new_axis_mask, shrink_axis_mask, &is_identity, &is_simple_slice, &is_dim0); if (indices.size()) { - newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape), 'c', + auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape), 'c', shape); // if (inputLen > 1) { // newShape = ConstantShapeHelper::getInstance()->createShapeInfo(ArrayOptions::dataType(inShape), 'c', @@ -527,10 +526,10 @@ namespace sd { // } else { // newShape = ConstantShapeHelper::getInstance()->scalarShapeInfo(ArrayOptions::dataType(inShape)); // } - } else - newShape = ConstantShapeHelper::getInstance()->emptyShapeInfo(ArrayOptions::dataType(inShape)); + return SHAPELIST(newShape); + } - return SHAPELIST(newShape); + return SHAPELIST(ConstantShapeHelper::getInstance()->emptyShapeInfo(ArrayOptions::dataType(inShape))); } diff --git a/libnd4j/include/ops/declarable/generic/transforms/concat.cpp b/libnd4j/include/ops/declarable/generic/transforms/concat.cpp index 0b171b36f..fb1fd2e87 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/concat.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/concat.cpp @@ -132,7 +132,7 @@ DECLARE_SHAPE_FN(concat) { // first of all take into account possible presence of empty arrays // also if scalar is present -> use the shape of vector with length=1 instead - std::vector arrShapes; + ShapeList arrShapes; std::vector shapesToDelete; int index = 0; for(int i = 0; i < numOfInArrs; ++i) { @@ -151,7 +151,7 @@ DECLARE_SHAPE_FN(concat) { const int numOfNonEmptyArrs = arrShapes.size(); - const int rank = arrShapes[0][0]; + const int rank = shape::rank(arrShapes.at(0)); int axis = isAxisInLastArr ? INPUT_VARIABLE(block.width() - 1)->e(0) : INT_ARG(0); if(axis < 0){ @@ -162,33 +162,33 @@ DECLARE_SHAPE_FN(concat) { REQUIRE_TRUE(0 <= axis && axis < rank, 0, "CONCAT op: input axis must be in range [0, %i], but got %i instead!", rank-1, axis); for(int i = 1; i < numOfNonEmptyArrs; ++i) - REQUIRE_TRUE(arrShapes[i][0] == rank, 0, "CONCAT op: all input arrays must have the same rank !"); + REQUIRE_TRUE(shape::rank(arrShapes.at(i)) == rank, 0, "CONCAT op: all input arrays must have the same rank !"); for(int i = 1; i < numOfNonEmptyArrs; ++i) { for(int dim = 0; dim < rank; ++dim) if(dim != axis) - REQUIRE_TRUE(arrShapes[i][dim+1] == arrShapes[0][dim+1], 0, "CONCAT op: all input arrays must have the same dimensions (except those on input axis) !"); + REQUIRE_TRUE(arrShapes.at(i)[dim+1] == arrShapes.at(0)[dim+1], 0, "CONCAT op: all input arrays must have the same dimensions (except those on input axis) !"); } // ******** end of input validation ******** // Nd4jLong* outShapeInfo(nullptr); - COPY_SHAPE(arrShapes[0], outShapeInfo); + COPY_SHAPE(arrShapes.at(0), outShapeInfo); // case when we have only one input array if(numOfNonEmptyArrs == 1) { - ShapeUtils::updateStridesAndType(outShapeInfo, arrShapes[0], shape::order(arrShapes[0])); + ShapeUtils::updateStridesAndType(outShapeInfo, arrShapes.at(0), shape::order(arrShapes.at(0))); return SHAPELIST(CONSTANT(outShapeInfo)); } for(int i = 1; i < numOfNonEmptyArrs; ++i) - outShapeInfo[axis + 1] += arrShapes[i][axis + 1]; + outShapeInfo[axis + 1] += arrShapes.at(i)[axis + 1]; - ShapeUtils::updateStridesAndType(outShapeInfo, arrShapes[0], shape::order(arrShapes[0])); + ShapeUtils::updateStridesAndType(outShapeInfo, arrShapes.at(0), shape::order(arrShapes.at(0))); // delete dynamically allocated vectors shapes with length=1 - for(int index : shapesToDelete) - RELEASE(arrShapes[index], block.getWorkspace()); +// for(int index : shapesToDelete) +// RELEASE(arrShapes[index], block.getWorkspace()); auto result = ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(outShapeInfo)); RELEASE(outShapeInfo, block.getWorkspace()); @@ -237,8 +237,8 @@ DECLARE_SHAPE_FN(concat) { // auto buffers = new Nd4jPointer[elements]; // auto shapes = new Nd4jPointer[elements]; - // buffers[0] = (Nd4jPointer) first->getBuffer(); - // shapes[0] = (Nd4jPointer) first->getShapeInfo(); + // buffers[0] = (Nd4jPointer) first->buffer(); + // shapes[0] = (Nd4jPointer) first->shapeInfo(); // if (_dimension < 0) // _dimension += first->rankOf(); @@ -256,8 +256,8 @@ DECLARE_SHAPE_FN(concat) { // if (array->isEmpty()) // continue; - // buffers[er] = reinterpret_cast(array->getBuffer()); - // shapes[er++] = reinterpret_cast(array->getShapeInfo()); + // buffers[er] = reinterpret_cast(array->buffer()); + // shapes[er++] = reinterpret_cast(array->shapeInfo()); // oldScalars &= array->rankOf() == 2 && array->isScalar(); @@ -274,7 +274,7 @@ DECLARE_SHAPE_FN(concat) { // _dimension = 1; // } - // sd::SpecialMethods::concatCpuGeneric(_dimension, elements, buffers, shapes, output->getBuffer(), output->getShapeInfo()); + // sd::SpecialMethods::concatCpuGeneric(_dimension, elements, buffers, shapes, output->buffer(), output->shapeInfo()); // STORE_RESULT(*output); diff --git a/libnd4j/include/ops/declarable/generic/transforms/mirrorPad.cpp b/libnd4j/include/ops/declarable/generic/transforms/mirrorPad.cpp index a4b934853..143e57a80 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/mirrorPad.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/mirrorPad.cpp @@ -79,23 +79,20 @@ DECLARE_SHAPE_FN(mirror_pad) { REQUIRE_TRUE( (paddings->e(i,0) <= (input->sizeAt(i) - includeBorder)) && (paddings->e(i,1) <= (input->sizeAt(i) - includeBorder)), 0, "MIRROR_PAD OP: wrong content of paddings array, its elements must be no grater then corresponding dimension of input array for symmetric mode (or dimension-1 for reflect mode) !"); } - Nd4jLong* outShapeInfo(nullptr); - if(rank == 1) { Nd4jLong len = input->lengthOf() + paddings->e(0) + paddings->e(1); - outShapeInfo = ConstantShapeHelper::getInstance()->vectorShapeInfo(len, input->dataType()); - } - else { - ALLOCATE(outShapeInfo, block.getWorkspace(), shape::shapeInfoLength(rank), Nd4jLong); - outShapeInfo[0] = rank; - for(int i = 0; i < rank; ++i) - outShapeInfo[i+1] = input->sizeAt(i) + paddings->e(i,0) + paddings->e(i,1); - ShapeUtils::updateStridesAndType(outShapeInfo, input->shapeInfo(), input->ordering()); - - outShapeInfo = CONSTANT(outShapeInfo); + return SHAPELIST(ConstantShapeHelper::getInstance()->vectorShapeInfo(len, input->dataType())); } - return SHAPELIST(outShapeInfo); + Nd4jLong* outShapeInfo(nullptr); + + ALLOCATE(outShapeInfo, block.getWorkspace(), shape::shapeInfoLength(rank), Nd4jLong); + outShapeInfo[0] = rank; + for(int i = 0; i < rank; ++i) + outShapeInfo[i+1] = input->sizeAt(i) + paddings->e(i,0) + paddings->e(i,1); + ShapeUtils::updateStridesAndType(outShapeInfo, input->shapeInfo(), input->ordering()); + + return SHAPELIST(CONSTANT(outShapeInfo)); } diff --git a/libnd4j/include/ops/declarable/generic/transforms/parallelStack.cpp b/libnd4j/include/ops/declarable/generic/transforms/parallelStack.cpp index b6a2ba1e1..46572d88e 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/parallelStack.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/parallelStack.cpp @@ -34,7 +34,7 @@ CUSTOM_OP_IMPL(parallel_stack, -1, 1, false, 0, 0) { // check whether shapes of all input array are the same for (int i = 0; i < (int) block.width() - 1; ++i) - REQUIRE_TRUE(shape::equalsSoft((INPUT_VARIABLE(i))->getShapeInfo(), (INPUT_VARIABLE(i+1))->getShapeInfo()), 0, "PARALLEL_STACK op: the shapes of all input arrays must be the same !"); + REQUIRE_TRUE(shape::equalsSoft((INPUT_VARIABLE(i))->shapeInfo(), (INPUT_VARIABLE(i+1))->shapeInfo()), 0, "PARALLEL_STACK op: the shapes of all input arrays must be the same !"); std::vector inArrs(block.width()); for(int i = 0; i < block.width(); ++i) diff --git a/libnd4j/include/ops/declarable/generic/transforms/slice.cpp b/libnd4j/include/ops/declarable/generic/transforms/slice.cpp index dc4671ef7..96e7fe6b3 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/slice.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/slice.cpp @@ -85,7 +85,7 @@ namespace sd { Nd4jLong offset; - shape::calcSubArrShapeInfoAndOffset(indices.data(), input->getShapeInfo(), subArrShapeInfo, offset, true); + shape::calcSubArrShapeInfoAndOffset(indices.data(), input->shapeInfo(), subArrShapeInfo, offset, true); auto subArrShapeInfoPack = ConstantShapeHelper::getInstance()->bufferForShapeInfo(subArrShapeInfo); diff --git a/libnd4j/include/ops/declarable/generic/transforms/split.cpp b/libnd4j/include/ops/declarable/generic/transforms/split.cpp index 60a80378e..462f2c77e 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/split.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/split.cpp @@ -84,17 +84,14 @@ namespace ops { DECLARE_SHAPE_FN(split) { int num_splits = INT_ARG(0); - Nd4jLong *input = nullptr; - sd::DataType dataType; + auto input = inputShape->at(0); + sd::DataType dataType = ArrayOptions::dataType(input); // axis is 0 by default int axis = 0; int inputVar = 0; - if (inputShape->size() == 1) { - input = inputShape->at(0); - dataType = ArrayOptions::dataType(input); - } else { + if (inputShape->size() != 1) { auto shape0 = inputShape->at(0); auto shape1 = inputShape->at(1); diff --git a/libnd4j/include/ops/declarable/generic/transforms/stack.cpp b/libnd4j/include/ops/declarable/generic/transforms/stack.cpp index a78442b03..65cd41a3a 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/stack.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/stack.cpp @@ -41,7 +41,7 @@ CUSTOM_OP_IMPL(stack, -1, 1, false, 0, 0) { // input validation // check whether shapes of all input array are the same for (int i = 0; i < (int) block.width() - 1; ++i) - REQUIRE_TRUE(shape::equalsSoft((INPUT_VARIABLE(i))->getShapeInfo(), (INPUT_VARIABLE(i+1))->getShapeInfo()), 0, "STACK op: the shapes of all input arrays must be the same !"); + REQUIRE_TRUE(shape::equalsSoft((INPUT_VARIABLE(i))->shapeInfo(), (INPUT_VARIABLE(i+1))->shapeInfo()), 0, "STACK op: the shapes of all input arrays must be the same !"); REQUIRE_TRUE(dim <= input->rankOf(), 0, "STACK op: the input dimension parameter must be <= rank of input arrays shapes (rank=%i), but got %i instead !", input->shapeOf(), dim); diff --git a/libnd4j/include/ops/declarable/generic/transforms/tile.cpp b/libnd4j/include/ops/declarable/generic/transforms/tile.cpp index 6041d1c41..4dc259bba 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/tile.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/tile.cpp @@ -68,7 +68,7 @@ CUSTOM_OP_IMPL(tile, 1, 1, false, 0, -2) { DECLARE_SHAPE_FN(tile) { - Nd4jLong* inShape = inputShape->at(0); + auto inShape = inputShape->at(0); const int inRank = inShape[0]; std::vector reps; @@ -145,8 +145,8 @@ CUSTOM_OP_IMPL(tile_bp, 2, 1, false, 0, -2) { DECLARE_SHAPE_FN(tile_bp) { - Nd4jLong* inShape = inputShape->at(0); - Nd4jLong* gradOShape = inputShape->at(1); + auto inShape = inputShape->at(0); + auto gradOShape = inputShape->at(1); const int inRank = inShape[0]; std::vector reps; diff --git a/libnd4j/include/ops/declarable/generic/updaters/adaDeltaUpdater.cpp b/libnd4j/include/ops/declarable/generic/updaters/adaDeltaUpdater.cpp index bab205543..93f01ae1f 100644 --- a/libnd4j/include/ops/declarable/generic/updaters/adaDeltaUpdater.cpp +++ b/libnd4j/include/ops/declarable/generic/updaters/adaDeltaUpdater.cpp @@ -41,11 +41,11 @@ namespace sd { return Status::OK(); REQUIRE_TRUE(gradient->isSameShape(initStateMsg), 0, "ADA_DELTA UPDATER OP: input state Msg must have the same shape as gradient," - " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->getShapeInfo()).c_str(), - ShapeUtils::shapeAsString(initStateMsg->getShapeInfo()).c_str()); + " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->shapeInfo()).c_str(), + ShapeUtils::shapeAsString(initStateMsg->shapeInfo()).c_str()); REQUIRE_TRUE(gradient->isSameShape(initStateMsdx), 0, "ADA_DELTA UPDATER OP: input state Msdx must have the same shape as gradient," - " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->getShapeInfo()).c_str(), - ShapeUtils::shapeAsString(initStateMsdx->getShapeInfo()).c_str()); + " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->shapeInfo()).c_str(), + ShapeUtils::shapeAsString(initStateMsdx->shapeInfo()).c_str()); bool bParamsSupply = 5 == block.width() || 2 == block.getTArguments()->size(); diff --git a/libnd4j/include/ops/declarable/generic/updaters/adaGradUpdater.cpp b/libnd4j/include/ops/declarable/generic/updaters/adaGradUpdater.cpp index a7a92b410..4cd5b0504 100644 --- a/libnd4j/include/ops/declarable/generic/updaters/adaGradUpdater.cpp +++ b/libnd4j/include/ops/declarable/generic/updaters/adaGradUpdater.cpp @@ -39,8 +39,8 @@ namespace sd { return Status::OK(); REQUIRE_TRUE(gradient->isSameShape(initState), 0, "ADA_GRAD UPDATER OP: input state must have the same shape as gradient," - " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->getShapeInfo()).c_str(), - ShapeUtils::shapeAsString(initState->getShapeInfo()).c_str()); + " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->shapeInfo()).c_str(), + ShapeUtils::shapeAsString(initState->shapeInfo()).c_str()); bool bParamsSupply = 4 == block.width() || 2 == block.getTArguments()->size(); diff --git a/libnd4j/include/ops/declarable/generic/updaters/adaMaxUpdater.cpp b/libnd4j/include/ops/declarable/generic/updaters/adaMaxUpdater.cpp index 4e34c24f6..9f4bb574b 100644 --- a/libnd4j/include/ops/declarable/generic/updaters/adaMaxUpdater.cpp +++ b/libnd4j/include/ops/declarable/generic/updaters/adaMaxUpdater.cpp @@ -42,11 +42,11 @@ namespace sd { return Status::OK(); REQUIRE_TRUE(gradient->isSameShape(initStateU), 0, "ADA_MAX UPDATER OP: input state V must have the same shape as gradient," - " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->getShapeInfo()).c_str(), - ShapeUtils::shapeAsString(initStateU->getShapeInfo()).c_str()); + " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->shapeInfo()).c_str(), + ShapeUtils::shapeAsString(initStateU->shapeInfo()).c_str()); REQUIRE_TRUE(gradient->isSameShape(initStateM), 0, "ADA_MAX UPDATER OP: input state M must have the same shape as gradient," - " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->getShapeInfo()).c_str(), - ShapeUtils::shapeAsString(initStateM->getShapeInfo()).c_str()); + " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->shapeInfo()).c_str(), + ShapeUtils::shapeAsString(initStateM->shapeInfo()).c_str()); bool bParamsSupply = 7 == block.width() || 4 == block.getTArguments()->size(); diff --git a/libnd4j/include/ops/declarable/generic/updaters/adamUpdater.cpp b/libnd4j/include/ops/declarable/generic/updaters/adamUpdater.cpp index a696d2388..96386c45b 100644 --- a/libnd4j/include/ops/declarable/generic/updaters/adamUpdater.cpp +++ b/libnd4j/include/ops/declarable/generic/updaters/adamUpdater.cpp @@ -42,11 +42,11 @@ namespace sd { return Status::OK(); REQUIRE_TRUE(gradient->isSameShape(initStateU), 0, "ADAM UPDATER OP: input state V must have the same shape as gradient," - " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->getShapeInfo()).c_str(), - ShapeUtils::shapeAsString(initStateU->getShapeInfo()).c_str()); + " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->shapeInfo()).c_str(), + ShapeUtils::shapeAsString(initStateU->shapeInfo()).c_str()); REQUIRE_TRUE(gradient->isSameShape(initStateM), 0, "ADAM UPDATER OP: input state M must have the same shape as gradient," - " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->getShapeInfo()).c_str(), - ShapeUtils::shapeAsString(initStateM->getShapeInfo()).c_str()); + " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->shapeInfo()).c_str(), + ShapeUtils::shapeAsString(initStateM->shapeInfo()).c_str()); bool bParamsSupply = 7 == block.width() || 4 == block.getTArguments()->size(); diff --git a/libnd4j/include/ops/declarable/generic/updaters/amsGradUpdater.cpp b/libnd4j/include/ops/declarable/generic/updaters/amsGradUpdater.cpp index bc0f4beac..32084d970 100644 --- a/libnd4j/include/ops/declarable/generic/updaters/amsGradUpdater.cpp +++ b/libnd4j/include/ops/declarable/generic/updaters/amsGradUpdater.cpp @@ -44,14 +44,14 @@ namespace sd { return Status::OK(); REQUIRE_TRUE(gradient->isSameShape(initStateV), 0, "AMSGRAD UPDATER OP: input state Msg must have the same shape as gradient," - " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->getShapeInfo()).c_str(), - ShapeUtils::shapeAsString(initStateV->getShapeInfo()).c_str()); + " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->shapeInfo()).c_str(), + ShapeUtils::shapeAsString(initStateV->shapeInfo()).c_str()); REQUIRE_TRUE(gradient->isSameShape(initStateM), 0, "AMSGRAD UPDATER OP: input state Msdx must have the same shape as gradient," - " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->getShapeInfo()).c_str(), - ShapeUtils::shapeAsString(initStateM->getShapeInfo()).c_str()); + " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->shapeInfo()).c_str(), + ShapeUtils::shapeAsString(initStateM->shapeInfo()).c_str()); REQUIRE_TRUE(gradient->isSameShape(initStateH), 0, "AMSGRAD UPDATER OP: input state Msdx must have the same shape as gradient!," - " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->getShapeInfo()).c_str(), - ShapeUtils::shapeAsString(initStateH->getShapeInfo()).c_str()); + " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->shapeInfo()).c_str(), + ShapeUtils::shapeAsString(initStateH->shapeInfo()).c_str()); bool bParamsSupply = 8 == block.width() || 4 == block.getTArguments()->size(); diff --git a/libnd4j/include/ops/declarable/generic/updaters/nadamUpdater.cpp b/libnd4j/include/ops/declarable/generic/updaters/nadamUpdater.cpp index c6af0686b..4d5e4e12e 100644 --- a/libnd4j/include/ops/declarable/generic/updaters/nadamUpdater.cpp +++ b/libnd4j/include/ops/declarable/generic/updaters/nadamUpdater.cpp @@ -42,11 +42,11 @@ namespace sd { return Status::OK(); REQUIRE_TRUE(gradient->isSameShape(initStateM), 0, "NADAM UPDATER OP: input state M must have the same shape as gradient," - " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->getShapeInfo()).c_str(), - ShapeUtils::shapeAsString(initStateM->getShapeInfo()).c_str()); + " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->shapeInfo()).c_str(), + ShapeUtils::shapeAsString(initStateM->shapeInfo()).c_str()); REQUIRE_TRUE(gradient->isSameShape(initStateV), 0, "NADAM UPDATER OP: input state V must have the same shape as gradient," - " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->getShapeInfo()).c_str(), - ShapeUtils::shapeAsString(initStateV->getShapeInfo()).c_str()); + " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->shapeInfo()).c_str(), + ShapeUtils::shapeAsString(initStateV->shapeInfo()).c_str()); bool bParamsSupply = 7 == block.width() || 4 == block.getTArguments()->size(); diff --git a/libnd4j/include/ops/declarable/generic/updaters/nesterovsUpdater.cpp b/libnd4j/include/ops/declarable/generic/updaters/nesterovsUpdater.cpp index c77abd448..bcbefe36b 100644 --- a/libnd4j/include/ops/declarable/generic/updaters/nesterovsUpdater.cpp +++ b/libnd4j/include/ops/declarable/generic/updaters/nesterovsUpdater.cpp @@ -39,8 +39,8 @@ namespace sd { return Status::OK(); REQUIRE_TRUE(gradient->isSameShape(initState), 0, "NESTEROVS UPDATER OP: input state Msg must have the same shape as gradient," - " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->getShapeInfo()).c_str(), - ShapeUtils::shapeAsString(initState->getShapeInfo()).c_str()); + " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->shapeInfo()).c_str(), + ShapeUtils::shapeAsString(initState->shapeInfo()).c_str()); bool bParamsSupply = 4 == block.width() || 2 == block.getTArguments()->size(); diff --git a/libnd4j/include/ops/declarable/generic/updaters/rmsPropUpdater.cpp b/libnd4j/include/ops/declarable/generic/updaters/rmsPropUpdater.cpp index 1ca318e26..a611a4fbe 100644 --- a/libnd4j/include/ops/declarable/generic/updaters/rmsPropUpdater.cpp +++ b/libnd4j/include/ops/declarable/generic/updaters/rmsPropUpdater.cpp @@ -39,8 +39,8 @@ namespace sd { return Status::OK(); REQUIRE_TRUE(gradient->isSameShape(initState), 0, "RMS_PROB UPDATER OP: input state must have the same shape as gradient," - " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->getShapeInfo()).c_str(), - ShapeUtils::shapeAsString(initState->getShapeInfo()).c_str()); + " expected shape %s, but got %s!", ShapeUtils::shapeAsString(gradient->shapeInfo()).c_str(), + ShapeUtils::shapeAsString(initState->shapeInfo()).c_str()); bool bParamsSupply = 5 == block.width() || 3 == block.getTArguments()->size(); diff --git a/libnd4j/include/ops/declarable/generic/util/print_affinity.cpp b/libnd4j/include/ops/declarable/generic/util/print_affinity.cpp index 0103e8672..5518588e4 100644 --- a/libnd4j/include/ops/declarable/generic/util/print_affinity.cpp +++ b/libnd4j/include/ops/declarable/generic/util/print_affinity.cpp @@ -31,7 +31,7 @@ namespace sd { auto input = INPUT_VARIABLE(0); auto output = OUTPUT_VARIABLE(0); - nd4j_printf(": Actuality: [HOST: %s; DEVICE: %s]; affinity: [%i]; Pointers: [HOST: %p; DEVICE: %p]; DataBuffer length: %lld\n", block.nodeId(), input->isActualOnHostSide() ? "true" : "false", input->isActualOnDeviceSide() ? "true" : "false", input->dataBuffer()->deviceId(), input->getBuffer(), input->getSpecialBuffer(), input->dataBuffer()->getLenInBytes()); + nd4j_printf(": Actuality: [HOST: %s; DEVICE: %s]; affinity: [%i]; Pointers: [HOST: %p; DEVICE: %p]; DataBuffer length: %lld\n", block.nodeId(), input->isActualOnHostSide() ? "true" : "false", input->isActualOnDeviceSide() ? "true" : "false", input->dataBuffer()->deviceId(), input->buffer(), input->specialBuffer(), input->dataBuffer()->getLenInBytes()); return Status::OK(); } diff --git a/libnd4j/include/ops/declarable/headers/compression.h b/libnd4j/include/ops/declarable/headers/compression.h new file mode 100644 index 000000000..9c177f8a4 --- /dev/null +++ b/libnd4j/include/ops/declarable/headers/compression.h @@ -0,0 +1,62 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author sgazeos@gmail.com +// +#ifndef SD_HEADERS_COMPRESSION_H +#define SD_HEADERS_COMPRESSION_H + +#include + +namespace sd { + namespace ops { + + /** + * encode_bitmap - reinterpret 3D float tensor into uint8_t vector with length N. + * + * Input: + * 0 - 3D float tensor with shape {height, width, channels} + * + * Output: + * 0 - 1D uint8_t tensor with shape {N} + */ + #if NOT_EXCLUDED(OP_encode_bitmap) + DECLARE_CUSTOM_OP(encode_bitmap, 1, 3, true, 1, 0); + #endif + + /** + * decode_bitmap - reinterpret uint8_t linear tensor as data to float tensor with shape + * + * Input: + * 0 - uint8_t vector with length N ( shape {N}) + * + * Output: + * 0 - 3D tensor with shape {height, width, channels} + * + */ + #if NOT_EXCLUDED(OP_decode_bitmap) + DECLARE_CUSTOM_OP(decode_bitmap, 2, 1, true, 0, 0); + #endif + + + DECLARE_CUSTOM_OP(encode_threshold, 2, 1, true, 1, 0); + DECLARE_CUSTOM_OP(decode_threshold, 2, 1, true, 0, 0); + } +} + +#endif // SD_HEADERS_COMPRESSION_H \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/compression.h b/libnd4j/include/ops/declarable/helpers/compression.h new file mode 100644 index 000000000..b9c70a91b --- /dev/null +++ b/libnd4j/include/ops/declarable/helpers/compression.h @@ -0,0 +1,34 @@ +/******************************************************************************* + * Copyright (c) 2020 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author sgazeos@gmail.com +// +#ifndef __COMPRESSION_H_HELPERS__ +#define __COMPRESSION_H_HELPERS__ +#include +#include + +namespace sd { +namespace ops { +namespace helpers { + + void decodeBitmap(sd::LaunchContext* context, const NDArray* input, NDArray* output); + Nd4jLong encodeBitmap(sd::LaunchContext* context, NDArray* input, NDArray* output, float threshold); +} +} +} +#endif diff --git a/libnd4j/include/ops/declarable/helpers/convolutions.h b/libnd4j/include/ops/declarable/helpers/convolutions.h index f38692a35..eb41ae637 100644 --- a/libnd4j/include/ops/declarable/helpers/convolutions.h +++ b/libnd4j/include/ops/declarable/helpers/convolutions.h @@ -155,7 +155,7 @@ namespace sd { // evaluates sizes values and indexes using input and output arrays depending on data format static inline void getSizesAndIndexesConv2d(const bool isNCHW, const int wFormat, const NDArray& input, const NDArray& output, int& bS, int& iC, int& iH, int& iW, int& oC, int& oH, int& oW, int& indIOioC, int& indIiH, int& indWiC, int& indWoC, int& indWkH, int& indOoH) { - getSizesAndIndexesConv2d(isNCHW, wFormat, input.getShapeInfo(), output.getShapeInfo(), bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWiC, indWoC, indWkH, indOoH); + getSizesAndIndexesConv2d(isNCHW, wFormat, input.shapeInfo(), output.shapeInfo(), bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWiC, indWoC, indWkH, indOoH); } static inline void getSizesAndIndexesConv2d(const bool isNCHW, const int wFormat, const Nd4jLong* inShapeInfo, const Nd4jLong* outShapeInfo, int& bS, int& iC, int& iH, int& iW, int& oC, int& oH, int& oW, int& indIOioC, int& indIiH, int& indWiC, int& indWoC, int& indWkH, int& indOoH) { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/BarnesHutTsne.cpp b/libnd4j/include/ops/declarable/helpers/cpu/BarnesHutTsne.cpp index 5ac61964c..97bdd5c89 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/BarnesHutTsne.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/BarnesHutTsne.cpp @@ -28,8 +28,8 @@ namespace helpers { Nd4jLong barnes_row_count(const NDArray* rowP, const NDArray* colP, Nd4jLong N, NDArray& rowCounts) { int* pRowCounts = reinterpret_cast(rowCounts.buffer()); - int const* pRows = reinterpret_cast(rowP->getBuffer()); - int const* pCols = reinterpret_cast(colP->getBuffer()); + int const* pRows = reinterpret_cast(rowP->buffer()); + int const* pCols = reinterpret_cast(colP->buffer()); for (Nd4jLong n = 0; n < N; n++) { int begin = pRows[n];//->e(n); int end = pRows[n + 1];//rowP->e(n + 1); @@ -69,7 +69,7 @@ namespace helpers { //NDArray symValP = NDArrayFactory::create('c', {numElements}); //symRowP.insert(symRowP.begin(),0); //symRowP(1, {0}) = *rowCounts; - int const* pRows = reinterpret_cast(rowP->getBuffer()); + int const* pRows = reinterpret_cast(rowP->buffer()); int* symRowP = reinterpret_cast(outputRows->buffer()); symRowP[0] = 0; for (Nd4jLong n = 0; n < N; n++) @@ -79,8 +79,8 @@ namespace helpers { int* symColP = reinterpret_cast(outputCols->buffer()); // symRowP.p(n + 1, symRowP.e(n) + rowCounts.e(n)) // outputRows->printBuffer("SymRows are"); - int const* pCols = reinterpret_cast(colP->getBuffer()); - T const* pVals = reinterpret_cast(valP->getBuffer()); + int const* pCols = reinterpret_cast(colP->buffer()); + T const* pVals = reinterpret_cast(valP->buffer()); T* pOutput = reinterpret_cast(outputVals->buffer()); //std::vector rowCountsV = rowCounts->getBufferAsVector(); std::vector offset(N);// = NDArrayFactory::create('c', {N}); @@ -143,8 +143,8 @@ namespace helpers { template static void barnes_edge_forces_(const NDArray* rowP, NDArray const* colP, NDArray const* valP, int N, NDArray const* data, NDArray* output) { - T const* dataP = reinterpret_cast(data->getBuffer()); - T const* vals = reinterpret_cast(valP->getBuffer()); + T const* dataP = reinterpret_cast(data->buffer()); + T const* vals = reinterpret_cast(valP->buffer()); T* outputP = reinterpret_cast(output->buffer()); int colCount = data->columns(); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/activations.cpp b/libnd4j/include/ops/declarable/helpers/cpu/activations.cpp index de56650c8..ccc4d676a 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/activations.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/activations.cpp @@ -64,9 +64,9 @@ namespace helpers { const int rank = input.rankOf(); int temp; - if(shape::isCommonVector(input.getShapeInfo(), temp)) { + if(shape::isCommonVector(input.shapeInfo(), temp)) { - BUILD_SINGLE_SELECTOR(input.dataType(), _softMaxDerivForVector, (context, input.getBuffer(), input.getShapeInfo(), output.buffer()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), _softMaxDerivForVector, (context, input.buffer(), input.shapeInfo(), output.buffer()), FLOAT_TYPES); } else { auto maxAlongDim = const_cast(input).reduceAlongDimension(reduce::Max, {dimension}, true); @@ -79,8 +79,8 @@ namespace helpers { /////////////////////////////////////////////////////////////////// template - void logSoftMaxForVector_(void *input, Nd4jLong *inShapeInfo, void *output, Nd4jLong *outShapeInfo) { - auto inBuff = reinterpret_cast(input); + void logSoftMaxForVector_(void const* input, Nd4jLong const* inShapeInfo, void *output, Nd4jLong const* outShapeInfo) { + auto inBuff = reinterpret_cast(input); auto outBuff = reinterpret_cast(output); T max = -DataTypeUtils::max(); @@ -126,21 +126,21 @@ namespace helpers { } /////////////////////////////////////////////////////////////////// - void logSoftMaxForVector(sd::LaunchContext * context, const NDArray& input, NDArray& output) { + void logSoftMaxForVector(sd::LaunchContext* context, const NDArray& input, NDArray& output) { if(!input.isVector() || !output.isVector()) throw std::runtime_error("ops::helpers::logSoftMaxForVector function input and output arrays must be vectors !"); auto xType = input.dataType(); - BUILD_SINGLE_SELECTOR(xType, logSoftMaxForVector_, (input.getBuffer(), input.getShapeInfo(), output.buffer(), output.shapeInfo()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(xType, logSoftMaxForVector_, (input.buffer(), input.shapeInfo(), output.buffer(), output.shapeInfo()), FLOAT_TYPES); } ////////////////////////////////////////////////////////////////////////// void prelu(sd::LaunchContext * context, const NDArray& input, const NDArray& alpha, NDArray& output) { const Nd4jLong inputLen = input.lengthOf(); - const Nd4jLong* inputShapeInfo = input.getShapeInfo(); - const Nd4jLong* alphaShapeInfo = alpha.getShapeInfo(); + const Nd4jLong* inputShapeInfo = input.shapeInfo(); + const Nd4jLong* alphaShapeInfo = alpha.shapeInfo(); auto func = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i++) { @@ -161,8 +161,8 @@ void prelu(sd::LaunchContext * context, const NDArray& input, const NDArray& alp void preluBP(sd::LaunchContext * context, const NDArray& input, const NDArray& alpha, const NDArray& dLdO, NDArray& dLdI, NDArray& dLdA) { const Nd4jLong inputLen = input.lengthOf(); - const Nd4jLong* inputShapeInfo = input.getShapeInfo(); - const Nd4jLong* alphaShapeInfo = alpha.getShapeInfo(); + const Nd4jLong* inputShapeInfo = input.shapeInfo(); + const Nd4jLong* alphaShapeInfo = alpha.shapeInfo(); dLdA.assign(0.0f); @@ -219,7 +219,7 @@ void preluBP(sd::LaunchContext * context, const NDArray& input, const NDArray& a if(input.isVector()) { if(rank == 1 || input.sizeAt(dimension) != 1) { - BUILD_SINGLE_SELECTOR(input.dataType(), logSoftMaxForVector_, (input.getBuffer(), input.getShapeInfo(), output.buffer(), output.shapeInfo()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), logSoftMaxForVector_, (input.buffer(), input.shapeInfo(), output.buffer(), output.shapeInfo()), FLOAT_TYPES); } else output = 0.; @@ -235,7 +235,7 @@ void preluBP(sd::LaunchContext * context, const NDArray& input, const NDArray& a } BUILD_SINGLE_TEMPLATE(template void thresholdReluDerivative_, (sd::LaunchContext * context, NDArray* input, double threshold, NDArray* dLdO, NDArray* output), FLOAT_TYPES); - BUILD_SINGLE_TEMPLATE(template void logSoftMaxForVector_, (void *input, Nd4jLong *inShapeInfo, void *output, Nd4jLong *outShapeInfo), FLOAT_TYPES); + BUILD_SINGLE_TEMPLATE(template void logSoftMaxForVector_, (void const* input, Nd4jLong const* inShapeInfo, void *output, Nd4jLong const* outShapeInfo), FLOAT_TYPES); BUILD_SINGLE_TEMPLATE(template void _softMaxDerivForVector, (sd::LaunchContext * context, const void *input, const Nd4jLong *inShapeInfo, void *output), FLOAT_TYPES); } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/addBias.cpp b/libnd4j/include/ops/declarable/helpers/cpu/addBias.cpp index 68b8c6955..a03b4504f 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/addBias.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/addBias.cpp @@ -396,15 +396,15 @@ namespace sd { } */ - Nd4jLong* x_shapeInfo = input.getShapeInfo(); - Nd4jLong* z_shapeInfo = output.getShapeInfo(); - X* x = input.bufferAsT(); - X* z = output.bufferAsT(); - const Y* b = bias.bufferAsT(); + auto x_shapeInfo = input.shapeInfo(); + auto z_shapeInfo = output.shapeInfo(); + auto x = input.bufferAsT(); + auto z = output.bufferAsT(); + auto b = bias.bufferAsT(); const Nd4jLong rank = x_shapeInfo[0]; - const Nd4jLong* bases = &(x_shapeInfo[1]); - const Nd4jLong* x_strides = &(x_shapeInfo[rank + 1]); - const Nd4jLong* z_strides = &(z_shapeInfo[rank + 1]); + auto bases = &(x_shapeInfo[1]); + auto x_strides = &(x_shapeInfo[rank + 1]); + auto z_strides = &(z_shapeInfo[rank + 1]); const bool inplaceOp = (x == z); const bool same_order = inplaceOp || (input.ordering() == output.ordering()); const bool channel_atTheEnd = !isNCHW; @@ -502,27 +502,27 @@ namespace sd { FUNC_1D func = [order, isContinuous, rank, x, b, bias_new, z, x_shapeInfo, z_shapeInfo, same_stride, same_order, yStrideC, rank_skip] (uint64_t thread_id, int64_t start, int64_t stop, int64_t increment) -> void { const Nd4jLong rank = x_shapeInfo[0]; - const Nd4jLong* bases = &(x_shapeInfo[1]); - const Nd4jLong* x_strides = &(x_shapeInfo[rank + 1]); - const Nd4jLong* z_strides = &(z_shapeInfo[rank + 1]); + auto bases = &(x_shapeInfo[1]); + auto x_strides = &(x_shapeInfo[rank + 1]); + auto z_strides = &(z_shapeInfo[rank + 1]); const bool inplaceOp = (x == z); if (order == 'c') { if (isContinuous) { - channel_atTheEnd_continous_C(x, bias_new, z, inplaceOp, start, stop, increment); + channel_atTheEnd_continous_C(const_cast(x), bias_new, z, inplaceOp, start, stop, increment); } // rank is in [2,5] else if (rank == 4) { - channel_atTheEnd_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, x, bias_new, z, start, stop, increment); + channel_atTheEnd_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, const_cast(x), bias_new, z, start, stop, increment); } else if (rank == 5) { - channel_atTheEnd_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, x, bias_new, z, start, stop, increment); + channel_atTheEnd_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, const_cast(x), bias_new, z, start, stop, increment); } else if (rank == 2) { - channel_atTheEnd_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, x, bias_new, z, start, stop, increment); + channel_atTheEnd_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, const_cast(x), bias_new, z, start, stop, increment); } else if (rank == 3) { - channel_atTheEnd_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, x, bias_new, z, start, stop, increment); + channel_atTheEnd_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, const_cast(x), bias_new, z, start, stop, increment); } } else { @@ -530,36 +530,36 @@ namespace sd { if (isContinuous) { if (rank == 4) { if (rank_skip == rank - 2) { - channel_generic_stride_skip_F(x_strides, bases, x, b, z, inplaceOp, yStrideC, start, stop, increment); + channel_generic_stride_skip_F(x_strides, bases, const_cast(x), b, z, inplaceOp, yStrideC, start, stop, increment); } else { - channel_generic_stride_skip_F(x_strides, bases, x, b, z, inplaceOp, yStrideC, start, stop, increment); + channel_generic_stride_skip_F(x_strides, bases, const_cast(x), b, z, inplaceOp, yStrideC, start, stop, increment); } } else if (rank == 5) { if (rank_skip == rank - 2) { //skip==3 - channel_generic_stride_skip_F(x_strides, bases, x, b, z, inplaceOp, yStrideC, start, stop, increment); + channel_generic_stride_skip_F(x_strides, bases, const_cast(x), b, z, inplaceOp, yStrideC, start, stop, increment); } else { - channel_generic_stride_skip_F(x_strides, bases, x, b, z, inplaceOp, yStrideC, start, stop, increment); + channel_generic_stride_skip_F(x_strides, bases, const_cast(x), b, z, inplaceOp, yStrideC, start, stop, increment); } } else if (rank == 3) { - channel_generic_stride_skip_F(x_strides, bases, x, b, z, inplaceOp, yStrideC, start, stop, increment); + channel_generic_stride_skip_F(x_strides, bases, const_cast(x), b, z, inplaceOp, yStrideC, start, stop, increment); } } else if (rank == 4) { - channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, x, b, z, start, stop, increment); + channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, const_cast(x), b, z, start, stop, increment); } else if (rank == 5) { - channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, x, b, z, start, stop, increment); + channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, const_cast(x), b, z, start, stop, increment); } else if (rank == 2) { - channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, x, b, z, start, stop, increment); + channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, const_cast(x), b, z, start, stop, increment); } else if (rank == 3) { - channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, x, b, z, start, stop, increment); + channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, const_cast(x), b, z, start, stop, increment); } } @@ -600,18 +600,18 @@ namespace sd { const bool inplaceOp = (x == z); if (order == 'c') { if (isContinuous) { - channel_NC_continous_numHW_C(rank, bases, x_strides, x, b, z, inplaceOp, yStrideC, start, stop, increment); + channel_NC_continous_numHW_C(rank, bases, x_strides, const_cast(x), b, z, inplaceOp, yStrideC, start, stop, increment); } // rank is in [3,5] else if (rank == 4) { - channel_NC_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, x, b, z, start, stop, increment); + channel_NC_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, const_cast(x), b, z, start, stop, increment); } else if (rank == 5) { - channel_NC_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, x, b, z, start, stop, increment); + channel_NC_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, const_cast(x), b, z, start, stop, increment); } else if (rank == 3) { - channel_NC_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, x, b, z, start, stop, increment); + channel_NC_generic_C(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, const_cast(x), b, z, start, stop, increment); } } else { @@ -620,13 +620,13 @@ namespace sd { //continous case is missing if (rank == 4) { - channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, x, b, z, start, stop, increment); + channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, const_cast(x), b, z, start, stop, increment); } else if (rank == 5) { - channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, x, b, z, start, stop, increment); + channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, const_cast(x), b, z, start, stop, increment); } else if (rank == 3) { - channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, x, b, z, start, stop, increment); + channel_generic_F(bases, x_strides, z_strides, inplaceOp, same_stride, same_order, yStrideC, const_cast(x), b, z, start, stop, increment); } } }; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/adjust_hue.cpp b/libnd4j/include/ops/declarable/helpers/cpu/adjust_hue.cpp index 078ebda10..20d91ee8b 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/adjust_hue.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/adjust_hue.cpp @@ -59,8 +59,8 @@ static void adjustHue_(const NDArray *input, const NDArray* deltaScalarArr, NDAr } else { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimC); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimC); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimC); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimC); const Nd4jLong numOfTads = packX.numberOfTads(); const Nd4jLong xDimCstride = input->stridesOf()[dimC]; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/adjust_saturation.cpp b/libnd4j/include/ops/declarable/helpers/cpu/adjust_saturation.cpp index c5c5cf9c6..6610b69ac 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/adjust_saturation.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/adjust_saturation.cpp @@ -58,8 +58,8 @@ static void adjustSaturation_(const NDArray *input, const NDArray* factorScalarA samediff::Threads::parallel_for(func, 0, input->lengthOf(), 3); } else { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimC); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimC); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimC); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimC); const Nd4jLong numOfTads = packX.numberOfTads(); const Nd4jLong xDimCstride = input->stridesOf()[dimC]; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/batched_gemm.cpp b/libnd4j/include/ops/declarable/helpers/cpu/batched_gemm.cpp index daaf4f71a..ec8f040a9 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/batched_gemm.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/batched_gemm.cpp @@ -72,9 +72,9 @@ void bgemm_(const std::vector& vA, const std::vector& vB, st } if (std::is_same::value) { - BlasHelper::getInstance()->dgemmBatched()(CblasColMajor, tA, tB, tM, tN, tK, (double *) alphas->getBuffer(), (double **) buffersA.data(), tldA, (double **) buffersB.data(), tldB, (double *) betas->getBuffer(),(double **) buffersC.data(), tldC, vA.size(), tsize); + BlasHelper::getInstance()->dgemmBatched()(CblasColMajor, tA, tB, tM, tN, tK, (double *) alphas->buffer(), (double **) buffersA.data(), tldA, (double **) buffersB.data(), tldB, (double *) betas->buffer(),(double **) buffersC.data(), tldC, vA.size(), tsize); } else if (std::is_same::value) { - BlasHelper::getInstance()->sgemmBatched()(CblasColMajor, tA, tB, tM, tN, tK, (float *) alphas->getBuffer(), (float **) buffersA.data(), tldA, (float **) buffersB.data(), tldB, (float *) betas->getBuffer(), (float **) buffersC.data(), tldC, vA.size(), tsize); + BlasHelper::getInstance()->sgemmBatched()(CblasColMajor, tA, tB, tM, tN, tK, (float *) alphas->buffer(), (float **) buffersA.data(), tldA, (float **) buffersB.data(), tldB, (float *) betas->buffer(), (float **) buffersC.data(), tldC, vA.size(), tsize); } // release temporary arrays diff --git a/libnd4j/include/ops/declarable/helpers/cpu/batchnorm.cpp b/libnd4j/include/ops/declarable/helpers/cpu/batchnorm.cpp index 2293fe843..65c342d9c 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/batchnorm.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/batchnorm.cpp @@ -44,13 +44,13 @@ static void batchnorm_(const NDArray* input, const NDArray* mean, const NDArray* const T* g = gamma == nullptr ? nullptr : gamma->bufferAsT(); const T* b = beta == nullptr ? nullptr : beta->bufferAsT(); - const bool xzSameOffset = shape::haveSameShapeAndStrides(input->getShapeInfo(), output->getShapeInfo()); + const bool xzSameOffset = shape::haveSameShapeAndStrides(input->shapeInfo(), output->shapeInfo()); - bool paramSameOffset = shape::haveSameShapeAndStrides(mean->getShapeInfo(), variance->getShapeInfo()); + bool paramSameOffset = shape::haveSameShapeAndStrides(mean->shapeInfo(), variance->shapeInfo()); if(paramSameOffset && gamma != nullptr) - paramSameOffset &= shape::haveSameShapeAndStrides(mean->getShapeInfo(), gamma->getShapeInfo()); + paramSameOffset &= shape::haveSameShapeAndStrides(mean->shapeInfo(), gamma->shapeInfo()); if(paramSameOffset && beta != nullptr) - paramSameOffset &= shape::haveSameShapeAndStrides(mean->getShapeInfo(), beta->getShapeInfo()); + paramSameOffset &= shape::haveSameShapeAndStrides(mean->shapeInfo(), beta->shapeInfo()); const Nd4jLong lenBig = input->lengthOf(); const Nd4jLong lenSmall = mean->lengthOf(); @@ -73,27 +73,27 @@ static void batchnorm_(const NDArray* input, const NDArray* mean, const NDArray* if(!isOwner) continue; - const auto meanOffset = shape::getIndexOffset(j, mean->getShapeInfo()); - const auto varOffset = paramSameOffset ? meanOffset : shape::getIndexOffset(j, variance->getShapeInfo()); + const auto meanOffset = shape::getIndexOffset(j, mean->shapeInfo()); + const auto varOffset = paramSameOffset ? meanOffset : shape::getIndexOffset(j, variance->shapeInfo()); const auto meanVal = m[meanOffset]; auto sigmaInvGam = static_cast(1) / sd::math::nd4j_sqrt(v[varOffset] + epsilon); if(g != nullptr) { - const auto gammaOffset = paramSameOffset ? meanOffset : shape::getIndexOffset(j, gamma->getShapeInfo()); + const auto gammaOffset = paramSameOffset ? meanOffset : shape::getIndexOffset(j, gamma->shapeInfo()); sigmaInvGam *= g[gammaOffset]; } T betaVal = static_cast(0); if(b != nullptr) { - const auto betaOffset = paramSameOffset ? meanOffset : shape::getIndexOffset(j, beta->getShapeInfo()); + const auto betaOffset = paramSameOffset ? meanOffset : shape::getIndexOffset(j, beta->shapeInfo()); betaVal = b[betaOffset]; } // calculate offsets for input and output - shape::outerArrayOffsets(xOffsets, j, input->getShapeInfo(), mean->getShapeInfo(), auxBuff, dimsToExclude.data()); + shape::outerArrayOffsets(xOffsets, j, input->shapeInfo(), mean->shapeInfo(), auxBuff, dimsToExclude.data()); if(!xzSameOffset) - shape::outerArrayOffsets(zOffsets, j, output->getShapeInfo(), mean->getShapeInfo(), auxBuff, dimsToExclude.data()); + shape::outerArrayOffsets(zOffsets, j, output->shapeInfo(), mean->shapeInfo(), auxBuff, dimsToExclude.data()); PRAGMA_OMP_SIMD for (Nd4jLong i = 0; i < steps; ++i) @@ -129,13 +129,13 @@ static void batchnorm2_(const NDArray* input, const NDArray* mean, const NDArray const uint minRank = mean->rankOf(); const uint numAxes = axes.size(); - const bool xzSameOffset = shape::haveSameShapeAndStrides(input->getShapeInfo(), output->getShapeInfo()); + const bool xzSameOffset = shape::haveSameShapeAndStrides(input->shapeInfo(), output->shapeInfo()); - bool paramSameOffset = shape::haveSameShapeAndStrides(mean->getShapeInfo(), variance->getShapeInfo()); + bool paramSameOffset = shape::haveSameShapeAndStrides(mean->shapeInfo(), variance->shapeInfo()); if(paramSameOffset && gamma != nullptr) - paramSameOffset &= shape::haveSameShapeAndStrides(mean->getShapeInfo(), gamma->getShapeInfo()); + paramSameOffset &= shape::haveSameShapeAndStrides(mean->shapeInfo(), gamma->shapeInfo()); if(paramSameOffset && beta != nullptr) - paramSameOffset &= shape::haveSameShapeAndStrides(mean->getShapeInfo(), beta->getShapeInfo()); + paramSameOffset &= shape::haveSameShapeAndStrides(mean->shapeInfo(), beta->shapeInfo()); auto func = PRAGMA_THREADS_FOR { @@ -149,10 +149,10 @@ static void batchnorm2_(const NDArray* input, const NDArray* mean, const NDArray for (auto i = start; i < stop; i++) { - shape::index2coordsCPU(start, i, input->getShapeInfo(), xzCoords); + shape::index2coordsCPU(start, i, input->shapeInfo(), xzCoords); - const auto xOffset = shape::getOffset(input->getShapeInfo(), xzCoords); - const auto zOffset = xzSameOffset ? xOffset : shape::getOffset(output->getShapeInfo(), xzCoords); + const auto xOffset = shape::getOffset(input->shapeInfo(), xzCoords); + const auto zOffset = xzSameOffset ? xOffset : shape::getOffset(output->shapeInfo(), xzCoords); if(minRank == xRank) { for (uint j = 0; j < numAxes; ++j) @@ -161,20 +161,20 @@ static void batchnorm2_(const NDArray* input, const NDArray* mean, const NDArray else // minRank = numAxes = 1 in this case minCoords[0] = xzCoords[axes[0]]; - const auto meanOffset = shape::getOffset(mean->getShapeInfo(), minCoords); - const auto varianceOffset = paramSameOffset ? meanOffset : shape::getOffset(variance->getShapeInfo(), minCoords); + const auto meanOffset = shape::getOffset(mean->shapeInfo(), minCoords); + const auto varianceOffset = paramSameOffset ? meanOffset : shape::getOffset(variance->shapeInfo(), minCoords); T sigmaInvGam = 1. / sd::math::nd4j_sqrt(v[varianceOffset] + epsilon); if(g != nullptr) { - const auto gammaOffset = paramSameOffset ? meanOffset : shape::getOffset(gamma->getShapeInfo(), minCoords); + const auto gammaOffset = paramSameOffset ? meanOffset : shape::getOffset(gamma->shapeInfo(), minCoords); sigmaInvGam *= g[gammaOffset]; } z[zOffset] = (x[xOffset] - m[meanOffset]) * sigmaInvGam; if(b != nullptr) { - const auto betaOffset = paramSameOffset ? meanOffset : shape::getOffset(beta->getShapeInfo(), minCoords); + const auto betaOffset = paramSameOffset ? meanOffset : shape::getOffset(beta->shapeInfo(), minCoords); z[zOffset] += b[betaOffset]; } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/col2im.cpp b/libnd4j/include/ops/declarable/helpers/cpu/col2im.cpp index cf46df2db..42d4af529 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/col2im.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/col2im.cpp @@ -31,8 +31,8 @@ void col2im_(sd::LaunchContext & context, const NDArray& input, NDArray& output auto imBuff = output.bufferAsT(); auto colBuff = input.bufferAsT(); - auto imShapeBuffer = output.getShapeInfo(); - auto colShapeBuffer = input.getShapeInfo(); + auto imShapeBuffer = output.shapeInfo(); + auto colShapeBuffer = input.shapeInfo(); auto colShape = shape::shapeOf(colShapeBuffer); auto colStride = shape::stride(colShapeBuffer); auto imShape = shape::shapeOf(imShapeBuffer); @@ -60,7 +60,9 @@ void col2im_(sd::LaunchContext & context, const NDArray& input, NDArray& output if (false) { auto func = PRAGMA_THREADS_FOR_2D { - T *col, *im; + T const* col; + T* im; + int imRow, imCol; for (auto b = start_x; b < stop_x; b += inc_x) { @@ -96,20 +98,20 @@ void col2im_(sd::LaunchContext & context, const NDArray& input, NDArray& output for (auto b = start; b < stop; b++) { T *im0 = imBuff + b * imStride0; - T *col4 = colBuff + b * colStride0; + T const* col4 = colBuff + b * colStride0; for (int colH = 0; colH < oH; ++colH, col4 += colStride4) { - T *col5 = col4; + T const* col5 = col4; for (int colW = 0; colW < oW; ++colW, col5 += colStride5) { - T *col1 = col5; + T const* col1 = col5; T *im1 = im0; for (int c = 0; c < iC; ++c, col1 += colStride1, im1 += imStride1) { int imRow = (-pH + colH * sH); - T *col2 = col1; + T const* col2 = col1; T *im2 = im1 + imRow * imStride2; for (int kRow = 0; kRow < kH; ++kRow, col2 += colStride2, imRow += dH, im2 += dH * imStride2) { int imCol = -pW + colW * sW; - T *col3 = col2; + T const* col3 = col2; T *im3 = im2 + imCol * imStride3; for (int kCol = 0; kCol < kW; ++kCol, col3 += colStride3, imCol += dW, im3 += dW * imStride3) { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compare_elem.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compare_elem.cpp index 12961fe92..32dc3d7c7 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/compare_elem.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/compare_elem.cpp @@ -22,7 +22,7 @@ namespace sd { namespace helpers { template static void _compare_elem(NDArray *input, bool isStrictlyIncreasing, bool& output) { - auto length = shape::length(input->getShapeInfo()); + auto length = shape::length(input->shapeInfo()); int elementsPerThread = length / ELEMENT_THRESHOLD; int num_threads = sd::math::nd4j_max(1, elementsPerThread); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compression/compression.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compression/compression.cpp new file mode 100644 index 000000000..0911b0619 --- /dev/null +++ b/libnd4j/include/ops/declarable/helpers/cpu/compression/compression.cpp @@ -0,0 +1,37 @@ +/******************************************************************************* + * Copyright (c) 2020 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author sgazeos@gmail.com +// +#include +#include + +namespace sd { +namespace ops { +namespace helpers { + + void decodeBitmap(sd::LaunchContext* context, const NDArray* input, NDArray* output) { + NativeOpExecutioner::decodeBitmap(input->buffer(), output->lengthOf(), output->buffer(), output->shapeInfo()); + } + + + Nd4jLong encodeBitmap(sd::LaunchContext* context, NDArray* input, NDArray* output, float threshold) { + return NativeOpExecutioner::encodeBitmap(input->buffer(), input->shapeInfo(), input->lengthOf(), output->bufferAsT(), threshold); + } +} +} +} diff --git a/libnd4j/include/ops/declarable/helpers/cpu/compression/threshold.cpp b/libnd4j/include/ops/declarable/helpers/cpu/compression/threshold.cpp new file mode 100644 index 000000000..bac3812d1 --- /dev/null +++ b/libnd4j/include/ops/declarable/helpers/cpu/compression/threshold.cpp @@ -0,0 +1,62 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include +#include +#include + +namespace sd { + namespace ops { + namespace helpers { + template + static int32_t thresholdEstimate_(const NDArray &updates, const float threshold) { + auto N = updates.lengthOf(); + const auto buffer = updates.bufferAsT(); + + auto func = PRAGMA_REDUCE_LONG { + int64_t cnt = 0; + for (auto e = start; e < stop; e++) { + auto v = sd::math::nd4j_abs(buffer[e]); + if (v >= threshold) + cnt++; + } + + return cnt; + }; + + return samediff::Threads::parallel_long(func, LAMBDA_AL { return _old + _new; }, 0, N); + } + + int32_t thresholdEstimate(const NDArray &updates, const float threshold) { + BUILD_SINGLE_SELECTOR(updates.dataType(), return thresholdEstimate_, (updates, threshold), FLOAT_TYPES); + + return 0; + } + + void thresholdEncode(NDArray &updates, NDArray &encoded, float threshold) { + BUILD_SINGLE_SELECTOR(updates.dataType(), sd::TypeCast::convertToThreshold, (nullptr, updates.buffer(), updates.lengthOf(), encoded.buffer()), FLOAT_TYPES); + } + + void thresholdDecode(const NDArray &encoded, NDArray &updates) { + BUILD_SINGLE_SELECTOR(updates.dataType(), sd::TypeCast::convertFromThreshold, (nullptr, encoded.buffer(), updates.lengthOf(), updates.buffer()), FLOAT_TYPES); + } + } + } +} diff --git a/libnd4j/include/ops/declarable/helpers/cpu/convolutions_col2vol.cpp b/libnd4j/include/ops/declarable/helpers/cpu/convolutions_col2vol.cpp index c9cae504a..b12064cac 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/convolutions_col2vol.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/convolutions_col2vol.cpp @@ -61,7 +61,7 @@ static void col2vol_(const NDArray& columns, NDArray& volume, const int sD, cons T* colBuff = const_cast(columns).bufferAsT(); - if (volume.ordering() == 'c' && columns.ordering() == 'c' && shape::strideDescendingCAscendingF(volume.getShapeInfo()) && shape::strideDescendingCAscendingF(columns.getShapeInfo())) { + if (volume.ordering() == 'c' && columns.ordering() == 'c' && shape::strideDescendingCAscendingF(volume.shapeInfo()) && shape::strideDescendingCAscendingF(columns.shapeInfo())) { auto func = PRAGMA_THREADS_FOR { T* col, *vol; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/convolutions_vol2col.cpp b/libnd4j/include/ops/declarable/helpers/cpu/convolutions_vol2col.cpp index 552dceb6a..4c8b5bad1 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/convolutions_vol2col.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/convolutions_vol2col.cpp @@ -59,7 +59,7 @@ static void vol2col_(const NDArray& volume, NDArray& columns, const int sD, cons T* volBuff = const_cast(volume).bufferAsT(); - if (volume.ordering() == 'c' && columns.ordering() == 'c' && shape::strideDescendingCAscendingF(volume.getShapeInfo()) && shape::strideDescendingCAscendingF(columns.getShapeInfo())) { + if (volume.ordering() == 'c' && columns.ordering() == 'c' && shape::strideDescendingCAscendingF(volume.shapeInfo()) && shape::strideDescendingCAscendingF(columns.shapeInfo())) { auto func = PRAGMA_THREADS_FOR_3D { T *col, *vol; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/d_t_s.cpp b/libnd4j/include/ops/declarable/helpers/cpu/d_t_s.cpp index 598b3dc30..27b73d001 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/d_t_s.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/d_t_s.cpp @@ -27,7 +27,7 @@ namespace helpers { template static void __depthToSpace(const NDArray &input, NDArray *output, int block_size, bool isNHWC) { - T *input_ptr = reinterpret_cast(input.getBuffer()); + T const*input_ptr = reinterpret_cast(input.buffer()); T *output_ptr = reinterpret_cast(output->buffer()); const int batch_size = input.sizeAt(0); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/dilation2d.cpp b/libnd4j/include/ops/declarable/helpers/cpu/dilation2d.cpp index fbf071e28..1688dcbc4 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/dilation2d.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/dilation2d.cpp @@ -38,9 +38,9 @@ static void dilation2d_(NDArray *input, NDArray *weights, NDArray *output, const const X* y = weights->bufferAsT(); Z* z = output->bufferAsT(); - const Nd4jLong* xShapeInfo = input->getShapeInfo(); - const Nd4jLong* yShapeInfo = weights->getShapeInfo(); - const Nd4jLong* zShapeInfo = output->getShapeInfo(); + const Nd4jLong* xShapeInfo = input->shapeInfo(); + const Nd4jLong* yShapeInfo = weights->shapeInfo(); + const Nd4jLong* zShapeInfo = output->shapeInfo(); const uint bS = input->sizeAt(0); const uint iH = input->sizeAt(1); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/gather.cpp b/libnd4j/include/ops/declarable/helpers/cpu/gather.cpp index fb715a5e5..1deb12752 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/gather.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/gather.cpp @@ -78,20 +78,19 @@ void gather(sd::LaunchContext * context, const NDArray* input, const NDArray* in const Nd4jLong numOfSubArrs = indices->lengthOf(); - auto inTadPack = ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimsIn); - auto outTadPack = ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimsOut); + auto inTadPack = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimsIn); + auto outTadPack = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimsOut); - Nd4jLong* inTadShapeInfo = inTadPack.primaryShapeInfo(); - Nd4jLong* outTadShapeInfo = outTadPack.primaryShapeInfo(); + auto inTadShapeInfo = inTadPack.primaryShapeInfo(); + auto outTadShapeInfo = outTadPack.primaryShapeInfo(); if (shape::order(inTadShapeInfo) == shape::order(outTadShapeInfo) && shape::order(inTadShapeInfo) == 'c' && input->dataType() == output->dataType() && shape::elementWiseStride(inTadShapeInfo) == 1 && shape::elementWiseStride(outTadShapeInfo) == 1) { auto func = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i++) { - - void* inBuff = input->bufferWithOffset(inTadPack.primaryOffsets()[indices->e(i)]); - void* outBuff = output->bufferWithOffset(outTadPack.primaryOffsets()[i]); + auto inBuff = input->bufferWithOffset(inTadPack.primaryOffsets()[indices->e(i)]); + auto outBuff = output->bufferWithOffset(outTadPack.primaryOffsets()[i]); memcpy(outBuff, inBuff, shape::length(inTadShapeInfo) * input->sizeOfT()); } @@ -102,8 +101,8 @@ void gather(sd::LaunchContext * context, const NDArray* input, const NDArray* in auto func = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i++) { - void* inBuff = input->bufferWithOffset(inTadPack.primaryOffsets()[indices->e(i)]); - void* outBuff = output->bufferWithOffset(outTadPack.primaryOffsets()[i]); + auto inBuff = input->bufferWithOffset(inTadPack.primaryOffsets()[indices->e(i)]); + auto outBuff = output->bufferWithOffset(outTadPack.primaryOffsets()[i]); NativeOpExecutioner::execTransformAny(input->getContext(), transform::Assign, inBuff, inTadShapeInfo, nullptr/*input specialBuffer*/, nullptr/*input specialShapeInfo*/, @@ -130,19 +129,18 @@ void gather(sd::LaunchContext * context, const NDArray* input, const NDArray* in std::vector dims = ShapeUtils::evalDimsToExclude(input->rankOf(), {axis}); - auto inTadPack = ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dims); - auto outTadPack = ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dims); + auto inTadPack = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dims); + auto outTadPack = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dims); - Nd4jLong* inTadShapeInfo = inTadPack.primaryShapeInfo(); - Nd4jLong* outTadShapeInfo = outTadPack.primaryShapeInfo(); + auto inTadShapeInfo = inTadPack.primaryShapeInfo(); + auto outTadShapeInfo = outTadPack.primaryShapeInfo(); if (shape::order(inTadShapeInfo) == shape::order(outTadShapeInfo) && shape::order(inTadShapeInfo) == 'c' && input->dataType() == output->dataType() && shape::elementWiseStride(inTadShapeInfo) == 1 && shape::elementWiseStride(outTadShapeInfo) == 1) { auto func = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i++) { - - void* inBuff = input->bufferWithOffset(inTadPack.primaryOffsets()[intArgs[i + 1]]); + auto inBuff = input->bufferWithOffset(inTadPack.primaryOffsets()[intArgs[i + 1]]); void* outBuff = output->bufferWithOffset(outTadPack.primaryOffsets()[i]); std::memcpy(outBuff, inBuff, shape::length(inTadShapeInfo) * input->sizeOfT()); @@ -156,9 +154,8 @@ void gather(sd::LaunchContext * context, const NDArray* input, const NDArray* in auto func = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i++) { - - void* inBuff = input->bufferWithOffset(inTadPack.primaryOffsets()[intArgs[i + 1]]); - void* outBuff = output->bufferWithOffset(outTadPack.primaryOffsets()[i]); + auto inBuff = input->bufferWithOffset(inTadPack.primaryOffsets()[intArgs[i + 1]]); + auto outBuff = output->bufferWithOffset(outTadPack.primaryOffsets()[i]); NativeOpExecutioner::execTransformAny(input->getContext(), transform::Assign, inBuff, inTadShapeInfo, nullptr/*input specialBuffer*/, nullptr/*input specialShapeInfo*/, diff --git a/libnd4j/include/ops/declarable/helpers/cpu/gatherTransforms.cpp b/libnd4j/include/ops/declarable/helpers/cpu/gatherTransforms.cpp index f7cb1cf59..db62c4b4f 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/gatherTransforms.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/gatherTransforms.cpp @@ -34,9 +34,9 @@ namespace helpers { template static void gatherND_(NDArray& input, NDArray& indices, NDArray& output) { - const X* x = reinterpret_cast(input.getBuffer()); - const Y* y = reinterpret_cast(indices.getBuffer()); - X* z = reinterpret_cast(output.getBuffer()); + const X* x = reinterpret_cast(input.buffer()); + const Y* y = reinterpret_cast(indices.buffer()); + X* z = reinterpret_cast(output.buffer()); const int xRank = input.rankOf(); const int yRank = indices.rankOf(); @@ -56,13 +56,13 @@ static void gatherND_(NDArray& input, NDArray& indices, NDArray& output) { for (auto i = start; i < stop; i++) { - shape::index2coordsCPU(start, i, output.getShapeInfo(), zCoords); + shape::index2coordsCPU(start, i, output.shapeInfo(), zCoords); - const auto zOffset = shape::getOffset(output.getShapeInfo(), zCoords); + const auto zOffset = shape::getOffset(output.shapeInfo(), zCoords); temp = zCoords[yRank - 1]; zCoords[yRank - 1] = 0; - const auto yOffset = shape::getOffset(indices.getShapeInfo(), zCoords); + const auto yOffset = shape::getOffset(indices.shapeInfo(), zCoords); zCoords[yRank - 1] = temp; if(bEqual) @@ -75,7 +75,7 @@ static void gatherND_(NDArray& input, NDArray& indices, NDArray& output) { for (uint j = 0; j < yLastDim; ++j) xCoords[j] = y[yOffset + j * indices.stridesOf()[yRank - 1]]; // last stride - const auto xOffset = shape::getOffset(input.getShapeInfo(), xCoords); + const auto xOffset = shape::getOffset(input.shapeInfo(), xCoords); z[zOffset] = x[xOffset]; } @@ -116,9 +116,9 @@ static void gather_(NDArray* input, const NDArray* indices, NDArray* output, con output->assign(scalarNDArray); } else { auto dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {axis}); - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); + auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); - auto tadArr = NDArray(reinterpret_cast(reinterpret_cast(input->getBuffer()) + tadPack.primaryOffsets()[indices->e(0)]), tadPack.primaryShapeInfo(), output->getContext()); + auto tadArr = NDArray(reinterpret_cast(reinterpret_cast(input->buffer()) + tadPack.primaryOffsets()[indices->e(0)]), tadPack.primaryShapeInfo(), output->getContext()); output->assign(&tadArr); } } @@ -135,7 +135,7 @@ static void gather_(NDArray* input, const NDArray* indices, NDArray* output, con std::vector dimsOut(indices->rankOf()); std::iota(dimsOut.begin(), dimsOut.end(), axis); // fill with axis, axis+1, ... indices->rankOf()-1 - const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(output->getShapeInfo(), dimsOut); + const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(output->shapeInfo(), dimsOut); auto func = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i++) { @@ -159,7 +159,7 @@ static void gather_(NDArray* input, const NDArray* indices, NDArray* output, con output->assign((*input)(intArgs[1], {axis})); } else { // vector case - const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(output->getShapeInfo(), {axis}); + const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(output->shapeInfo(), {axis}); auto func = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i++) { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/histogram.cpp b/libnd4j/include/ops/declarable/helpers/cpu/histogram.cpp index 9fc6ddefb..cb815110d 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/histogram.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/histogram.cpp @@ -24,8 +24,8 @@ namespace sd { namespace ops { namespace helpers { template - static void histogram_(void *xBuffer, Nd4jLong *xShapeInfo, void *zBuffer, Nd4jLong *zShapeInfo, Nd4jLong numBins, double min_val, double max_val) { - auto dx = reinterpret_cast(xBuffer); + static void histogram_(void const* xBuffer, Nd4jLong const* xShapeInfo, void *zBuffer, Nd4jLong const* zShapeInfo, Nd4jLong numBins, double min_val, double max_val) { + auto dx = reinterpret_cast(xBuffer); auto result = reinterpret_cast(zBuffer); int length = shape::length(xShapeInfo); @@ -63,7 +63,7 @@ namespace sd { double min_val = input.reduceNumber(reduce::SameOps::Min).e(0); double max_val = input.reduceNumber(reduce::SameOps::Max).e(0); - BUILD_DOUBLE_SELECTOR(input.dataType(), output.dataType(), histogram_, (input.buffer(), input.shapeInfo(), output.getBuffer(), output.getShapeInfo(), numBins, min_val, max_val), LIBND4J_TYPES, INDEXING_TYPES); + BUILD_DOUBLE_SELECTOR(input.dataType(), output.dataType(), histogram_, (input.buffer(), input.shapeInfo(), output.buffer(), output.shapeInfo(), numBins, min_val, max_val), LIBND4J_TYPES, INDEXING_TYPES); } } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/im2col.cpp b/libnd4j/include/ops/declarable/helpers/cpu/im2col.cpp index 2129b4bee..2434fddcc 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/im2col.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/im2col.cpp @@ -32,10 +32,10 @@ static void im2col_(sd::LaunchContext & context, const NDArray& input, NDArray& // input [bS, iC, iH, iW] is convoluted to output [bS, iC, kH, kW, oH, oW] - auto imBuff = static_cast(input.getBuffer()); - auto colBuff = static_cast(output.getBuffer()); - auto imShapeBuffer = input.getShapeInfo(); - auto colShapeBuffer = output.getShapeInfo(); + auto imBuff = static_cast(input.buffer()); + auto colBuff = static_cast(output.buffer()); + auto imShapeBuffer = input.shapeInfo(); + auto colShapeBuffer = output.shapeInfo(); auto colShape = shape::shapeOf(colShapeBuffer); auto colStride = shape::stride(colShapeBuffer); auto imShape = shape::shapeOf(imShapeBuffer); @@ -95,7 +95,8 @@ static void im2col_(sd::LaunchContext & context, const NDArray& input, NDArray& else { auto func = PRAGMA_THREADS_FOR_2D { - T *col, *im; + T *col; + T const* im; int imRow, imCol; for (auto b = start_x; b < stop_x; b += inc_x) { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/imagesHelpers.cpp b/libnd4j/include/ops/declarable/helpers/cpu/imagesHelpers.cpp index 682677ef3..2183b7d5a 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/imagesHelpers.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/imagesHelpers.cpp @@ -53,9 +53,9 @@ static void rgbToGrs_(const NDArray& input, NDArray& output, const int dimC) { int coords[MAX_RANK]; for (auto i = start; i < stop; i++) { - shape::index2coordsCPU(start, i, output.getShapeInfo(), coords); - const auto zOffset = shape::getOffset(output.getShapeInfo(), coords); - const auto xOffset0 = shape::getOffset(input.getShapeInfo(), coords); + shape::index2coordsCPU(start, i, output.shapeInfo(), coords); + const auto zOffset = shape::getOffset(output.shapeInfo(), coords); + const auto xOffset0 = shape::getOffset(input.shapeInfo(), coords); const auto xOffset1 = xOffset0 + input.strideAt(dimC); const auto xOffset2 = xOffset1 + input.strideAt(dimC); z[zOffset] = 0.2989f*x[xOffset0] + 0.5870f*x[xOffset1] + 0.1140f*x[xOffset2]; @@ -91,8 +91,8 @@ FORCEINLINE static void rgbToFromYuv_(const NDArray& input, NDArray& output, con return; } - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), dimC); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.getShapeInfo(), dimC); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimC); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), dimC); const Nd4jLong numOfTads = packX.numberOfTads(); const Nd4jLong xDimCstride = input.stridesOf()[dimC]; @@ -149,8 +149,8 @@ FORCEINLINE static void tripleTransformer(const NDArray* input, NDArray* output, samediff::Threads::parallel_for(func, 0, input->lengthOf(), 3); } else { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimC); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimC); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimC); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimC); const Nd4jLong numOfTads = packX.numberOfTads(); const Nd4jLong xDimCstride = input->stridesOf()[dimC]; @@ -199,8 +199,8 @@ FORCEINLINE static void tripleTransformer(const NDArray* input, NDArray* output, samediff::Threads::parallel_for(func, 0, input->lengthOf(), 3); } else { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimC); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimC); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimC); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimC); const Nd4jLong numOfTads = packX.numberOfTads(); const Nd4jLong xDimCstride = input->stridesOf()[dimC]; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/ismax.cpp b/libnd4j/include/ops/declarable/helpers/cpu/ismax.cpp index 5a4bb28cc..687153f99 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/ismax.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/ismax.cpp @@ -40,7 +40,7 @@ static void ismax_(const NDArray* input, NDArray* output, const std::vector output->p(i, 1); } else { - int eleStride = shape::elementWiseStride(input->getShapeInfo()); + int eleStride = shape::elementWiseStride(input->shapeInfo()); if (eleStride == 1) { int maxIdx = 0; auto currMax = input->e(0); @@ -125,8 +125,8 @@ static void ismax_(const NDArray* input, NDArray* output, const std::vector //moving all dimensions (in sorted order) //to the back. //permuted version of the input shape info for setting up the tad problem - auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), const_cast(dimensions.data()), dimensionsLength); - auto tadPackZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), const_cast(dimensions.data()), dimensionsLength); + auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), const_cast(dimensions.data()), dimensionsLength); + auto tadPackZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), const_cast(dimensions.data()), dimensionsLength); auto tadShapeShapeInfo = tadPack.primaryShapeInfo(); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/lrn.cpp b/libnd4j/include/ops/declarable/helpers/cpu/lrn.cpp index 31235d737..8dc31d8c0 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/lrn.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/lrn.cpp @@ -35,13 +35,13 @@ static int lrnFunctor_(sd::graph::Context& block, NDArray* input, NDArray* outpu const int rank = input->rankOf(); - TadPack inTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), {rank - 1}); + TadPack inTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {rank - 1}); TadPack outTadPack; - if(shape::haveSameShapeAndStrides(input->getShapeInfo(), output->getShapeInfo())) + if(shape::haveSameShapeAndStrides(input->shapeInfo(), output->shapeInfo())) outTadPack = inTadPack; else - outTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), {rank - 1}); + outTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {rank - 1}); const Nd4jLong numOfTads = inTadPack.numberOfTads(); const Nd4jLong tadLen = input->sizeAt(-1); @@ -52,8 +52,8 @@ static int lrnFunctor_(sd::graph::Context& block, NDArray* input, NDArray* outpu const Nd4jLong inTadEws = shape::elementWiseStride(inTadPack.primaryShapeInfo()); const Nd4jLong outTadEws = shape::elementWiseStride(outTadPack.primaryShapeInfo()); - const T* inBuff = reinterpret_cast(input->getBuffer()); - T* outBuff = reinterpret_cast(output->getBuffer()); + const T* inBuff = reinterpret_cast(input->buffer()); + T* outBuff = reinterpret_cast(output->buffer()); const T tbias = static_cast(bias); const T tbeta = static_cast(beta); @@ -151,13 +151,13 @@ static void lrnBP_(const NDArray& input, const NDArray& gradO, NDArray& gradI, c const int rank = input.rankOf(); - TadPack inTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), {rank - 1}); + TadPack inTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), {rank - 1}); TadPack gradITadPack; - if(shape::haveSameShapeAndStrides(input.getShapeInfo(), gradI.getShapeInfo())) + if(shape::haveSameShapeAndStrides(input.shapeInfo(), gradI.shapeInfo())) gradITadPack = inTadPack; else - gradITadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradI.getShapeInfo(), {rank - 1}); + gradITadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradI.shapeInfo(), {rank - 1}); const Nd4jLong numOfTads = inTadPack.numberOfTads(); const Nd4jLong tadLen = input.sizeAt(-1); @@ -168,8 +168,8 @@ static void lrnBP_(const NDArray& input, const NDArray& gradO, NDArray& gradI, c const Nd4jLong inTadEws = shape::elementWiseStride(inTadPack.primaryShapeInfo()); const Nd4jLong gradITadEws = shape::elementWiseStride(gradITadPack.primaryShapeInfo()); - const X* inBuff = reinterpret_cast(input.getBuffer()); - Y* gradIBuff = reinterpret_cast(gradI.getBuffer()); + const X* inBuff = reinterpret_cast(input.buffer()); + Y* gradIBuff = reinterpret_cast(gradI.buffer()); const Y tbias = static_cast(bias); const Y tbeta = static_cast(beta); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/lstsq.cpp b/libnd4j/include/ops/declarable/helpers/cpu/lstsq.cpp index 554486bbf..675fb2794 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/lstsq.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/lstsq.cpp @@ -52,7 +52,7 @@ namespace helpers { if (fast) { // Cholesky decomposition approach // Equation for solve A^T * Ax = A^T * b, so // 1. Computing A2: - auto tAtShape = ShapeUtils::evalShapeForMatmul(leftInput->getShapeInfo(), leftInput->getShapeInfo(), true, false); + auto tAtShape = ShapeUtils::evalShapeForMatmul(leftInput->shapeInfo(), leftInput->shapeInfo(), true, false); //tAtShape[tAtShape.size() - 2] = output->sizeAt(-2); NDArray leftOutput('c', tAtShape, output->dataType(), context); MmulHelper::matmul(leftInput, leftInput, &leftOutput, true, false); // Computing A2 = A^T * A diff --git a/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp b/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp index 8938a98f9..0f435cfdb 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp @@ -40,7 +40,7 @@ namespace helpers { BUILD_SINGLE_TEMPLATE(template void swapRows_, (NDArray* matrix, int theFirst, int theSecond), FLOAT_TYPES); template - static void swapRows(T* matrixBuf, Nd4jLong* matrixShape, Nd4jLong theFirst, Nd4jLong theSecond) { + static void swapRows(T* matrixBuf, Nd4jLong const* matrixShape, Nd4jLong theFirst, Nd4jLong theSecond) { if (theFirst != theSecond) { auto n = shape::sizeAt(matrixShape, -1); @@ -208,7 +208,7 @@ namespace helpers { * lu decomposition with naive algorithm with partial pivoting * */ template - static I argmaxCol(I column, T* compoundBuffer, Nd4jLong* compoundShape) { + static I argmaxCol(I column, T* compoundBuffer, Nd4jLong const* compoundShape) { auto rowNum = shape::sizeAt(compoundShape, 0); Nd4jLong xInitial[] = {column, column}; auto xInitialIndex = shape::getOffset(compoundShape, xInitial, 0); @@ -230,7 +230,7 @@ namespace helpers { } template - void processColumns(int currentRow, int rowNum, T* compoundBuf, Nd4jLong* compoundShape) { + void processColumns(int currentRow, int rowNum, T* compoundBuf, Nd4jLong const* compoundShape) { Nd4jLong xDiag[] = {currentRow, currentRow}; auto diagIndex = shape::getOffset(compoundShape, xDiag, 0); auto loop = PRAGMA_THREADS_FOR { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/matrixSetDiag.cpp b/libnd4j/include/ops/declarable/helpers/cpu/matrixSetDiag.cpp index 60df150a9..443048c56 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/matrixSetDiag.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/matrixSetDiag.cpp @@ -39,9 +39,9 @@ void matrixSetDiag_(const NDArray& input, const NDArray& diagonal, NDArray& outp const T* y = diagonal.bufferAsT(); T* z = output.bufferAsT(); - const Nd4jLong* xShapeInfo = input.getShapeInfo(); - const Nd4jLong* yShapeInfo = diagonal.getShapeInfo(); - const Nd4jLong* zShapeInfo = output.getShapeInfo(); + const Nd4jLong* xShapeInfo = input.shapeInfo(); + const Nd4jLong* yShapeInfo = diagonal.shapeInfo(); + const Nd4jLong* zShapeInfo = output.shapeInfo(); const bool areSameOffsets = shape::haveSameShapeAndStrides(xShapeInfo, zShapeInfo); // shapes are definitely the same, but strides might not diff --git a/libnd4j/include/ops/declarable/helpers/cpu/merge.cpp b/libnd4j/include/ops/declarable/helpers/cpu/merge.cpp index 74007635f..7874d6d67 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/merge.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/merge.cpp @@ -131,10 +131,10 @@ static void mergeMaxBp_(const std::vector& inArrs, std::vectorgetShapeInfo(); + auto gradShape = inArrs[numArgs]->shapeInfo(); std::vector vbSameShaepeAndStrides(numArgs); for (int i = 0; i < numArgs; ++i) { - vbSameShaepeAndStrides[i] = shape::haveSameShapeAndStrides(gradShape, inArrs[i]->getShapeInfo()); + vbSameShaepeAndStrides[i] = shape::haveSameShapeAndStrides(gradShape, inArrs[i]->shapeInfo()); } auto func = PRAGMA_THREADS_FOR{ @@ -151,7 +151,7 @@ static void mergeMaxBp_(const std::vector& inArrs, std::vectorgetShapeInfo(), coords); + const auto xOffset = vbSameShaepeAndStrides[i] ? gradOffset : shape::getOffset(inArrs[i]->shapeInfo(), coords); const T* v = inArrs[i]->bufferAsT(); if (v[xOffset] > max) { max = v[xOffset]; @@ -159,7 +159,7 @@ static void mergeMaxBp_(const std::vector& inArrs, std::vectorgetShapeInfo(), coords); + const auto zOffset = vbSameShaepeAndStrides[nMaxIndex] ? gradOffset : shape::getOffset(outArrs[nMaxIndex]->shapeInfo(), coords); T* z = outArrs[nMaxIndex]->bufferAsT(); z[zOffset] = gradient[gradOffset]; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/nth_element.cpp b/libnd4j/include/ops/declarable/helpers/cpu/nth_element.cpp index 2730d9e88..53565f3c1 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/nth_element.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/nth_element.cpp @@ -34,7 +34,7 @@ namespace helpers { NDArray sortedVals(*input); if (input->isVector()) { //std::vector data(input->lengthOf()); - //memcpy(&data[0], input->getBuffer(), sizeof(T) * data.size()); + //memcpy(&data[0], input->buffer(), sizeof(T) * data.size()); //size_t l = 0; //for (size_t l = 0; l < data.size(); ++l) // data[l] = input->e(l); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/one_hot.cpp b/libnd4j/include/ops/declarable/helpers/cpu/one_hot.cpp index d3f7add49..2aa14585b 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/one_hot.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/one_hot.cpp @@ -27,9 +27,9 @@ namespace sd { namespace ops { namespace helpers { template - static void onehot_(void *voutput, Nd4jLong *zShapeInfo, void *vindices, Nd4jLong *iShapeInfo, int axis, double on, double off) { + static void onehot_(void *voutput, Nd4jLong const* zShapeInfo, void const* vindices, Nd4jLong const* iShapeInfo, int axis, double on, double off) { auto output = reinterpret_cast(voutput); - auto indices = reinterpret_cast(vindices); + auto indices = reinterpret_cast(vindices); auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(zShapeInfo, {axis}); @@ -96,7 +96,7 @@ namespace sd { auto zType = output->dataType(); auto iType = indices->dataType(); - BUILD_DOUBLE_SELECTOR(zType, iType, onehot_, (output->buffer(), output->shapeInfo(), indices->getBuffer(), indices->getShapeInfo(), axis, on, off), LIBND4J_TYPES, LIBND4J_TYPES); + BUILD_DOUBLE_SELECTOR(zType, iType, onehot_, (output->buffer(), output->shapeInfo(), indices->buffer(), indices->shapeInfo(), axis, on, off), LIBND4J_TYPES, LIBND4J_TYPES); } } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/pad.cpp b/libnd4j/include/ops/declarable/helpers/cpu/pad.cpp index b303d95ae..a0efd44c1 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/pad.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/pad.cpp @@ -52,8 +52,8 @@ void pad_(const int mode, const NDArray& input, const NDArray& paddings, NDArray for (auto i = start; i < stop; i++) { - shape::index2coordsCPU(start, i, output.getShapeInfo(), zCoords); - const auto zOffset = shape::getOffset(output.getShapeInfo(), zCoords); + shape::index2coordsCPU(start, i, output.shapeInfo(), zCoords); + const auto zOffset = shape::getOffset(output.shapeInfo(), zCoords); memcpy(xCoords, zCoords, rank * sizeof(int)); @@ -75,7 +75,7 @@ void pad_(const int mode, const NDArray& input, const NDArray& paddings, NDArray } if (within) - z[zOffset] = x[shape::getOffset(input.getShapeInfo(), xCoords)]; + z[zOffset] = x[shape::getOffset(input.shapeInfo(), xCoords)]; else z[zOffset] = padVal; } @@ -94,8 +94,8 @@ void pad_(const int mode, const NDArray& input, const NDArray& paddings, NDArray for (auto i = start; i < stop; i++) { - shape::index2coordsCPU(start, i, output.getShapeInfo(), zCoords); - const auto zOffset = shape::getOffset(output.getShapeInfo(), zCoords); + shape::index2coordsCPU(start, i, output.shapeInfo(), zCoords); + const auto zOffset = shape::getOffset(output.shapeInfo(), zCoords); memcpy(xCoords, zCoords, rank * sizeof(int)); @@ -112,7 +112,7 @@ void pad_(const int mode, const NDArray& input, const NDArray& paddings, NDArray xCoords[j] = 2 * xShape[j] - xCoords[j] - shift2; // means fill from right } - const auto xOffset = shape::getOffset(input.getShapeInfo(), xCoords); + const auto xOffset = shape::getOffset(input.shapeInfo(), xCoords); z[zOffset] = x[xOffset]; } }; @@ -148,7 +148,7 @@ void pad_(const int mode, const NDArray& input, const NDArray& paddings, NDArray // Nd4jLong startL = mode == 1 ? 1 : 0; // REFLECT or SYMMETRIC // Nd4jLong startR = mode == 1 ? inDimSize-2 : inDimSize-1; // REFLECT or SYMMETRIC -// Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(input.getShapeInfo(), dimsToExclude); +// Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(input.shapeInfo(), dimsToExclude); // NDArray outSubArr0 = output(outIdx[0], true); @@ -209,7 +209,7 @@ void pad_(const int mode, const NDArray& input, const NDArray& paddings, NDArray // startL = mode == 1 ? numLeft + 1 : numLeft; // REFLECT or SYMMETRIC // startR = mode == 1 ? numLeft + inDimSize - 2 : numLeft + inDimSize-1; // REFLECT or SYMMETRIC -// numOfSubArrs = ShapeUtils::getNumOfSubArrs(output.getShapeInfo(), dimsToExclude); +// numOfSubArrs = ShapeUtils::getNumOfSubArrs(output.shapeInfo(), dimsToExclude); // PRAGMA_OMP_PARALLEL_FOR_ARGS(firstprivate(outIdxOuter, outIdxInner)) // for(Nd4jLong j = 0; j < numOfSubArrs; ++j) { @@ -294,7 +294,7 @@ static void mirrorPad_(const NDArray& input, const NDArray& paddings, NDArray& o for (auto i = start; i < stop; i++) { - shape::index2coordsCPU(start, i, output.getShapeInfo(), outIdx); + shape::index2coordsCPU(start, i, output.shapeInfo(), outIdx); for (int j = 0; j < rank; ++j) { const Nd4jLong inLen = input.sizeAt(j); @@ -312,9 +312,9 @@ static void mirrorPad_(const NDArray& input, const NDArray& paddings, NDArray& o inIdx[j] = len - outIdx[j]; } - auto outOffset = shape::getOffset(output.getShapeInfo(), outIdx); - auto inOffset = shape::getOffset(input.getShapeInfo(), inIdx); - reinterpret_cast(output.buffer())[outOffset] = reinterpret_cast(input.getBuffer())[inOffset]; + auto outOffset = shape::getOffset(output.shapeInfo(), outIdx); + auto inOffset = shape::getOffset(input.shapeInfo(), inIdx); + reinterpret_cast(output.buffer())[outOffset] = reinterpret_cast(input.buffer())[inOffset]; } }; @@ -340,13 +340,13 @@ static void recursiveLoopForPad_(const int mode, NDArray& input, const NDArray& // then we use this array for tads building, every time while recursion the number of built tads becomes bigger dimensions.erase(dimensions.begin()); // build tad basing on output array, also create auxiliary arrays pointing on required output array ranges - shape::TAD tadOut(output.getShapeInfo(), dimensions.data(), dimensions.size()); + shape::TAD tadOut(output.shapeInfo(), dimensions.data(), dimensions.size()); tadOut.createTadOnlyShapeInfo(); tadOut.createOffsets(); auto subArrOut = NDArray(output.getBuffer(), tadOut.tadOnlyShapeInfo, output.getContext()); auto subArr = NDArray(output.getBuffer(), tadOut.tadOnlyShapeInfo, output.getContext()); // build tad basing on input array, also create auxiliary array pointing on required input array range - shape::TAD tadIn(input.getShapeInfo(), dimensions.data(), dimensions.size()); + shape::TAD tadIn(input.shapeInfo(), dimensions.data(), dimensions.size()); tadIn.createTadOnlyShapeInfo(); tadIn.createOffsets(); auto subArrIn = NDArray(input.getBuffer(), tadIn.tadOnlyShapeInfo, output.getContext()); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/percentile.cpp b/libnd4j/include/ops/declarable/helpers/cpu/percentile.cpp index 3ffa4dd82..dea46cd69 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/percentile.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/percentile.cpp @@ -69,7 +69,7 @@ static void _percentile(const NDArray& input, NDArray& output, std::vector& // FIXME: parallelism ! for(int i=0; i(flattenedArr.getBuffer()); + auto buff = reinterpret_cast(flattenedArr.buffer()); flattenedArr.assign(listOfSubArrs.at(i)); std::sort(buff, buff + len); output.p(i, flattenedArr.e(position)); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/prefix.cpp b/libnd4j/include/ops/declarable/helpers/cpu/prefix.cpp index 5307f841e..1afe03556 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/prefix.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/prefix.cpp @@ -27,7 +27,7 @@ namespace sd { namespace ops { namespace helpers { template - static void prefix_(scalar::Ops op, const void* vx, Nd4jLong* xShapeInfo, void* vz, Nd4jLong* zShapeInfo, bool exclusive, bool reverse) { + static void prefix_(scalar::Ops op, const void* vx, Nd4jLong const* xShapeInfo, void* vz, Nd4jLong const* zShapeInfo, bool exclusive, bool reverse) { const auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto length = shape::length(xShapeInfo); @@ -113,7 +113,7 @@ namespace sd { template static void prefix_(scalar::Ops op, const NDArray* x, NDArray* z, bool exclusive, bool reverse) { - prefix_(op, x->getBuffer(), x->getShapeInfo(), z->buffer(), z->shapeInfo(), exclusive, reverse); + prefix_(op, x->buffer(), x->shapeInfo(), z->buffer(), z->shapeInfo(), exclusive, reverse); }; void prefix(sd::LaunchContext * context, scalar::Ops op, const NDArray* x, NDArray* z, bool exclusive, bool reverse) { @@ -124,7 +124,7 @@ namespace sd { BUILD_SINGLE_SELECTOR(x->dataType(), prefix_, (op, x, z, dims, exclusive, reverse), LIBND4J_TYPES); } - BUILD_SINGLE_TEMPLATE(template void prefix_, (scalar::Ops op, const void* vx, Nd4jLong* xShapeInfo, void* vz, Nd4jLong* zShapeInfo, bool exclusive, bool reverse), LIBND4J_TYPES); + BUILD_SINGLE_TEMPLATE(template void prefix_, (scalar::Ops op, const void* vx, Nd4jLong const* xShapeInfo, void* vz, Nd4jLong const* zShapeInfo, bool exclusive, bool reverse), LIBND4J_TYPES); BUILD_SINGLE_TEMPLATE(template void prefix_, (scalar::Ops op, const NDArray* x, NDArray* z, const std::vector& dims, bool exclusive, bool reverse), LIBND4J_TYPES); BUILD_SINGLE_TEMPLATE(template void prefix_, (scalar::Ops op, const NDArray* x, NDArray* z, bool exclusive, bool reverse), LIBND4J_TYPES); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/random.cpp b/libnd4j/include/ops/declarable/helpers/cpu/random.cpp index b38101feb..1e96211b3 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/random.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/random.cpp @@ -34,11 +34,13 @@ namespace helpers { template void fillRandomGamma_(LaunchContext* context, graph::RandomGenerator& rng, NDArray* alpha, NDArray* beta, NDArray* output) { - Nd4jLong* broadcasted = nullptr; - if (beta != nullptr) - ShapeUtils::evalBroadcastShapeInfo(*alpha, *beta, true, broadcasted, context->getWorkspace()); - else - broadcasted = alpha->shapeInfo(); + auto broadcasted = alpha->shapeInfo(); + if (beta != nullptr) { + const Nd4jLong* broadcastedShape = nullptr; + ShapeUtils::evalBroadcastShapeInfo(*alpha, *beta, true, broadcastedShape, context->getWorkspace()); + broadcasted = broadcastedShape; + } + auto step = shape::length(broadcasted); auto shift = output->lengthOf() / step; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/randomShuffle.cpp b/libnd4j/include/ops/declarable/helpers/cpu/randomShuffle.cpp index 7323c3937..2e336da23 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/randomShuffle.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/randomShuffle.cpp @@ -42,7 +42,7 @@ void randomShuffle_(NDArray& input, NDArray& output, sd::graph::RandomGenerator& if(!isInplace) output.assign(input); } - else if (input.isVector() || shape::isLikeVector(input.getShapeInfo(), temp)) { + else if (input.isVector() || shape::isLikeVector(input.shapeInfo(), temp)) { // apply Fisher-Yates shuffle if(isInplace) { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/random_crop.cpp b/libnd4j/include/ops/declarable/helpers/cpu/random_crop.cpp index 365465f64..34be299b7 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/random_crop.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/random_crop.cpp @@ -30,12 +30,12 @@ namespace helpers { template static int _randomCropFunctor(graph::Context& context, NDArray* input, NDArray* shape, NDArray* output, int seed) { graph::RandomGenerator rngX(context.getRng()); - //functions::random::RandomFunction::template execTransform>(rng, output->getBuffer(), output->getShapeInfo(), std::vector({T(0.), shape->e(last)}).data()); + //functions::random::RandomFunction::template execTransform>(rng, output->buffer(), output->shapeInfo(), std::vector({T(0.), shape->e(last)}).data()); //NativeOpExecutioner::execRandom(random::UniformDistribution, rng, output->buffer(), output->shapeInfo(), std::vector({T(0.), shape->e(last)}).data()); Nd4jLong last = shape->lengthOf() - 1; rngX.setSeed(seed); - //functions::random::RandomFunction::template execTransform>(rng, output->getBuffer(), output->getShapeInfo(), std::vector({T(0.), shape->getScalar(last)}).data()); + //functions::random::RandomFunction::template execTransform>(rng, output->buffer(), output->shapeInfo(), std::vector({T(0.), shape->getScalar(last)}).data()); for (Nd4jLong e = 0; e < output->lengthOf(); ++e) { output->p(e, rngX.relativeT(e, 0, shape->e(last))); } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/range.cpp b/libnd4j/include/ops/declarable/helpers/cpu/range.cpp index e4349ac8a..eb2cbd760 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/range.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/range.cpp @@ -34,7 +34,7 @@ static void _range(const NDArray& start, const NDArray& delta, NDArray& outVecto const Nd4jLong len = outVector.lengthOf(); - auto buff = reinterpret_cast(outVector.getBuffer()); + auto buff = reinterpret_cast(outVector.buffer()); auto s = start.e(0); auto d = delta.e(0); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/reverse.cpp b/libnd4j/include/ops/declarable/helpers/cpu/reverse.cpp index 3d17fb62a..95417dade 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/reverse.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/reverse.cpp @@ -38,8 +38,8 @@ inline void swap(T* arr, Nd4jLong from, Nd4jLong to) { // this legacy op is written by raver119@gmail.com template -static void reverseArray(sd::LaunchContext * context, void *vinArr, Nd4jLong *inShapeBuffer, void *voutArr, Nd4jLong *outShapeBuffer, int numOfElemsToReverse = 0) { - auto inArr = reinterpret_cast(vinArr); +static void reverseArray(sd::LaunchContext * context, void const* vinArr, Nd4jLong const*inShapeBuffer, void *voutArr, Nd4jLong const*outShapeBuffer, int numOfElemsToReverse = 0) { + auto inArr = reinterpret_cast(vinArr); auto outArr = reinterpret_cast(voutArr); Nd4jLong inLength = shape::length(inShapeBuffer); @@ -56,7 +56,7 @@ static void reverseArray(sd::LaunchContext * context, void *vinArr, Nd4jLong *in auto func = PRAGMA_THREADS_FOR { for (auto e = start; e < stop; e++) { auto idx = sLength - e; - swap(inArr, e, idx); + swap(const_cast(inArr), e, idx); } }; samediff::Threads::parallel_for(func, 0, numOfElemsToReverse / 2); @@ -66,7 +66,7 @@ static void reverseArray(sd::LaunchContext * context, void *vinArr, Nd4jLong *in for (auto e = start; e < stop; e++) { auto idx1 = (sLength - e) * inEWS; Nd4jLong idx2 = e * inEWS; - swap(inArr, idx1, idx2); + swap(const_cast(inArr), idx1, idx2); } }; @@ -154,12 +154,12 @@ template static void reverseSequence_(sd::LaunchContext * context, const NDArray* input, const NDArray* seqLengths, NDArray* output, int seqDim, const int batchDim){ int posOfNonUnityDim = -1; - if(input->isVector() || shape::isLikeVector(input->getShapeInfo(), posOfNonUnityDim)) { + if(input->isVector() || shape::isLikeVector(input->shapeInfo(), posOfNonUnityDim)) { if((seqDim == 0 && input->sizeAt(0) == 1) || (batchDim == posOfNonUnityDim)) output->assign(input); else - helpers::reverseArray(context, const_cast(input)->getBuffer(), const_cast(input)->getShapeInfo(), output->getBuffer(), output->getShapeInfo(), seqLengths->e(0)); + helpers::reverseArray(context, const_cast(input)->buffer(), const_cast(input)->shapeInfo(), output->buffer(), output->shapeInfo(), seqLengths->e(0)); } else { @@ -182,7 +182,7 @@ static void reverseSequence_(sd::LaunchContext * context, const NDArray* input, auto inInnerSet = inSubArrsSet.at(i)->allTensorsAlongDimension({seqDim}); auto outInnerSet = outSubArrsSet.at(i)->allTensorsAlongDimension({seqDim}); for(int j = 0; j < inInnerSet.size(); ++j) - helpers::reverseArray(context, inInnerSet.at(j)->getBuffer(), inInnerSet.at(j)->getShapeInfo(), outInnerSet.at(j)->getBuffer(), outInnerSet.at(j)->getShapeInfo(), numOfElemsToReverse); + helpers::reverseArray(context, inInnerSet.at(j)->buffer(), inInnerSet.at(j)->shapeInfo(), outInnerSet.at(j)->buffer(), outInnerSet.at(j)->shapeInfo(), numOfElemsToReverse); } } } @@ -206,12 +206,12 @@ void reverse(sd::LaunchContext * context, const NDArray* input, NDArray* output, for(int i = 0; i < listIn.size(); ++i) { // listIn.size() = listOut.size() subArrIn = listIn.at(i); subArrOut = listOut.at(i); - BUILD_SINGLE_SELECTOR(input->dataType(), helpers::reverseArray, (context, subArrIn->getBuffer(), subArrIn->getShapeInfo(), subArrOut->getBuffer(), subArrOut->getShapeInfo()), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(input->dataType(), helpers::reverseArray, (context, subArrIn->buffer(), subArrIn->shapeInfo(), subArrOut->buffer(), subArrOut->shapeInfo()), LIBND4J_TYPES); } } BUILD_SINGLE_TEMPLATE(template void reverseSequence_, (sd::LaunchContext * context, const NDArray* input, const NDArray* seqLengths, NDArray* output, int seqDim, const int batchDim), LIBND4J_TYPES); -BUILD_SINGLE_TEMPLATE(template void reverseArray, (sd::LaunchContext * context, void *inArr, Nd4jLong *inShapeBuffer, void *outArr, Nd4jLong *outShapeBuffer, int numOfElemsToReverse), LIBND4J_TYPES); +BUILD_SINGLE_TEMPLATE(template void reverseArray, (sd::LaunchContext * context, void const*inArr, Nd4jLong const*inShapeBuffer, void* outArr, Nd4jLong const* outShapeBuffer, int numOfElemsToReverse), LIBND4J_TYPES); } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/s_t_b.cpp b/libnd4j/include/ops/declarable/helpers/cpu/s_t_b.cpp index 6a854bba8..99a172c02 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/s_t_b.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/s_t_b.cpp @@ -45,8 +45,8 @@ static void batchToSpace_(const NDArray& input, NDArray& output, const uint crop const int rank = 4; - const Nd4jLong* xShapeInfo = input.getShapeInfo(); - const Nd4jLong* zShapeInfo = output.getShapeInfo(); + const Nd4jLong* xShapeInfo = input.shapeInfo(); + const Nd4jLong* zShapeInfo = output.shapeInfo(); const uint bS = xShapeInfo[1]; const uint iH = xShapeInfo[2]; @@ -118,7 +118,7 @@ static void batchToSpaceND_(const NDArray& input, const NDArray& crop, NDArray& for (auto i = start; i < stop; i++) { - shape::index2coordsCPU(start, i, output.getShapeInfo(), zCoords); + shape::index2coordsCPU(start, i, output.shapeInfo(), zCoords); memcpy(xCoords, zCoords, rank * sizeof(int)); @@ -126,8 +126,8 @@ static void batchToSpaceND_(const NDArray& input, const NDArray& crop, NDArray& for (uint j = 1; j <= numOfSpatialDims; ++j) xCoords[j] += crop.e(j - 1, 0); // add crop left - const auto zOffset = shape::getOffset(output.getShapeInfo(), zCoords); - const auto xOffset = shape::getOffset(input.getShapeInfo(), xCoords); + const auto zOffset = shape::getOffset(output.shapeInfo(), zCoords); + const auto xOffset = shape::getOffset(input.shapeInfo(), xCoords); z[zOffset] = x[xOffset]; } @@ -211,8 +211,8 @@ static void spaceToBatch_(const NDArray& input, NDArray& output, const uint padB const int rank = 4; - const Nd4jLong* xShapeInfo = input.getShapeInfo(); - const Nd4jLong* zShapeInfo = output.getShapeInfo(); + const Nd4jLong* xShapeInfo = input.shapeInfo(); + const Nd4jLong* zShapeInfo = output.shapeInfo(); const uint bS = zShapeInfo[1]; const uint oH = zShapeInfo[2]; @@ -259,7 +259,7 @@ void spaceToBatch(sd::LaunchContext* context, const NDArray& input, NDArray& out NDArray outputRearranged1 = outputRearranged0.reshape(output.ordering(), {input.sizeAt(0), output.sizeAt(1) * blockSize, output.sizeAt(2) * blockSize, output.sizeAt(3)}, false); BUILD_SINGLE_SELECTOR(input.dataType(), spaceToBatch_, (input, outputRearranged1, padBottom, padTop, padLeft, padRight), LIBND4J_TYPES); - if(output.getBuffer() != outputRearranged1.getBuffer()) + if(output.buffer() != outputRearranged1.buffer()) outputRearranged0.assign(outputRearranged1); } } @@ -309,9 +309,9 @@ static void spaceToBatchND_(const NDArray& input, const NDArray& padding, NDArra for (auto i = start; i < stop; i++) { - shape::index2coordsCPU(start, i, output.getShapeInfo(), zCoords); + shape::index2coordsCPU(start, i, output.shapeInfo(), zCoords); - const auto zOffset = shape::getOffset(output.getShapeInfo(), zCoords); + const auto zOffset = shape::getOffset(output.shapeInfo(), zCoords); memcpy(xCoords, zCoords, rank * sizeof(int)); @@ -331,7 +331,7 @@ static void spaceToBatchND_(const NDArray& input, const NDArray& padding, NDArra } if (within) - z[zOffset] = x[shape::getOffset(input.getShapeInfo(), xCoords)]; + z[zOffset] = x[shape::getOffset(input.shapeInfo(), xCoords)]; else z[zOffset] = 0.f; } @@ -396,7 +396,7 @@ void spaceToBatchND(sd::LaunchContext* context, const NDArray& input, const NDAr BUILD_SINGLE_SELECTOR(input.dataType(), spaceToBatchND_, (input, padding, outputRearranged1, numOfSpatialDims), LIBND4J_TYPES); - if(output.getBuffer() != outputRearranged1.getBuffer()) + if(output.buffer() != outputRearranged1.buffer()) outputRearranged0.assign(outputRearranged1); } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/s_t_d.cpp b/libnd4j/include/ops/declarable/helpers/cpu/s_t_d.cpp index 5668ea422..b51a4adc9 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/s_t_d.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/s_t_d.cpp @@ -26,7 +26,7 @@ namespace ops { namespace helpers { template static void _spaceTodepth_(const NDArray &input, NDArray *output, int block_size, bool isNHWC) { - auto input_ptr = reinterpret_cast(input.getBuffer()); + auto input_ptr = reinterpret_cast(input.buffer()); auto output_ptr = reinterpret_cast(output->buffer()); const int batch_size = input.sizeAt(0); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/scatter.cpp b/libnd4j/include/ops/declarable/helpers/cpu/scatter.cpp index dd83a8618..e19eb5dea 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/scatter.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/scatter.cpp @@ -36,8 +36,8 @@ Nd4jLong checkIndices_(const NDArray& indices, const NDArray& output, const int const auto x = indices.bufferAsT(); - const auto xShapeInfo = indices.getShapeInfo(); - const auto zShapeInfo = output.getShapeInfo(); + const auto xShapeInfo = indices.shapeInfo(); + const auto zShapeInfo = output.shapeInfo(); const auto xRank = indices.rankOf(); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/softmax.cpp b/libnd4j/include/ops/declarable/helpers/cpu/softmax.cpp index e2c0f5183..bfd44629c 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/softmax.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/softmax.cpp @@ -30,10 +30,10 @@ namespace sd { namespace helpers { template - static void softMaxForVector_(void *input, Nd4jLong *inShapeInfo, void *output, Nd4jLong *outShapeInfo) { + static void softMaxForVector_(void const* input, Nd4jLong const* inShapeInfo, void *output, Nd4jLong const* outShapeInfo) { - T* inBuff = reinterpret_cast(input); - T* outBuff = reinterpret_cast(output); + auto inBuff = reinterpret_cast(input); + auto outBuff = reinterpret_cast(output); T max = -DataTypeUtils::max(); T sum = 0.; @@ -80,15 +80,16 @@ namespace sd { throw std::runtime_error("ops::helpers::softMaxForVector function: input and output arrays must be vectors !"); auto xType = input.dataType(); - BUILD_SINGLE_SELECTOR(xType, softMaxForVector_, (input.getBuffer(), input.getShapeInfo(), output.buffer(), output.shapeInfo()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(xType, softMaxForVector_, (input.buffer(), input.shapeInfo(), output.buffer(), output.shapeInfo()), FLOAT_TYPES); } template - void softmax_loop(T *input, T *output, Nd4jLong *offsets, Nd4jLong numOfSubArrs, uint32_t tadLen); + void softmax_loop(const T* input, T *output, const Nd4jLong * offsets, Nd4jLong numOfSubArrs, uint32_t tadLen); + #ifdef _OPENMP template <> - FORCEINLINE void softmax_loop(float *input, float *output, Nd4jLong *offsets, Nd4jLong numOfSubArrs, uint32_t tadLen) { -#pragma omp parallel for + FORCEINLINE void softmax_loop(const float* input, float *output, const Nd4jLong * offsets, Nd4jLong numOfSubArrs, uint32_t tadLen) { +#pragma omp parallel for default(shared) for (Nd4jLong i = 0; i < numOfSubArrs; i++) { auto inBuff = input + offsets[i]; auto outBuff = output + offsets[i]; @@ -113,7 +114,7 @@ namespace sd { } #else template <> - FORCEINLINE void softmax_loop(float *input, float *output, Nd4jLong *offsets, Nd4jLong numOfSubArrs, uint32_t tadLen) { + FORCEINLINE void softmax_loop(const float *input, float *output, const Nd4jLong *offsets, Nd4jLong numOfSubArrs, uint32_t tadLen) { auto func = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i++) { auto inBuff = input + offsets[i]; @@ -143,7 +144,7 @@ namespace sd { template - FORCEINLINE void softmax_loop(T *input, T *output, Nd4jLong *offsets, Nd4jLong numOfSubArrs, uint32_t tadLen) { + FORCEINLINE void softmax_loop(const T *input, T *output, const Nd4jLong *offsets, Nd4jLong numOfSubArrs, uint32_t tadLen) { auto func = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i++) { auto inBuff = input + offsets[i]; @@ -180,20 +181,20 @@ namespace sd { if(input.isVector()) { if(rank == 1 || input.sizeAt(dimension) != 1) - softMaxForVector_(input.getBuffer(), input.getShapeInfo(), output.buffer(), output.getShapeInfo()); + softMaxForVector_(input.buffer(), input.shapeInfo(), output.buffer(), output.shapeInfo()); else output = 1.; } else if(input.isSameShapeStrict(output)) { - TadPack tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), dimension); - Nd4jLong* tadShapeInfo = tadPack.primaryShapeInfo(); - Nd4jLong* tadOffsets = tadPack.primaryOffsets(); + TadPack tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimension); + auto tadShapeInfo = tadPack.primaryShapeInfo(); + auto tadOffsets = tadPack.primaryOffsets(); const uint numOfSubArrs = tadPack.numberOfTads(); const uint tadLen = shape::length(tadShapeInfo); if(shape::elementWiseStride(tadShapeInfo) == 1){ - T *inBuff = input.bufferAsT(); + auto inBuff = input.bufferAsT(); T *outBuff = output.bufferAsT(); softmax_loop(inBuff, outBuff, tadOffsets, numOfSubArrs, tadLen); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/split.cpp b/libnd4j/include/ops/declarable/helpers/cpu/split.cpp index 2e30cdf0a..48c6c4903 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/split.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/split.cpp @@ -33,7 +33,7 @@ namespace helpers { const auto sizeofT = input.sizeOfT(); - T* xBuff = input.bufferAsT(); + auto xBuff = input.bufferAsT(); bool luckCase1 = ((axis == 0 && input.ordering() == 'c') || (axis == input.rankOf() - 1 && input.ordering() == 'f')) && input.ews() == 1; @@ -77,7 +77,7 @@ namespace helpers { for (Nd4jLong i = 0; i < input.lengthOf() / xDim; ++i) { - T* x = xBuff + xDim * i; + auto x = xBuff + xDim * i; for (uint j = 0; j < numSplits; ++j) { const auto zDim = outArrs[j]->sizeAt(axis); @@ -100,8 +100,8 @@ namespace helpers { for (auto i = start; i < stop; i += increment) { - shape::index2coordsCPU(start, i, input.getShapeInfo(), coords); - const auto xOffset = shape::getOffset(input.getShapeInfo(), coords); + shape::index2coordsCPU(start, i, input.shapeInfo(), coords); + const auto xOffset = shape::getOffset(input.shapeInfo(), coords); uint outArrIdx = 0; @@ -113,7 +113,7 @@ namespace helpers { } T* z = outArrs[outArrIdx]->bufferAsT(); - const auto zOffset = shape::getOffset(outArrs[outArrIdx]->getShapeInfo(), coords); + const auto zOffset = shape::getOffset(outArrs[outArrIdx]->shapeInfo(), coords); z[zOffset] = xBuff[xOffset]; coords[axis] = temp; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/stack.cpp b/libnd4j/include/ops/declarable/helpers/cpu/stack.cpp index f8fc07201..694ced4cb 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/stack.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/stack.cpp @@ -47,8 +47,8 @@ static void stack_(const std::vector& inArrs, NDArray& output, c } else { - auto zTadPack = ConstantTadHelper::getInstance()->tadForDimensions(output.getShapeInfo(), ShapeUtils::evalDimsToExclude(output.rankOf(), {dim})); - Nd4jLong* zTadShapeInfo = zTadPack.primaryShapeInfo(); + auto zTadPack = ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), ShapeUtils::evalDimsToExclude(output.rankOf(), {dim})); + auto zTadShapeInfo = zTadPack.primaryShapeInfo(); auto func = PRAGMA_THREADS_FOR { @@ -57,7 +57,7 @@ static void stack_(const std::vector& inArrs, NDArray& output, c void* zBuff = output.bufferWithOffset(zTadPack.primaryOffsets()[i]); NativeOpExecutioner::execTransformAny(inArrs[0]->getContext(), transform::Assign, - inArrs[i]->getBuffer(), inArrs[i]->getShapeInfo(), nullptr/*input specialBuffer*/, nullptr/*input specialShapeInfo*/, + inArrs[i]->buffer(), inArrs[i]->shapeInfo(), nullptr/*input specialBuffer*/, nullptr/*input specialShapeInfo*/, zBuff, zTadShapeInfo, nullptr/*output specialBuffer*/, nullptr/*output specialShapeInfo*/, nullptr, nullptr, nullptr, false/*allowParallelism*/); } @@ -92,17 +92,16 @@ static void unstack_(const NDArray& input, const std::vector& outArrs, } else { - auto xTadPack = ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), ShapeUtils::evalDimsToExclude(input.rankOf(), {dim})); - Nd4jLong* xTadShapeInfo = xTadPack.primaryShapeInfo(); + auto xTadPack = ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), ShapeUtils::evalDimsToExclude(input.rankOf(), {dim})); + auto xTadShapeInfo = xTadPack.primaryShapeInfo(); auto func = PRAGMA_THREADS_FOR { for (auto i = start; i < stop; i++) { - - void* xBuff = input.bufferWithOffset(xTadPack.primaryOffsets()[i]); + auto xBuff = input.bufferWithOffset(xTadPack.primaryOffsets()[i]); NativeOpExecutioner::execTransformAny(input.getContext(), transform::Assign, xBuff, xTadShapeInfo, nullptr/*input specialBuffer*/, nullptr/*input specialShapeInfo*/, - outArrs[i]->getBuffer(), outArrs[i]->getShapeInfo(), nullptr/*output specialBuffer*/, nullptr/*output specialShapeInfo*/, + outArrs[i]->buffer(), outArrs[i]->shapeInfo(), nullptr/*output specialBuffer*/, nullptr/*output specialShapeInfo*/, nullptr, nullptr, nullptr, false/*allowParallelism*/); } }; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/tile.cpp b/libnd4j/include/ops/declarable/helpers/cpu/tile.cpp index 8f2a10bc9..4edb9e2a0 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/tile.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/tile.cpp @@ -31,8 +31,8 @@ namespace helpers { template static void tileBP_(const NDArray& gradO /*input*/, NDArray& gradI /*output*/, const std::vector reps) { - T* gradIBuff = reinterpret_cast(gradI.getBuffer()); - const T* gradOBuff = reinterpret_cast(gradO.getBuffer()); + T* gradIBuff = reinterpret_cast(gradI.buffer()); + auto gradOBuff = reinterpret_cast(gradO.buffer()); const Nd4jLong gradILen = gradI.lengthOf(); const Nd4jLong gradOLen = gradO.lengthOf(); // gradOLen >= gradILen const Nd4jLong gradIEWS = sd::math::nd4j_abs(gradI.ews()); @@ -52,7 +52,7 @@ static void tileBP_(const NDArray& gradO /*input*/, NDArray& gradI /*output*/, c //PRAGMA_OMP_PARALLEL_FOR_SIMD for(Nd4jLong i=0; i(idx) + gradOBuff[i]); } } @@ -60,7 +60,7 @@ static void tileBP_(const NDArray& gradO /*input*/, NDArray& gradI /*output*/, c //PRAGMA_OMP_PARALLEL_FOR_SIMD for(Nd4jLong i=0; i(idx) + gradOBuff[i * gradOEWS]); } } @@ -69,8 +69,8 @@ static void tileBP_(const NDArray& gradO /*input*/, NDArray& gradI /*output*/, c //PRAGMA_OMP_PARALLEL_FOR_SIMD for(Nd4jLong i=0; i(fidx) + gradOBuff[shape::getIndexOffset(i, gradO.getShapeInfo())]); + auto fidx = shape::subArrayIndex(i, gradO.shapeInfo(), gradI.shapeInfo()); + gradI.p(fidx, gradI.e(fidx) + gradOBuff[shape::getIndexOffset(i, gradO.shapeInfo())]); } } } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/top_k.cpp b/libnd4j/include/ops/declarable/helpers/cpu/top_k.cpp index 78b06d71e..fdab43261 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/top_k.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/top_k.cpp @@ -42,7 +42,7 @@ namespace helpers { for (size_t d = 0; d < dimsToExclude.size(); ++d) dimsToExclude[d] = d; - const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(input->getShapeInfo(), dimsToExclude); + const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(input->shapeInfo(), dimsToExclude); if (k == 1) { for (Nd4jLong e = 0; e < numOfSubArrs; ++e) { diff --git a/libnd4j/include/ops/declarable/helpers/cpu/updaterAdaDelta.cpp b/libnd4j/include/ops/declarable/helpers/cpu/updaterAdaDelta.cpp index e80018348..78268b2dc 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/updaterAdaDelta.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/updaterAdaDelta.cpp @@ -67,23 +67,23 @@ static void adaDeltaUpdater_(const NDArray& gradient, const NDArray& initStateMs } - bool bXZsame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), update.getShapeInfo()); - bool bXInMsgSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), initStateMsg.getShapeInfo()); - bool bXStMsgSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), stateMsg.getShapeInfo()); - bool bXInMsdxSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), initStateMsdx.getShapeInfo()); - bool bXStMsdxSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), stateMsdx.getShapeInfo()); + bool bXZsame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), update.shapeInfo()); + bool bXInMsgSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), initStateMsg.shapeInfo()); + bool bXStMsgSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), stateMsg.shapeInfo()); + bool bXInMsdxSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), initStateMsdx.shapeInfo()); + bool bXStMsdxSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), stateMsdx.shapeInfo()); auto func = PRAGMA_THREADS_FOR{ int coords[MAX_RANK]; for (auto i = start; i < gradient.lengthOf(); i++) { - shape::index2coordsCPU(start, i, gradient.getShapeInfo(), coords); - const auto xOffset = shape::getOffset(gradient.getShapeInfo(), coords); - const auto zOffset = bXZsame ? xOffset : shape::getOffset(update.getShapeInfo(), coords); - const auto initMsgOffset = bXInMsgSame ? xOffset : shape::getOffset(initStateMsg.getShapeInfo(), coords); - const auto stMsgOffset = bXStMsgSame ? xOffset : shape::getOffset(stateMsg.getShapeInfo(), coords); - const auto initMsdxOffset = bXInMsdxSame ? xOffset : shape::getOffset(initStateMsdx.getShapeInfo(), coords); - const auto stMsdxOffset = bXStMsdxSame ? xOffset : shape::getOffset(stateMsdx.getShapeInfo(), coords); + shape::index2coordsCPU(start, i, gradient.shapeInfo(), coords); + const auto xOffset = shape::getOffset(gradient.shapeInfo(), coords); + const auto zOffset = bXZsame ? xOffset : shape::getOffset(update.shapeInfo(), coords); + const auto initMsgOffset = bXInMsgSame ? xOffset : shape::getOffset(initStateMsg.shapeInfo(), coords); + const auto stMsgOffset = bXStMsgSame ? xOffset : shape::getOffset(stateMsg.shapeInfo(), coords); + const auto initMsdxOffset = bXInMsdxSame ? xOffset : shape::getOffset(initStateMsdx.shapeInfo(), coords); + const auto stMsdxOffset = bXStMsdxSame ? xOffset : shape::getOffset(stateMsdx.shapeInfo(), coords); stMsg[stMsgOffset] = rho * initMsg[initMsgOffset] + grad[xOffset] * grad[xOffset] * rhoT; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/updaterAdaGrad.cpp b/libnd4j/include/ops/declarable/helpers/cpu/updaterAdaGrad.cpp index 280597d31..e65f34e72 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/updaterAdaGrad.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/updaterAdaGrad.cpp @@ -56,21 +56,21 @@ static void adaGradUpdater_(const NDArray& gradient, const NDArray& initState, N return; } - bool bXZsame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), update.getShapeInfo()); - bool bXInSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), initState.getShapeInfo()); - bool bXStSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), stateH.getShapeInfo()); + bool bXZsame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), update.shapeInfo()); + bool bXInSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), initState.shapeInfo()); + bool bXStSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), stateH.shapeInfo()); auto func = PRAGMA_THREADS_FOR{ int coords[MAX_RANK]; for (auto i = start; i < stop; i++) { - shape::index2coordsCPU(start, i, gradient.getShapeInfo(), coords); + shape::index2coordsCPU(start, i, gradient.shapeInfo(), coords); - const auto xOffset = shape::getOffset(gradient.getShapeInfo(), coords); + const auto xOffset = shape::getOffset(gradient.shapeInfo(), coords); - const auto zOffset = bXZsame ? xOffset : shape::getOffset(update.getShapeInfo(), coords); - const auto initOffset = bXInSame ? xOffset : shape::getOffset(initState.getShapeInfo(), coords); - const auto stOffset = bXStSame ? xOffset : shape::getOffset(stateH.getShapeInfo(), coords); + const auto zOffset = bXZsame ? xOffset : shape::getOffset(update.shapeInfo(), coords); + const auto initOffset = bXInSame ? xOffset : shape::getOffset(initState.shapeInfo(), coords); + const auto stOffset = bXStSame ? xOffset : shape::getOffset(stateH.shapeInfo(), coords); st[stOffset] = init[initOffset] + grad[xOffset] * grad[xOffset]; up[zOffset] = (lr * grad[xOffset]) / (math::nd4j_sqrt(st[stOffset]) + epsilon); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/updaterAdaMax.cpp b/libnd4j/include/ops/declarable/helpers/cpu/updaterAdaMax.cpp index ae986f901..6c7d0d322 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/updaterAdaMax.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/updaterAdaMax.cpp @@ -73,23 +73,23 @@ static void adaMaxUpdater_(const NDArray& gradient, const NDArray& initStateU, c return; } - bool bXZsame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), update.getShapeInfo()); - bool bXInVSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), initStateU.getShapeInfo()); - bool bXStVSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), stateU.getShapeInfo()); - bool bXInMSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), initStateM.getShapeInfo()); - bool bXStMSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), stateM.getShapeInfo()); + bool bXZsame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), update.shapeInfo()); + bool bXInVSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), initStateU.shapeInfo()); + bool bXStVSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), stateU.shapeInfo()); + bool bXInMSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), initStateM.shapeInfo()); + bool bXStMSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), stateM.shapeInfo()); auto func = PRAGMA_THREADS_FOR{ int coords[MAX_RANK]; for (auto i = start; i < stop; i++) { - shape::index2coordsCPU(start, i, gradient.getShapeInfo(), coords); - const auto xOffset = shape::getOffset(gradient.getShapeInfo(), coords); - const auto zOffset = bXZsame ? xOffset : shape::getOffset(update.getShapeInfo(), coords); - const auto initUOffset = bXInVSame ? xOffset : shape::getOffset(initStateU.getShapeInfo(), coords); - const auto stUOffset = bXStVSame ? xOffset : shape::getOffset(stateU.getShapeInfo(), coords); - const auto initMOffset = bXInMSame ? xOffset : shape::getOffset(initStateM.getShapeInfo(), coords); - const auto stMOffset = bXStMSame ? xOffset : shape::getOffset(stateM.getShapeInfo(), coords); + shape::index2coordsCPU(start, i, gradient.shapeInfo(), coords); + const auto xOffset = shape::getOffset(gradient.shapeInfo(), coords); + const auto zOffset = bXZsame ? xOffset : shape::getOffset(update.shapeInfo(), coords); + const auto initUOffset = bXInVSame ? xOffset : shape::getOffset(initStateU.shapeInfo(), coords); + const auto stUOffset = bXStVSame ? xOffset : shape::getOffset(stateU.shapeInfo(), coords); + const auto initMOffset = bXInMSame ? xOffset : shape::getOffset(initStateM.shapeInfo(), coords); + const auto stMOffset = bXStMSame ? xOffset : shape::getOffset(stateM.shapeInfo(), coords); //m = B_1 * m + (1-B_1)*grad stM[stMOffset] = beta1 * initM[initMOffset] + grad[xOffset] * (1 - beta1); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/updaterAdam.cpp b/libnd4j/include/ops/declarable/helpers/cpu/updaterAdam.cpp index b8eab1e6f..2d670949f 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/updaterAdam.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/updaterAdam.cpp @@ -75,23 +75,23 @@ static void adamUpdater_(const NDArray& gradient, const NDArray& initStateU, con return; } - bool bXZsame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), update.getShapeInfo()); - bool bXInVSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), initStateU.getShapeInfo()); - bool bXStVSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), stateU.getShapeInfo()); - bool bXInMSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), initStateM.getShapeInfo()); - bool bXStMSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), stateM.getShapeInfo()); + bool bXZsame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), update.shapeInfo()); + bool bXInVSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), initStateU.shapeInfo()); + bool bXStVSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), stateU.shapeInfo()); + bool bXInMSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), initStateM.shapeInfo()); + bool bXStMSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), stateM.shapeInfo()); auto func = PRAGMA_THREADS_FOR{ int coords[MAX_RANK]; for (auto i = start; i < stop; i++) { - shape::index2coordsCPU(start, i, gradient.getShapeInfo(), coords); - const auto xOffset = shape::getOffset(gradient.getShapeInfo(), coords); - const auto zOffset = bXZsame ? xOffset : shape::getOffset(update.getShapeInfo(), coords); - const auto initUOffset = bXInVSame ? xOffset : shape::getOffset(initStateU.getShapeInfo(), coords); - const auto stUOffset = bXStVSame ? xOffset : shape::getOffset(stateU.getShapeInfo(), coords); - const auto initMOffset = bXInVSame ? xOffset : shape::getOffset(initStateM.getShapeInfo(), coords); - const auto stMOffset = bXStMSame ? xOffset : shape::getOffset(stateM.getShapeInfo(), coords); + shape::index2coordsCPU(start, i, gradient.shapeInfo(), coords); + const auto xOffset = shape::getOffset(gradient.shapeInfo(), coords); + const auto zOffset = bXZsame ? xOffset : shape::getOffset(update.shapeInfo(), coords); + const auto initUOffset = bXInVSame ? xOffset : shape::getOffset(initStateU.shapeInfo(), coords); + const auto stUOffset = bXStVSame ? xOffset : shape::getOffset(stateU.shapeInfo(), coords); + const auto initMOffset = bXInVSame ? xOffset : shape::getOffset(initStateM.shapeInfo(), coords); + const auto stMOffset = bXStMSame ? xOffset : shape::getOffset(stateM.shapeInfo(), coords); stM[stMOffset] = beta1 * initM[initMOffset] + grad[xOffset] * (1 - beta1); stU[stUOffset] = beta2 * initU[initUOffset] + grad[xOffset] * grad[xOffset] * (1 - beta2); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/updaterAmsGrad.cpp b/libnd4j/include/ops/declarable/helpers/cpu/updaterAmsGrad.cpp index 686c22cbe..7cb05075c 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/updaterAmsGrad.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/updaterAmsGrad.cpp @@ -81,27 +81,27 @@ static void amsGradUpdater_(const NDArray& gradient, const NDArray& initStateV, return; } - bool bXZsame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), update.getShapeInfo()); - bool bXInVSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), initStateV.getShapeInfo()); - bool bXStVSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), stateV.getShapeInfo()); - bool bXInMSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), initStateM.getShapeInfo()); - bool bXStMSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), stateM.getShapeInfo()); - bool bXInHSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), initStateH.getShapeInfo()); - bool bXStHSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), stateH.getShapeInfo()); + bool bXZsame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), update.shapeInfo()); + bool bXInVSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), initStateV.shapeInfo()); + bool bXStVSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), stateV.shapeInfo()); + bool bXInMSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), initStateM.shapeInfo()); + bool bXStMSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), stateM.shapeInfo()); + bool bXInHSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), initStateH.shapeInfo()); + bool bXStHSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), stateH.shapeInfo()); auto func = PRAGMA_THREADS_FOR{ int coords[MAX_RANK]; for (auto i = start; i < stop; i++) { - shape::index2coordsCPU(start, i, gradient.getShapeInfo(), coords); - const auto xOffset = shape::getOffset(gradient.getShapeInfo(), coords); - const auto zOffset = bXZsame ? xOffset : shape::getOffset(update.getShapeInfo(), coords); - const auto initVOffset = bXInVSame ? xOffset : shape::getOffset(initStateV.getShapeInfo(), coords); - const auto stVOffset = bXStVSame ? xOffset : shape::getOffset(stateV.getShapeInfo(), coords); - const auto initMOffset = bXInMSame ? xOffset : shape::getOffset(initStateM.getShapeInfo(), coords); - const auto stMOffset = bXStMSame ? xOffset : shape::getOffset(stateM.getShapeInfo(), coords); - const auto initHOffset = bXInHSame ? xOffset : shape::getOffset(initStateH.getShapeInfo(), coords); - const auto stHOffset = bXStHSame ? xOffset : shape::getOffset(stateH.getShapeInfo(), coords); + shape::index2coordsCPU(start, i, gradient.shapeInfo(), coords); + const auto xOffset = shape::getOffset(gradient.shapeInfo(), coords); + const auto zOffset = bXZsame ? xOffset : shape::getOffset(update.shapeInfo(), coords); + const auto initVOffset = bXInVSame ? xOffset : shape::getOffset(initStateV.shapeInfo(), coords); + const auto stVOffset = bXStVSame ? xOffset : shape::getOffset(stateV.shapeInfo(), coords); + const auto initMOffset = bXInMSame ? xOffset : shape::getOffset(initStateM.shapeInfo(), coords); + const auto stMOffset = bXStMSame ? xOffset : shape::getOffset(stateM.shapeInfo(), coords); + const auto initHOffset = bXInHSame ? xOffset : shape::getOffset(initStateH.shapeInfo(), coords); + const auto stHOffset = bXStHSame ? xOffset : shape::getOffset(stateH.shapeInfo(), coords); stM[stMOffset] = beta1 * initM[initMOffset] + grad[xOffset] * mbeta1; stV[stVOffset] = beta2 * initV[initVOffset] + grad[xOffset] * grad[xOffset] * mbeta2; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/updaterNadam.cpp b/libnd4j/include/ops/declarable/helpers/cpu/updaterNadam.cpp index 82ade0f16..40f9c9407 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/updaterNadam.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/updaterNadam.cpp @@ -74,23 +74,23 @@ static void nadamUpdater_(const NDArray& gradient, const NDArray& initStateV, co return; } - bool bXZsame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), update.getShapeInfo()); - bool bXInVSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), initStateV.getShapeInfo()); - bool bXStVSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), stateV.getShapeInfo()); - bool bXInMSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), initStateM.getShapeInfo()); - bool bXStMSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), stateM.getShapeInfo()); + bool bXZsame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), update.shapeInfo()); + bool bXInVSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), initStateV.shapeInfo()); + bool bXStVSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), stateV.shapeInfo()); + bool bXInMSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), initStateM.shapeInfo()); + bool bXStMSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), stateM.shapeInfo()); auto func = PRAGMA_THREADS_FOR{ int coords[MAX_RANK]; for (auto i = start; i < stop; i++) { - shape::index2coordsCPU(start, i, gradient.getShapeInfo(), coords); - const auto xOffset = shape::getOffset(gradient.getShapeInfo(), coords); - const auto zOffset = bXZsame ? xOffset : shape::getOffset(update.getShapeInfo(), coords); - const auto initVOffset = bXInVSame ? xOffset : shape::getOffset(initStateV.getShapeInfo(), coords); - const auto stVOffset = bXStVSame ? xOffset : shape::getOffset(stateV.getShapeInfo(), coords); - const auto initMOffset = bXInMSame ? xOffset : shape::getOffset(initStateM.getShapeInfo(), coords); - const auto stMOffset = bXStMSame ? xOffset : shape::getOffset(stateM.getShapeInfo(), coords); + shape::index2coordsCPU(start, i, gradient.shapeInfo(), coords); + const auto xOffset = shape::getOffset(gradient.shapeInfo(), coords); + const auto zOffset = bXZsame ? xOffset : shape::getOffset(update.shapeInfo(), coords); + const auto initVOffset = bXInVSame ? xOffset : shape::getOffset(initStateV.shapeInfo(), coords); + const auto stVOffset = bXStVSame ? xOffset : shape::getOffset(stateV.shapeInfo(), coords); + const auto initMOffset = bXInMSame ? xOffset : shape::getOffset(initStateM.shapeInfo(), coords); + const auto stMOffset = bXStMSame ? xOffset : shape::getOffset(stateM.shapeInfo(), coords); auto oneMinusBeta1Grad = grad[xOffset] * mbeta1; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/updaterNesterovs.cpp b/libnd4j/include/ops/declarable/helpers/cpu/updaterNesterovs.cpp index 82e21ace7..1d8bb8d45 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/updaterNesterovs.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/updaterNesterovs.cpp @@ -58,19 +58,19 @@ static void nesterovsUpdater_(const NDArray& gradient, const NDArray& initState, return; } - bool bXZsame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), update.getShapeInfo()); - bool bXInSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), initState.getShapeInfo()); - bool bXStSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), stateV.getShapeInfo()); + bool bXZsame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), update.shapeInfo()); + bool bXInSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), initState.shapeInfo()); + bool bXStSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), stateV.shapeInfo()); auto func = PRAGMA_THREADS_FOR{ int coords[MAX_RANK]; for (auto i = start; i < stop; i++) { - shape::index2coordsCPU(start, i, gradient.getShapeInfo(), coords); - const auto xOffset = shape::getOffset(gradient.getShapeInfo(), coords); - const auto zOffset = bXZsame ? xOffset : shape::getOffset(update.getShapeInfo(), coords); - const auto initOffset = bXInSame ? xOffset : shape::getOffset(initState.getShapeInfo(), coords); - const auto stOffset = bXStSame ? xOffset : shape::getOffset(stateV.getShapeInfo(), coords); + shape::index2coordsCPU(start, i, gradient.shapeInfo(), coords); + const auto xOffset = shape::getOffset(gradient.shapeInfo(), coords); + const auto zOffset = bXZsame ? xOffset : shape::getOffset(update.shapeInfo(), coords); + const auto initOffset = bXInSame ? xOffset : shape::getOffset(initState.shapeInfo(), coords); + const auto stOffset = bXStSame ? xOffset : shape::getOffset(stateV.shapeInfo(), coords); T prevState = momentum * init[initOffset]; st[stOffset] = prevState - lr * grad[xOffset]; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/updaterRmsProp.cpp b/libnd4j/include/ops/declarable/helpers/cpu/updaterRmsProp.cpp index a0b9f731e..473b43cf8 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/updaterRmsProp.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/updaterRmsProp.cpp @@ -57,19 +57,19 @@ static void rmsPropUpdater_(const NDArray& gradient, const NDArray& initState, N return; } - bool bXZsame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), update.getShapeInfo()); - bool bXInSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), initState.getShapeInfo()); - bool bXStSame = shape::haveSameShapeAndStrides(gradient.getShapeInfo(), stateG.getShapeInfo()); + bool bXZsame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), update.shapeInfo()); + bool bXInSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), initState.shapeInfo()); + bool bXStSame = shape::haveSameShapeAndStrides(gradient.shapeInfo(), stateG.shapeInfo()); auto func = PRAGMA_THREADS_FOR{ int coords[MAX_RANK]; for (auto i = start; i < stop; i++) { - shape::index2coordsCPU(start, i, gradient.getShapeInfo(), coords); - const auto xOffset = shape::getOffset(gradient.getShapeInfo(), coords); - const auto zOffset = bXZsame ? xOffset : shape::getOffset(update.getShapeInfo(), coords); - const auto initOffset = bXInSame ? xOffset : shape::getOffset(initState.getShapeInfo(), coords); - const auto stOffset = bXStSame ? xOffset : shape::getOffset(stateG.getShapeInfo(), coords); + shape::index2coordsCPU(start, i, gradient.shapeInfo(), coords); + const auto xOffset = shape::getOffset(gradient.shapeInfo(), coords); + const auto zOffset = bXZsame ? xOffset : shape::getOffset(update.shapeInfo(), coords); + const auto initOffset = bXInSame ? xOffset : shape::getOffset(initState.shapeInfo(), coords); + const auto stOffset = bXStSame ? xOffset : shape::getOffset(stateG.shapeInfo(), coords); st[stOffset] = init[initOffset] * rmsDecay + grad[xOffset] * grad[xOffset] * (1 - rmsDecay) ; up[zOffset] = (lr * grad[xOffset]) / ( math::nd4j_sqrt(st[stOffset]) + epsilon); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/BarnesHutTsne.cu b/libnd4j/include/ops/declarable/helpers/cuda/BarnesHutTsne.cu index 71eef3386..70ff75b96 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/BarnesHutTsne.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/BarnesHutTsne.cu @@ -56,8 +56,8 @@ namespace helpers { Nd4jLong barnes_row_count(const NDArray* rowP, const NDArray* colP, Nd4jLong N, NDArray& rowCounts) { int* pRowCounts = reinterpret_cast(rowCounts.specialBuffer()); - int const* pRows = reinterpret_cast(rowP->getSpecialBuffer()); - int const* pCols = reinterpret_cast(colP->getSpecialBuffer()); + int const* pRows = reinterpret_cast(rowP->specialBuffer()); + int const* pCols = reinterpret_cast(colP->specialBuffer()); auto stream = rowCounts.getContext()->getCudaStream(); countRowsKernel<<<1, 1, 128, *stream>>>(pRowCounts, pRows, pCols, N); NDArray numElementsArr = rowCounts.sumNumber(); //reduceAlongDimension(reduce::Sum, {}); @@ -146,7 +146,7 @@ namespace helpers { // template static void barnes_symmetrize_(const NDArray* rowP, const NDArray* colP, const NDArray* valP, Nd4jLong N, NDArray* outputRows, NDArray* outputCols, NDArray* outputVals, NDArray* rowCounts) { - int const* pRows = reinterpret_cast(rowP->getSpecialBuffer()); + int const* pRows = reinterpret_cast(rowP->specialBuffer()); int* symRowP = reinterpret_cast(outputRows->specialBuffer()); int* pRowCounts = reinterpret_cast(rowCounts->specialBuffer()); auto stream = outputCols->getContext()->getCudaStream(); @@ -156,8 +156,8 @@ namespace helpers { // outputRows->printBuffer("output rows"); int* symColP = reinterpret_cast(outputCols->specialBuffer()); // outputRows->printBuffer("SymRows are"); - int const* pCols = reinterpret_cast(colP->getSpecialBuffer()); - T const* pVals = reinterpret_cast(valP->getSpecialBuffer()); + int const* pCols = reinterpret_cast(colP->specialBuffer()); + T const* pVals = reinterpret_cast(valP->specialBuffer()); T* pOutput = reinterpret_cast(outputVals->specialBuffer()); //std::vector rowCountsV = rowCounts->getBufferAsVector(); auto offsetArr = NDArrayFactory::create('c', {N}); @@ -211,11 +211,11 @@ namespace helpers { template static void barnes_edge_forces_(const NDArray* rowP, NDArray const* colP, NDArray const* valP, int N, NDArray const* data, NDArray* output) { NDArray::prepareSpecialUse({output}, {data, rowP, colP, valP, valP}); - T const* dataP = reinterpret_cast(data->getSpecialBuffer()); - T const* vals = reinterpret_cast(valP->getSpecialBuffer()); + T const* dataP = reinterpret_cast(data->specialBuffer()); + T const* vals = reinterpret_cast(valP->specialBuffer()); T* outputP = reinterpret_cast(output->specialBuffer()); - int const* pRows = reinterpret_cast(rowP->getSpecialBuffer()); - int const* pCols = reinterpret_cast(colP->getSpecialBuffer()); + int const* pRows = reinterpret_cast(rowP->specialBuffer()); + int const* pCols = reinterpret_cast(colP->specialBuffer()); int colCount = data->columns(); //auto shift = 0; auto rowSize = sizeof(T) * colCount; diff --git a/libnd4j/include/ops/declarable/helpers/cuda/activations.cu b/libnd4j/include/ops/declarable/helpers/cuda/activations.cu index 4243c6e04..c8bc709a0 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/activations.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/activations.cu @@ -91,7 +91,7 @@ void prelu(sd::LaunchContext * context, const NDArray& input, const NDArray& alp const auto yType = alpha.dataType(); NDArray::prepareSpecialUse({&output}, {&input, &alpha}); - BUILD_SINGLE_SELECTOR_TWICE(xType, preluCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), alpha.getSpecialBuffer(), alpha.getSpecialShapeInfo(), output.getSpecialBuffer()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR_TWICE(xType, preluCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), alpha.specialBuffer(), alpha.specialShapeInfo(), output.specialBuffer()), FLOAT_TYPES); NDArray::registerSpecialUse({&output}, {&input, &alpha}); manager.synchronize(); @@ -175,7 +175,7 @@ void preluBP(sd::LaunchContext* context, const NDArray& input, const NDArray& al const auto zType = alpha.dataType(); NDArray::prepareSpecialUse({&dLdI, &dLdA}, {&input, &alpha, &dLdO}); - BUILD_SINGLE_SELECTOR_TWICE(xType, preluBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), alpha.getSpecialBuffer(), alpha.getSpecialShapeInfo(), dLdO.getSpecialBuffer(), dLdO.getSpecialShapeInfo(), dLdI.getSpecialBuffer(), dLdI.getSpecialShapeInfo(), dLdA.getSpecialBuffer(), dLdA.getSpecialShapeInfo()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR_TWICE(xType, preluBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), alpha.specialBuffer(), alpha.specialShapeInfo(), dLdO.specialBuffer(), dLdO.specialShapeInfo(), dLdI.specialBuffer(), dLdI.specialShapeInfo(), dLdA.specialBuffer(), dLdA.specialShapeInfo()), FLOAT_TYPES); NDArray::registerSpecialUse({&dLdI, &dLdA}, {&input, &alpha, &dLdO}); manager.synchronize(); @@ -313,7 +313,7 @@ void softmax(sd::LaunchContext * context, const NDArray& input, NDArray& output, if(rank == 1 || input.sizeAt(dimension) != 1) { NDArray::prepareSpecialUse({&output}, {&input}); - BUILD_SINGLE_SELECTOR(input.dataType(), softMaxForVectorCudaLauncher, (context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.getSpecialBuffer(), output.getSpecialShapeInfo()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), softMaxForVectorCudaLauncher, (context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo()), FLOAT_TYPES); NDArray::registerSpecialUse({&output}, {&input}); } else @@ -321,15 +321,15 @@ void softmax(sd::LaunchContext * context, const NDArray& input, NDArray& output, } else { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), {dimension}); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.getShapeInfo(), {dimension}); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), {dimension}); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), {dimension}); const int threadsPerBlock = MAX_NUM_THREADS / 4; const int blocksPerGrid = packZ.numberOfTads(); const int sharedMem = input.sizeOfT() * threadsPerBlock + 512; NDArray::prepareSpecialUse({&output}, {&input}); - BUILD_SINGLE_SELECTOR(input.dataType(), softMaxCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.getSpecialBuffer(), packX.specialShapeInfo(), packX.specialOffsets(), output.specialBuffer(), packZ.specialShapeInfo(), packZ.specialOffsets()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), softMaxCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.specialBuffer(), packX.specialShapeInfo(), packX.specialOffsets(), output.specialBuffer(), packZ.specialShapeInfo(), packZ.specialOffsets()), FLOAT_TYPES); NDArray::registerSpecialUse({&output}, {&input}); // auto maxAlongDim = const_cast(input).reduceAlongDimension(reduce::Max, {dimension}, true); @@ -442,7 +442,7 @@ void logSoftmax(sd::LaunchContext * context, const NDArray& input, NDArray& outp if(input.isVector()) { if(rank == 1 || input.sizeAt(dimension) != 1) { - BUILD_SINGLE_SELECTOR(input.dataType(), logSoftMaxForVectorCudaLauncher, (context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.getSpecialBuffer()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), logSoftMaxForVectorCudaLauncher, (context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), output.specialBuffer()), FLOAT_TYPES); input.tickReadDevice(); } else @@ -560,9 +560,9 @@ void softmaxDerivative(sd::LaunchContext * context, const NDArray& input, NDArra const int rank = input.rankOf(); int temp; - if(shape::isCommonVector(input.getShapeInfo(), temp)) { + if(shape::isCommonVector(input.shapeInfo(), temp)) { - BUILD_SINGLE_SELECTOR(input.dataType(), softMaxDerivForVectorCudaLauncher, (context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.getSpecialBuffer()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), softMaxDerivForVectorCudaLauncher, (context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), output.specialBuffer()), FLOAT_TYPES); input.tickReadDevice(); } else { diff --git a/libnd4j/include/ops/declarable/helpers/cuda/addBias.cu b/libnd4j/include/ops/declarable/helpers/cuda/addBias.cu index 0878a1c77..18474f2c7 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/addBias.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/addBias.cu @@ -126,7 +126,7 @@ void addBias(sd::graph::Context& block, const NDArray& input, const NDArray& bia if (input.rankOf() == 2 && bias.rankOf() == 1 && input.ordering() == 'c' && output.ordering() == 'c' && input.ews() == 1 && bias.ews() == 1 && input.sizeAt(1) == bias.sizeAt(0)) { BUILD_DOUBLE_SELECTOR(input.dataType(), bias.dataType(), addBias2DCudaLauncher, - (block.launchContext()->getCudaStream(), input.getSpecialBuffer(), bias.getSpecialBuffer(), output.specialBuffer(), input.sizeAt(0), bias.sizeAt(0)), + (block.launchContext()->getCudaStream(), input.specialBuffer(), bias.specialBuffer(), output.specialBuffer(), input.sizeAt(0), bias.sizeAt(0)), FLOAT_TYPES, FLOAT_TYPES); } else { // default case @@ -136,7 +136,7 @@ void addBias(sd::graph::Context& block, const NDArray& input, const NDArray& bia BUILD_DOUBLE_SELECTOR(input.dataType(), bias.dataType(), addBiasCudaLauncher, - (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), bias.getSpecialBuffer(), bias.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), isNCHW), + (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), bias.specialBuffer(), bias.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), isNCHW), FLOAT_TYPES, FLOAT_TYPES); } NDArray::registerSpecialUse({&output}, {&input, &bias}); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/adjust_hue.cu b/libnd4j/include/ops/declarable/helpers/cuda/adjust_hue.cu index 0b3681663..9ce00f318 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/adjust_hue.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/adjust_hue.cu @@ -81,8 +81,8 @@ static _CUDA_H void adjustHueCudaLauncher(const int blocksPerGrid, const int thr //////////////////////////////////////////////////////////////////////// void adjustHue(sd::LaunchContext* context, const NDArray *input, const NDArray* deltaScalarArr, NDArray *output, const int dimC) { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), {dimC}); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), {dimC}); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {dimC}); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {dimC}); const Nd4jLong numOfTads = packX.numberOfTads(); @@ -92,7 +92,7 @@ void adjustHue(sd::LaunchContext* context, const NDArray *input, const NDArray* PointersManager manager(context, "adjustHue"); NDArray::prepareSpecialUse({output}, {input, deltaScalarArr}); - BUILD_SINGLE_SELECTOR(input->dataType(), adjustHueCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input->getSpecialBuffer(), input->getSpecialShapeInfo(), packX.platformOffsets(), output->specialBuffer(), output->specialShapeInfo(), packZ.platformOffsets(), numOfTads, deltaScalarArr, dimC), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input->dataType(), adjustHueCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input->specialBuffer(), input->specialShapeInfo(), packX.platformOffsets(), output->specialBuffer(), output->specialShapeInfo(), packZ.platformOffsets(), numOfTads, deltaScalarArr, dimC), FLOAT_TYPES); NDArray::registerSpecialUse({output}, {input, deltaScalarArr}); manager.synchronize(); @@ -173,8 +173,8 @@ static void _adjust_hue_single(sd::LaunchContext * context, NDArray *array, NDAr adjustHueSingleNHWCKernel<<<256, 256, 1024, *context->getCudaStream()>>>(array->specialBuffer(), array->specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), tuples, delta); } else { // TODO: check this one - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(array->getShapeInfo(), {1, 2}); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), {1, 2}); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(array->shapeInfo(), {1, 2}); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {1, 2}); auto tadLength = shape::length(packX.primaryShapeInfo()); @@ -195,8 +195,8 @@ static void _adjust_hue_batch(sd::LaunchContext * context, NDArray *array, NDArr BUILD_SINGLE_SELECTOR(xType, _adjust_hue_single, (context, array, output, delta, isNHWC);, FLOAT_TYPES); } else { // TODO: check this one - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(array->getShapeInfo(), {0, 2, 3}); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), {0, 2, 3}); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(array->shapeInfo(), {0, 2, 3}); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {0, 2, 3}); auto tadLength = shape::length(packX.primaryShapeInfo()); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/adjust_saturation.cu b/libnd4j/include/ops/declarable/helpers/cuda/adjust_saturation.cu index f2da480cb..fd413f8cd 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/adjust_saturation.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/adjust_saturation.cu @@ -83,8 +83,8 @@ static _CUDA_H void adjustSaturationCudaLauncher(const int blocksPerGrid, const //////////////////////////////////////////////////////////////////////// void adjustSaturation(sd::LaunchContext* context, const NDArray *input, const NDArray* factorScalarArr, NDArray *output, const int dimC) { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), {dimC}); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), {dimC}); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {dimC}); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {dimC}); const Nd4jLong numOfTads = packX.numberOfTads(); @@ -94,7 +94,7 @@ void adjustSaturation(sd::LaunchContext* context, const NDArray *input, const ND PointersManager manager(context, "adjustSaturation"); NDArray::prepareSpecialUse({output}, {input, factorScalarArr}); - BUILD_SINGLE_SELECTOR(input->dataType(), adjustSaturationCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input->getSpecialBuffer(), input->getSpecialShapeInfo(), packX.platformOffsets(), output->specialBuffer(), output->specialShapeInfo(), packZ.platformOffsets(), numOfTads, factorScalarArr, dimC), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input->dataType(), adjustSaturationCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input->specialBuffer(), input->specialShapeInfo(), packX.platformOffsets(), output->specialBuffer(), output->specialShapeInfo(), packZ.platformOffsets(), numOfTads, factorScalarArr, dimC), FLOAT_TYPES); NDArray::registerSpecialUse({output}, {input, factorScalarArr}); manager.synchronize(); @@ -164,8 +164,8 @@ static void _adjust_saturation_single(sd::LaunchContext * context, NDArray *arra if (isNHWC) { adjustSaturationSingleNHWCKernel<<<256, 256, 1024, *context->getCudaStream()>>>(array->specialBuffer(), array->specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), tuples, delta); } else { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(array->getShapeInfo(), {1, 2}); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), {1, 2}); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(array->shapeInfo(), {1, 2}); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {1, 2}); auto tadLength = shape::length(packX.primaryShapeInfo()); @@ -185,8 +185,8 @@ static void _adjust_saturation_batch(sd::LaunchContext * context, NDArray *array BUILD_SINGLE_SELECTOR(xType, _adjust_saturation_single, (context, array, output, delta, isNHWC);, FLOAT_TYPES); } else { // TODO: check this one - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(array->getShapeInfo(), {0, 2, 3}); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), {0, 2, 3}); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(array->shapeInfo(), {0, 2, 3}); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {0, 2, 3}); auto tadLength = shape::length(packX.primaryShapeInfo()); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/batched_gemm.cu b/libnd4j/include/ops/declarable/helpers/cuda/batched_gemm.cu index b5447b411..40540f65d 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/batched_gemm.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/batched_gemm.cu @@ -87,9 +87,9 @@ void bgemm(const std::vector& vA, const std::vector& vB, std std::vector pAbuffs(bS), pBbuffs(bS), pCbuffs(bS); for(int i = 0; i < bS; ++i) { - pAbuffs[i] = pA[i]->getSpecialBuffer(); - pBbuffs[i] = pB[i]->getSpecialBuffer(); - pCbuffs[i] = pC[i]->getSpecialBuffer(); + pAbuffs[i] = pA[i]->specialBuffer(); + pBbuffs[i] = pB[i]->specialBuffer(); + pCbuffs[i] = pC[i]->specialBuffer(); } sd::LaunchContext* context = vA[0]->getContext(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/batchnorm.cu b/libnd4j/include/ops/declarable/helpers/cuda/batchnorm.cu index 2daac26c3..791953ab7 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/batchnorm.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/batchnorm.cu @@ -201,7 +201,7 @@ void batchnorm(const NDArray* input, const NDArray* mean, const NDArray* varianc // std::vector dimsToExclude = ShapeUtils::evalDimsToExclude(input->rankOf(), axes); - // auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimsToExclude); + // auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimsToExclude); // auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimsToExclude); // const int threadsPerBlock = MAX_NUM_THREADS / 2; @@ -210,7 +210,7 @@ void batchnorm(const NDArray* input, const NDArray* mean, const NDArray* varianc // PointersManager manager(input->getContext(), "batchnorm"); // NDArray::prepareSpecialUse({output}, {input, mean, variance, gamma, beta}); - // BUILD_SINGLE_SELECTOR(input->dataType(), batchnormCudaLauncher, (blocksPerGrid, threadsPerBlock, input->getContext()->getCudaStream(), input->getSpecialBuffer(), input->getSpecialShapeInfo(), mean->getSpecialBuffer(), mean->getSpecialShapeInfo(), variance->getSpecialBuffer(), variance->getSpecialShapeInfo(), gamma ? gamma->getSpecialBuffer() : nullptr, gamma ? gamma->getSpecialShapeInfo() : nullptr, beta ? beta->getSpecialBuffer() : nullptr, beta ? beta->getSpecialShapeInfo() : nullptr, output->specialBuffer(), output->specialShapeInfo(), packX.platformShapeInfo(), packX.platformOffsets(), packZ.platformShapeInfo(), packZ.platformOffsets(), epsilon), FLOAT_TYPES); + // BUILD_SINGLE_SELECTOR(input->dataType(), batchnormCudaLauncher, (blocksPerGrid, threadsPerBlock, input->getContext()->getCudaStream(), input->specialBuffer(), input->specialShapeInfo(), mean->specialBuffer(), mean->specialShapeInfo(), variance->specialBuffer(), variance->specialShapeInfo(), gamma ? gamma->specialBuffer() : nullptr, gamma ? gamma->specialShapeInfo() : nullptr, beta ? beta->specialBuffer() : nullptr, beta ? beta->specialShapeInfo() : nullptr, output->specialBuffer(), output->specialShapeInfo(), packX.platformShapeInfo(), packX.platformOffsets(), packZ.platformShapeInfo(), packZ.platformOffsets(), epsilon), FLOAT_TYPES); // NDArray::registerSpecialUse({output}, {input, mean, variance, gamma, beta}); // manager.synchronize(); @@ -224,7 +224,7 @@ void batchnorm(const NDArray* input, const NDArray* mean, const NDArray* varianc const int* dims = reinterpret_cast(manager.replicatePointer(axes.data(), axes.size() * sizeof(int))); NDArray::prepareSpecialUse({output}, {input, mean, variance, gamma, beta}); - BUILD_SINGLE_SELECTOR(input->dataType(), batchnormCudaLauncher2, (blocksPerGrid, threadsPerBlock, input->getContext()->getCudaStream(), input->getSpecialBuffer(), input->getSpecialShapeInfo(), mean->getSpecialBuffer(), mean->getSpecialShapeInfo(), variance->getSpecialBuffer(), variance->getSpecialShapeInfo(), gamma ? gamma->getSpecialBuffer() : nullptr, gamma ? gamma->getSpecialShapeInfo() : nullptr, beta ? beta->getSpecialBuffer() : nullptr, beta ? beta->getSpecialShapeInfo() : nullptr, output->specialBuffer(), output->specialShapeInfo(), axes.size(), dims, epsilon), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input->dataType(), batchnormCudaLauncher2, (blocksPerGrid, threadsPerBlock, input->getContext()->getCudaStream(), input->specialBuffer(), input->specialShapeInfo(), mean->specialBuffer(), mean->specialShapeInfo(), variance->specialBuffer(), variance->specialShapeInfo(), gamma ? gamma->specialBuffer() : nullptr, gamma ? gamma->specialShapeInfo() : nullptr, beta ? beta->specialBuffer() : nullptr, beta ? beta->specialShapeInfo() : nullptr, output->specialBuffer(), output->specialShapeInfo(), axes.size(), dims, epsilon), FLOAT_TYPES); NDArray::registerSpecialUse({output}, {input, mean, variance, gamma, beta}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/betaInc.cu b/libnd4j/include/ops/declarable/helpers/cuda/betaInc.cu index f1407f9e8..a18ec1fda 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/betaInc.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/betaInc.cu @@ -181,7 +181,7 @@ void betaInc(sd::LaunchContext* context, const NDArray& a, const NDArray& b, con PointersManager manager(context, "betaInc"); NDArray::prepareSpecialUse({&output}, {&a, &b, &x}); - BUILD_SINGLE_SELECTOR(xType, betaIncForArrayCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), a.getSpecialBuffer(), a.getSpecialShapeInfo(), b.getSpecialBuffer(), b.getSpecialShapeInfo(), x.getSpecialBuffer(), x.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(xType, betaIncForArrayCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), a.specialBuffer(), a.specialShapeInfo(), b.specialBuffer(), b.specialShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo()), FLOAT_TYPES); NDArray::registerSpecialUse({&output}, {&a, &b, &x}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/col2im.cu b/libnd4j/include/ops/declarable/helpers/cuda/col2im.cu index 878ce3a6a..62f60cc73 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/col2im.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/col2im.cu @@ -193,7 +193,7 @@ void col2im(sd::LaunchContext& context, const NDArray& col, NDArray& im, const i const int sharedMem = col.rankOf() * sizeof(uint) * threadsPerBlock + 256; NDArray::prepareSpecialUse({&im}, {&col}); - BUILD_SINGLE_SELECTOR(im.dataType(), col2imCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context.getCudaStream(), col.getSpecialBuffer(), col.getSpecialShapeInfo(), im.specialBuffer(), im.specialShapeInfo(), sH, sW, pH, pW, dH, dW), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(im.dataType(), col2imCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context.getCudaStream(), col.specialBuffer(), col.specialShapeInfo(), im.specialBuffer(), im.specialShapeInfo(), sH, sW, pH, pW, dH, dW), FLOAT_TYPES); NDArray::registerSpecialUse({&im}, {&col}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/compression/compression.cu b/libnd4j/include/ops/declarable/helpers/cuda/compression/compression.cu new file mode 100644 index 000000000..5de20c57f --- /dev/null +++ b/libnd4j/include/ops/declarable/helpers/cuda/compression/compression.cu @@ -0,0 +1,66 @@ +/******************************************************************************* + * Copyright (c) 2020 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author sgazeos@gmail.com +// +#include +#include +#include + +namespace sd { +namespace ops { +namespace helpers { + void decodeBitmap(sd::LaunchContext* context, const NDArray* input, NDArray* output) { + auto stream = context->getCudaStream(); + NDArray::prepareSpecialUse({output}, {input}); + + dim3 launchDims(512, 512, 16384); + auto xType = output->dataType(); + BUILD_SINGLE_SELECTOR(xType, cudaDecodeBitmapGeneric, (launchDims, stream, input->specialBuffer(), output->lengthOf(), output->specialBuffer()), FLOAT_TYPES); + + sd::DebugHelper::checkErrorCode(stream, "decodeBitmapFloat(...) failed"); + + NDArray::registerSpecialUse({output}, {input}); + } + + Nd4jLong encodeBitmap(sd::LaunchContext* context, NDArray* input, NDArray* output, float threshold) { + auto stream = LaunchContext::defaultContext()->getCudaStream(); + int *resultPointer = reinterpret_cast(LaunchContext::defaultContext()->getScalarPointer()); + int *reductionPointer = reinterpret_cast(LaunchContext::defaultContext()->getReductionPointer()); + + // nullify result pointer before use + resultPointer[0] = 0; + + NDArray::prepareSpecialUse({},{output, input}); + + dim3 launchDims(512, 512, 32768); + auto xType = input->dataType(); + BUILD_SINGLE_SELECTOR(xType, cudaEncodeBitmapGeneric, + (launchDims, stream, input->specialBuffer(), input->lengthOf(), reinterpret_cast(output->specialBuffer()), resultPointer, reductionPointer, threshold), + FLOAT_TYPES); + + sd::DebugHelper::checkErrorCode(stream, "encodeBitmapFloat(...) failed"); + + Nd4jLong dZ = (Nd4jLong) resultPointer[0]; + resultPointer[0] = 0; + + NDArray::registerSpecialUse({output, input}, {}); + return dZ; + } +} +} +} diff --git a/libnd4j/include/ops/declarable/helpers/cuda/compression/threshold.cu b/libnd4j/include/ops/declarable/helpers/cuda/compression/threshold.cu new file mode 100644 index 000000000..6b5af0df4 --- /dev/null +++ b/libnd4j/include/ops/declarable/helpers/cuda/compression/threshold.cu @@ -0,0 +1,231 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include +#include +#include +#include + +namespace sd { + namespace ops { + namespace helpers { + void prescanArrayRecursive(int** g_scanBlockSums, int *dZ, int *dX, int numElements, int level) { + auto stream = LaunchContext::defaultContext()->getCudaStream(); + + + int blockSize = 512; // max size of the thread blocks + int numBlocks = sd::math::nd4j_max(1, static_cast(ceil(static_cast(numElements) / (2.f * blockSize)))); + int numThreads; + + if (numBlocks > 1) + numThreads = blockSize; + else if (sd::isPowerOfTwo(numElements)) + numThreads = numElements / 2; + else + numThreads = sd::floorPow2(numElements); + + int numEltsPerBlock = numThreads * 2; + + // if this is a non-power-of-2 array, the last block will be non-full + // compute the smallest power of 2 able to compute its scan. + int numEltsLastBlock = + numElements - (numBlocks-1) * numEltsPerBlock; + int numThreadsLastBlock = sd::math::nd4j_max(1, numEltsLastBlock / 2); + int np2LastBlock = 0; + int sharedMemLastBlock = 0; + + if (numEltsLastBlock != numEltsPerBlock) { + np2LastBlock = 1; + + if(!isPowerOfTwo(numEltsLastBlock)) + numThreadsLastBlock = floorPow2(numEltsLastBlock); + + unsigned int extraSpace = (2 * numThreadsLastBlock) / NUM_BANKS; + sharedMemLastBlock = sizeof(int) * (2 * numThreadsLastBlock + extraSpace); + } + + // padding space is used to avoid shared memory bank conflicts + int extraSpace = numEltsPerBlock / NUM_BANKS; + int sharedMemSize = sizeof(int) * (numEltsPerBlock + extraSpace); + + // setup execution parameters + // if NP2, we process the last block separately + dim3 grid(sd::math::nd4j_max(1, numBlocks - np2LastBlock), 1, 1); + dim3 threads(numThreads, 1, 1); + dim3 gridOnes(1, 1, 1); + dim3 threadsOnes(numThreadsLastBlock, 1, 1); + + if (sharedMemSize < 2048) + sharedMemSize = 2048; + + if (sharedMemLastBlock < 2048) + sharedMemLastBlock = 2048; + + // execute the scan + if (numBlocks > 1) { + sd::prescanLauncher(grid, threads, sharedMemSize, stream, dZ, dX, g_scanBlockSums[level], numThreads * 2, 0, 0); + if (np2LastBlock) { + sd::prescanLauncher(gridOnes, threadsOnes, sharedMemLastBlock, stream, dZ, dX, g_scanBlockSums[level], numEltsLastBlock, numBlocks - 1, numElements - numEltsLastBlock); + } + + // After scanning all the sub-blocks, we are mostly done. But now we + // need to take all of the last values of the sub-blocks and scan those. + // This will give us a new value that must be sdded to each block to + // get the final results. + // recursive (CPU) call + prescanArrayRecursive(g_scanBlockSums, g_scanBlockSums[level], g_scanBlockSums[level], numBlocks, level+1); + + sd::uniformAdd<<>>(dZ, g_scanBlockSums[level], numElements - numEltsLastBlock, 0, 0); + + if (np2LastBlock) { + sd::uniformAdd<<<1, numThreadsLastBlock, 1024, *stream>>>(dZ, g_scanBlockSums[level], numEltsLastBlock, numBlocks - 1, numElements - numEltsLastBlock); + } + } else if (isPowerOfTwo(numElements)) { + sd::prescanLauncher(grid, threads, sharedMemSize, stream, dZ, dX, 0, numThreads * 2, 0, 0); + } else { + sd::prescanLauncher(grid, threads, sharedMemSize, stream, dZ, dX, 0, numElements, 0, 0); + } + + sd::DebugHelper::checkErrorCode(stream, "prescanArray(...) failed"); + } + + static void encodeThresholdP2Int_(void **prs, int *dx, Nd4jLong N, int *dz) { + auto stream = LaunchContext::defaultContext()->getCudaStream(); + + prescanArrayRecursive(reinterpret_cast(prs), dz, dx + 1, (int) N, 0); + sd::DebugHelper::checkErrorCode(stream, "encodeThresholdP2Int(...) failed"); + } + + static void encodeThresholdP3_(void *dx, const Nd4jLong *hXShapeInfo, int *offsets, Nd4jLong N, int *dz){ + auto stream = LaunchContext::defaultContext()->getCudaStream(); + + int blockSize = 512; + int numBlocks = N / blockSize + (N % blockSize ? 1 : 0); + + dim3 launchDims(numBlocks, blockSize, 8192); + auto xType = sd::ArrayOptions::dataType(hXShapeInfo); + BUILD_SINGLE_SELECTOR(xType, encoderKernelP3Generic, (launchDims, stream, dx, offsets, N, dz), FLOAT_TYPES); + + sd::DebugHelper::checkErrorCode(stream, "encodeThresholdP3Float(...) failed"); + } + + + static NDArray thresholdEstimate_(const NDArray &updates, const float threshold) { + const int numThreads = 512; + const int numBlocks = updates.lengthOf() / numThreads + (updates.lengthOf() % numThreads ? 1 : 0); + + auto tmp = NDArrayFactory::create('c', {numBlocks + 1}); + + dim3 launchDims(numBlocks, numThreads, 1024); + auto xType = updates.dataType(); + + NDArray::prepareSpecialUse({&tmp}, {&updates}); + BUILD_SINGLE_SELECTOR(xType, encoderKernelP1Generic, (launchDims, LaunchContext::defaultContext()->getCudaStream(), updates.specialBuffer(), updates.lengthOf(), tmp.specialBuffer(), threshold), FLOAT_TYPES); + NDArray::registerSpecialUse({&tmp}, {&updates}); + + return std::move(tmp); + } + + int32_t thresholdEstimate(const NDArray &updates, const float threshold) { + return thresholdEstimate_(updates, threshold).e(0); + } + + void thresholdEncode(NDArray &updates, NDArray &encoded, float threshold) { + // we need these blocks in order to know, how many "updates" will be processed by each GPU block + auto blocks = thresholdEstimate_(updates, threshold); + + const int numThreads = 512; + const int numBlocks = updates.lengthOf() / numThreads + (updates.lengthOf() % numThreads ? 1 : 0); + + const int prefixThreads = 512; + int numElts = numBlocks; + int level = 0; + + // here we just calculate number of sumBlock arrays + do { + int numPrefixBlocks = sd::math::nd4j_max(1, sd::math::nd4j_ceil((float) numElts / (2.0f * prefixThreads))); + if (numBlocks > 1) { + level++; + } + numElts = numPrefixBlocks; + } while (numElts > 1); + + + + std::vector tempArrays(level); + std::vector pointers(level); + + level = 0; + numElts = numBlocks; + + do { + int numPrefixBlocks = sd::math::nd4j_max(1, sd::math::nd4j_ceil((float) numElts / (2.0f * prefixThreads))); + if (numPrefixBlocks > 1) { + tempArrays[level] = std::move(NDArrayFactory::create('c', {numPrefixBlocks})); + pointers[level] = tempArrays[level++].specialBuffer(); + } + numElts = numPrefixBlocks; + } while (numElts > 1); + + PointersManager pm(LaunchContext::defaultContext(), "thresholdEncode"); + auto dptr = pm.replicatePointer(pointers.data(), pointers.size() * 8); + auto offsets = NDArrayFactory::create('c', {numBlocks}); + + // we want to check, if we're hiting external limit on number of encoded elements + auto numMatches = blocks.e(0); + if (numMatches > encoded.lengthOf() - 4) { + blocks.p(0, encoded.lengthOf() - 4); + blocks.syncToDevice(); + } + + NDArray::prepareSpecialUse({}, {&encoded, &updates}); + + // filling offsets + encodeThresholdP2Int_(reinterpret_cast(dptr), + reinterpret_cast(blocks.specialBuffer()), + numBlocks, + reinterpret_cast(offsets.specialBuffer())); + + NDArray::registerSpecialUse({&blocks, &offsets}, {}); + pm.synchronize(); + + + encodeThresholdP3_(updates.specialBuffer(), + updates.shapeInfo(), + reinterpret_cast(offsets.specialBuffer()), + updates.lengthOf(), + reinterpret_cast(encoded.specialBuffer())); + + pm.synchronize(); + + NDArray::registerSpecialUse({&encoded, &updates}, {}); + } + + void thresholdDecode(const NDArray &encoded, NDArray &updates) { + dim3 launchDims(128, 512, 512); + auto xType = updates.dataType(); + + NDArray::prepareSpecialUse({&updates}, {&encoded}); + BUILD_SINGLE_SELECTOR(xType, decoderKernelGeneric, (launchDims, LaunchContext::defaultContext()->getCudaStream(), encoded.specialBuffer(), updates.lengthOf(), updates.specialBuffer()), FLOAT_TYPES); + NDArray::registerSpecialUse({&updates}, {&encoded}); + } + } + } +} diff --git a/libnd4j/include/ops/declarable/helpers/cuda/concat.cu b/libnd4j/include/ops/declarable/helpers/cuda/concat.cu index 10e1d132c..cbcd35ffe 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/concat.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/concat.cu @@ -36,7 +36,7 @@ namespace helpers { /////////////////////////////////////////////////////////////////// template -__global__ static void concatCuda(void* pVx, void* pxShapeInfo, void* vz, Nd4jLong* zShapeInfo, const int axis) { +__global__ static void concatCuda(void* pVx, void* pxShapeInfo, void* vz, const Nd4jLong* zShapeInfo, const int axis) { T* z = reinterpret_cast(vz); __shared__ Nd4jLong zLen, totalThreads; @@ -76,11 +76,10 @@ __global__ static void concatCuda(void* pVx, void* pxShapeInfo, void* vz, Nd4jL /////////////////////////////////////////////////////////////////// template __host__ static void concatCudaLauncher(const int blocksPerGrid, const int threadsPerBlock, const int sharedMem, const cudaStream_t *stream, - void* pVx, void* pxShapeInfo, void* vz, Nd4jLong* zShapeInfo, const int axis) { + void* pVx, void* pxShapeInfo, void* vz, const Nd4jLong* zShapeInfo, const int axis) { concatCuda<<>>(pVx, pxShapeInfo, vz, zShapeInfo, axis); } -BUILD_SINGLE_TEMPLATE(template void concatCudaLauncher, (const int blocksPerGrid, const int threadsPerBlock, const int sharedMem, const cudaStream_t *stream, void* pVx, void* pxShapeInfo, void* vz, Nd4jLong* zShapeInfo, const int axis), LIBND4J_TYPES); ////////////////////////////////////////////////////////////////////////// void concat(sd::LaunchContext * context, const std::vector& inArrs, NDArray& output, const int axis) { @@ -102,11 +101,11 @@ void concat(sd::LaunchContext * context, const std::vector& inAr if(luckCase1) { // for example {1,10} + {2,10} + {3,10} = {6, 10} order c; or {10,1} + {10,2} + {10,3} = {10, 6} order f - void* z = static_cast(output.getSpecialBuffer()); + void* z = static_cast(output.specialBuffer()); for (uint i = 0; i < numOfInArrs; ++i) { const auto memAmountToCopy = inArrs[i]->lengthOf() * sizeofT; - cudaMemcpyAsync(z, static_cast(inArrs[i]->getSpecialBuffer()), memAmountToCopy, cudaMemcpyDeviceToDevice, *context->getCudaStream()); + cudaMemcpyAsync(z, reinterpret_cast(inArrs[i]->specialBuffer()), memAmountToCopy, cudaMemcpyDeviceToDevice, *context->getCudaStream()); z = static_cast(z) + memAmountToCopy; } @@ -134,7 +133,7 @@ void concat(sd::LaunchContext * context, const std::vector& inAr // if(!areInputsContin || !allSameOrder) // break; - // strideOfContigStride[i] = shape::strideOverContigAxis(axis, inArrs[i]->getShapeInfo()); + // strideOfContigStride[i] = shape::strideOverContigAxis(axis, inArrs[i]->shapeInfo()); // } // } @@ -142,16 +141,16 @@ void concat(sd::LaunchContext * context, const std::vector& inAr // if(luckCase2) { // for example {2,1,3} + {2,5,3} + {2,10,3} = {2,16,3}, here axis 1 shoud have stride = 1 for all inputs arrays and output array - // const auto zStep = shape::strideOverContigAxis(axis, output.getShapeInfo()); + // const auto zStep = shape::strideOverContigAxis(axis, output.shapeInfo()); // for (uint i = 0; i < output.lengthOf() / output.sizeAt(axis); ++i) { // const auto iShift = i * sizeofT; - // void* z = static_cast(output.getSpecialBuffer()) + zStep * iShift; + // void* z = static_cast(output.specialBuffer()) + zStep * iShift; // for (uint j = 0; j < numOfInArrs; ++j) { // const auto xDim = inArrs[j]->sizeAt(axis); - // void* x = static_cast(inArrs[j]->getSpecialBuffer()) + strideOfContigStride[j] * iShift; + // void* x = static_cast(inArrs[j]->specialBuffer()) + strideOfContigStride[j] * iShift; // const auto memSizeToCopy = xDim * sizeofT; // cudaMemcpyAsync(z, x, memSizeToCopy, cudaMemcpyDeviceToDevice, *context->getCudaStream()); // z = static_cast(z) + memSizeToCopy; @@ -168,12 +167,12 @@ void concat(sd::LaunchContext * context, const std::vector& inAr const int sharedMem = 512; // prepare arrays of pointers on buffers and shapes - std::vector hInBuffers(numOfInArrs); - std::vector hInShapeInfo(numOfInArrs); + std::vector hInBuffers(numOfInArrs); + std::vector hInShapeInfo(numOfInArrs); for(int i = 0; i < numOfInArrs; ++i) { - hInBuffers[i] = inArrs[i]->getSpecialBuffer(); - hInShapeInfo[i] = inArrs[i]->getSpecialShapeInfo(); + hInBuffers[i] = inArrs[i]->specialBuffer(); + hInShapeInfo[i] = inArrs[i]->specialShapeInfo(); } PointersManager manager(context, "helpers::concat"); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/confusion.cu b/libnd4j/include/ops/declarable/helpers/cuda/confusion.cu index edb7538d4..dfa86124a 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/confusion.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/confusion.cu @@ -38,7 +38,7 @@ namespace helpers { } template - __global__ static void confusionFunctorKernel(Nd4jLong* labelsBuffer, Nd4jLong* predictionBuffer, Nd4jLong bufferLength, void const* weightsBuffer, void* outputBuffer, Nd4jLong* tadShape, Nd4jLong* tadOffsets) { + __global__ static void confusionFunctorKernel(Nd4jLong* labelsBuffer, Nd4jLong* predictionBuffer, Nd4jLong bufferLength, void const* weightsBuffer, void* outputBuffer, const Nd4jLong* tadShape, const Nd4jLong* tadOffsets) { __shared__ int arrIdx, blocksPerArr; __shared__ T *z; __shared__ T const* w; @@ -80,7 +80,7 @@ namespace helpers { if (err != 0) throw sd::cuda_exception::build("Cannot allocate memory for labels long buffer", err); // copy with type conversion - copyBuffers<<<256, 512, 1024, *stream>>>(labelsLongBuffer, labels->getSpecialBuffer(), labels->lengthOf()); + copyBuffers<<<256, 512, 1024, *stream>>>(labelsLongBuffer, labels->specialBuffer(), labels->lengthOf()); } if (predictionLongBuffer == nullptr) { @@ -88,22 +88,22 @@ namespace helpers { if (err != 0) throw sd::cuda_exception::build("Cannot allocate memory for predictions long buffer", err); // copy with type conversion - copyBuffers<<<256, 512, 1024, *stream>>>(predictionLongBuffer, predictions->getSpecialBuffer(), predictions->lengthOf()); + copyBuffers<<<256, 512, 1024, *stream>>>(predictionLongBuffer, predictions->specialBuffer(), predictions->lengthOf()); } auto bufferLength = labels->lengthOf(); dim3 launchDims(32, 32, 1024); - confusionFunctorKernel<<>>(labelsLongBuffer, predictionLongBuffer, bufferLength, weights != nullptr? weights->getSpecialBuffer():nullptr, output->specialBuffer(), pack.specialShapeInfo(), pack.specialOffsets()); + confusionFunctorKernel<<>>(labelsLongBuffer, predictionLongBuffer, bufferLength, weights != nullptr? weights->specialBuffer():nullptr, output->specialBuffer(), pack.specialShapeInfo(), pack.specialOffsets()); manager.synchronize(); - if (predictionLongBuffer != predictions->getSpecialBuffer()) { + if (predictionLongBuffer != predictions->specialBuffer()) { cudaError_t err = cudaFree(predictionLongBuffer); if (err != 0) throw sd::cuda_exception::build("Cannot deallocate memory for predictions long buffer", err); } - if (labelsLongBuffer != labels->getSpecialBuffer()) { + if (labelsLongBuffer != labels->specialBuffer()) { cudaError_t err = cudaFree(labelsLongBuffer); if (err != 0) throw sd::cuda_exception::build("Cannot deallocate memory for labels long buffer", err); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_col2vol.cu b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_col2vol.cu index d751c2b1e..80df76c91 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_col2vol.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_col2vol.cu @@ -121,7 +121,7 @@ void ConvolutionUtils::col2vol(sd::graph::Context& block, const NDArray& col, ND const int sharedMem = col.rankOf() * sizeof(uint) * threadsPerBlock + 256; NDArray::prepareSpecialUse({&vol}, {&col}); - BUILD_SINGLE_SELECTOR(vol.dataType(), col2volCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), col.getSpecialBuffer(), col.getSpecialShapeInfo(), vol.specialBuffer(), vol.specialShapeInfo(), sD, sH, sW, pD, pH, pW, dD, dH, dW), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(vol.dataType(), col2volCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), col.specialBuffer(), col.specialShapeInfo(), vol.specialBuffer(), vol.specialShapeInfo(), sD, sH, sW, pD, pH, pW, dD, dH, dW), FLOAT_TYPES); NDArray::registerSpecialUse({&vol}, {&col}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_pooling2d.cu b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_pooling2d.cu index eb336cb76..c146be7bf 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_pooling2d.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_pooling2d.cu @@ -118,7 +118,7 @@ static __global__ void avgPooling2dCuda(const void *vx, const Nd4jLong *xShapeIn ////////////////////////////////////////////////////////////////////////// template -static void avgPooling2dCudaLauncher(sd::LaunchContext & block, void *vx, Nd4jLong *vxShapeInfo, void *vz, Nd4jLong *vzShapeInfo, const int kH, const int kW, const int sH, const int sW, const int pH, const int pW, const int dH, const int dW, const int extraParam0) { +static void avgPooling2dCudaLauncher(sd::LaunchContext & block, const void *vx, const Nd4jLong *vxShapeInfo, void *vz, const Nd4jLong *vzShapeInfo, const int kH, const int kW, const int sH, const int sW, const int pH, const int pW, const int dH, const int dW, const int extraParam0) { avgPooling2dCuda<<<512, 512, 4192, *block.getCudaStream()>>>(vx, vxShapeInfo, vz, vzShapeInfo, kH, kW, sH, sW, pH, pW, dH, dW, extraParam0); } @@ -209,7 +209,7 @@ static __global__ void pnormPooling2dCuda(const void *vx, const Nd4jLong *xShape ////////////////////////////////////////////////////////////////////////// template -static void pnormPooling2dCudaLauncher(sd::LaunchContext & block, void *vx, Nd4jLong *vxShapeInfo, void *vz, Nd4jLong *vzShapeInfo, const int kH, const int kW, const int sH, const int sW, const int pH, const int pW, const int dH, const int dW, const int extraParam0) { +static void pnormPooling2dCudaLauncher(sd::LaunchContext & block, const void *vx, const Nd4jLong *vxShapeInfo, void *vz, const Nd4jLong *vzShapeInfo, const int kH, const int kW, const int sH, const int sW, const int pH, const int pW, const int dH, const int dW, const int extraParam0) { pnormPooling2dCuda<<<512, 512, 4192, *block.getCudaStream()>>>(vx, vxShapeInfo, vz, vzShapeInfo, kH, kW, sH, sW, pH, pW, dH, dW, extraParam0); } @@ -303,7 +303,7 @@ static __global__ void maxPooling2dCuda(const void *vx, const Nd4jLong *xShapeIn ////////////////////////////////////////////////////////////////////////// template -static void maxPooling2dCudaLauncher(sd::LaunchContext & block, void *vx, Nd4jLong *vxShapeInfo, void *vz, Nd4jLong *vzShapeInfo, const int kH, const int kW, const int sH, const int sW, const int pH, const int pW, const int dH, const int dW, const int extraParam0) { +static void maxPooling2dCudaLauncher(sd::LaunchContext & block, const void *vx, const Nd4jLong *vxShapeInfo, void *vz, const Nd4jLong *vzShapeInfo, const int kH, const int kW, const int sH, const int sW, const int pH, const int pW, const int dH, const int dW, const int extraParam0) { maxPooling2dCuda<<<512, 512, 4192, *block.getCudaStream()>>>(vx, vxShapeInfo, vz, vzShapeInfo, kH, kW, sH, sW, pH, pW, dH, dW, extraParam0); } @@ -315,15 +315,15 @@ void ConvolutionUtils::pooling2d(sd::graph::Context& block, const NDArray& input switch (poolingMode) { case MAX_POOL: { - BUILD_SINGLE_SELECTOR_TWICE(input.dataType(), maxPooling2dCudaLauncher, (*block.launchContext(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), kH, kW, sH, sW, pH, pW, dH, dW, extraParam0), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR_TWICE(input.dataType(), maxPooling2dCudaLauncher, (*block.launchContext(), input.specialBuffer(), input.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), kH, kW, sH, sW, pH, pW, dH, dW, extraParam0), FLOAT_TYPES); } break; case AVG_POOL: { - BUILD_SINGLE_SELECTOR_TWICE(input.dataType(), avgPooling2dCudaLauncher, (*block.launchContext(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), kH, kW, sH, sW, pH, pW, dH, dW, extraParam0), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR_TWICE(input.dataType(), avgPooling2dCudaLauncher, (*block.launchContext(), input.specialBuffer(), input.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), kH, kW, sH, sW, pH, pW, dH, dW, extraParam0), FLOAT_TYPES); } break; case PNORM_POOL: { - BUILD_SINGLE_SELECTOR_TWICE(input.dataType(), pnormPooling2dCudaLauncher, (*block.launchContext(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), kH, kW, sH, sW, pH, pW, dH, dW, extraParam0), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR_TWICE(input.dataType(), pnormPooling2dCudaLauncher, (*block.launchContext(), input.specialBuffer(), input.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), kH, kW, sH, sW, pH, pW, dH, dW, extraParam0), FLOAT_TYPES); } break; default: diff --git a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_pooling2dBP.cu b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_pooling2dBP.cu index 26808ad4c..62f4787dd 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_pooling2dBP.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_pooling2dBP.cu @@ -178,7 +178,7 @@ void ConvolutionUtils::pooling2dBP(sd::graph::Context& block, const NDArray& inp const int sharedMem = gradO.rankOf() * sizeof(Nd4jLong) * threadsPerBlock + 128; NDArray::prepareSpecialUse({&gradI}, {&input, &gradO}); - BUILD_SINGLE_SELECTOR(input.dataType(), pooling2dBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), gradO.getSpecialBuffer(), gradO.getSpecialShapeInfo(), gradI.specialBuffer(), gradI.specialShapeInfo(), kH, kW, sH, sW, pH, pW, dH, dW, poolingMode, extraParam0), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), pooling2dBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), gradO.specialBuffer(), gradO.specialShapeInfo(), gradI.specialBuffer(), gradI.specialShapeInfo(), kH, kW, sH, sW, pH, pW, dH, dW, poolingMode, extraParam0), FLOAT_TYPES); NDArray::registerSpecialUse({&gradI}, {&input, &gradO}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_pooling3d.cu b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_pooling3d.cu index 93e372a7e..0a3bfc9b6 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_pooling3d.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_pooling3d.cu @@ -170,7 +170,7 @@ void ConvolutionUtils::pooling3d(sd::graph::Context& block, const NDArray& input const int sharedMem = output.rankOf() * sizeof(Nd4jLong) * threadsPerBlock + 128; NDArray::prepareSpecialUse({&output}, {&input}); - BUILD_SINGLE_SELECTOR(input.dataType(), pooling3dCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), kD, kH, kW, sD, sH, sW, pD, pH, pW, dD, dH, dW, poolingMode, extraParam0), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), pooling3dCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), kD, kH, kW, sD, sH, sW, pD, pH, pW, dD, dH, dW, poolingMode, extraParam0), FLOAT_TYPES); NDArray::registerSpecialUse({&output}, {&input}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_pooling3dBP.cu b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_pooling3dBP.cu index 51b48bc23..fd78bb80b 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_pooling3dBP.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_pooling3dBP.cu @@ -192,7 +192,7 @@ void ConvolutionUtils::pooling3dBP(sd::graph::Context& block, const NDArray& inp const int sharedMem = gradO.rankOf() * sizeof(Nd4jLong) * threadsPerBlock + 128; NDArray::prepareSpecialUse({&gradI}, {&input, &gradO}); - BUILD_SINGLE_SELECTOR(input.dataType(), pooling3dBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), gradO.getSpecialBuffer(), gradO.getSpecialShapeInfo(), gradI.specialBuffer(), gradI.specialShapeInfo(), kD, kH, kW, sD, sH, sW, pD, pH, pW, dD, dH, dW, poolingMode, extraParam0), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), pooling3dBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), gradO.specialBuffer(), gradO.specialShapeInfo(), gradI.specialBuffer(), gradI.specialShapeInfo(), kD, kH, kW, sD, sH, sW, pD, pH, pW, dD, dH, dW, poolingMode, extraParam0), FLOAT_TYPES); NDArray::registerSpecialUse({&gradI}, {&input, &gradO}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_upsampling2d.cu b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_upsampling2d.cu index be9fab0be..ee1fa8924 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_upsampling2d.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_upsampling2d.cu @@ -87,7 +87,7 @@ void ConvolutionUtils::upsampling2d(sd::graph::Context& block, const NDArray& in const int sharedMem = output.rankOf() * sizeof(Nd4jLong) * threadsPerBlock + 128; NDArray::prepareSpecialUse({&output}, {&input}); - BUILD_SINGLE_SELECTOR(input.dataType(), upsampling2dCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), factorH, factorW, isNCHW), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), upsampling2dCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), factorH, factorW, isNCHW), FLOAT_TYPES); NDArray::registerSpecialUse({&output}, {&input}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_upsampling2dBP.cu b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_upsampling2dBP.cu index ce393d279..c6864c48a 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_upsampling2dBP.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_upsampling2dBP.cu @@ -93,7 +93,7 @@ void ConvolutionUtils::upsampling2dBP(sd::graph::Context& block, const NDArray& const int sharedMem = gradI.rankOf() * sizeof(Nd4jLong) * threadsPerBlock + 128; NDArray::prepareSpecialUse({&gradI}, {&gradO}); - BUILD_SINGLE_SELECTOR(gradI.dataType(), upsampling2dBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), gradO.getSpecialBuffer(), gradO.getSpecialShapeInfo(), gradI.specialBuffer(), gradI.specialShapeInfo(), isNCHW), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(gradI.dataType(), upsampling2dBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), gradO.specialBuffer(), gradO.specialShapeInfo(), gradI.specialBuffer(), gradI.specialShapeInfo(), isNCHW), FLOAT_TYPES); NDArray::registerSpecialUse({&gradI}, {&gradO}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_upsampling3d.cu b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_upsampling3d.cu index 6f15a27d6..1acb4307f 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_upsampling3d.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_upsampling3d.cu @@ -88,7 +88,7 @@ void ConvolutionUtils::upsampling3d(sd::graph::Context& block, const NDArray& in const int sharedMem = output.rankOf() * sizeof(Nd4jLong) * threadsPerBlock + 128; NDArray::prepareSpecialUse({&output}, {&input}); - BUILD_SINGLE_SELECTOR(input.dataType(), upsampling3dCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), factorD, factorH, factorW, isNCDHW), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), upsampling3dCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), factorD, factorH, factorW, isNCDHW), FLOAT_TYPES); NDArray::registerSpecialUse({&output}, {&input}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_upsampling3dBP.cu b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_upsampling3dBP.cu index f9eb56bec..5a1e08c07 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_upsampling3dBP.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_upsampling3dBP.cu @@ -96,7 +96,7 @@ void ConvolutionUtils::upsampling3dBP(sd::graph::Context& block, const NDArray& const int sharedMem = gradI.rankOf() * sizeof(Nd4jLong) * threadsPerBlock + 128; NDArray::prepareSpecialUse({&gradI}, {&gradO}); - BUILD_SINGLE_SELECTOR(gradI.dataType(), upsampling3dBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), gradO.getSpecialBuffer(), gradO.getSpecialShapeInfo(), gradI.specialBuffer(), gradI.specialShapeInfo(), isNCDHW), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(gradI.dataType(), upsampling3dBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), gradO.specialBuffer(), gradO.specialShapeInfo(), gradI.specialBuffer(), gradI.specialShapeInfo(), isNCDHW), FLOAT_TYPES); NDArray::registerSpecialUse({&gradI}, {&gradO}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_vol2col.cu b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_vol2col.cu index ebe0ec26e..c2c5fb3ef 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/convolutions_vol2col.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/convolutions_vol2col.cu @@ -101,7 +101,7 @@ void ConvolutionUtils::vol2col(sd::graph::Context& block, const NDArray& vol, ND const int sharedMem = col.rankOf() * sizeof(Nd4jLong) * threadsPerBlock + 128; NDArray::prepareSpecialUse({&col}, {&vol}); - BUILD_SINGLE_SELECTOR(vol.dataType(), vol2colCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), vol.getSpecialBuffer(), vol.getSpecialShapeInfo(), col.specialBuffer(), col.specialShapeInfo(), sD, sH, sW, pD, pH, pW, dD, dH, dW), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(vol.dataType(), vol2colCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, block.launchContext()->getCudaStream(), vol.specialBuffer(), vol.specialShapeInfo(), col.specialBuffer(), col.specialShapeInfo(), sD, sH, sW, pD, pH, pW, dD, dH, dW), FLOAT_TYPES); NDArray::registerSpecialUse({&col}, {&vol}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/cross.cu b/libnd4j/include/ops/declarable/helpers/cuda/cross.cu index d7694641c..8de4f65fd 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/cross.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/cross.cu @@ -111,7 +111,7 @@ void crossBatched(sd::LaunchContext* context, NDArray *x, NDArray *y, NDArray *z PointersManager manager(context, "cross"); NDArray::prepareSpecialUse({z}, {x, y}); - BUILD_SINGLE_SELECTOR(x->dataType(), crossCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), x->getSpecialBuffer(), x->getSpecialShapeInfo(), y->getSpecialBuffer(), y->getSpecialShapeInfo(), z->specialBuffer(), z->specialShapeInfo()), NUMERIC_TYPES); + BUILD_SINGLE_SELECTOR(x->dataType(), crossCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), x->specialBuffer(), x->specialShapeInfo(), y->specialBuffer(), y->specialShapeInfo(), z->specialBuffer(), z->specialShapeInfo()), NUMERIC_TYPES); NDArray::registerSpecialUse({z}, {x, y}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/d_t_s.cu b/libnd4j/include/ops/declarable/helpers/cuda/d_t_s.cu index fc3b04ee8..35d8bf033 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/d_t_s.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/d_t_s.cu @@ -25,9 +25,9 @@ namespace ops { namespace helpers { template - static _CUDA_G void depthToSpaceKernel(void *vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, const int block_size, const bool isNHWC) { - T *input_ptr = reinterpret_cast(vx); - T *output_ptr = reinterpret_cast(vz); + static _CUDA_G void depthToSpaceKernel(const void *vx, const Nd4jLong *xShapeInfo, void *vz, const Nd4jLong *zShapeInfo, const int block_size, const bool isNHWC) { + auto input_ptr = reinterpret_cast(vx); + auto output_ptr = reinterpret_cast(vz); const int batch_size = shape::sizeAt(xShapeInfo, 0); const int input_depth = isNHWC ? shape::sizeAt(xShapeInfo, 3) : shape::sizeAt(xShapeInfo, 1); @@ -89,7 +89,7 @@ namespace helpers { template static void __depthToSpace(sd::LaunchContext * context, const NDArray &input, NDArray *output, int block_size, bool isNHWC) { - depthToSpaceKernel<<<512, 512, 1024, *context->getCudaStream()>>>(input.getSpecialBuffer(), input.getSpecialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), block_size, isNHWC); + depthToSpaceKernel<<<512, 512, 1024, *context->getCudaStream()>>>(input.specialBuffer(), input.specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), block_size, isNHWC); } void _depthToSpace(sd::LaunchContext * context, const NDArray &input, NDArray *output, int block_size, bool isNHWC) { @@ -100,9 +100,6 @@ namespace helpers { BUILD_SINGLE_SELECTOR(xType, __depthToSpace, (context, input, output, block_size, isNHWC), LIBND4J_TYPES); NDArray::registerSpecialUse({output}, {&input}); } - - BUILD_SINGLE_TEMPLATE(template void __depthToSpace, (sd::LaunchContext * context, const NDArray &input, NDArray *output, int block_size, bool isNHWC);, LIBND4J_TYPES); - } } } \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/cuda/diGamma.cu b/libnd4j/include/ops/declarable/helpers/cuda/diGamma.cu index a6d06be17..ff217bdb6 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/diGamma.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/diGamma.cu @@ -66,7 +66,7 @@ void diGamma(sd::LaunchContext* context, const NDArray& x, NDArray& z) { int blocksPerGrid = (z.lengthOf() + threadsPerBlock - 1) / threadsPerBlock; NDArray::prepareSpecialUse({&z}, {&x}); - BUILD_SINGLE_SELECTOR(x.dataType(), diGammaCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), x.getSpecialBuffer(), x.getSpecialShapeInfo(), z.getSpecialBuffer(), z.getSpecialShapeInfo()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(x.dataType(), diGammaCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), x.specialBuffer(), x.specialShapeInfo(), z.specialBuffer(), z.specialShapeInfo()), FLOAT_TYPES); NDArray::registerSpecialUse({&z}, {&x}); } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/diag.cu b/libnd4j/include/ops/declarable/helpers/cuda/diag.cu index 87fd2aa98..f011f4095 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/diag.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/diag.cu @@ -33,7 +33,7 @@ namespace helpers { // inputLength - length for input tensor // template -static __global__ void diagFunctorKernel(void* outputBuffer, Nd4jLong* outputShape, void const* inputBuffer, Nd4jLong* inputShape, Nd4jLong inputLength) { +static __global__ void diagFunctorKernel(void* outputBuffer, const Nd4jLong* outputShape, void const* inputBuffer, const Nd4jLong* inputShape, Nd4jLong inputLength) { __shared__ T *z; __shared__ T const* x; __shared__ Nd4jLong outputLength; @@ -65,7 +65,7 @@ static __global__ void diagFunctorKernel(void* outputBuffer, Nd4jLong* outputSha // inputLength - given length for input tensor // template - static __global__ void diagPartFunctorKernel(void* outputBuffer, Nd4jLong* outputShape, void const* inputBuffer, Nd4jLong* inputShape, Nd4jLong outputLength, Nd4jLong inputLength) { + static __global__ void diagPartFunctorKernel(void* outputBuffer, const Nd4jLong* outputShape, void const* inputBuffer, const Nd4jLong* inputShape, Nd4jLong outputLength, Nd4jLong inputLength) { __shared__ T *z; __shared__ T const* x; @@ -96,7 +96,7 @@ static __global__ void diagFunctorKernel(void* outputBuffer, Nd4jLong* outputSha dim3 launchDims(256, 512, 8192); if (!input->isActualOnDeviceSide()) input->syncToDevice(); - diagFunctorKernel<<>>(output->specialBuffer(), output->specialShapeInfo(), input->getSpecialBuffer(), input->getSpecialShapeInfo(), inputLength); + diagFunctorKernel<<>>(output->specialBuffer(), output->specialShapeInfo(), input->specialBuffer(), input->specialShapeInfo(), inputLength); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -121,7 +121,7 @@ static __global__ void diagFunctorKernel(void* outputBuffer, Nd4jLong* outputSha if (!input->isActualOnDeviceSide()) input->syncToDevice(); - diagPartFunctorKernel<<>>(output->specialBuffer(), output->specialShapeInfo(), input->getSpecialBuffer(), input->getSpecialShapeInfo(), outLen, inLen); + diagPartFunctorKernel<<>>(output->specialBuffer(), output->specialShapeInfo(), input->specialBuffer(), input->specialShapeInfo(), outLen, inLen); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/libnd4j/include/ops/declarable/helpers/cuda/dilation2d.cu b/libnd4j/include/ops/declarable/helpers/cuda/dilation2d.cu index c05b5fb6d..0d25552c9 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/dilation2d.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/dilation2d.cu @@ -122,7 +122,7 @@ void dilation2d(sd::LaunchContext* context, NDArray *input, NDArray *weights, ND const int sharedMem = (weights->rankOf() + output->rankOf()) * sizeof(int) * threadsPerBlock + 128; NDArray::prepareSpecialUse({output}, {input, weights}); - BUILD_SINGLE_SELECTOR_TWICE(input->dataType(), dilation2dCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input->getSpecialBuffer(), input->getSpecialShapeInfo(), weights->getSpecialBuffer(), weights->getSpecialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), sH, sW, pH, pW, dH, dW), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR_TWICE(input->dataType(), dilation2dCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input->specialBuffer(), input->specialShapeInfo(), weights->specialBuffer(), weights->specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), sH, sW, pH, pW, dH, dW), FLOAT_TYPES); NDArray::registerSpecialUse({output}, {input, weights}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/dropout.cu b/libnd4j/include/ops/declarable/helpers/cuda/dropout.cu index aee79caa7..4e0fdb377 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/dropout.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/dropout.cu @@ -29,7 +29,7 @@ namespace ops { namespace helpers { template - static __global__ void dropoutSimpleKernel(void const* inputBuf, Nd4jLong const* inputShape, void* outputBuf, Nd4jLong* outputShape, double probVal, int inLen, sd::graph::RandomGenerator* nodeRng) { + static __global__ void dropoutSimpleKernel(void const* inputBuf, Nd4jLong const* inputShape, void* outputBuf, Nd4jLong const* outputShape, double probVal, int inLen, sd::graph::RandomGenerator* nodeRng) { auto tid = blockIdx.x * blockDim.x + threadIdx.x; auto step = blockDim.x * gridDim.x; T const* input = reinterpret_cast(inputBuf); @@ -62,7 +62,7 @@ namespace helpers { throw cuda_exception::build("helpers::dropoutSimple: Cannot set up device memory for random generator.", err); } - dropoutSimpleKernel<<<128, 256, 1024, *stream>>>(input->getSpecialBuffer(), input->getSpecialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), probValue, inLen, dRandom); + dropoutSimpleKernel<<<128, 256, 1024, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), probValue, inLen, dRandom); err = cudaFree(dRandom); if (err) { throw cuda_exception::build("helpers::dropoutSimple: Cannot deallocate device memory for random generator.", err); @@ -124,7 +124,7 @@ namespace helpers { /////////////////////////////////// backrpopagations /////////////////////////////////////////////// template - static __global__ void dropoutBPKernel(void* outputBuf, Nd4jLong* outputShape, void* gradOutBuf, Nd4jLong* gradOutShape, double probValue) { + static __global__ void dropoutBPKernel(void* outputBuf, Nd4jLong const* outputShape, void* gradOutBuf, Nd4jLong const* gradOutShape, double probValue) { __shared__ T* output; __shared__ T* input; __shared__ int len; @@ -165,7 +165,7 @@ namespace helpers { } template - static __global__ void alphaDropoutSimpleKernel(void const* inputBuf, Nd4jLong const* inputShape, void* outputBuf, Nd4jLong* outputShape, double probValue, double alpha, double alpha1, double beta, int inLen, sd::graph::RandomGenerator* nodeRng) { + static __global__ void alphaDropoutSimpleKernel(void const* inputBuf, Nd4jLong const* inputShape, void* outputBuf, Nd4jLong const* outputShape, double probValue, double alpha, double alpha1, double beta, int inLen, sd::graph::RandomGenerator* nodeRng) { auto tid = blockIdx.x * blockDim.x + threadIdx.x; auto step = blockDim.x * gridDim.x; T const* input = reinterpret_cast(inputBuf); @@ -191,7 +191,7 @@ namespace helpers { throw cuda_exception::build("helpers::alphaDropoutSimple: Cannot set up device memory for random generator.", err); } - alphaDropoutSimpleKernel<<<128, 256, 1024, *stream>>>(input->getSpecialBuffer(), input->getSpecialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), probValue, alpha, alpha1, beta, output->lengthOf(), dRandom); + alphaDropoutSimpleKernel<<<128, 256, 1024, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), probValue, alpha, alpha1, beta, output->lengthOf(), dRandom); err = cudaFree(dRandom); if (err) { diff --git a/libnd4j/include/ops/declarable/helpers/cuda/dynamic.cu b/libnd4j/include/ops/declarable/helpers/cuda/dynamic.cu index a80d838be..6f29995d3 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/dynamic.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/dynamic.cu @@ -27,9 +27,9 @@ namespace sd { template - static _CUDA_G void dynamicPartitionScalarKernel(void *vx, Nd4jLong *xShapeInfo, void *vi, Nd4jLong *iShapeInfo, void **vz, Nd4jLong **zShapeInfos, const Nd4jLong numOutputs) { - auto x = reinterpret_cast(vx); - auto i = reinterpret_cast(vi); + static _CUDA_G void dynamicPartitionScalarKernel(const void *vx, const Nd4jLong *xShapeInfo, const void *vi, const Nd4jLong *iShapeInfo, void **vz, Nd4jLong **zShapeInfos, const Nd4jLong numOutputs) { + auto x = reinterpret_cast(vx); + auto i = reinterpret_cast(vi); auto xLength = shape::length(xShapeInfo); auto iLength = shape::length(iShapeInfo); @@ -85,9 +85,9 @@ namespace sd { } template - static _CUDA_G void dynamicPartitionTadKernel(void *vx, Nd4jLong *xTadShapeInfo, Nd4jLong *xTadOffsets, Nd4jLong xLength, void *vindices, Nd4jLong *iShapeInfo, Nd4jLong iLength, void **vz, Nd4jLong **zTadShapeInfos, Nd4jLong **zTadOffsets, Nd4jLong numOutputs) { - auto x = reinterpret_cast(vx); - auto indices = reinterpret_cast(vindices); + static _CUDA_G void dynamicPartitionTadKernel(const void *vx, const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffsets, Nd4jLong xLength, const void *vindices, const Nd4jLong *iShapeInfo, Nd4jLong iLength, void **vz, Nd4jLong **zTadShapeInfos, Nd4jLong **zTadOffsets, Nd4jLong numOutputs) { + auto x = reinterpret_cast(vx); + auto indices = reinterpret_cast(vindices); // we run things in blocks, 1 partition per block of threads for (int i = blockIdx.x; i < numOutputs; i += gridDim.x) { @@ -124,11 +124,11 @@ namespace sd { for (int i = sourceDimsLen; i > 0; i--) sourceDims[sourceDimsLen - i] = input->rankOf() - i; //compute tad array for given dimensions - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), sourceDims); + auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), sourceDims); std::vector outBuffers(outSize); - std::vector tadShapes(outSize); - std::vector tadOffsets(outSize); + std::vector tadShapes(outSize); + std::vector tadOffsets(outSize); std::vector numTads(outSize); // fill up dimensions array for before kernel for (unsigned int i = 0; i < outSize; i++) { @@ -140,9 +140,9 @@ namespace sd { for (int k = 1; k < r; k++) outDims[k - 1] = k; - auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(outputList.at(i)->getShapeInfo(), outDims); + auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(outputList.at(i)->shapeInfo(), outDims); - outBuffers[i] = outputList.at(i)->getSpecialBuffer(); + outBuffers[i] = outputList.at(i)->specialBuffer(); tadShapes[i] = packZ.platformShapeInfo(); tadOffsets[i] = packZ.platformOffsets(); } @@ -152,24 +152,24 @@ namespace sd { auto dOutTadShapes = reinterpret_cast(pm.replicatePointer(tadShapes.data(), tadShapes.size() * sizeof(Nd4jLong *))); auto dOutTadOffsets = reinterpret_cast(pm.replicatePointer(tadOffsets.data(), tadOffsets.size() * sizeof(Nd4jLong *))); // run kernel on device - dynamicPartitionTadKernel<<<256, 256, 1024, *context->getCudaStream()>>>(input->getSpecialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), shape::length(packX.primaryShapeInfo()), indices->getSpecialBuffer(), indices->getSpecialShapeInfo(), indices->lengthOf(), dOutBuffers, dOutTadShapes, dOutTadOffsets, outSize); + dynamicPartitionTadKernel<<<256, 256, 1024, *context->getCudaStream()>>>(input->specialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), shape::length(packX.primaryShapeInfo()), indices->specialBuffer(), indices->specialShapeInfo(), indices->lengthOf(), dOutBuffers, dOutTadShapes, dOutTadOffsets, outSize); } else { // linear case auto numThreads = 256; auto shmemSize = numThreads * sizeof(Y) * 2 + 1024; std::vector outBuffers; - std::vector outShapes; + std::vector outShapes; for (auto v:outputList) { - outBuffers.emplace_back(v->getSpecialBuffer()); - outShapes.emplace_back(v->getSpecialShapeInfo()); + outBuffers.emplace_back(v->specialBuffer()); + outShapes.emplace_back(v->specialShapeInfo()); } auto dOutBuffers = reinterpret_cast(pm.replicatePointer(outBuffers.data(), outBuffers.size() * sizeof(void *))); auto dOutShapes = reinterpret_cast(pm.replicatePointer(outShapes.data(), outShapes.size() * sizeof(Nd4jLong *))); - dynamicPartitionScalarKernel<<<256, numThreads, shmemSize, *context->getCudaStream()>>>(input->getSpecialBuffer(), input->getSpecialShapeInfo(), indices->getSpecialBuffer(), indices-> getSpecialShapeInfo(), dOutBuffers, dOutShapes, outSize); + dynamicPartitionScalarKernel<<<256, numThreads, shmemSize, *context->getCudaStream()>>>(input->specialBuffer(), input->specialShapeInfo(), indices->specialBuffer(), indices->specialShapeInfo(), dOutBuffers, dOutShapes, outSize); } pm.synchronize(); @@ -177,7 +177,7 @@ namespace sd { template - static _CUDA_G void dynamicStitchScalarKernel(void **vx, Nd4jLong **xShapeInfos, void **vindices, Nd4jLong **iShapeInfos, int inputSize, void *vz, Nd4jLong *zShapeInfo, Nd4jLong zLength) { + static _CUDA_G void dynamicStitchScalarKernel(void **vx, Nd4jLong **xShapeInfos, void **vindices, Nd4jLong **iShapeInfos, int inputSize, void *vz, const Nd4jLong *zShapeInfo, Nd4jLong zLength) { auto z = reinterpret_cast(vz); for (int e = blockIdx.x; e < inputSize; e += gridDim.x) { @@ -198,7 +198,7 @@ namespace sd { } template - static _CUDA_G void dynamicStitchTadKernel(void **vx, Nd4jLong **xTadShapeInfos, Nd4jLong **xTadOffsets, void **vindices, Nd4jLong **iShapeInfos, int inputSize, void *vz, Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets) { + static _CUDA_G void dynamicStitchTadKernel(void **vx, Nd4jLong **xTadShapeInfos, Nd4jLong **xTadOffsets, void **vindices, Nd4jLong **iShapeInfos, int inputSize, void *vz, const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffsets) { auto bz = reinterpret_cast(vz); for (int e = blockIdx.x; e < inputSize; e += gridDim.x) { @@ -237,17 +237,17 @@ namespace sd { PointersManager pm(context, "dynamicStitch"); if (output->isVector()) { - std::vector inputBuffers(inputSize); - std::vector inputShapes(inputSize); - std::vector indicesBuffers(inputSize); - std::vector indicesShapes(inputSize); + std::vector inputBuffers(inputSize); + std::vector inputShapes(inputSize); + std::vector indicesBuffers(inputSize); + std::vector indicesShapes(inputSize); for (int e = 0; e < inputSize; e++) { - inputBuffers[e] = inputs.at(e)->getSpecialBuffer(); - indicesBuffers[e] = indices.at(e)->getSpecialBuffer(); + inputBuffers[e] = inputs.at(e)->specialBuffer(); + indicesBuffers[e] = indices.at(e)->specialBuffer(); - inputShapes[e] = inputs.at(e)->getSpecialShapeInfo(); - indicesShapes[e] = indices.at(e)->getSpecialShapeInfo(); + inputShapes[e] = inputs.at(e)->specialShapeInfo(); + indicesShapes[e] = indices.at(e)->specialShapeInfo(); } // copying pointers to buffers to device @@ -262,26 +262,26 @@ namespace sd { for (int i = restDims.size(); i > 0; i--) restDims[restDims.size() - i] = output->rankOf() - i; - auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), restDims); + auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), restDims); - std::vector inputBuffers(inputSize); - std::vector inputTadShapes(inputSize); - std::vector inputTadOffsets(inputSize); + std::vector inputBuffers(inputSize); + std::vector inputTadShapes(inputSize); + std::vector inputTadOffsets(inputSize); - std::vector indicesBuffers(inputSize); - std::vector indicesShapes(inputSize); + std::vector indicesBuffers(inputSize); + std::vector indicesShapes(inputSize); for (int e = 0; e < inputSize; e++) { std::vector sourceDims(inputs[e]->rankOf() - indices[e]->rankOf()); for (int i = sourceDims.size(); i > 0; i--) sourceDims[sourceDims.size() - i] = inputs[e]->rankOf() - i; - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(inputs[e]->getShapeInfo(), sourceDims); + auto packX = ConstantTadHelper::getInstance()->tadForDimensions(inputs[e]->shapeInfo(), sourceDims); - indicesBuffers[e] = indices[e]->getSpecialBuffer(); - indicesShapes[e] = indices[e]->getSpecialShapeInfo(); + indicesBuffers[e] = indices[e]->specialBuffer(); + indicesShapes[e] = indices[e]->specialShapeInfo(); - inputBuffers[e] = inputs[e]->getSpecialBuffer(); + inputBuffers[e] = inputs[e]->specialBuffer(); inputTadShapes[e] = packX.platformShapeInfo(); inputTadOffsets[e] = packX.platformOffsets(); } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/extract_patches.cu b/libnd4j/include/ops/declarable/helpers/cuda/extract_patches.cu index 3a0ea9240..c5e8848cb 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/extract_patches.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/extract_patches.cu @@ -47,7 +47,7 @@ namespace helpers { // - outputOffsets - output TAD offsets // template - static __global__ void globalExtractPatchesKernel(bool theSame, int batchCount, int sizeRow, int sizeCol, int rowDim, int colDim, int outRowDim, int outColDim, int strideRow, int strideCol, int rateRow, int rateCol, int rowCast, int colCast, int lastDim, T* input, Nd4jLong* patchShape, Nd4jLong* inputOffsets, T* output, Nd4jLong* outTadShape, Nd4jLong* outputOffsets) { + static __global__ void globalExtractPatchesKernel(bool theSame, int batchCount, int sizeRow, int sizeCol, int rowDim, int colDim, int outRowDim, int outColDim, int strideRow, int strideCol, int rateRow, int rateCol, int rowCast, int colCast, int lastDim, const T* input, const Nd4jLong* patchShape, const Nd4jLong* inputOffsets, T* output, const Nd4jLong* outTadShape, const Nd4jLong* outputOffsets) { auto start = threadIdx.x + blockIdx.x * blockDim.x; @@ -114,8 +114,8 @@ namespace helpers { if (sizeCol * rateCol < 3) colCast = 0; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(images->getShapeInfo(), restDims.data(), restDims.size()); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), restDims.data(), restDims.size()); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(images->shapeInfo(), restDims.data(), restDims.size()); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), restDims.data(), restDims.size()); int batchCount = packX.numberOfTads(); PointersManager manager(context, "helpers::extractPatches"); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu b/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu index 262b1fe3e..7fcd71dba 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu @@ -79,9 +79,11 @@ namespace helpers { } template - static __global__ void fakeQuantWithMinMaxKernel(T* input, Nd4jLong* inputShape, T* min, T* max, - int lowIntBound, int upperIntBound, Nd4jLong channels, - T* output, Nd4jLong* outputShape, Nd4jLong length) { + static __global__ void fakeQuantWithMinMaxKernel(const T* input, const Nd4jLong* inputShape, + T* min, T* max, + int lowIntBound, int upperIntBound, Nd4jLong channels, + T* output, const Nd4jLong* outputShape, + Nd4jLong length) { __shared__ int block; if (threadIdx.x == 0) { block = length / channels; // to loop with last dimension as block @@ -129,10 +131,6 @@ namespace helpers { void fakeQuantWithMinMaxVarsPerChannel(LaunchContext* context, NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) { BUILD_SINGLE_SELECTOR(input->dataType(), fakeQuantWithMinMaxVarsPerChannel_, (context, input, min, max, numBits, narrowed, output), FLOAT_TYPES); } - - BUILD_SINGLE_TEMPLATE(template void fakeQuantWithMinMaxVars_, (NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output), FLOAT_TYPES); - BUILD_SINGLE_TEMPLATE(template void fakeQuantWithMinMaxVarsPerChannel_, (LaunchContext* context, NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output), FLOAT_TYPES); - } } } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/flatten.cu b/libnd4j/include/ops/declarable/helpers/cuda/flatten.cu index 3600104e1..aa2ff8297 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/flatten.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/flatten.cu @@ -25,7 +25,7 @@ namespace sd { namespace ops { namespace helpers { template - void _CUDA_G flattenKernel(void **xBuffers, Nd4jLong **xShapeInfos, Nd4jLong *offsets, Nd4jLong numInputs, void *zBuffer, Nd4jLong *zShapeInfo, char order) { + void _CUDA_G flattenKernel(void **xBuffers, Nd4jLong **xShapeInfos, Nd4jLong *offsets, Nd4jLong numInputs, void *zBuffer, const Nd4jLong *zShapeInfo, char order) { int xCoord[MAX_RANK]; @@ -47,9 +47,9 @@ namespace sd { void flatten_(sd::LaunchContext *context, std::vector &inputs, NDArray *output, char order) { PointersManager pm(context, "flatten"); - std::vector hdBuffers(inputs.size()); + std::vector hdBuffers(inputs.size()); std::vector hOffsets(inputs.size()); - std::vector hdShapes(inputs.size()); + std::vector hdShapes(inputs.size()); Nd4jLong cOffset = 0; // calculating offsets in output @@ -67,7 +67,7 @@ namespace sd { auto dOffsets = (Nd4jLong *) pm.replicatePointer(hOffsets.data(), inputs.size() * sizeof(Nd4jLong)); - flattenKernel<<<256, 512, 8192, *context->getCudaStream()>>>(dBuffers, dShapes, dOffsets, inputs.size(), output->getSpecialBuffer(), output->getSpecialShapeInfo(), order); + flattenKernel<<<256, 512, 8192, *context->getCudaStream()>>>(dBuffers, dShapes, dOffsets, inputs.size(), output->specialBuffer(), output->specialShapeInfo(), order); pm.synchronize(); } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/gather.cu b/libnd4j/include/ops/declarable/helpers/cuda/gather.cu index 03d2f35d8..26778aa63 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/gather.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/gather.cu @@ -161,13 +161,13 @@ void gather(sd::LaunchContext * context, const NDArray* input, const NDArray* in sizeof(Nd4jLong))); NDArray::prepareSpecialUse({output}, {input, pIndices}); - BUILD_DOUBLE_SELECTOR(input->dataType(), pIndices->dataType(), gatherCudaLauncher, (context->getCudaStream(), numOfSubArrs, input->getSpecialBuffer(), xShapeInfo, xOffsets, pIndices->getSpecialBuffer(), pIndices->getSpecialShapeInfo(), output->getSpecialBuffer(), zShapeInfo, zOffsets), LIBND4J_TYPES, INDEXING_TYPES); + BUILD_DOUBLE_SELECTOR(input->dataType(), pIndices->dataType(), gatherCudaLauncher, (context->getCudaStream(), numOfSubArrs, input->specialBuffer(), xShapeInfo, xOffsets, pIndices->specialBuffer(), pIndices->specialShapeInfo(), output->specialBuffer(), zShapeInfo, zOffsets), LIBND4J_TYPES, INDEXING_TYPES); NDArray::registerSpecialUse({output}, {input, pIndices}); manager.synchronize(); } else { NDArray::prepareSpecialUse({output}, {input, pIndices}); - BUILD_DOUBLE_SELECTOR(input->dataType(), pIndices->dataType(), gatherCudaLinear, (context->getCudaStream(), input->getSpecialBuffer(), input->getSpecialShapeInfo(), pIndices->getSpecialBuffer(), pIndices->getSpecialShapeInfo(), output->specialBuffer(), output->specialShapeInfo()), LIBND4J_TYPES, INDEXING_TYPES); + BUILD_DOUBLE_SELECTOR(input->dataType(), pIndices->dataType(), gatherCudaLinear, (context->getCudaStream(), input->specialBuffer(), input->specialShapeInfo(), pIndices->specialBuffer(), pIndices->specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo()), LIBND4J_TYPES, INDEXING_TYPES); NDArray::registerSpecialUse({output}, {input, pIndices}); } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/gather_nd.cu b/libnd4j/include/ops/declarable/helpers/cuda/gather_nd.cu index 21ab1ff98..d72f3e1bc 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/gather_nd.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/gather_nd.cu @@ -135,7 +135,7 @@ namespace sd { PointersManager manager(context, "gatherND"); NDArray::prepareSpecialUse({&output}, {&input, &indices}); - BUILD_DOUBLE_SELECTOR(xType, yType, gatherNDCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), indices.getSpecialBuffer(), indices.getSpecialShapeInfo(), output.getSpecialBuffer(), output.getSpecialShapeInfo()), LIBND4J_TYPES, INDEXING_TYPES); + BUILD_DOUBLE_SELECTOR(xType, yType, gatherNDCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), indices.specialBuffer(), indices.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo()), LIBND4J_TYPES, INDEXING_TYPES); NDArray::registerSpecialUse({&output}, {&input, &indices}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/hamming.cu b/libnd4j/include/ops/declarable/helpers/cuda/hamming.cu index f88ec6003..e3fdd9411 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/hamming.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/hamming.cu @@ -25,9 +25,9 @@ namespace sd { namespace ops { namespace helpers { template - static _CUDA_G void _hammingKernel(void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, void *vz, void *reductionBuffer, Nd4jLong length) { - auto x = reinterpret_cast(vx); - auto y = reinterpret_cast(vy); + static _CUDA_G void _hammingKernel(const void *vx, const Nd4jLong *xShapeInfo, const void *vy, const Nd4jLong *yShapeInfo, void *vz, void *reductionBuffer, Nd4jLong length) { + auto x = reinterpret_cast(vx); + auto y = reinterpret_cast(vy); auto z = reinterpret_cast(vz); __shared__ Nd4jLong *shared; diff --git a/libnd4j/include/ops/declarable/helpers/cuda/histogram.cu b/libnd4j/include/ops/declarable/helpers/cuda/histogram.cu index c6123d6da..6d7310fc0 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/histogram.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/histogram.cu @@ -25,7 +25,7 @@ namespace sd { namespace ops { namespace helpers { template - void _CUDA_G histogramKernel(void *xBuffer, Nd4jLong *xShapeInfo, void *zBuffer, Nd4jLong *zShapeInfo, void *allocationPointer, void *reductionPointer, Nd4jLong numBins, X* min_val, X* max_val) { + void _CUDA_G histogramKernel(void *xBuffer, const Nd4jLong *xShapeInfo, void *zBuffer, const Nd4jLong *zShapeInfo, void *allocationPointer, void *reductionPointer, Nd4jLong numBins, X* min_val, X* max_val) { int tid = blockIdx.x * blockDim.x + threadIdx.x; auto dx = reinterpret_cast(xBuffer); auto result = reinterpret_cast(zBuffer); @@ -108,13 +108,13 @@ namespace sd { } template - static void histogram_(sd::LaunchContext *context, void *xBuffer, Nd4jLong *xShapeInfo, Nd4jLong *dxShapeInfo, void *zBuffer, Nd4jLong *zShapeInfo, Nd4jLong numBins, void* min_val, void* max_val) { + static void histogram_(sd::LaunchContext *context, void *xBuffer, const Nd4jLong *xShapeInfo, const Nd4jLong *dxShapeInfo, void *zBuffer, const Nd4jLong *zShapeInfo, Nd4jLong numBins, void* min_val, void* max_val) { int numThreads = 256; int numBlocks = sd::math::nd4j_max(256, sd::math::nd4j_min(1, shape::length(xShapeInfo) / numThreads)); int workspaceSize = numBlocks * numBins; auto tmp = NDArrayFactory::create('c', {workspaceSize}, context); - histogramKernel<<getCudaStream()>>>(xBuffer, dxShapeInfo, zBuffer, zShapeInfo, tmp.getSpecialBuffer(), context->getReductionPointer(), numBins, reinterpret_cast(min_val), reinterpret_cast(max_val)); + histogramKernel<<getCudaStream()>>>(xBuffer, dxShapeInfo, zBuffer, zShapeInfo, tmp.specialBuffer(), context->getReductionPointer(), numBins, reinterpret_cast(min_val), reinterpret_cast(max_val)); cudaStreamSynchronize(*context->getCudaStream()); } @@ -127,7 +127,7 @@ namespace sd { auto max_val = input.reduceNumber(reduce::SameOps::Max); // min_val.printIndexedBuffer("MIN"); // max_val.printIndexedBuffer("MAX"); - BUILD_DOUBLE_SELECTOR(input.dataType(), output.dataType(), histogram_, (context, input.specialBuffer(), input.shapeInfo(), input.specialShapeInfo(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), numBins, min_val.specialBuffer(), max_val.specialBuffer()), LIBND4J_TYPES, INTEGER_TYPES); + BUILD_DOUBLE_SELECTOR(input.dataType(), output.dataType(), histogram_, (context, input.specialBuffer(), input.shapeInfo(), input.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), numBins, min_val.specialBuffer(), max_val.specialBuffer()), LIBND4J_TYPES, INTEGER_TYPES); NDArray::registerSpecialUse({&output}, {&input}); } } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/histogramFixedWidth.cu b/libnd4j/include/ops/declarable/helpers/cuda/histogramFixedWidth.cu index e39f9b438..adb5a3ec4 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/histogramFixedWidth.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/histogramFixedWidth.cu @@ -77,7 +77,7 @@ __host__ static void histogramFixedWidthCudaLauncher(const cudaStream_t *stream, const X leftEdge = range.e(0); const X rightEdge = range.e(1); - histogramFixedWidthCuda<<<256, 256, 1024, *stream>>>(input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), leftEdge, rightEdge); + histogramFixedWidthCuda<<<256, 256, 1024, *stream>>>(input.specialBuffer(), input.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), leftEdge, rightEdge); } //////////////////////////////////////////////////////////////////////// @@ -164,8 +164,8 @@ void histogramFixedWidth(sd::LaunchContext* context, const NDArray& input, const // cudaError_t err = cudaMalloc(&outputBuffer, output.lengthOf() * sizeof(Nd4jLong)); // if (err != 0) // throw cuda_exception::build("helpers::histogramFixedWidth: Cannot allocate memory for output", err); -// copyBuffers<<<256, 512, 8192, *stream>>>(outputBuffer, output.getSpecialBuffer(), output.getSpecialShapeInfo(), output.lengthOf()); -// histogramFixedWidthKernel<<<256, 512, 8192, *stream>>>(outputBuffer, output.lengthOf(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), input.lengthOf(), leftEdge, binWidth, secondEdge, lastButOneEdge); +// copyBuffers<<<256, 512, 8192, *stream>>>(outputBuffer, output.specialBuffer(), output.specialShapeInfo(), output.lengthOf()); +// histogramFixedWidthKernel<<<256, 512, 8192, *stream>>>(outputBuffer, output.lengthOf(), input.specialBuffer(), input.specialShapeInfo(), input.lengthOf(), leftEdge, binWidth, secondEdge, lastButOneEdge); // returnBuffers<<<256, 512, 8192, *stream>>>(output.specialBuffer(), outputBuffer, output.specialShapeInfo(), output.lengthOf()); // //cudaSyncStream(*stream); // err = cudaFree(outputBuffer); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/im2col.cu b/libnd4j/include/ops/declarable/helpers/cuda/im2col.cu index 0dbca8c47..08f5959e8 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/im2col.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/im2col.cu @@ -93,7 +93,7 @@ void im2col(sd::LaunchContext& context, const NDArray& image, NDArray& columns, const int blocksPerGrid = (columns.lengthOf() + threadsPerBlock - 1) / threadsPerBlock; NDArray::prepareSpecialUse({&columns}, {&image}); - BUILD_SINGLE_SELECTOR(columns.dataType(), im2colCudaLauncher, (blocksPerGrid, threadsPerBlock, context, image.getSpecialBuffer(), columns.getSpecialBuffer(), image.getSpecialShapeInfo(), columns.getSpecialShapeInfo(), sH, sW, pH, pW, dH, dW, arrZeroPadVal.e(0)), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(columns.dataType(), im2colCudaLauncher, (blocksPerGrid, threadsPerBlock, context, image.specialBuffer(), columns.specialBuffer(), image.specialShapeInfo(), columns.specialShapeInfo(), sH, sW, pH, pW, dH, dW, arrZeroPadVal.e(0)), FLOAT_TYPES); NDArray::registerSpecialUse({&columns}, {&image}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu b/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu index 6d6ec95ed..47319f100 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/image_draw_bounding_boxes.cu @@ -49,9 +49,12 @@ namespace helpers { } template - static __global__ void drawBoundingBoxesKernel(T const* images, Nd4jLong* imagesShape, float const* boxes, - Nd4jLong* boxesShape, float const* colorTable, Nd4jLong* colorTableShape, T* output, Nd4jLong* outputShape, - Nd4jLong batchSize, Nd4jLong width, Nd4jLong height, Nd4jLong channels, Nd4jLong boxSize, Nd4jLong colorTableLen) { + static __global__ void drawBoundingBoxesKernel(T const* images, const Nd4jLong* imagesShape, + float const* boxes, const Nd4jLong* boxesShape, + float const* colorTable, const Nd4jLong* colorTableShape, + T* output, const Nd4jLong* outputShape, + Nd4jLong batchSize, Nd4jLong width, Nd4jLong height, + Nd4jLong channels, Nd4jLong boxSize, Nd4jLong colorTableLen) { for (auto batch = blockIdx.x; batch < (int)batchSize; batch += gridDim.x) { // loop by batch for (auto boxIndex = 0; boxIndex < boxSize; ++boxIndex) { @@ -153,8 +156,8 @@ namespace helpers { auto boxesBuf = boxes->getDataBuffer()->specialAsT(); // boxes should be float32 auto colorsTableBuf = colorsTable.getDataBuffer()->specialAsT(); // color table is float32 auto outputBuf = output->dataBuffer()->specialAsT(); - drawBoundingBoxesKernel<<<128, 128, 1024, *stream>>>(imagesBuf, images->getSpecialShapeInfo(), - boxesBuf, boxes->getSpecialShapeInfo(), colorsTableBuf, colorsTable.getSpecialShapeInfo(), + drawBoundingBoxesKernel<<<128, 128, 1024, *stream>>>(imagesBuf, images->specialShapeInfo(), + boxesBuf, boxes->specialShapeInfo(), colorsTableBuf, colorsTable.specialShapeInfo(), outputBuf, output->specialShapeInfo(), batchSize, width, height, channels, boxSize, colorsTable.lengthOf()); } @@ -171,7 +174,7 @@ namespace helpers { BUILD_SINGLE_SELECTOR(output->dataType(), drawBoundingBoxesH, (context, images, boxes, colors, output), FLOAT_TYPES); NDArray::registerSpecialUse({output}, {images, boxes, colors}); } - BUILD_SINGLE_TEMPLATE(template void drawBoundingBoxesH, (sd::LaunchContext* context, NDArray const* images, NDArray const* boxes, NDArray const* colors, NDArray* output), FLOAT_TYPES); + } } } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/image_resize.cu b/libnd4j/include/ops/declarable/helpers/cuda/image_resize.cu index 6a045bc8d..d483f87b3 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/image_resize.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/image_resize.cu @@ -128,7 +128,7 @@ namespace helpers { // template static __global__ void resizeImageKernel(T const* input, Nd4jLong const* inputShape, Z* outputYptr, - Nd4jLong* outputShape, Nd4jLong batchSize, Nd4jLong outWidth, Nd4jLong outHeight, Nd4jLong channels, + Nd4jLong const* outputShape, Nd4jLong batchSize, Nd4jLong outWidth, Nd4jLong outHeight, Nd4jLong channels, Nd4jLong inRowSize, Nd4jLong outRowSize, Nd4jLong inBatchNumValues, BilinearInterpolationData* xs_, BilinearInterpolationData* ys_) { @@ -171,11 +171,11 @@ namespace helpers { Nd4jLong inBatchNumValues = inHeight * inRowSize; Nd4jLong outRowSize = outWidth * channels; auto stream = context->getCudaStream(); - T const* pInput = images->getDataBuffer()->specialAsT(); //reinterpret_cast(images->getSpecialBuffer()); // this works only with 'c' direction + T const* pInput = images->getDataBuffer()->specialAsT(); //reinterpret_cast(images->specialBuffer()); // this works only with 'c' direction F* pOutput = output->dataBuffer()->specialAsT();//reinterpret_cast(output->specialBuffer()); dim3 batchSizeBlock(batchSize, 1, 1); dim3 pictureBlock(outHeight, outWidth, channels); - resizeImageKernel<<<256, 256, 256, *stream>>>(pInput, images->getSpecialShapeInfo(), pOutput, + resizeImageKernel<<<256, 256, 256, *stream>>>(pInput, images->specialShapeInfo(), pOutput, output->specialShapeInfo(), batchSize, outWidth, outHeight, channels, inRowSize, outRowSize, inBatchNumValues, xs_, ys_); @@ -255,7 +255,7 @@ namespace helpers { // resize by interpolation nearest neighbor algorithm kernel // template - static __global__ void resizeNeighborKernel(T const* input, Nd4jLong* inputShape, T* output, Nd4jLong* outputShape, + static __global__ void resizeNeighborKernel(T const* input, Nd4jLong const* inputShape, T* output, Nd4jLong const* outputShape, Nd4jLong batchSize, Nd4jLong inWidth, Nd4jLong inHeight, Nd4jLong outWidth, Nd4jLong outHeight, Nd4jLong channels, double widthScale, double heightScale, bool alignCorners, bool halfPixelCenters) { //for (int b = blockIdx.x; b < batchSize; b += gridDim.x) @@ -325,12 +325,12 @@ namespace helpers { float heightScale = calculateResizeScale(inHeight, outHeight, alignCorners); float widthScale = calculateResizeScale(inWidth, outWidth, alignCorners); - auto imagesBuffer = images->getDataBuffer()->specialAsT();//reinterpret_cast(images->getSpecialBuffer()); + auto imagesBuffer = images->getDataBuffer()->specialAsT();//reinterpret_cast(images->specialBuffer()); auto outputBuffer = output->dataBuffer()->specialAsT();//reinterpret_cast(output->specialBuffer()); auto stream = context->getCudaStream(); NDArray::prepareSpecialUse({output}, {images}); - resizeNeighborKernel<<>>(imagesBuffer, images->getSpecialShapeInfo(), outputBuffer, output->specialShapeInfo(), + resizeNeighborKernel<<>>(imagesBuffer, images->specialShapeInfo(), outputBuffer, output->specialShapeInfo(), batchSize, inWidth, inHeight, outWidth, outHeight, channels, widthScale, heightScale, alignCorners, halfPixelCenters); NDArray::registerSpecialUse({output}, {images}); @@ -1055,7 +1055,7 @@ namespace helpers { template static __global__ void resizeAreaKernel(ImageResizerState const* pSt, CachedInterpolation const* caches, float scale, - T const* inputPtr, Nd4jLong* inputShape, float* outputPtr, Nd4jLong* outputShape, ScaleCache* cachePool) { //batch * outWidth * outHeight + T const* inputPtr, Nd4jLong const* inputShape, float* outputPtr, Nd4jLong const* outputShape, ScaleCache* cachePool) { //batch * outWidth * outHeight for (auto batch = blockIdx.x; batch < pSt->batchSize; batch += gridDim.x) { for (auto y = threadIdx.x; y < pSt->outHeight; y += blockDim.x) { @@ -1106,7 +1106,7 @@ namespace helpers { static void resizeArea(cudaStream_t* stream, ImageResizerState const& st, CachedInterpolation* cache, NDArray const* input, NDArray* output) { - T const* inputPtr = reinterpret_cast(input->getSpecialBuffer()); + T const* inputPtr = reinterpret_cast(input->specialBuffer()); // float* yScales; // T const** yPtrs; float scale = 1.f / (st.heightScale * st.widthScale); @@ -1116,7 +1116,7 @@ namespace helpers { err = cudaMemcpyAsync(pSt, &st, sizeof(ImageResizerState), cudaMemcpyHostToDevice, *stream); ScaleCache* cachePool; err = cudaMalloc(&cachePool, sizeof(ScaleCache) * st.batchSize * st.outWidth * st.outHeight); - resizeAreaKernel<<<128, 2, 2048, *stream>>>(pSt, cache, scale, inputPtr, input->getSpecialShapeInfo(), outputPtr, + resizeAreaKernel<<<128, 2, 2048, *stream>>>(pSt, cache, scale, inputPtr, input->specialShapeInfo(), outputPtr, output->specialShapeInfo(), cachePool); err = cudaStreamSynchronize(*stream); err = cudaFree(cachePool); @@ -1197,9 +1197,9 @@ namespace helpers { // cropAndResize kernel type of input(images) and output should be the same // template - static __global__ void cropAndResizeKernel(T const *images, Nd4jLong* imagesShape, Z const* boxes, Nd4jLong* boxesShape, - I const* indices, Nd4jLong* indexShape, I const* cropSize, Nd4jLong* cropShape, int method, - double extrapolationVal, T* output, Nd4jLong* outputShape, int numBoxes, int cropHeight, int cropWidth, + static __global__ void cropAndResizeKernel(T const *images, Nd4jLong const* imagesShape, Z const* boxes, Nd4jLong const* boxesShape, + I const* indices, Nd4jLong const* indexShape, I const* cropSize, Nd4jLong const* cropShape, int method, + double extrapolationVal, T* output, Nd4jLong const* outputShape, int numBoxes, int cropHeight, int cropWidth, int batchSize, int imageHeight, int imageWidth, int depth) { for (int b = blockIdx.x; b < numBoxes; b += gridDim.x) @@ -1337,10 +1337,10 @@ namespace helpers { const int cropWidth = crops->sizeAt(2); const int depth = crops->sizeAt(3); auto stream = context->getCudaStream(); - T const* imagesBuf = reinterpret_cast(images->getSpecialBuffer()); - Z const* boxesBuf = reinterpret_cast(boxes->getSpecialBuffer()); - I const* indexBuf = reinterpret_cast(indices->getSpecialBuffer()); - I const* cropSizes = reinterpret_cast(cropSize->getSpecialBuffer()); + T const* imagesBuf = reinterpret_cast(images->specialBuffer()); + Z const* boxesBuf = reinterpret_cast(boxes->specialBuffer()); + I const* indexBuf = reinterpret_cast(indices->specialBuffer()); + I const* cropSizes = reinterpret_cast(cropSize->specialBuffer()); T* outBuf = reinterpret_cast(crops->specialBuffer()); int threadsPerBlock = math::nd4j_max(imageHeight * imageWidth, cropHeight * cropWidth); @@ -1348,8 +1348,8 @@ namespace helpers { threadsPerBlock = MAX_NUM_THREADS/4; NDArray::prepareSpecialUse({crops}, {images, boxes, indices, cropSize}); - cropAndResizeKernel<<>>(imagesBuf, images->getSpecialShapeInfo(), boxesBuf, boxes->getSpecialShapeInfo(), indexBuf, indices->getSpecialShapeInfo(), - cropSizes, cropSize->getSpecialShapeInfo(), method, extrapolationVal, outBuf, crops->specialShapeInfo(), numBoxes, cropHeight, cropWidth, batchSize, imageHeight, imageWidth, depth); + cropAndResizeKernel<<>>(imagesBuf, images->specialShapeInfo(), boxesBuf, boxes->specialShapeInfo(), indexBuf, indices->specialShapeInfo(), + cropSizes, cropSize->specialShapeInfo(), method, extrapolationVal, outBuf, crops->specialShapeInfo(), numBoxes, cropHeight, cropWidth, batchSize, imageHeight, imageWidth, depth); NDArray::registerSpecialUse({crops}, {images, boxes, indices, cropSize}); } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/image_suppression.cu b/libnd4j/include/ops/declarable/helpers/cuda/image_suppression.cu index e6d9a27b1..8b7e8ee57 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/image_suppression.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/image_suppression.cu @@ -38,7 +38,7 @@ namespace helpers { // return value: true, if threshold is overcome, false otherwise // template - static __device__ bool needToSuppressWithThreshold(T* boxes, Nd4jLong* boxesShape, int previousIndex, int nextIndex, T threshold) { + static __device__ bool needToSuppressWithThreshold(T* boxes, Nd4jLong const* boxesShape, int previousIndex, int nextIndex, T threshold) { Nd4jLong previous0[] = {previousIndex, 0}; Nd4jLong previous1[] = {previousIndex, 1}; Nd4jLong previous2[] = {previousIndex, 2}; @@ -80,7 +80,7 @@ namespace helpers { } template - static __device__ T similirityV3(T* boxes, Nd4jLong* boxesShape, int previousIndex, int nextIndex) { + static __device__ T similirityV3(T* boxes, Nd4jLong const* boxesShape, int previousIndex, int nextIndex) { Nd4jLong previous0[] = {previousIndex, 0}; Nd4jLong previous1[] = {previousIndex, 1}; Nd4jLong previous2[] = {previousIndex, 2}; @@ -127,7 +127,7 @@ namespace helpers { // we compute boolean flag as shared uint32 and return it on final only for the first thread // template - static __global__ void shouldSelectKernel(T* boxesBuf, Nd4jLong* boxesShape, I* indexBuf, I* selectedIndicesData, double threshold, int numSelected, int i, bool* shouldSelect) { + static __global__ void shouldSelectKernel(T* boxesBuf, Nd4jLong const* boxesShape, I* indexBuf, I* selectedIndicesData, double threshold, int numSelected, int i, bool* shouldSelect) { auto tid = blockIdx.x * blockDim.x + threadIdx.x; auto step = gridDim.x * blockDim.x; __shared__ unsigned int shouldSelectShared; @@ -242,7 +242,7 @@ namespace helpers { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// template - static __device__ bool checkOverlapBoxes(T* boxes, Nd4jLong* shape, T* scores, I* indices, I* selectedIndices, I* startIndices, I selectedSize, I nextCandidateIndex, T overlapThreshold, T scoreThreshold, bool simple) { + static __device__ bool checkOverlapBoxes(T* boxes, Nd4jLong const* shape, T* scores, I* indices, I* selectedIndices, I* startIndices, I selectedSize, I nextCandidateIndex, T overlapThreshold, T scoreThreshold, bool simple) { bool shouldHardSuppress = false; T& nextCandidateScore = scores[nextCandidateIndex]; I selectedIndex = indices[nextCandidateIndex]; @@ -276,8 +276,8 @@ namespace helpers { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// template static __global__ void - suppressNonMaxOverlapKernel(T* boxes, Nd4jLong* boxesShape, T* scoresData, I* indices, I* startIndices, Nd4jLong length, I maxOutputLen, - T overlapThreshold, T scoreThreshold, I* output, Nd4jLong* outputShape, I* outputLength, bool simple) { + suppressNonMaxOverlapKernel(T* boxes, Nd4jLong const* boxesShape, T* scoresData, I* indices, I* startIndices, Nd4jLong length, I maxOutputLen, + T overlapThreshold, T scoreThreshold, I* output, Nd4jLong const* outputShape, I* outputLength, bool simple) { __shared__ I selectedSize; __shared__ I* tempOutput; diff --git a/libnd4j/include/ops/declarable/helpers/cuda/imagesHelpers.cu b/libnd4j/include/ops/declarable/helpers/cuda/imagesHelpers.cu index 54f306ef7..c26b79ee6 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/imagesHelpers.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/imagesHelpers.cu @@ -69,8 +69,8 @@ linkage void rgbToYuvCudaLauncher(const int blocksPerGrid, const int threadsPerB /////////////////////////////////////////////////////////////////// void transformRgbYuv(sd::LaunchContext* context, const NDArray& input, NDArray& output, const int dimC) { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), { dimC }); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.getShapeInfo(), { dimC }); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), { dimC }); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), { dimC }); const Nd4jLong numOfTads = packX.numberOfTads(); @@ -80,7 +80,7 @@ void transformRgbYuv(sd::LaunchContext* context, const NDArray& input, NDArray& PointersManager manager(context, "yuv_to_rgb"); NDArray::prepareSpecialUse({ &output }, { &input }); - BUILD_SINGLE_SELECTOR(input.dataType(), rgbToYuvCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), packX.platformOffsets(), output.specialBuffer(), output.specialShapeInfo(), packZ.platformOffsets(), numOfTads, dimC), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), rgbToYuvCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), packX.platformOffsets(), output.specialBuffer(), output.specialShapeInfo(), packZ.platformOffsets(), numOfTads, dimC), FLOAT_TYPES); NDArray::registerSpecialUse({ &output }, { &input }); manager.synchronize(); @@ -124,8 +124,8 @@ linkage void yuvToRgbCudaLauncher(const int blocksPerGrid, const int threadsPerB /////////////////////////////////////////////////////////////////// void transformYuvRgb(sd::LaunchContext* context, const NDArray& input, NDArray& output, const int dimC) { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), { dimC }); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.getShapeInfo(), { dimC }); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), { dimC }); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), { dimC }); const Nd4jLong numOfTads = packX.numberOfTads(); @@ -135,7 +135,7 @@ void transformYuvRgb(sd::LaunchContext* context, const NDArray& input, NDArray& PointersManager manager(context, "yuv_to_rgb"); NDArray::prepareSpecialUse({ &output }, { &input }); - BUILD_SINGLE_SELECTOR(input.dataType(), yuvToRgbCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), packX.platformOffsets(), output.specialBuffer(), output.specialShapeInfo(), packZ.platformOffsets(), numOfTads, dimC), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), yuvToRgbCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), packX.platformOffsets(), output.specialBuffer(), output.specialShapeInfo(), packZ.platformOffsets(), numOfTads, dimC), FLOAT_TYPES); NDArray::registerSpecialUse({ &output }, { &input }); manager.synchronize(); @@ -200,7 +200,7 @@ void transformRgbGrs(sd::LaunchContext* context, const NDArray& input, NDArray& const int sharedMem = input.rankOf() * sizeof(int) * threadsPerBlock + 128; NDArray::prepareSpecialUse({&output}, {&input}); - BUILD_SINGLE_SELECTOR(input.dataType(), rgbToGrsCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), dimC), NUMERIC_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), rgbToGrsCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), dimC), NUMERIC_TYPES); NDArray::registerSpecialUse({&output}, {&input}); manager.synchronize(); @@ -287,8 +287,8 @@ static _CUDA_H void rgbToHsvCudaLauncher(const int blocksPerGrid, const int thre /////////////////////////////////////////////////////////////////// void transformHsvRgb(sd::LaunchContext* context, const NDArray* input, NDArray* output, const int dimC) { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), {dimC}); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), {dimC}); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {dimC}); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {dimC}); const Nd4jLong numOfTads = packX.numberOfTads(); @@ -298,7 +298,7 @@ void transformHsvRgb(sd::LaunchContext* context, const NDArray* input, NDArray* PointersManager manager(context, "hsv_to_rgb"); NDArray::prepareSpecialUse({output}, {input}); - BUILD_SINGLE_SELECTOR(input->dataType(), hsvToRgbCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input->getSpecialBuffer(), input->getSpecialShapeInfo(), packX.platformOffsets(), output->specialBuffer(), output->specialShapeInfo(), packZ.platformOffsets(), numOfTads, dimC), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input->dataType(), hsvToRgbCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input->specialBuffer(), input->specialShapeInfo(), packX.platformOffsets(), output->specialBuffer(), output->specialShapeInfo(), packZ.platformOffsets(), numOfTads, dimC), FLOAT_TYPES); NDArray::registerSpecialUse({output}, {input}); manager.synchronize(); @@ -306,8 +306,8 @@ void transformHsvRgb(sd::LaunchContext* context, const NDArray* input, NDArray* /////////////////////////////////////////////////////////////////// void transformRgbHsv(sd::LaunchContext* context, const NDArray* input, NDArray* output, const int dimC) { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), {dimC}); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), {dimC}); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {dimC}); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {dimC}); const Nd4jLong numOfTads = packX.numberOfTads(); @@ -317,7 +317,7 @@ void transformRgbHsv(sd::LaunchContext* context, const NDArray* input, NDArray* PointersManager manager(context, "rgb_to_hsv"); NDArray::prepareSpecialUse({output}, {input}); - BUILD_SINGLE_SELECTOR(input->dataType(), rgbToHsvCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input->getSpecialBuffer(), input->getSpecialShapeInfo(), packX.platformOffsets(), output->specialBuffer(), output->specialShapeInfo(), packZ.platformOffsets(), numOfTads, dimC), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input->dataType(), rgbToHsvCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input->specialBuffer(), input->specialShapeInfo(), packX.platformOffsets(), output->specialBuffer(), output->specialShapeInfo(), packZ.platformOffsets(), numOfTads, dimC), FLOAT_TYPES); NDArray::registerSpecialUse({output}, {input}); manager.synchronize(); @@ -389,21 +389,21 @@ __global__ void tripleTransformerCuda(const void *vx, const Nd4jLong *xShapeInfo template static void rgbYiq(sd::LaunchContext* context, const NDArray* input, NDArray* output, const int dimC) { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimC); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimC); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimC); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimC); NDArray::prepareSpecialUse({output}, {input}); - return tripleTransformerCuda<<<256, 256, 8192, *context->getCudaStream()>>>(input->getSpecialBuffer(), input->getSpecialShapeInfo(), packX.platformShapeInfo(), packX.platformOffsets(), output->specialBuffer(), output->specialShapeInfo(), packZ.platformShapeInfo(), packZ.platformOffsets(), dimC, 1, packZ.numberOfTads()); + return tripleTransformerCuda<<<256, 256, 8192, *context->getCudaStream()>>>(input->specialBuffer(), input->specialShapeInfo(), packX.platformShapeInfo(), packX.platformOffsets(), output->specialBuffer(), output->specialShapeInfo(), packZ.platformShapeInfo(), packZ.platformOffsets(), dimC, 1, packZ.numberOfTads()); NDArray::registerSpecialUse({output}, {input}); } template FORCEINLINE static void yiqRgb(sd::LaunchContext* context, const NDArray* input, NDArray* output, const int dimC) { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimC); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimC); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimC); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimC); NDArray::prepareSpecialUse({output}, {input}); - return tripleTransformerCuda<<<256, 256, 8192, *context->getCudaStream()>>>(input->getSpecialBuffer(), input->getSpecialShapeInfo(), packX.platformShapeInfo(), packX.platformOffsets(), output->specialBuffer(), output->specialShapeInfo(), packZ.platformShapeInfo(), packZ.platformOffsets(), dimC, 2, packZ.numberOfTads()); + return tripleTransformerCuda<<<256, 256, 8192, *context->getCudaStream()>>>(input->specialBuffer(), input->specialShapeInfo(), packX.platformShapeInfo(), packX.platformOffsets(), output->specialBuffer(), output->specialShapeInfo(), packZ.platformShapeInfo(), packZ.platformOffsets(), dimC, 2, packZ.numberOfTads()); NDArray::registerSpecialUse({output}, {input}); } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/ismax.cu b/libnd4j/include/ops/declarable/helpers/cuda/ismax.cu index 27f4f35f2..723b0f215 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/ismax.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/ismax.cu @@ -61,7 +61,7 @@ static void ismax_(sd::LaunchContext * context, const NDArray* input, NDArray* o int dimensionLength = dimensions.size(); std::vector copy(dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), copy.data(), copy.size()); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), copy.data(), copy.size()); // we launch legacy IndexMax op, to get indices of max values along dimension auto indexMaxArr = input->applyIndexReduce(indexreduce::IndexMax, dimensions); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/lrn.cu b/libnd4j/include/ops/declarable/helpers/cuda/lrn.cu index 57bb205a9..ebc0732e2 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/lrn.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/lrn.cu @@ -27,7 +27,7 @@ namespace ops { namespace helpers { template - static _CUDA_G void lrnKernel(void *vx, Nd4jLong *xTadShapeInfo, Nd4jLong *xTadOffsets, void *vz, Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets, Nd4jLong numTads, Nd4jLong tadLength, int depth, double bias, double alpha, double beta) { + static _CUDA_G void lrnKernel(void *vx, Nd4jLong const*xTadShapeInfo, Nd4jLong const*xTadOffsets, void *vz, Nd4jLong const*zTadShapeInfo, Nd4jLong const*zTadOffsets, Nd4jLong numTads, Nd4jLong tadLength, int depth, double bias, double alpha, double beta) { extern __shared__ char sharedChar[]; T* shared = reinterpret_cast(sharedChar); @@ -63,7 +63,7 @@ namespace helpers { } template - static _CUDA_G void lrnBPKernel(void *vx, Nd4jLong *xTadShapeInfo, Nd4jLong *xTadOffsets, void *vz, Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets, Nd4jLong numTads, Nd4jLong tadLength, int depth, double bias, double alpha, double beta) { + static _CUDA_G void lrnBPKernel(void const* vx, Nd4jLong const* xTadShapeInfo, Nd4jLong const* xTadOffsets, void *vz, Nd4jLong const* zTadShapeInfo, Nd4jLong const* zTadOffsets, Nd4jLong numTads, Nd4jLong tadLength, int depth, double bias, double alpha, double beta) { extern __shared__ char sharedChar[]; X* sharedX = reinterpret_cast(sharedChar); Z* sharedY = reinterpret_cast(sharedX + blockDim.x); @@ -82,7 +82,7 @@ namespace helpers { for (uint i = blockIdx.x; i < numTads; i += gridDim.x) { - auto x = reinterpret_cast(vx) + xTadOffsets[i]; + auto x = reinterpret_cast(vx) + xTadOffsets[i]; auto z = reinterpret_cast(vz) + zTadOffsets[i]; const uint begin = sd::math::nd4j_max(0, threadIdx.x - depth); @@ -116,8 +116,8 @@ namespace helpers { template static void lrnBP_(sd::graph::Context& block, const NDArray& input, const NDArray& gradO, NDArray& gradI, const int depth, const float bias, const float alpha, const float beta) { auto rank = input.rankOf(); - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), {rank - 1}); - auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(gradI.getShapeInfo(), {rank - 1}); + auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), {rank - 1}); + auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(gradI.shapeInfo(), {rank - 1}); const auto tadLength = shape::length(packX.primaryShapeInfo()); const int numBlocks = sd::math::nd4j_min(1024, packX.numberOfTads()); @@ -126,7 +126,7 @@ namespace helpers { if (tadLength > 1024 || tadLength < 1) throw std::runtime_error("LRN: tadLength > 1024 isn't implemented yet"); - lrnBPKernel<<getCudaStream()>>>(input.getSpecialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), gradI.specialBuffer(), packZ.platformShapeInfo(), packZ.platformOffsets(), packX.numberOfTads(), tadLength, depth, bias, alpha, beta); + lrnBPKernel<<getCudaStream()>>>(input.specialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), gradI.specialBuffer(), packZ.platformShapeInfo(), packZ.platformOffsets(), packX.numberOfTads(), tadLength, depth, bias, alpha, beta); gradI.tickWriteDevice(); gradI *= gradO; diff --git a/libnd4j/include/ops/declarable/helpers/cuda/lstsq.cu b/libnd4j/include/ops/declarable/helpers/cuda/lstsq.cu index a3b029c0b..8d8548be5 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/lstsq.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/lstsq.cu @@ -33,7 +33,7 @@ namespace ops { namespace helpers { template - static __global__ void fillRegularizerKernel(T* ioMatrixData, Nd4jLong* ioMatrixShape, Nd4jLong* ioMatrixTads, Nd4jLong* ioMatrixOffsets, Nd4jLong batchSize, Nd4jLong rows, T const value) { + static __global__ void fillRegularizerKernel(T* ioMatrixData, const Nd4jLong* ioMatrixShape, const Nd4jLong* ioMatrixTads, const Nd4jLong* ioMatrixOffsets, Nd4jLong batchSize, Nd4jLong rows, T const value) { for (auto x = blockIdx.x; x < batchSize; x += gridDim.x) { auto z = ioMatrixData + ioMatrixOffsets[x]; @@ -61,7 +61,7 @@ namespace helpers { if (fast) { // Cholesky decomposition approach // Equation for solve A^T * Ax = A^T * b, so // 1. Computing A2: - auto tAtShape = ShapeUtils::evalShapeForMatmul(leftInput->getShapeInfo(), leftInput->getShapeInfo(), true, false); + auto tAtShape = ShapeUtils::evalShapeForMatmul(leftInput->shapeInfo(), leftInput->shapeInfo(), true, false); //tAtShape[tAtShape.size() - 2] = output->sizeAt(-2); NDArray leftOutput(leftInput->ordering(), tAtShape, output->dataType(), context); MmulHelper::matmul(leftInput, leftInput, &leftOutput, true, false); // Computing A2 = A^T * A diff --git a/libnd4j/include/ops/declarable/helpers/cuda/lup.cu b/libnd4j/include/ops/declarable/helpers/cuda/lup.cu index c986260e8..682b2eee9 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/lup.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/lup.cu @@ -37,9 +37,9 @@ namespace helpers { // invert the second diagonal for lower diagonal matrix template static __global__ void - invertKernelLow(void *invertedBuf, Nd4jLong *invertedShape, void *inputBuf, Nd4jLong *inputShape, Nd4jLong n) { - T* inverted = reinterpret_cast(invertedBuf); - T* input = reinterpret_cast(inputBuf); + invertKernelLow(void *invertedBuf, const Nd4jLong *invertedShape, const void *inputBuf, const Nd4jLong *inputShape, Nd4jLong n) { + auto inverted = reinterpret_cast(invertedBuf); + auto input = reinterpret_cast(inputBuf); auto start = threadIdx.x + blockIdx.x * blockDim.x; auto step = blockDim.x * gridDim.x; @@ -61,9 +61,9 @@ namespace helpers { // invert diagonal vals to upper diagonal matrix template static __global__ void - upvertKernel(void *invertedBuf, Nd4jLong *invertedShape, void *inputBuf, Nd4jLong *inputShape, Nd4jLong n) { - T *inverted = reinterpret_cast(invertedBuf); - T *input = reinterpret_cast(inputBuf); + upvertKernel(void *invertedBuf, const Nd4jLong *invertedShape, const void *inputBuf, const Nd4jLong *inputShape, Nd4jLong n) { + auto inverted = reinterpret_cast(invertedBuf); + auto input = reinterpret_cast(inputBuf); auto start = threadIdx.x + blockIdx.x * blockDim.x; auto step = blockDim.x * gridDim.x; @@ -72,7 +72,7 @@ namespace helpers { Nd4jLong pos[] = {i, i}; auto xIndex = shape::getOffset(inputShape, pos); auto zIndex = shape::getOffset(invertedShape, pos); -// math::atomics::nd4j_atomicDiv(&inverted[zIndex], input[xIndex]); + // invert diagonal elements inverted[zIndex] /= input[xIndex]; } @@ -82,13 +82,13 @@ namespace helpers { // invert upper second diagonal template static __global__ void - upvertKernelUp(void *invertedBuf, Nd4jLong *invertedShape, void *inputBuf, Nd4jLong *inputShape, Nd4jLong n) { + upvertKernelUp(void *invertedBuf, const Nd4jLong *invertedShape, const void *inputBuf, const Nd4jLong *inputShape, Nd4jLong n) { __shared__ T* inverted; - __shared__ T* input; + __shared__ const T* input; if (threadIdx.x == 0) { inverted = reinterpret_cast(invertedBuf); - input = reinterpret_cast(inputBuf); + input = reinterpret_cast(inputBuf); } __syncthreads(); @@ -110,15 +110,11 @@ namespace helpers { // ------------------------------------------------------------------------------------------------------------------ // template static __global__ void - invertLowKernel(void *invertedBuf, Nd4jLong *invertedShape, void *inputBuf, Nd4jLong *inputShape, Nd4jLong n) { + invertLowKernel(void *invertedBuf, const Nd4jLong *invertedShape, const void *inputBuf, const Nd4jLong *inputShape, Nd4jLong n) { + + auto input = reinterpret_cast(inputBuf); + auto inverted = reinterpret_cast(invertedBuf); - T *inverted = reinterpret_cast(invertedBuf); - T *input = reinterpret_cast(inputBuf); - if (threadIdx.x == 0) { - inverted = reinterpret_cast(invertedBuf); - input = reinterpret_cast(inputBuf); - } - __syncthreads(); auto tid = blockIdx.x * blockDim.x + threadIdx.x; auto step = gridDim.x * blockDim.x; @@ -145,15 +141,14 @@ namespace helpers { // Invertion of upper triangular matrix non-diagonal elements when main and second diagonals already processed template static __global__ void - invertUpKernel(void *invertedBuf, Nd4jLong *invertedShape, void *inputBuf, Nd4jLong *inputShape, Nd4jLong n) { - __shared__ T* inverted; - __shared__ T* input; + invertUpKernel( + void *invertedBuf, const Nd4jLong *invertedShape, + const void *inputBuf, const Nd4jLong *inputShape, + Nd4jLong n) { + + auto inverted = reinterpret_cast(invertedBuf);; + auto input = reinterpret_cast(inputBuf); - if (threadIdx.x == 0) { - inverted = reinterpret_cast(invertedBuf);; - input = reinterpret_cast(inputBuf); - } - __syncthreads(); auto tid = blockIdx.x * blockDim.x + threadIdx.x; auto step = blockDim.x * gridDim.x; @@ -264,15 +259,15 @@ namespace helpers { // output - a N-D tensor buffer with rank not less than 2, input - 2D square n x n matrix with n = rowLen template static __global__ void - fillMatrix(void *output, Nd4jLong *outShape, void *input, Nd4jLong *inputShape, Nd4jLong pos, Nd4jLong rowLen) { + fillMatrix(void *output, const Nd4jLong *outShape, const void *input, const Nd4jLong *inputShape, Nd4jLong pos, Nd4jLong rowLen) { __shared__ F *matrix; - __shared__ T *inputBuf; + __shared__ const T *inputBuf; __shared__ Nd4jLong inputLen; __shared__ Nd4jLong n2; if (threadIdx.x == 0) { matrix = reinterpret_cast(output); - inputBuf = reinterpret_cast(input); + inputBuf = reinterpret_cast(input); inputLen = shape::length(inputShape); n2 = rowLen * rowLen; } @@ -291,15 +286,14 @@ namespace helpers { // same as above, but without type conversion template static __global__ void - returnMatrix(void *output, Nd4jLong *outputShape, void *input, Nd4jLong *inputShape, Nd4jLong pos, Nd4jLong rowLen) { - __shared__ T* matrix; - __shared__ T* outputBuf; + returnMatrix(void *output, const Nd4jLong *outputShape, const void *input, const Nd4jLong *inputShape, Nd4jLong pos, Nd4jLong rowLen) { __shared__ Nd4jLong outputLen; __shared__ Nd4jLong n2; + auto matrix = reinterpret_cast(input); + auto outputBuf = reinterpret_cast(output); if (threadIdx.x == 0) { - matrix = reinterpret_cast(input); - outputBuf = reinterpret_cast(output); + outputLen = shape::length(inputShape); n2 = rowLen * rowLen; } @@ -316,7 +310,7 @@ namespace helpers { // ------------------------------------------------------------------------------------------------------------------ // // fill up permutaion matrix kernel. Permutation matrix filled with zeros and ones template - static __global__ void fillUpPermutation(void *output, Nd4jLong *shape, int *source, int rowNum) { + static __global__ void fillUpPermutation(void *output, const Nd4jLong *shape, int *source, int rowNum) { F *permutation = reinterpret_cast(output); auto start = blockIdx.x * blockDim.x + threadIdx.x; @@ -515,7 +509,7 @@ namespace helpers { BUILD_DOUBLE_TEMPLATE(template void lup_,(LaunchContext * context, NDArray * input, NDArray * output, NDArray * permutation), FLOAT_NATIVE, INDEXING_TYPES); template - static __device__ void swapRows(T* matrix, Nd4jLong* shape, Nd4jLong theFirst, Nd4jLong theSecond, Nd4jLong n) { + static __device__ void swapRows(T* matrix, const Nd4jLong* shape, Nd4jLong theFirst, Nd4jLong theSecond, Nd4jLong n) { if (theFirst != theSecond) { for (auto i = 0; i < n; i++) { Nd4jLong theFirstPos[] = {theFirst, i}; @@ -528,7 +522,7 @@ namespace helpers { } template - static __device__ void processColumns(Nd4jLong currentRow, Nd4jLong rowNum, T* compoundBuf, Nd4jLong* compoundShape) { + static __device__ void processColumns(Nd4jLong currentRow, Nd4jLong rowNum, T* compoundBuf, const Nd4jLong* compoundShape) { Nd4jLong xDiag[] = {currentRow, currentRow}; auto diagIndex = shape::getOffset(compoundShape, xDiag, 0); for (auto j = currentRow + 1; j < rowNum; j++) { @@ -546,7 +540,7 @@ namespace helpers { } template - __device__ Nd4jLong argmaxCol(Nd4jLong column, T* compoundBuffer, Nd4jLong* compoundShape) { + __device__ Nd4jLong argmaxCol(Nd4jLong column, T* compoundBuffer, const Nd4jLong* compoundShape) { auto rowNum = shape::sizeAt(compoundShape, 0); Nd4jLong xInitial[] = {column, column}; auto xInitialIndex = shape::getOffset(compoundShape, xInitial, 0); @@ -565,7 +559,7 @@ namespace helpers { } template - static __device__ int luNN(T* matrix, Nd4jLong* shape, I* permutation, Nd4jLong* permuShape, Nd4jLong n) { + static __device__ int luNN(T* matrix, const Nd4jLong* shape, I* permutation, const Nd4jLong* permuShape, Nd4jLong n) { for (auto i = 0; i < n - 1; i++) { auto pivotIndex = argmaxCol(i, matrix, shape); @@ -581,9 +575,12 @@ namespace helpers { } template - static __global__ void luBatchedKernel(T* outputBuf, Nd4jLong* outputShape, I* permutations, Nd4jLong* permuShape, - Nd4jLong* outputTadShape, Nd4jLong* outputTadOffsets, Nd4jLong* permuTadShape, Nd4jLong* permuTadOffsets, - Nd4jLong batchNum) { + static __global__ void luBatchedKernel( + T* outputBuf, const Nd4jLong* outputShape, + I* permutations, const Nd4jLong* permuShape, + const Nd4jLong* outputTadShape, const Nd4jLong* outputTadOffsets, + const Nd4jLong* permuTadShape, const Nd4jLong* permuTadOffsets, + Nd4jLong batchNum) { auto start = blockIdx.x * blockDim.x + threadIdx.x; auto step = blockDim.x * gridDim.x; @@ -627,7 +624,7 @@ namespace helpers { Nd4jLong n = input->sizeAt(-1); Nd4jLong n2 = n * n; std::vector dims(); - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), {input->rankOf() - 2, input->rankOf() - 1}); + auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {input->rankOf() - 2, input->rankOf() - 1}); //auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {output->rankOf() - 1}); // DataType dtype = input->dataType(); // if (dtype != DataType::DOUBLE) @@ -651,8 +648,7 @@ namespace helpers { auto inputBuf = reinterpret_cast(matrix.specialBuffer()); auto outputBuf = reinterpret_cast(output->specialBuffer()) + offset; // if (matrix.dataType() == input->dataType()) - determinantKernel << < launchDims.x, launchDims.y, launchDims.z, *stream >> > - (inputBuf, outputBuf, n); + determinantKernel<<< launchDims.x, launchDims.y, launchDims.z, *stream>>>(inputBuf, outputBuf, n); // else // determinantKernel<<>> (inputBuf, outputBuf, n); } @@ -672,7 +668,7 @@ namespace helpers { Nd4jLong n = input->sizeAt(-1); Nd4jLong n2 = n * n; std::vector dims(); - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), {input->rankOf() - 2, input->rankOf() - 1}); + auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {input->rankOf() - 2, input->rankOf() - 1}); //auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {output->rankOf() - 1}); DataType dtype = input->dataType(); if (dtype != DataType::DOUBLE) @@ -718,8 +714,11 @@ namespace helpers { template static __global__ void - fillLowerUpperKernel(void *lowerBuf, Nd4jLong *lowerShape, void *upperBuf, Nd4jLong *upperShape, - void *matrixBuf, Nd4jLong *matrixShape, Nd4jLong n) { + fillLowerUpperKernel( + void *lowerBuf, const Nd4jLong *lowerShape, + void *upperBuf, const Nd4jLong *upperShape, + void *matrixBuf, const Nd4jLong *matrixShape, + Nd4jLong n) { __shared__ T *lowerMatrix; __shared__ T *upperMatrix; @@ -760,10 +759,10 @@ namespace helpers { NDArray lower = NDArrayFactory::create('c', {n, n}, dtype, context); NDArray compound = NDArrayFactory::create('c', {n, n}, dtype, context); NDArray permutation = NDArrayFactory::create('c', {n, n}, dtype, context); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {input->rankOf() - 2, input->rankOf() - 1}); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {output->rankOf() - 2, output->rankOf() - 1}); auto stream = context->getCudaStream(); @@ -792,7 +791,7 @@ namespace helpers { sd::MmulHelper::mmul(&matrix, &compound, &upper, 1.0, 0.0); upper.tickWriteDevice(); // upper.printIndexedBuffer("Full inverted"); - returnMatrix <<<1, n2, 1024, *stream>>>(output->specialBuffer(), output->specialShapeInfo(), upper.specialBuffer(), upper.specialShapeInfo(), i * n2, n); + returnMatrix<<<1, n2, 1024, *stream>>>(output->specialBuffer(), output->specialShapeInfo(), upper.specialBuffer(), upper.specialShapeInfo(), i * n2, n); } return Status::OK(); } @@ -808,7 +807,7 @@ namespace helpers { } template - __global__ void fillBatchKernel(F **dArrayBatch, F *buf, Nd4jLong *offsets, Nd4jLong batchSize) { + __global__ void fillBatchKernel(F **dArrayBatch, F *buf, const Nd4jLong *offsets, Nd4jLong batchSize) { auto start = blockIdx.x * blockDim.x + threadIdx.x; auto step = blockDim.x * gridDim.x; @@ -819,7 +818,7 @@ namespace helpers { template __global__ void - adjustResultsKernel(F *dArray, Nd4jLong *shape, Nd4jLong *offsets, Nd4jLong batchSize, Nd4jLong n) { + adjustResultsKernel(F *dArray, const Nd4jLong *shape, const Nd4jLong *offsets, Nd4jLong batchSize, Nd4jLong n) { //auto i = blockIdx.x * blockDim.x + threadIdx.x; Nd4jLong *shapeOf = shape::shapeOf(shape); Nd4jLong *strideOf = shape::stride(shape); @@ -850,7 +849,7 @@ namespace helpers { throw cuda_exception::build("helpers::cholesky_: Cannot create solver handle", status); } F **dArrayBatch = nullptr; - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempOutput.getShapeInfo(), + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempOutput.shapeInfo(), {tempOutput.rankOf() - 2, tempOutput.rankOf() - 1}); const Nd4jLong batchSize = packX.numberOfTads(); @@ -865,8 +864,7 @@ namespace helpers { throw cuda_exception::build("helpers::cholesky_: Cannot allocate memory for solver errors buffer", err); } auto stream = context->getCudaStream(); - fillBatchKernel << < 1, batchSize, 128, *stream >> > - (dArrayBatch, reinterpret_cast(tempOutput.specialBuffer()), packX.specialOffsets(), batchSize); + fillBatchKernel<<<1, batchSize, 128, *stream>>>(dArrayBatch, reinterpret_cast(tempOutput.specialBuffer()), packX.specialOffsets(), batchSize); status = cusolverDnSetStream(handle, *stream); if (CUSOLVER_STATUS_SUCCESS != status) { @@ -895,8 +893,7 @@ namespace helpers { if (CUSOLVER_STATUS_SUCCESS != status) { throw cuda_exception::build("helpers::cholesky_: Cholesky factorization failed for batch", status); } - adjustResultsKernel << < batchSize, n2, 128, *stream >> > - (reinterpret_cast(tempOutput.specialBuffer()), packX.specialShapeInfo(), packX.specialOffsets(), batchSize, n); + adjustResultsKernel<<>>(reinterpret_cast(tempOutput.specialBuffer()), packX.specialShapeInfo(), packX.specialOffsets(), batchSize, n); err = cudaFree(dArrayBatch); if (err) { @@ -944,9 +941,11 @@ namespace helpers { FLOAT_NATIVE); template - __global__ void - logDetKernel(T *inputBuf, Nd4jLong *inputShape, Nd4jLong batchNum, Nd4jLong *tadShape, Nd4jLong *tadOffsets, - T *outputBuf, Nd4jLong *outputShape) { + __global__ void logDetKernel( + const T *inputBuf, const Nd4jLong *inputShape, + Nd4jLong batchNum, + const Nd4jLong *tadShape, const Nd4jLong *tadOffsets, + T *outputBuf, const Nd4jLong *outputShape) { __shared__ int n; if (threadIdx.x == 0) { @@ -954,11 +953,11 @@ namespace helpers { } __syncthreads(); - T *output = outputBuf; - T *input = inputBuf; + auto output = outputBuf; + auto input = inputBuf; for (auto i = blockIdx.x; i < batchNum; i += gridDim.x) { - T *current = input + tadOffsets[i]; + auto current = input + tadOffsets[i]; auto zIndex = shape::getIndexOffset(i, outputShape); for (auto e = threadIdx.x; e < n; e += blockDim.x) { @@ -981,10 +980,10 @@ namespace helpers { auto outputBuf = output->dataBuffer()->specialAsT(); //reinterpret_cast(output->specialBuffer()); // + e * n2; // + e * n2; auto inputBuf = tempOutput.dataBuffer()->specialAsT(); //reinterpret_cast(tempOutput.specialBuffer()); output->nullify(); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempOutput.getShapeInfo(), + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempOutput.shapeInfo(), {tempOutput.rankOf() - 2, tempOutput.rankOf() - 1}); - logDetKernel <<<128, 512, 256, *stream>>>(inputBuf, tempOutput.specialShapeInfo(), + logDetKernel<<<128, 512, 256, *stream>>>(inputBuf, tempOutput.specialShapeInfo(), packX.numberOfTads(), packX.specialShapeInfo(), packX.specialOffsets(), outputBuf, output->specialShapeInfo()); output->tickWriteDevice(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/matrixSetDiag.cu b/libnd4j/include/ops/declarable/helpers/cuda/matrixSetDiag.cu index e5773abf5..97124c3db 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/matrixSetDiag.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/matrixSetDiag.cu @@ -91,7 +91,7 @@ void matrixSetDiag(sd::LaunchContext* context, const NDArray& input, const NDArr PointersManager manager(context, "matrixSetDiag"); NDArray::prepareSpecialUse({&output}, {&input, &diagonal}); - BUILD_SINGLE_SELECTOR(input.dataType(), matrixSetDiagCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), diagonal.getSpecialBuffer(), diagonal.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), zeroPad), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), matrixSetDiagCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), diagonal.specialBuffer(), diagonal.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), zeroPad), LIBND4J_TYPES); NDArray::registerSpecialUse({&output}, {&input, &diagonal}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/matrix_band.cu b/libnd4j/include/ops/declarable/helpers/cuda/matrix_band.cu index 3c1305391..78249bc38 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/matrix_band.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/matrix_band.cu @@ -43,9 +43,13 @@ namespace helpers { // inputLength - input subarray length // template - static __global__ void matrixBandKernel(void* inputBuffer, Nd4jLong* inputShape, - void* outputBuffer, Nd4jLong* outputShape, Nd4jLong lowerBand, Nd4jLong upperBand, Nd4jLong* tadOnlyInputShapeInfo, Nd4jLong* tadInputOffsets, - Nd4jLong* tadOnlyOutputShapeInfo, Nd4jLong* tadOutputOffsets, Nd4jLong numTads, Nd4jLong inputLength) { + static __global__ void matrixBandKernel(const void* inputBuffer, const Nd4jLong* inputShape, + void* outputBuffer, const Nd4jLong* outputShape, + Nd4jLong lowerBand, Nd4jLong upperBand, + const Nd4jLong* tadOnlyInputShapeInfo, const Nd4jLong* tadInputOffsets, + const Nd4jLong* tadOnlyOutputShapeInfo, const Nd4jLong* tadOutputOffsets, + Nd4jLong numTads, + Nd4jLong inputLength) { int totalThreads = blockDim.x; Nd4jLong rows = shape::sizeAt(inputShape, -2); Nd4jLong cols = shape::sizeAt(inputShape, -1); @@ -90,14 +94,14 @@ namespace helpers { std::vector lastDims({input->rankOf() - 2, input->rankOf() - 1}); std::vector dimsToExclude = ShapeUtils::evalDimsToExclude(input->rankOf(), lastDims); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), lastDims); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), lastDims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), lastDims); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), lastDims); const Nd4jLong numTads = packX.numberOfTads(); NDArray::prepareSpecialUse({output}, {input}); - matrixBandKernel<<>>(input->getSpecialBuffer(), - input->getSpecialShapeInfo(), output->getSpecialBuffer(), output->getSpecialShapeInfo(), + matrixBandKernel<<>>(input->specialBuffer(), + input->specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), lowerBand, upperBand, packX.specialShapeInfo(), packX.specialOffsets(), packZ.specialShapeInfo(), packZ.specialOffsets(), numTads, input->lengthOf()); NDArray::registerSpecialUse({output}, {input}); } @@ -106,7 +110,6 @@ namespace helpers { void matrixBandPart(sd::LaunchContext * context, NDArray* input, NDArray* output, Nd4jLong lowerBand, Nd4jLong upperBand) { BUILD_SINGLE_SELECTOR(input->dataType(), matrixBandPart_, (context, input, output, lowerBand, upperBand), FLOAT_TYPES); } - BUILD_SINGLE_TEMPLATE(template void matrixBandPart_, (sd::LaunchContext * context, NDArray* input, NDArray* output, Nd4jLong lowerBand, Nd4jLong upperBand), FLOAT_TYPES); } } } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/matrix_diag_part.cu b/libnd4j/include/ops/declarable/helpers/cuda/matrix_diag_part.cu index 7d78d0323..30d5f0ef9 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/matrix_diag_part.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/matrix_diag_part.cu @@ -35,8 +35,8 @@ namespace helpers { // put diagonals from input batched matricies to output batched vectors template static __global__ void matrixDiagPartKernel(void const* inputBuffer, void* outputBuffer, Nd4jLong numTads, Nd4jLong inputLength, - Nd4jLong* tadOnlyInputShapeInfo, Nd4jLong *tadInputOffsets, - Nd4jLong* tadOnlyOutputShapeInfo, Nd4jLong *tadOutputOffsets) { + const Nd4jLong* tadOnlyInputShapeInfo, const Nd4jLong *tadInputOffsets, + const Nd4jLong* tadOnlyOutputShapeInfo, const Nd4jLong *tadOutputOffsets) { int totalThreads = blockDim.x; for (Nd4jLong i = blockIdx.x; i < numTads; i += gridDim.x) { auto yOffset = tadInputOffsets[i]; @@ -66,13 +66,13 @@ namespace helpers { Nd4jLong lastDimension = sd::math::nd4j_min(input->sizeAt(-2), input->sizeAt(-1)); std::vector dimsToExclude = ShapeUtils::evalDimsToExclude(output->rankOf(), {output->rankOf() - 1}); - const Nd4jLong numTads = ShapeUtils::getNumOfSubArrs(input->getShapeInfo(), dimsToExclude); //this->tensorsAlongDimension({dimension}); + const Nd4jLong numTads = ShapeUtils::getNumOfSubArrs(input->shapeInfo(), dimsToExclude); //this->tensorsAlongDimension({dimension}); //printf("Repeat delta %lld, numTads %lld\n", repeatDelta, numTads); //tadOnlyInputShapeInfo, tadInputOffsets, tadOnlyOutputShapeInfo, tadOutputOffsets; std::vector outputDims({output->rankOf() - 1}); std::vector inputDims({input->rankOf() - 2, input->rankOf() - 1}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), inputDims); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), outputDims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), inputDims); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), outputDims); if (!output->isActualOnDeviceSide()) @@ -83,7 +83,7 @@ namespace helpers { dim3 launchDims(256, 512, 8192); - matrixDiagPartKernel<<>>(input->getSpecialBuffer(), output->getSpecialBuffer(), numTads, lastDimension, packX.specialShapeInfo(), packX.specialOffsets(), packZ.specialShapeInfo(), packZ.specialOffsets()); + matrixDiagPartKernel<<>>(input->specialBuffer(), output->specialBuffer(), numTads, lastDimension, packX.specialShapeInfo(), packX.specialOffsets(), packZ.specialShapeInfo(), packZ.specialOffsets()); return Status::OK(); } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/max_pooling.cu b/libnd4j/include/ops/declarable/helpers/cuda/max_pooling.cu index b809647c1..6e70d4510 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/max_pooling.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/max_pooling.cu @@ -27,7 +27,7 @@ namespace ops { namespace helpers { template - static _CUDA_G void indicesFiller(void *vz, Nd4jLong *zShapeInfo, Nd4jLong part, Nd4jLong bSize) { + static _CUDA_G void indicesFiller(void *vz, Nd4jLong const* zShapeInfo, Nd4jLong part, Nd4jLong bSize) { auto z = reinterpret_cast(vz); for (int b = blockIdx.x; b < bSize; b += gridDim.x) { diff --git a/libnd4j/include/ops/declarable/helpers/cuda/merge.cu b/libnd4j/include/ops/declarable/helpers/cuda/merge.cu index a7dd9b199..3c580ee33 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/merge.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/merge.cu @@ -34,7 +34,7 @@ namespace sd { namespace helpers { ////////////////////////////////////////////////////////////////////////// template - static __global__ void mergeMaxIndexCudaLauncher(void** inArrs, void** inShapes, const int numArrays, void* voutput, Nd4jLong* outputShape, Nd4jLong length) { + static __global__ void mergeMaxIndexCudaLauncher(void** inArrs, void** inShapes, const int numArrays, void* voutput, const Nd4jLong* outputShape, Nd4jLong length) { auto output = reinterpret_cast(voutput); const auto tid = blockIdx.x * blockDim.x + threadIdx.x; @@ -62,11 +62,11 @@ namespace sd { static void mergeMaxIndex_(sd::LaunchContext* context, const std::vector& inArrs, NDArray& output) { int nArrSize = static_cast(inArrs.size()); - std::vector inBuffers(nArrSize), inShapes(nArrSize); + std::vector inBuffers(nArrSize), inShapes(nArrSize); for (int e = 0; e < nArrSize; e++) { - inBuffers[e] = inArrs[e]->getSpecialBuffer(); - inShapes[e] = inArrs[e]->getSpecialShapeInfo(); + inBuffers[e] = inArrs[e]->specialBuffer(); + inShapes[e] = inArrs[e]->specialShapeInfo(); } PointersManager manager(context, "mergeMaxIndex"); @@ -78,7 +78,7 @@ namespace sd { const int threadsPerBlock = MAX_NUM_THREADS / 2; const int blocksPerGrid = (length + threadsPerBlock - 1) / threadsPerBlock; - mergeMaxIndexCudaLauncher << getCudaStream() >> > (pInBuffers, pInShapes, nArrSize, output.getSpecialBuffer(), output.getSpecialShapeInfo(), length); + mergeMaxIndexCudaLauncher<<getCudaStream()>>>(pInBuffers, pInShapes, nArrSize, output.specialBuffer(), output.specialShapeInfo(), length); manager.synchronize(); } @@ -95,7 +95,7 @@ namespace sd { ////////////////////////////////////////////////////////////////////////// template - static __global__ void mergeMaxCudaLauncher(void** inArrs, void** inShapes, const int numArrays, void* voutput, Nd4jLong* outputShape, Nd4jLong length) { + static __global__ void mergeMaxCudaLauncher(void** inArrs, void** inShapes, const int numArrays, void* voutput, const Nd4jLong* outputShape, Nd4jLong length) { auto output = reinterpret_cast(voutput); const auto tid = blockIdx.x * blockDim.x + threadIdx.x; @@ -121,11 +121,11 @@ namespace sd { int nArrsSize = static_cast(inArrs.size()); - std::vector inBuffers(nArrsSize), inShapes(nArrsSize); + std::vector inBuffers(nArrsSize), inShapes(nArrsSize); for (int e = 0; e < nArrsSize; e++) { - inBuffers[e] = inArrs[e]->getSpecialBuffer(); - inShapes[e] = inArrs[e]->getSpecialShapeInfo(); + inBuffers[e] = inArrs[e]->specialBuffer(); + inShapes[e] = inArrs[e]->specialShapeInfo(); } PointersManager manager(context, "mergeMax"); @@ -137,7 +137,7 @@ namespace sd { const int threadsPerBlock = MAX_NUM_THREADS / 2; const int blocksPerGrid = (length + threadsPerBlock - 1) / threadsPerBlock; - mergeMaxCudaLauncher << getCudaStream() >> > (pInBuffers, pInShapes, nArrsSize, output.getSpecialBuffer(), output.getSpecialShapeInfo(), length); + mergeMaxCudaLauncher<<getCudaStream()>>>(pInBuffers, pInShapes, nArrsSize, output.specialBuffer(), output.specialShapeInfo(), length); manager.synchronize(); } @@ -153,10 +153,15 @@ namespace sd { ////////////////////////////////////////////////////////////////////////// template - static __global__ void mergeMaxBpCudaLauncher(void** inArrs, void** inShapes, void* vgradient, Nd4jLong* gradientShape, const int numArrays, - void** outArrs, void** outShapes, Nd4jLong length, bool bSameOrderAndEws1) { + static __global__ void mergeMaxBpCudaLauncher( + void** inArrs, void** inShapes, + const void* vgradient, const Nd4jLong* gradientShape, + const int numArrays, + void** outArrs, void** outShapes, + Nd4jLong length, + bool bSameOrderAndEws1) { - auto grad = reinterpret_cast(vgradient); + auto grad = reinterpret_cast(vgradient); const auto tid = blockIdx.x * blockDim.x + threadIdx.x; const auto step = gridDim.x * blockDim.x; @@ -204,13 +209,13 @@ namespace sd { template static void mergeMaxBp_(sd::LaunchContext* context, const std::vector& inArrs, std::vector& outArrs, int nArrSize, bool bSameOrderAndEws1) { - std::vector inBuffers(nArrSize), inShapes(nArrSize), outBuffers(nArrSize), outShapes(nArrSize); + std::vector inBuffers(nArrSize), inShapes(nArrSize), outBuffers(nArrSize), outShapes(nArrSize); for (int e = 0; e < nArrSize; e++) { - inBuffers[e] = inArrs[e]->getSpecialBuffer(); - inShapes[e] = inArrs[e]->getSpecialShapeInfo(); - outBuffers[e] = outArrs[e]->getSpecialBuffer(); - outShapes[e] = outArrs[e]->getSpecialShapeInfo(); + inBuffers[e] = inArrs[e]->specialBuffer(); + inShapes[e] = inArrs[e]->specialShapeInfo(); + outBuffers[e] = outArrs[e]->specialBuffer(); + outShapes[e] = outArrs[e]->specialShapeInfo(); } PointersManager manager(context, "mergeMaxBp"); @@ -226,8 +231,8 @@ namespace sd { const int threadsPerBlock = MAX_NUM_THREADS / 2; const int blocksPerGrid = (length + threadsPerBlock - 1) / threadsPerBlock; - mergeMaxBpCudaLauncher << getCudaStream() >> > (pInBuffers, pInShapes, inArrs[nArrSize]->getSpecialBuffer(), - inArrs[nArrSize]->getSpecialShapeInfo(), nArrSize, pOutBuffers, pOutShapes, + mergeMaxBpCudaLauncher<<getCudaStream()>>>(pInBuffers, pInShapes, inArrs[nArrSize]->specialBuffer(), + inArrs[nArrSize]->specialShapeInfo(), nArrSize, pOutBuffers, pOutShapes, length, bSameOrderAndEws1); manager.synchronize(); @@ -261,7 +266,7 @@ namespace sd { ////////////////////////////////////////////////////////////////////////// template - static __global__ void mergeAvgCudaLauncher(void** inArrs, void** inShapes, const int numArrays, void* voutput, Nd4jLong* outputShape, Nd4jLong length) { + static __global__ void mergeAvgCudaLauncher(void** inArrs, void** inShapes, const int numArrays, void* voutput, const Nd4jLong* outputShape, Nd4jLong length) { auto output = reinterpret_cast(voutput); const auto tid = blockIdx.x * blockDim.x + threadIdx.x; @@ -284,11 +289,11 @@ namespace sd { template static void mergeAvg_(sd::LaunchContext* context, const std::vector& inArrs, NDArray& output) { - std::vector inBuffers(inArrs.size()), inShapes(inArrs.size()); + std::vector inBuffers(inArrs.size()), inShapes(inArrs.size()); for (int e = 0; e < inArrs.size(); e++) { - inBuffers[e] = inArrs[e]->getSpecialBuffer(); - inShapes[e] = inArrs[e]->getSpecialShapeInfo(); + inBuffers[e] = inArrs[e]->specialBuffer(); + inShapes[e] = inArrs[e]->specialShapeInfo(); } PointersManager manager(context, "mergeAvg"); @@ -300,7 +305,7 @@ namespace sd { const int threadsPerBlock = MAX_NUM_THREADS / 2; const int blocksPerGrid = (length + threadsPerBlock - 1) / threadsPerBlock; - mergeAvgCudaLauncher << getCudaStream() >> > (pInBuffers, pInShapes, (int)inArrs.size(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), length); + mergeAvgCudaLauncher<<getCudaStream()>>>(pInBuffers, pInShapes, (int)inArrs.size(), output.specialBuffer(), output.specialShapeInfo(), length); manager.synchronize(); } @@ -315,10 +320,14 @@ namespace sd { } ////////////////////////////////////////////////////////////////////////// template - static __global__ void mergeAvgBpCudaLauncher(void* vgradient, Nd4jLong* gradientShape, void** outArrs, void** outShapes, - const int numArrays, Nd4jLong length, bool bSameOrderAndEws1) { + static __global__ void mergeAvgBpCudaLauncher( + const void* vgradient, const Nd4jLong* gradientShape, + void** outArrs, void** outShapes, + const int numArrays, + Nd4jLong length, + bool bSameOrderAndEws1) { - auto grad = reinterpret_cast(vgradient); + auto grad = reinterpret_cast(vgradient); const auto tid = blockIdx.x * blockDim.x + threadIdx.x; const auto step = gridDim.x * blockDim.x; @@ -352,11 +361,11 @@ namespace sd { int nArrSize = static_cast(outArrs.size()); - std::vector outBuffers(nArrSize), outShapes(nArrSize); + std::vector outBuffers(nArrSize), outShapes(nArrSize); for (int e = 0; e < nArrSize; e++) { - outBuffers[e] = outArrs[e]->getSpecialBuffer(); - outShapes[e] = outArrs[e]->getSpecialShapeInfo(); + outBuffers[e] = outArrs[e]->specialBuffer(); + outShapes[e] = outArrs[e]->specialShapeInfo(); } PointersManager manager(context, "mergeAvgBp"); @@ -369,7 +378,7 @@ namespace sd { const int threadsPerBlock = MAX_NUM_THREADS / 2; const int blocksPerGrid = (length + threadsPerBlock - 1) / threadsPerBlock; - mergeAvgBpCudaLauncher << getCudaStream() >> > (gradient.getSpecialBuffer(), gradient.getSpecialShapeInfo(), + mergeAvgBpCudaLauncher<<getCudaStream()>>>(gradient.specialBuffer(), gradient.specialShapeInfo(), pOutBuffers, pOutShapes, nArrSize, length, bSameOrderAndEws1); manager.synchronize(); @@ -396,7 +405,7 @@ namespace sd { ////////////////////////////////////////////////////////////////////////// template - static __global__ void mergeAddCudaLauncher(void** inArrs, void** inShapes, const int numArrays, void* voutput, Nd4jLong* outputShape, Nd4jLong length) { + static __global__ void mergeAddCudaLauncher(void** inArrs, void** inShapes, const int numArrays, void* voutput, const Nd4jLong* outputShape, Nd4jLong length) { auto output = reinterpret_cast(voutput); @@ -421,11 +430,11 @@ namespace sd { static void mergeAdd_(sd::LaunchContext* context, const std::vector& inArrs, NDArray& output) { int nArrSize = static_cast(inArrs.size()); - std::vector inBuffers(nArrSize), inShapes(nArrSize); + std::vector inBuffers(nArrSize), inShapes(nArrSize); for (int e = 0; e < nArrSize; e++) { - inBuffers[e] = inArrs[e]->getSpecialBuffer(); - inShapes[e] = inArrs[e]->getSpecialShapeInfo(); + inBuffers[e] = inArrs[e]->specialBuffer(); + inShapes[e] = inArrs[e]->specialShapeInfo(); } PointersManager manager(context, "mergeAdd"); @@ -437,7 +446,7 @@ namespace sd { const int threadsPerBlock = MAX_NUM_THREADS / 2; const int blocksPerGrid = (length + threadsPerBlock - 1) / threadsPerBlock; - mergeAddCudaLauncher << getCudaStream() >> > (pInBuffers, pInShapes, nArrSize, output.getSpecialBuffer(), output.getSpecialShapeInfo(), length); + mergeAddCudaLauncher<<getCudaStream()>>>(pInBuffers, pInShapes, nArrSize, output.specialBuffer(), output.specialShapeInfo(), length); manager.synchronize(); } @@ -454,10 +463,10 @@ namespace sd { ////////////////////////////////////////////////////////////////////////// template - static __global__ void mergeAddBpCudaLauncher(void* vgradient, Nd4jLong* gradientShape, void** outArrs, void** outShapes, + static __global__ void mergeAddBpCudaLauncher(const void* vgradient, const Nd4jLong* gradientShape, void** outArrs, void** outShapes, const int numArrays, Nd4jLong length, bool bSameOrderAndEws1) { - auto grad = reinterpret_cast(vgradient); + auto grad = reinterpret_cast(vgradient); const auto tid = blockIdx.x * blockDim.x + threadIdx.x; const auto step = gridDim.x * blockDim.x; @@ -491,11 +500,11 @@ namespace sd { int nArrSize = static_cast(outArrs.size()); - std::vector outBuffers(nArrSize), outShapes(nArrSize); + std::vector outBuffers(nArrSize), outShapes(nArrSize); for (int e = 0; e < nArrSize; e++) { - outBuffers[e] = outArrs[e]->getSpecialBuffer(); - outShapes[e] = outArrs[e]->getSpecialShapeInfo(); + outBuffers[e] = outArrs[e]->specialBuffer(); + outShapes[e] = outArrs[e]->specialShapeInfo(); } PointersManager manager(context, "mergeAddBp"); @@ -508,7 +517,7 @@ namespace sd { const int threadsPerBlock = MAX_NUM_THREADS / 2; const int blocksPerGrid = (length + threadsPerBlock - 1) / threadsPerBlock; - mergeAddBpCudaLauncher << getCudaStream() >> > (gradient.getSpecialBuffer(), gradient.getSpecialShapeInfo(), + mergeAddBpCudaLauncher<<getCudaStream()>>>(gradient.specialBuffer(), gradient.specialShapeInfo(), pOutBuffers, pOutShapes, nArrSize, length, bSameOrderAndEws1); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/meshgrid.cu b/libnd4j/include/ops/declarable/helpers/cuda/meshgrid.cu index 53570a0ba..3f2ed13b5 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/meshgrid.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/meshgrid.cu @@ -88,12 +88,12 @@ namespace helpers { } PointersManager pm(context, "meshgrid"); - std::vector hInBuffers(rank); + std::vector hInBuffers(rank); std::vector hOutBuffers(rank); - std::vector hInShapes(rank); + std::vector hInShapes(rank); - std::vector hOutTadShapes(rank); - std::vector hOutTadOffsets(rank); + std::vector hOutTadShapes(rank); + std::vector hOutTadOffsets(rank); std::vector hNumTads(rank); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/nth_element.cu b/libnd4j/include/ops/declarable/helpers/cuda/nth_element.cu index 4f26ef397..c3b4abc51 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/nth_element.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/nth_element.cu @@ -30,7 +30,7 @@ namespace ops { namespace helpers { template - static __global__ void fillUpElementKernel(void* outputBuffer, Nd4jLong* outputShapeInfo, void* inputBuffer, Nd4jLong* inputShapeInfo, Nd4jLong* pTadShape, Nd4jLong* pTadOffsets, Nd4jLong n) { + static __global__ void fillUpElementKernel(void* outputBuffer, Nd4jLong const* outputShapeInfo, void* inputBuffer, Nd4jLong const* inputShapeInfo, Nd4jLong const* pTadShape, Nd4jLong const* pTadOffsets, Nd4jLong n) { __shared__ Nd4jLong bufferLength; auto z = reinterpret_cast(outputBuffer); @@ -66,7 +66,7 @@ namespace helpers { else { // rank greater than 1 std::vector lastDims({input->rankOf() - 1});// = ShapeUtils::evalDimsToExclude(input->rankOf(), {input->rankOf() - 1}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(sortedVals.getShapeInfo(), lastDims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(sortedVals.shapeInfo(), lastDims); auto pTadShape = packX.specialShapeInfo(); auto pTadShapeH = packX.primaryShapeInfo(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/one_hot.cu b/libnd4j/include/ops/declarable/helpers/cuda/one_hot.cu index f1b87c1aa..f15200459 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/one_hot.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/one_hot.cu @@ -93,12 +93,12 @@ void onehot(const sd::LaunchContext* context, const NDArray *indices, NDArray *o const int threadsPerBlock = MAX_NUM_THREADS / 4; const int blocksPerGrid = (output->lengthOf() + threadsPerBlock - 1) / threadsPerBlock; - const int sharedMem = threadsPerBlock * sizeof(decltype(*output->getShapeInfo())) * output->rankOf() + 128; + const int sharedMem = threadsPerBlock * sizeof(decltype(*output->shapeInfo())) * output->rankOf() + 128; PointersManager manager(context, "onehot"); NDArray::prepareSpecialUse({output}, {indices}); - BUILD_DOUBLE_SELECTOR(xType, zType, onehotCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), indices->getSpecialBuffer(), indices->getSpecialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), axis, depth, on, off), LIBND4J_TYPES, LIBND4J_TYPES); + BUILD_DOUBLE_SELECTOR(xType, zType, onehotCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), indices->specialBuffer(), indices->specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), axis, depth, on, off), LIBND4J_TYPES, LIBND4J_TYPES); NDArray::registerSpecialUse({output}, {indices}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/pad.cu b/libnd4j/include/ops/declarable/helpers/cuda/pad.cu index fc4d96ce0..842a41ced 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/pad.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/pad.cu @@ -139,7 +139,7 @@ namespace sd { const auto xType = input.dataType(); const auto yType = paddings.dataType(); - BUILD_DOUBLE_SELECTOR(xType, yType, padCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), mode, input.getSpecialBuffer(), input.getSpecialShapeInfo(), paddings.getSpecialBuffer(), paddings.getSpecialShapeInfo(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), padValue.getSpecialBuffer()), LIBND4J_TYPES, INDEXING_TYPES); + BUILD_DOUBLE_SELECTOR(xType, yType, padCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), mode, input.specialBuffer(), input.specialShapeInfo(), paddings.specialBuffer(), paddings.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), padValue.specialBuffer()), LIBND4J_TYPES, INDEXING_TYPES); NDArray::registerSpecialUse({&output}, {&input, &paddings, &padValue}); manager.synchronize(); @@ -148,7 +148,7 @@ namespace sd { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// template - static __global__ void mirrorPadLinearKernel(void const* vx, Nd4jLong* xShape, void* vz, Nd4jLong* zShape, Nd4jLong leftSide, Nd4jLong leftSideCorrected, Nd4jLong xLen, Nd4jLong len, Nd4jLong zLen) { + static __global__ void mirrorPadLinearKernel(void const* vx, const Nd4jLong* xShape, void* vz, const Nd4jLong* zShape, Nd4jLong leftSide, Nd4jLong leftSideCorrected, Nd4jLong xLen, Nd4jLong len, Nd4jLong zLen) { __shared__ T const* x; __shared__ T* z; @@ -178,7 +178,7 @@ namespace sd { } template - static __global__ void mirrorPadKernel(void const* vx, Nd4jLong* xShape, void* vz, Nd4jLong* zShape, Nd4jLong outLen, void const* paddings, Nd4jLong* paddingShape, int reflBorder) { + static __global__ void mirrorPadKernel(void const* vx, const Nd4jLong* xShape, void* vz, const Nd4jLong* zShape, Nd4jLong outLen, void const* paddings, const Nd4jLong* paddingShape, int reflBorder) { __shared__ F const* x; __shared__ I const* pads; @@ -247,11 +247,11 @@ namespace sd { const auto leftSideCorrected = leftSide - reflBorder; const Nd4jLong len = 2*(inLen-1) + leftSide + reflBorder; - mirrorPadLinearKernel<<<256, 512, 256, *stream>>>(input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), leftSide, leftSideCorrected, inLen, len, outLen); + mirrorPadLinearKernel<<<256, 512, 256, *stream>>>(input.specialBuffer(), input.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), leftSide, leftSideCorrected, inLen, len, outLen); sd::DebugHelper::checkErrorCode(stream, "helpers::mirrorPadLinearKernel(...) failed"); } else { - mirrorPadKernel<<<256, 256, 8192, *stream>>>(input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), outLen, paddings.getSpecialBuffer(), paddings.getSpecialShapeInfo(), reflBorder); + mirrorPadKernel<<<256, 256, 8192, *stream>>>(input.specialBuffer(), input.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), outLen, paddings.specialBuffer(), paddings.specialShapeInfo(), reflBorder); sd::DebugHelper::checkErrorCode(stream, "helpers::mirrorPadKernel(...) failed"); } NDArray::registerSpecialUse({&output}, {&input, &paddings}); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/percentile.cu b/libnd4j/include/ops/declarable/helpers/cuda/percentile.cu index ebb067251..7f2bcdcfd 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/percentile.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/percentile.cu @@ -30,7 +30,10 @@ namespace ops { namespace helpers { template - static _CUDA_G void percentileKernel(void *vx, Nd4jLong *xTadShapeInfo, Nd4jLong *xTadOffsets, const Nd4jLong numTads, const Nd4jLong tadLength, void *vz, Nd4jLong *zShapeInfo, const Nd4jLong zLength, const Nd4jLong position) { + static _CUDA_G void percentileKernel(void *vx, const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffsets, + const Nd4jLong numTads, const Nd4jLong tadLength, + void *vz, const Nd4jLong *zShapeInfo, const Nd4jLong zLength, + const Nd4jLong position) { for (int t = blockIdx.x; t < numTads; t += gridDim.x) { auto x = reinterpret_cast(vx) + xTadOffsets[t]; auto z = reinterpret_cast(vz); @@ -93,8 +96,8 @@ namespace helpers { else shape::checkDimensions(inputRank, axis); - auto tempArray = input.dup(input.ordering()); - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(tempArray.getShapeInfo(), axis); + auto tempArray = input.dup(); + auto packX = ConstantTadHelper::getInstance()->tadForDimensions(tempArray.shapeInfo(), axis); auto tadLength = shape::length(packX.primaryShapeInfo()); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/polyGamma.cu b/libnd4j/include/ops/declarable/helpers/cuda/polyGamma.cu index 2f96d96e7..3e82632e2 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/polyGamma.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/polyGamma.cu @@ -90,7 +90,7 @@ void polyGamma(sd::LaunchContext * context, const NDArray& n, const NDArray& x, int threadsPerBlock = MAX_NUM_THREADS / 2; int blocksPerGrid = (z.lengthOf() + threadsPerBlock - 1) / threadsPerBlock; - BUILD_SINGLE_SELECTOR(n.dataType(), polyGammaCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), n.getSpecialBuffer(), n.getSpecialShapeInfo(), x.getSpecialBuffer(), x.getSpecialShapeInfo(), z.getSpecialBuffer(), z.getSpecialShapeInfo()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(n.dataType(), polyGammaCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), n.specialBuffer(), n.specialShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), z.specialBuffer(), z.specialShapeInfo()), FLOAT_TYPES); NDArray::registerSpecialUse({&z}, {&n, &x}); } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/prefix.cu b/libnd4j/include/ops/declarable/helpers/cuda/prefix.cu index 3d1fd104a..d2832ec80 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/prefix.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/prefix.cu @@ -148,8 +148,8 @@ static void prefixPerBlockCudaLauncher(const int blocksPerGrid, const int thread /////////////////////////////////////////////////////////////////// void prefix(sd::LaunchContext * context, scalar::Ops op, const NDArray* x, NDArray* z, const std::vector& dims, bool exclusive, bool reverse) { - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->getShapeInfo(), dims); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(z->getShapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(z->shapeInfo(), dims); const Nd4jLong numTads = packX.numberOfTads(); const Nd4jLong tadLen = x->lengthOf() / numTads; @@ -161,7 +161,7 @@ void prefix(sd::LaunchContext * context, scalar::Ops op, const NDArray* x, NDArr PointersManager manager(context, "prefix"); NDArray::prepareSpecialUse({z}, {x}); - BUILD_SINGLE_SELECTOR(x->dataType(), prefixPerBlockCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), op, x->getSpecialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), z->specialBuffer(), packZ.platformShapeInfo(), packZ.platformOffsets(), numTads, tadLen, exclusive, reverse), NUMERIC_TYPES); + BUILD_SINGLE_SELECTOR(x->dataType(), prefixPerBlockCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), op, x->specialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), z->specialBuffer(), packZ.platformShapeInfo(), packZ.platformOffsets(), numTads, tadLen, exclusive, reverse), NUMERIC_TYPES); NDArray::registerSpecialUse({z}, {x}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/print_variable.cu b/libnd4j/include/ops/declarable/helpers/cuda/print_variable.cu index a518ddd72..6733ce642 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/print_variable.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/print_variable.cu @@ -51,7 +51,7 @@ namespace sd { NDArray::prepareSpecialUse({}, {&array}); PointersManager pm(&ctx, "print_device"); - BUILD_SINGLE_SELECTOR(array.dataType(), exec_print_device, (ctx, array.getSpecialBuffer(), array.getSpecialShapeInfo()), LIBND4J_TYPES) + BUILD_SINGLE_SELECTOR(array.dataType(), exec_print_device, (ctx, array.specialBuffer(), array.specialShapeInfo()), LIBND4J_TYPES) pm.synchronize(); NDArray::registerSpecialUse({}, {&array}); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/qr.cu b/libnd4j/include/ops/declarable/helpers/cuda/qr.cu index 394840376..828867b4e 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/qr.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/qr.cu @@ -70,7 +70,7 @@ namespace helpers { /* m = I - v v^T */ template - static __global__ void vmulKernel(T* resBuf, Nd4jLong* resShape, T const* vBuff, Nd4jLong const* vShape, Nd4jLong n) { + static __global__ void vmulKernel(T* resBuf, const Nd4jLong* resShape, T const* vBuff, Nd4jLong const* vShape, Nd4jLong n) { for (auto i = blockIdx.x; i < n; i += gridDim.x) for (auto j = threadIdx.x; j < n; j += blockDim.x) { Nd4jLong posR[] = {i, j}; @@ -89,7 +89,7 @@ namespace helpers { auto stream = context->getCudaStream(); vmulKernel<<<128, 128, 128, *stream>>>(res.dataBuffer()->specialAsT(), res.specialShapeInfo(), - reinterpret_cast(v.getSpecialBuffer()), v.getSpecialShapeInfo(), n); + reinterpret_cast(v.specialBuffer()), v.specialShapeInfo(), n); return res; } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/random.cu b/libnd4j/include/ops/declarable/helpers/cuda/random.cu index 59f22d878..fe692a0df 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/random.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/random.cu @@ -44,8 +44,8 @@ namespace helpers { * output - distributed output. * */ template - static __global__ void fillGammaKernel(T* uList, Nd4jLong uLength, T* alpha, Nd4jLong* alphaShape, - T* beta, Nd4jLong* betaShape, T* output, Nd4jLong* outputShape) { + static __global__ void fillGammaKernel(T* uList, Nd4jLong uLength, T* alpha, const Nd4jLong* alphaShape, + T* beta, const Nd4jLong* betaShape, T* output, const Nd4jLong* outputShape) { // fill up __shared__ Nd4jLong aLength; if (threadIdx.x == 0) { @@ -70,7 +70,7 @@ namespace helpers { template static void fillRandomGamma_(LaunchContext* context, graph::RandomGenerator& rng, NDArray* alpha, NDArray* beta, NDArray* output) { // To fill up output need to broadcast alpha and beta to the same shape and in - Nd4jLong* broadcasted = nullptr; + const Nd4jLong* broadcasted = nullptr; if (beta != nullptr) ShapeUtils::evalBroadcastShapeInfo(*alpha, *beta, true, broadcasted, context->getWorkspace()); else @@ -136,8 +136,8 @@ namespace helpers { return x. * */ template - static __global__ void fillPoissonKernel(T* uList, Nd4jLong uLength, T* lambda, Nd4jLong* lambdaShape, T* output, - Nd4jLong* outputShape) { + static __global__ void fillPoissonKernel(T* uList, Nd4jLong uLength, T* lambda, const Nd4jLong* lambdaShape, + T* output, const Nd4jLong* outputShape) { __shared__ Nd4jLong step; @@ -186,7 +186,7 @@ namespace helpers { BUILD_SINGLE_TEMPLATE(template void fillRandomPoisson_, (LaunchContext* context, graph::RandomGenerator& rng, NDArray* lambda, NDArray* output), FLOAT_NATIVE); template - static __global__ void fillUniformKernel(graph::RandomGenerator* devRng, T from, T to, T* output, Nd4jLong* outputShape) { + static __global__ void fillUniformKernel(graph::RandomGenerator* devRng, T from, T to, T* output, const Nd4jLong* outputShape) { auto start = blockIdx.x * blockDim.x + threadIdx.x; auto step = blockDim.x * gridDim.x; @@ -247,9 +247,6 @@ namespace helpers { BUILD_SINGLE_SELECTOR(output->dataType(), fillRandomUniform_, (context, rng, min, max, output), NUMERIC_TYPES); } - BUILD_SINGLE_TEMPLATE(template void fillRandomUniform_, (LaunchContext* context, - graph::RandomGenerator& rng, NDArray* min, NDArray* max, NDArray* output), NUMERIC_TYPES); - /////////////////////////////////////////////////////////////////// // used https://en.wikipedia.org/wiki/Categorical_distribution // methods: gumbel trick + softmax + argmax @@ -346,8 +343,8 @@ void fillRandomMultiNomial(LaunchContext* context, graph::RandomGenerator& rng, NDArray::prepareSpecialUse({ &output }, { &input }); BUILD_DOUBLE_SELECTOR(input.dataType(), output.dataType(), fillMultiNomialCudaLauncher, - (blocksPerGrid, threadsPerBlock, context->getCudaStream(), devRng, input.getSpecialBuffer(), - input.getSpecialShapeInfo(), output.specialBuffer(), + (blocksPerGrid, threadsPerBlock, context->getCudaStream(), devRng, input.specialBuffer(), + input.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), batchValue, numOfSamples, numOfClassX, dimA), FLOAT_TYPES, INDEXING_TYPES); NDArray::registerSpecialUse({ &output }, { &input }); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/range.cu b/libnd4j/include/ops/declarable/helpers/cuda/range.cu index 668518d82..e33f95c52 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/range.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/range.cu @@ -39,7 +39,7 @@ namespace helpers { // be careful: outVector must have c-order and ews = 1 !!! template static void _range(sd::LaunchContext * context, const NDArray& start, const NDArray& delta, NDArray& outVector) { - global_range<<<512, 512, 2048, *context->getCudaStream()>>>(outVector.getSpecialBuffer(), outVector.lengthOf(), start.e(0), delta.e(0)); + global_range<<<512, 512, 2048, *context->getCudaStream()>>>(outVector.specialBuffer(), outVector.lengthOf(), start.e(0), delta.e(0)); } void range(sd::LaunchContext * context, const NDArray& start, const NDArray& delta, NDArray& outVector) { diff --git a/libnd4j/include/ops/declarable/helpers/cuda/reverse.cu b/libnd4j/include/ops/declarable/helpers/cuda/reverse.cu index 793d90f91..b6bbeea4c 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/reverse.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/reverse.cu @@ -31,8 +31,8 @@ namespace ops { namespace helpers { template - static __global__ void reverseTadKernel(void* vinput, Nd4jLong *inputShape, void* voutput, Nd4jLong *outputShape, Nd4jLong *inputTadShape, Nd4jLong *inputTadOffsets, Nd4jLong *outputTadShape, Nd4jLong *outputTadOffsets, uint64_t limit, uint64_t numOfElemsToReverse, uint64_t numTads) { - auto input = reinterpret_cast(vinput); + static __global__ void reverseTadKernel(const void* vinput, const Nd4jLong *inputShape, void* voutput, const Nd4jLong *outputShape, const Nd4jLong *inputTadShape, const Nd4jLong *inputTadOffsets, const Nd4jLong *outputTadShape, const Nd4jLong *outputTadOffsets, uint64_t limit, uint64_t numOfElemsToReverse, uint64_t numTads) { + auto input = reinterpret_cast(vinput); auto output = reinterpret_cast(voutput); const auto tid = blockIdx.x * blockDim.x + threadIdx.x; const auto step = gridDim.x * blockDim.x; @@ -92,11 +92,11 @@ namespace helpers { template - static __global__ void reverseArrayKernel(void* input, Nd4jLong *inputShape, void* output, Nd4jLong *outputShape, Nd4jLong numOfElemsToReverse) { + static __global__ void reverseArrayKernel(const void* input, const Nd4jLong *inputShape, void* output, const Nd4jLong *outputShape, Nd4jLong numOfElemsToReverse) { const auto tid = blockIdx.x * blockDim.x + threadIdx.x; const auto step = gridDim.x * blockDim.x; __shared__ int linearStatus; - __shared__ T* inputArr; + __shared__ const T* inputArr; __shared__ T* outputArr; __shared__ char inputOrder, outputOrder; @@ -105,7 +105,7 @@ namespace helpers { char inputOrder = shape::order(inputShape); char outputOrder = shape::order(outputShape); - inputArr = reinterpret_cast(input); + inputArr = reinterpret_cast(input); outputArr = reinterpret_cast(output); } __syncthreads(); @@ -141,9 +141,9 @@ namespace helpers { } template - static void reverseTad(sd::LaunchContext * context, const NDArray* input, NDArray* output, Nd4jLong *inputTadShape, Nd4jLong *inputTadOffsets, Nd4jLong *outputTadShape, Nd4jLong *outputTadOffsets, uint64_t tadLength) { + static void reverseTad(sd::LaunchContext * context, const NDArray* input, NDArray* output, const Nd4jLong *inputTadShape, const Nd4jLong *inputTadOffsets, const Nd4jLong *outputTadShape, const Nd4jLong *outputTadOffsets, uint64_t tadLength) { auto stream = context->getCudaStream(); - reverseTadKernel<<<256, 512, 8192, *stream>>>(input->getSpecialBuffer(), input->getSpecialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), inputTadShape, inputTadOffsets, outputTadShape, outputTadOffsets, input->lengthOf(), tadLength, input->lengthOf() / tadLength); + reverseTadKernel<<<256, 512, 8192, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), inputTadShape, inputTadOffsets, outputTadShape, outputTadOffsets, input->lengthOf(), tadLength, input->lengthOf() / tadLength); } template @@ -153,7 +153,7 @@ namespace helpers { if (numOfElemsToReverse == 0) numOfReverse = input->lengthOf(); - reverseArrayKernel<<<256, 512, 8192, *stream>>>(input->getSpecialBuffer(), input->getSpecialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), numOfReverse); + reverseArrayKernel<<<256, 512, 8192, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), numOfReverse); } @@ -164,12 +164,12 @@ namespace helpers { seqLengths->syncToHost(); auto stream = context->getCudaStream(); - if(input->isVector() || shape::isLikeVector(input->getShapeInfo(), posOfNonUnityDim) || seqLengths->lengthOf() == 1) { + if(input->isVector() || shape::isLikeVector(input->shapeInfo(), posOfNonUnityDim) || seqLengths->lengthOf() == 1) { int numOfElemsToReverse = seqLengths->e(0); if((seqDim == 0 && input->sizeAt(0) == 1) || (batchDim == posOfNonUnityDim)) output->assign(input); else - reverseArrayKernel<<<256, 512, 8192, *stream>>>(input->getSpecialBuffer(), input->getSpecialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), numOfElemsToReverse);//helpers::reverseArray(context, const_cast(input), output, numOfElemsToReverse); + reverseArrayKernel<<<256, 512, 8192, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), numOfElemsToReverse);//helpers::reverseArray(context, const_cast(input), output, numOfElemsToReverse); } else { @@ -202,7 +202,7 @@ namespace helpers { NDArray::prepareSpecialUse({output}, {input, seqLengths}); // if op isn't inplace - copy original data into output array - if (output->getSpecialBuffer() != input->getSpecialBuffer()) + if (output->specialBuffer() != input->specialBuffer()) output->assign(input); BUILD_SINGLE_SELECTOR(input->dataType(), reverseSequence_, (context, input, seqLengths, output, seqDim, batchDim), LIBND4J_TYPES); @@ -214,8 +214,8 @@ namespace helpers { // we need to reverse axis only if that's new op std::vector dimensions = isBackProp ? ShapeUtils::evalDimsToExclude(input->rankOf(), *intArgs) : *intArgs; std::vector axis = ShapeUtils::evalDimsToExclude(input->rankOf(), dimensions); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); @@ -229,9 +229,6 @@ namespace helpers { NDArray::registerSpecialUse({output}, {input}); } - -BUILD_SINGLE_TEMPLATE(template void reverseArray, (sd::LaunchContext * context, const NDArray *inArr, NDArray *outArr, Nd4jLong numOfElemsToReverse), LIBND4J_TYPES); - } } } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/roll.cu b/libnd4j/include/ops/declarable/helpers/cuda/roll.cu index d014b9115..773f7279d 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/roll.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/roll.cu @@ -27,8 +27,8 @@ namespace ops { namespace helpers { template - static void _CUDA_D rollKernelLinearStage1Dev(void *vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, Nd4jLong fullLength, int actualShift) { - auto x = reinterpret_cast(vx); + static void _CUDA_D rollKernelLinearStage1Dev(const void *vx, const Nd4jLong *xShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Nd4jLong fullLength, int actualShift) { + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto xEws = shape::elementWiseStride(xShapeInfo); @@ -69,13 +69,13 @@ namespace helpers { } template - static void _CUDA_G rollKernelLinearStage1(void *vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, Nd4jLong fullLength, int actualShift) { + static void _CUDA_G rollKernelLinearStage1(const void *vx, const Nd4jLong *xShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Nd4jLong fullLength, int actualShift) { rollKernelLinearStage1Dev(vx, xShapeInfo, vz, zShapeInfo, fullLength, actualShift); } template - static void _CUDA_G rollKernelLinearStage2(void *vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, Nd4jLong fullLength, int actualShift, int shiftCount) { - auto x = reinterpret_cast(vx); + static void _CUDA_G rollKernelLinearStage2(const void *vx, const Nd4jLong *xShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Nd4jLong fullLength, int actualShift, int shiftCount) { + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto xEws = shape::elementWiseStride(xShapeInfo); @@ -126,8 +126,8 @@ namespace helpers { } template - static void _CUDA_G rollKernelLinearStage3(void *vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, Nd4jLong fullLength, int actualShift, int remainShift) { - auto x = reinterpret_cast(vx); + static void _CUDA_G rollKernelLinearStage3(const void *vx, const Nd4jLong *xShapeInfo, void *vz, const Nd4jLong *zShapeInfo, Nd4jLong fullLength, int actualShift, int remainShift) { + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); auto xEws = shape::elementWiseStride(xShapeInfo); @@ -170,7 +170,7 @@ namespace helpers { } template - static void _CUDA_D swapTadsKernel(void *vx, void *vz, Nd4jLong *zShapeInfo, Nd4jLong tadLength) { + static void _CUDA_D swapTadsKernel(void *vx, void *vz, const Nd4jLong *zShapeInfo, Nd4jLong tadLength) { auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); @@ -202,8 +202,8 @@ namespace helpers { } template - static void _CUDA_G rollKernelFullAnyDimensionStage1(void *vx, Nd4jLong *xTadShapeInfo, Nd4jLong *xTadOffsets, void *vz, Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets, int numTads, Nd4jLong tadLength, int dim, Nd4jLong sizeAt, int theShift) { - auto x = reinterpret_cast(vx); + static void _CUDA_G rollKernelFullAnyDimensionStage1(const void *vx, const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffsets, void *vz, const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffsets, int numTads, Nd4jLong tadLength, int dim, Nd4jLong sizeAt, int theShift) { + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); for (int e = blockIdx.x + theShift; e < sizeAt - theShift; e += gridDim.x) { @@ -215,8 +215,8 @@ namespace helpers { } template - static void _CUDA_G rollKernelFullAnyDimensionStage2(void *vx, Nd4jLong *xTadShapeInfo, Nd4jLong *xTadOffsets, void *vz, Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets, int numTads, Nd4jLong tadLength, int dim, Nd4jLong sizeAt, int theShift) { - auto x = reinterpret_cast(vx); + static void _CUDA_G rollKernelFullAnyDimensionStage2(void *vx, const Nd4jLong *xTadShapeInfo, const Nd4jLong *xTadOffsets, void *vz, const Nd4jLong *zTadShapeInfo, const Nd4jLong *zTadOffsets, int numTads, Nd4jLong tadLength, int dim, Nd4jLong sizeAt, int theShift) { + auto x = reinterpret_cast(vx); auto z = reinterpret_cast(vz); for (int e = blockIdx.x; e < theShift; e += gridDim.x) { diff --git a/libnd4j/include/ops/declarable/helpers/cuda/s_t_b.cu b/libnd4j/include/ops/declarable/helpers/cuda/s_t_b.cu index 5784699d0..8b7bfb2b5 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/s_t_b.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/s_t_b.cu @@ -108,7 +108,7 @@ void batchToSpace(sd::LaunchContext* context, const NDArray& input, NDArray& out PointersManager manager(context, "batchToSpace"); NDArray::prepareSpecialUse({&output}, {&inputRearranged1}); - BUILD_SINGLE_SELECTOR(input.dataType(), batchToSpaceCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), inputRearranged1.getSpecialBuffer(), inputRearranged1.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), cropBottom, cropLeft), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), batchToSpaceCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), inputRearranged1.specialBuffer(), inputRearranged1.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), cropBottom, cropLeft), LIBND4J_TYPES); NDArray::registerSpecialUse({&output}, {&inputRearranged1}); manager.synchronize(); @@ -239,7 +239,7 @@ void batchToSpaceND(sd::LaunchContext* context, const NDArray& input, const NDAr PointersManager manager(context, "batchToSpaceND"); NDArray::prepareSpecialUse({&output}, {&inputRearranged1, &crop}); - BUILD_DOUBLE_SELECTOR(input.dataType(), crop.dataType(), batchToSpaceNDCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), inputRearranged1.getSpecialBuffer(), inputRearranged1.getSpecialShapeInfo(), crop.getSpecialBuffer(), crop.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), numOfSpatialDims), LIBND4J_TYPES, INTEGER_TYPES); + BUILD_DOUBLE_SELECTOR(input.dataType(), crop.dataType(), batchToSpaceNDCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), inputRearranged1.specialBuffer(), inputRearranged1.specialShapeInfo(), crop.specialBuffer(), crop.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), numOfSpatialDims), LIBND4J_TYPES, INTEGER_TYPES); NDArray::registerSpecialUse({&output}, {&inputRearranged1, &crop}); manager.synchronize(); @@ -331,12 +331,12 @@ void spaceToBatch(sd::LaunchContext* context, const NDArray& input, NDArray& out PointersManager manager(context, "spaceToBatch"); NDArray::prepareSpecialUse({&outputRearranged1}, {&input}); - BUILD_SINGLE_SELECTOR(input.dataType(), spaceToBatchCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), outputRearranged1.specialBuffer(), outputRearranged1.specialShapeInfo(), padBottom, padTop, padLeft, padRight), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), spaceToBatchCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), outputRearranged1.specialBuffer(), outputRearranged1.specialShapeInfo(), padBottom, padTop, padLeft, padRight), LIBND4J_TYPES); NDArray::registerSpecialUse({&outputRearranged1}, {&input}); manager.synchronize(); - if(output.getSpecialBuffer() != outputRearranged1.getSpecialBuffer()) + if(output.specialBuffer() != outputRearranged1.specialBuffer()) outputRearranged0.assign(outputRearranged1); } } @@ -478,12 +478,12 @@ void spaceToBatchND(sd::LaunchContext* context, const NDArray& input, const NDAr PointersManager manager(context, "spaceToBatchND"); NDArray::prepareSpecialUse({&outputRearranged1}, {&input, &padding}); - BUILD_DOUBLE_SELECTOR(input.dataType(), padding.dataType(), spaceToBatchNDCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), padding.getSpecialBuffer(), padding.getSpecialShapeInfo(), outputRearranged1.specialBuffer(), outputRearranged1.specialShapeInfo(), numOfSpatialDims), LIBND4J_TYPES, INTEGER_TYPES); + BUILD_DOUBLE_SELECTOR(input.dataType(), padding.dataType(), spaceToBatchNDCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), padding.specialBuffer(), padding.specialShapeInfo(), outputRearranged1.specialBuffer(), outputRearranged1.specialShapeInfo(), numOfSpatialDims), LIBND4J_TYPES, INTEGER_TYPES); NDArray::registerSpecialUse({&outputRearranged1}, {&input, &padding}); manager.synchronize(); - if(output.getSpecialBuffer() != outputRearranged1.getSpecialBuffer()) + if(output.specialBuffer() != outputRearranged1.specialBuffer()) outputRearranged0.assign(outputRearranged1); } } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/s_t_d.cu b/libnd4j/include/ops/declarable/helpers/cuda/s_t_d.cu index 4290a57c6..19a1937dd 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/s_t_d.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/s_t_d.cu @@ -24,8 +24,12 @@ namespace sd { namespace ops { namespace helpers { template - static _CUDA_G void spaceToDepthKernel(void *vx, Nd4jLong *xShapeInfo, void *vz, Nd4jLong *zShapeInfo, const int block_size, const bool isNHWC) { - auto input_ptr = reinterpret_cast(vx); + static _CUDA_G void spaceToDepthKernel( + const void *vx, const Nd4jLong *xShapeInfo, + void *vz, const Nd4jLong *zShapeInfo, + const int block_size, + const bool isNHWC) { + auto input_ptr = reinterpret_cast(vx); auto output_ptr = reinterpret_cast(vz); const int batch_size = shape::sizeAt(xShapeInfo, 0); @@ -91,7 +95,7 @@ namespace helpers { template static void _spaceTodepth_(sd::LaunchContext * context, const NDArray &input, NDArray *output, int block_size, bool isNHWC) { - spaceToDepthKernel<<<512, 512, 1024, *context->getCudaStream()>>>(input.getSpecialBuffer(), input.getSpecialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), block_size, isNHWC); + spaceToDepthKernel<<<512, 512, 1024, *context->getCudaStream()>>>(input.specialBuffer(), input.specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), block_size, isNHWC); } void _spaceTodepth(sd::LaunchContext * context, const NDArray &input, NDArray *output, int block_size, bool isNHWC) { @@ -99,9 +103,6 @@ namespace helpers { BUILD_SINGLE_SELECTOR(input.dataType(), _spaceTodepth_, (context, input, output, block_size, isNHWC), LIBND4J_TYPES); NDArray::registerSpecialUse({output}, {&input}); } - - BUILD_SINGLE_TEMPLATE(template void _spaceTodepth_, (sd::LaunchContext *context, const NDArray &input, NDArray *output, int block_size, bool isNHWC), LIBND4J_TYPES); - } } } \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/helpers/cuda/scatter.cu b/libnd4j/include/ops/declarable/helpers/cuda/scatter.cu index 364ad83d2..94b0e0080 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/scatter.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/scatter.cu @@ -96,7 +96,7 @@ Nd4jLong checkIndices(sd::LaunchContext *context, const NDArray& indices, const NDArray numOfBadIndx(sd::DataType::INT64, context, true); NDArray::prepareSpecialUse({&numOfBadIndx}, {&indices}); - BUILD_SINGLE_SELECTOR(xType, checkIndicesCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), indices.getSpecialBuffer(), indices.getSpecialShapeInfo(), reinterpret_cast(numOfBadIndx.getSpecialBuffer()), output.getSpecialShapeInfo(), axis), INDEXING_TYPES); + BUILD_SINGLE_SELECTOR(xType, checkIndicesCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), indices.specialBuffer(), indices.specialShapeInfo(), reinterpret_cast(numOfBadIndx.specialBuffer()), output.specialShapeInfo(), axis), INDEXING_TYPES); NDArray::registerSpecialUse({&numOfBadIndx}, {&indices}); manager.synchronize(); @@ -346,7 +346,7 @@ void scatter(sd::LaunchContext *context, pairwise::Ops op, const NDArray& indic PointersManager manager(context, "scatter"); NDArray::prepareSpecialUse({&output}, {&updates, &indices}); - BUILD_DOUBLE_SELECTOR(xType, yType, scatterCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), op, indices.getSpecialBuffer(), indices.getSpecialShapeInfo(), updates.getSpecialBuffer(), updates.getSpecialShapeInfo(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), lock), INDEXING_TYPES, GENERIC_NUMERIC_TYPES); + BUILD_DOUBLE_SELECTOR(xType, yType, scatterCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), op, indices.specialBuffer(), indices.specialShapeInfo(), updates.specialBuffer(), updates.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), lock), INDEXING_TYPES, GENERIC_NUMERIC_TYPES); NDArray::registerSpecialUse({&output}, {&updates, &indices}); manager.synchronize(); @@ -612,7 +612,7 @@ void scatterND(sd::LaunchContext *context, pairwise::Ops op, const NDArray& ind PointersManager manager(context, "scatterND"); NDArray::prepareSpecialUse({&output}, {&updates, &indices}); - BUILD_DOUBLE_SELECTOR(xType, yType, scatterNDCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), op, indices.getSpecialBuffer(), indices.getSpecialShapeInfo(), updates.getSpecialBuffer(), updates.getSpecialShapeInfo(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), lock), INDEXING_TYPES, GENERIC_NUMERIC_TYPES); + BUILD_DOUBLE_SELECTOR(xType, yType, scatterNDCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), op, indices.specialBuffer(), indices.specialShapeInfo(), updates.specialBuffer(), updates.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), lock), INDEXING_TYPES, GENERIC_NUMERIC_TYPES); NDArray::registerSpecialUse({&output}, {&updates, &indices}); manager.synchronize(); @@ -682,12 +682,12 @@ void scatterForLoss(sd::LaunchContext* context, const NDArray& indices, NDArray& if(calcGrad) { NDArray::prepareSpecialUse({&updates}, {&indices}); - BUILD_DOUBLE_SELECTOR(indices.dataType(), updates.dataType(), scatterForLossCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), indices.getSpecialBuffer(), indices.getSpecialShapeInfo(), updates.specialBuffer(), updates.specialShapeInfo(), nullptr, nullptr), INDEXING_TYPES, FLOAT_TYPES); + BUILD_DOUBLE_SELECTOR(indices.dataType(), updates.dataType(), scatterForLossCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), indices.specialBuffer(), indices.specialShapeInfo(), updates.specialBuffer(), updates.specialShapeInfo(), nullptr, nullptr), INDEXING_TYPES, FLOAT_TYPES); NDArray::registerSpecialUse({&updates}, {&indices}); } else { NDArray::prepareSpecialUse({&output}, {&indices, &updates}); - BUILD_DOUBLE_SELECTOR(indices.dataType(), updates.dataType(), scatterForLossCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), indices.getSpecialBuffer(), indices.getSpecialShapeInfo(), updates.getSpecialBuffer(), updates.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo()), INDEXING_TYPES, FLOAT_TYPES); + BUILD_DOUBLE_SELECTOR(indices.dataType(), updates.dataType(), scatterForLossCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), indices.specialBuffer(), indices.specialShapeInfo(), updates.specialBuffer(), updates.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo()), INDEXING_TYPES, FLOAT_TYPES); NDArray::registerSpecialUse({&output}, {&indices, &updates}); } @@ -736,8 +736,8 @@ __global__ static void scatterLockCuda(const int opCode, std::vector yTadDims(sizeOfUpdDims); std::iota(yTadDims.begin(), yTadDims.end(), 0); - auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(updates.getShapeInfo(), ShapeUtils::evalDimsToExclude(updates.rankOf(), yTadDims)); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.getShapeInfo(), zTadDims); + auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(updates.shapeInfo(), ShapeUtils::evalDimsToExclude(updates.rankOf(), yTadDims)); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), zTadDims); const Nd4jLong zTadLen = shape::length(packZ.primaryShapeInfo()); const Nd4jLong yTadLen = shape::length(packY.primaryShapeInfo()); @@ -748,7 +748,7 @@ __global__ static void scatterLockCuda(const int opCode, const auto xType = indices.dataType(); const auto yType = updates.dataType(); - BUILD_DOUBLE_SELECTOR(xType, yType, scatterLockCudaLauncher, (blocksPerGrid, threadsPerBlock, 1024, context->getCudaStream(), op, indices.getSpecialBuffer(), indices.getSpecialShapeInfo(), updates.getSpecialBuffer(), packY.specialShapeInfo(), packY.specialOffsets(), output.getSpecialBuffer(), packZ.specialShapeInfo(), packZ.specialOffsets(), indices.lengthOf(), yTadLen, zTadLen), INDEXING_TYPES, GENERIC_NUMERIC_TYPES); + BUILD_DOUBLE_SELECTOR(xType, yType, scatterLockCudaLauncher, (blocksPerGrid, threadsPerBlock, 1024, context->getCudaStream(), op, indices.specialBuffer(), indices.specialShapeInfo(), updates.specialBuffer(), packY.specialShapeInfo(), packY.specialOffsets(), output.specialBuffer(), packZ.specialShapeInfo(), packZ.specialOffsets(), indices.lengthOf(), yTadLen, zTadLen), INDEXING_TYPES, GENERIC_NUMERIC_TYPES); @@ -963,8 +963,8 @@ __global__ static void scatterLockCuda(const int opCode, std::vector dims = {0}; auto inverted = ShapeUtils::evalDimsToExclude(output.rankOf(), dims); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.getShapeInfo(), inverted); - auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(updates.getShapeInfo(), inverted); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), inverted); + auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(updates.shapeInfo(), inverted); auto psX = packX.specialShapeInfo(); auto psY = packY.specialShapeInfo(); @@ -984,9 +984,9 @@ __global__ static void scatterLockCuda(const int opCode, auto blockSize = sd::math::nd4j_max(32, sd::math::nd4j_min(tadLengthX, 1024)); if (lock) - scatterCuda<<<512, blockSize, 1024, *context->getCudaStream()>>>(op, indices.lengthOf(), output.getSpecialBuffer(), psX, poX, updates.getSpecialBuffer(), psY, poY, reinterpret_cast(indices.getSpecialBuffer()), tadLengthX, tadLengthY); + scatterCuda<<<512, blockSize, 1024, *context->getCudaStream()>>>(op, indices.lengthOf(), output.specialBuffer(), psX, poX, updates.specialBuffer(), psY, poY, reinterpret_cast(indices.specialBuffer()), tadLengthX, tadLengthY); else - scatterCuda<<<512, blockSize, 1024, *context->getCudaStream()>>>(op, indices.lengthOf(), output.getSpecialBuffer(), psX, poX, updates.getSpecialBuffer(), psY, poY, reinterpret_cast(indices.getSpecialBuffer()), tadLengthX, tadLengthY); + scatterCuda<<<512, blockSize, 1024, *context->getCudaStream()>>>(op, indices.lengthOf(), output.specialBuffer(), psX, poX, updates.specialBuffer(), psY, poY, reinterpret_cast(indices.specialBuffer()), tadLengthX, tadLengthY); NDArray::registerSpecialUse({&output}, {&updates, &indices}); manager.synchronize(); @@ -1016,9 +1016,9 @@ const int xLastDim = indices.sizeAt(-1); zTadDims[i] = zRank - 1 - j; } - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(indices.getShapeInfo(), {xRank - 1}); - auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(updates.getShapeInfo(), yTadDims); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.getShapeInfo(), zTadDims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(indices.shapeInfo(), {xRank - 1}); + auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(updates.shapeInfo(), yTadDims); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), zTadDims); const int threadsPerBlock = MAX_NUM_THREADS / 4; const int blocksPerGrid = packZ.numberOfTads(); @@ -1151,11 +1151,11 @@ const int xLastDim = indices.sizeAt(-1); // PointersManager::printDevContentOnDev(yShapeInfo, 8); // PointersManager::printDevContentOnDev(zShapeInfo, 8); - // manager.printDevContentOnHost(indices.getSpecialBuffer(), indices.lengthOf()); - // manager.printDevContentOnHost(indices.getSpecialShapeInfo(), shape::shapeInfoLength(indices.rankOf())); - // manager.printDevContentOnHost(updates.getSpecialBuffer(), updates.lengthOf()); - // manager.printDevContentOnHost(updates.getSpecialShapeInfo(), shape::shapeInfoLength(updates.rankOf())); - // manager.printDevContentOnHost(output.getSpecialShapeInfo(), shape::shapeInfoLength(output.rankOf())); + // manager.printDevContentOnHost(indices.specialBuffer(), indices.lengthOf()); + // manager.printDevContentOnHost(indices.specialShapeInfo(), shape::shapeInfoLength(indices.rankOf())); + // manager.printDevContentOnHost(updates.specialBuffer(), updates.lengthOf()); + // manager.printDevContentOnHost(updates.specialShapeInfo(), shape::shapeInfoLength(updates.rankOf())); + // manager.printDevContentOnHost(output.specialShapeInfo(), shape::shapeInfoLength(output.rankOf())); // printf("!!!!!!!\n"); // manager.printDevContentOnHost(packX.specialShapeInfo(), 2*shape::rank(packX.primaryShapeInfo()) + 4); // manager.printDevContentOnHost(packX.specialOffsets(), packX.numberOfTads()); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/scatter_simple.cu b/libnd4j/include/ops/declarable/helpers/cuda/scatter_simple.cu index 277a1f587..a17464cbd 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/scatter_simple.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/scatter_simple.cu @@ -33,9 +33,9 @@ namespace sd { namespace ops { namespace helpers { template - static _CUDA_G void scatterSimpleKernel(void *vx, Nd4jLong *xTadShape, Nd4jLong *xTadOffsets, Nd4jLong xLength, Nd4jLong numTads, void *vi, Nd4jLong *iShapeInfo, Nd4jLong iLength, void *vu, Nd4jLong *uShapeInfo, Nd4jLong uLength) { - auto u = reinterpret_cast(vu); - auto indices = reinterpret_cast(vi); + static _CUDA_G void scatterSimpleKernel(void *vx, const Nd4jLong *xTadShape, const Nd4jLong *xTadOffsets, Nd4jLong xLength, Nd4jLong numTads, const void *vi, const Nd4jLong *iShapeInfo, Nd4jLong iLength, const void *vu, const Nd4jLong *uShapeInfo, Nd4jLong uLength) { + auto u = reinterpret_cast(vu); + auto indices = reinterpret_cast(vi); auto tid = threadIdx.x + blockIdx.x * blockDim.x; for (int i = tid; i < iLength; i += blockDim.x * gridDim.x) { @@ -51,13 +51,13 @@ namespace sd { void scatterSimple_(sd::LaunchContext * context, const int opId, NDArray& input, const NDArray& updates, const NDArray& indices, const std::vector& dimensions) { auto dims = ShapeUtils::evalDimsToExclude(input.rankOf(), dimensions); - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), dims); + auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dims); auto xLength = shape::length(packX.primaryShapeInfo()); auto iLength = indices.lengthOf(); auto uLength = updates.lengthOf(); - scatterSimpleKernel<<<256, 256, 1024, *context->getCudaStream()>>>(input.getSpecialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), xLength, packX.numberOfTads(), indices.getSpecialBuffer(), indices.getSpecialShapeInfo(), iLength, updates.getSpecialBuffer(), updates.getSpecialShapeInfo(), uLength); + scatterSimpleKernel<<<256, 256, 1024, *context->getCudaStream()>>>(input.specialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), xLength, packX.numberOfTads(), indices.specialBuffer(), indices.specialShapeInfo(), iLength, updates.specialBuffer(), updates.specialShapeInfo(), uLength); } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/scatter_update.cu b/libnd4j/include/ops/declarable/helpers/cuda/scatter_update.cu index 748a2e6a3..51f917a79 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/scatter_update.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/scatter_update.cu @@ -114,15 +114,15 @@ namespace sd { for (int e = 2; e < 2 + numOfDims; e++) tadDimensions[e-2] = (*intArgs)[e]; - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), tadDimensions); - auto packY = ConstantTadHelper::getInstance()->tadForDimensions(updates.getShapeInfo(), tadDimensions); + auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), tadDimensions); + auto packY = ConstantTadHelper::getInstance()->tadForDimensions(updates.shapeInfo(), tadDimensions); NDArray indices(const_cast(intArgs->data()) + numOfDims + 3, 'c', {numOfInd}, sd::DataType::INT32, context); PointersManager manager(context, "scatterUpdate"); NDArray::prepareSpecialUse({&input}, {&input, &updates, &indices}); - BUILD_SINGLE_SELECTOR(input.dataType(), scatterUpdateCudaLauncher, (context->getCudaStream(), opCode, numOfInd, input.specialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), updates.specialBuffer(), packY.platformShapeInfo(), packY.platformOffsets(), reinterpret_cast(indices.getSpecialBuffer())), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), scatterUpdateCudaLauncher, (context->getCudaStream(), opCode, numOfInd, input.specialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), updates.specialBuffer(), packY.platformShapeInfo(), packY.platformOffsets(), reinterpret_cast(indices.specialBuffer())), LIBND4J_TYPES); NDArray::registerSpecialUse({&input}, {&input, &updates, &indices}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/segment.cu b/libnd4j/include/ops/declarable/helpers/cuda/segment.cu index 796dd6a1e..60d00fb60 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/segment.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/segment.cu @@ -47,7 +47,7 @@ namespace helpers { // Unsorted segment ops functors implementation // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void unsortedSegmentIndexValidateKernel(I* indices, Nd4jLong* indicesShape, I expected, I* found) { + static __global__ void unsortedSegmentIndexValidateKernel(const I* indices, const Nd4jLong* indicesShape, I expected, I* found) { __shared__ bool onlyTrue; __shared__ Nd4jLong len; @@ -90,12 +90,12 @@ namespace helpers { // -------------------------------------------------------------------------------------------------------------- // // fill up segments starts and ends - splitted ordered case template - static __global__ void fillUpSegmentsKernel(void* indices, Nd4jLong* indexShape, int numClasses, int* classesRangesStart, int* classesRangesLenghts) { - __shared__ I* idxBuf; + static __global__ void fillUpSegmentsKernel(const void* indices, const Nd4jLong* indexShape, int numClasses, int* classesRangesStart, int* classesRangesLenghts) { + __shared__ const I* idxBuf; __shared__ Nd4jLong idxLen; __shared__ int* result; if (threadIdx.x == 0) { - idxBuf = reinterpret_cast(indices); + idxBuf = reinterpret_cast(indices); idxLen = shape::length(indexShape); } __syncthreads(); @@ -115,8 +115,8 @@ namespace helpers { template static void fillUpSegments_(NDArray* indices, Nd4jLong numClasses, NDArray& classesRangesBegs, NDArray& classesRangesLens) { dim3 dims(numClasses, indices->lengthOf(), numClasses * 32 + 32); - int* begins = reinterpret_cast(classesRangesBegs.getSpecialBuffer()); - int* lengths = reinterpret_cast(classesRangesLens.getSpecialBuffer()); + int* begins = reinterpret_cast(classesRangesBegs.specialBuffer()); + int* lengths = reinterpret_cast(classesRangesLens.specialBuffer()); auto stream = classesRangesBegs.getContext()->getCudaStream(); fillUpSegmentsKernel<<>>(indices->specialBuffer(), indices->specialShapeInfo(), numClasses, begins, lengths); } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/segment_max.cu b/libnd4j/include/ops/declarable/helpers/cuda/segment_max.cu index e7baf2370..927b1bb2f 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/segment_max.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/segment_max.cu @@ -38,8 +38,8 @@ namespace sd { template static __global__ void - segmentMaxLinearKernel(void *input, Nd4jLong *inputShape, int *starts, int *lengths, Nd4jLong numOfClasses, - void *output, Nd4jLong *outputShape) { + segmentMaxLinearKernel(void *input, Nd4jLong const* inputShape, int *starts, int *lengths, Nd4jLong numOfClasses, + void *output, Nd4jLong const* outputShape) { __shared__ T *val; __shared__ Nd4jLong xLen, zLen, zIndex; __shared__ T *x; @@ -77,9 +77,9 @@ namespace sd { template static __global__ void - unsortedSegmentMaxLinearKernel(void *input, Nd4jLong *inputShape, void *indices, Nd4jLong *indicesShape, + unsortedSegmentMaxLinearKernel(void *input, Nd4jLong const* inputShape, void *indices, Nd4jLong const* indicesShape, int *starts, int *lengths, Nd4jLong numOfClasses, void *output, - Nd4jLong *outputShape) { + Nd4jLong const* outputShape) { __shared__ T *val; __shared__ Nd4jLong xLen, zLen, zIndex; __shared__ T *x; @@ -114,9 +114,9 @@ namespace sd { } // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void segmentMaxTadKernel(void* inputBuf, Nd4jLong* inputShape, Nd4jLong* inputTads, - Nd4jLong* inputTadOffsets, I* indices, int* starts, int* lengths, Nd4jLong numOfClasses, void* outputBuf, - Nd4jLong* outputShape, Nd4jLong* outputTads, Nd4jLong* outputTadOffsets, T filler = 0) { + static __global__ void segmentMaxTadKernel(void* inputBuf, Nd4jLong const* inputShape, Nd4jLong const* inputTads, + Nd4jLong const* inputTadOffsets, I* indices, int* starts, int* lengths, Nd4jLong numOfClasses, void* outputBuf, + Nd4jLong const* outputShape, Nd4jLong const* outputTads, Nd4jLong const* outputTadOffsets, T filler = 0) { __shared__ T* val; __shared__ Nd4jLong len, zIndex, total; @@ -185,12 +185,12 @@ namespace sd { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); segmentMaxTadKernel<<>>(input->specialBuffer(), input->specialShapeInfo(), inputTads, inputTadOffsets, reinterpret_cast(indices->specialBuffer()), begins, lengths, numOfClasses, output->specialBuffer(), output->specialShapeInfo(), outputTads, outputTadOffsets); } NDArray::registerSpecialUse({output}, {input, indices, &classesRangesBegs, &classesRangesLens}); @@ -218,20 +218,20 @@ namespace sd { dim3 dims(numOfClasses, indices->lengthOf(), numOfClasses * 32 + 32); // int* classesBuf = reinterpret_cast(classes.specialBuffer()); fillUpSegments(indices, numOfClasses, classesRangesBegs, classesRangesLens); - int* begins = reinterpret_cast(classesRangesBegs.getSpecialBuffer()); - int* lengths = reinterpret_cast(classesRangesLens.getSpecialBuffer()); + int* begins = reinterpret_cast(classesRangesBegs.specialBuffer()); + int* lengths = reinterpret_cast(classesRangesLens.specialBuffer()); if (input->isVector()) { unsortedSegmentMaxLinearKernel<<>>(input->specialBuffer(), input->specialShapeInfo(), indices->specialBuffer(), indices->specialShapeInfo(), begins, lengths, numOfClasses, output->specialBuffer(), output->specialShapeInfo()); } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); dims.x = input->sizeAt(0); output->assign(-DataTypeUtils::max()); segmentMaxTadKernel<<>>(input->specialBuffer(), input->specialShapeInfo(), inputTads, inputTadOffsets, reinterpret_cast(indices->specialBuffer()), begins, lengths, numOfClasses, output->specialBuffer(), output->specialShapeInfo(), outputTads, outputTadOffsets); @@ -250,9 +250,9 @@ namespace sd { // segment max // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void segmentMaxBPLinearKernel(void* inputBuf, Nd4jLong* inputShape, void* forwardOutput, - Nd4jLong* forwardShape, void* eps, Nd4jLong* epsShape, void* indicesBuf, Nd4jLong* indicesShape, - void* outputBuf, Nd4jLong* outputShape) { + static __global__ void segmentMaxBPLinearKernel(void* inputBuf, Nd4jLong const* inputShape, void* forwardOutput, + Nd4jLong const* forwardShape, void* eps, Nd4jLong const* epsShape, void* indicesBuf, Nd4jLong const* indicesShape, + void* outputBuf, Nd4jLong const* outputShape) { __shared__ T* x; __shared__ T* gradIn; __shared__ T* gradOut; @@ -291,12 +291,12 @@ namespace sd { // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void segmentMaxBPTadKernel(void* inputBuf, Nd4jLong* inputShape, void* forwardOutput, - Nd4jLong* forwardShape, void* eps, Nd4jLong* epsShape, void* indicesBuf, Nd4jLong* indicesShape, - void* outputBuf, Nd4jLong* outputShape,Nd4jLong* inputTad, - Nd4jLong* inputOffsets, Nd4jLong* gradInTad, Nd4jLong* gradInOffsets, - Nd4jLong* gradOutTad, Nd4jLong* gradOutOffsets, Nd4jLong* outTad, - Nd4jLong* outOffsets) { + static __global__ void segmentMaxBPTadKernel(void* inputBuf, Nd4jLong const* inputShape, void* forwardOutput, + Nd4jLong const* forwardShape, void* eps, Nd4jLong const* epsShape, void* indicesBuf, Nd4jLong const* indicesShape, + void* outputBuf, Nd4jLong const* outputShape,Nd4jLong const* inputTad, + Nd4jLong const* inputOffsets, Nd4jLong const* gradInTad, Nd4jLong const* gradInOffsets, + Nd4jLong const* gradOutTad, Nd4jLong const* gradOutOffsets, Nd4jLong const* outTad, + Nd4jLong const* outOffsets) { __shared__ T* x; __shared__ T* gradIn; __shared__ T* gradOut; @@ -349,18 +349,18 @@ namespace sd { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.getShapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); - Nd4jLong* gradInTads = packGradIn.specialShapeInfo(); - Nd4jLong* gradInTadOffsets = packGradIn.specialOffsets(); - Nd4jLong* gradOutTads = packGradOut.specialShapeInfo(); - Nd4jLong* gradOutTadOffsets = packGradOut.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + Nd4jLong const* inputTads = packX.specialShapeInfo(); + Nd4jLong const* inputTadOffsets = packX.specialOffsets(); + Nd4jLong const* outputTads = packZ.specialShapeInfo(); + Nd4jLong const* outputTadOffsets = packZ.specialOffsets(); + Nd4jLong const* gradInTads = packGradIn.specialShapeInfo(); + Nd4jLong const* gradInTadOffsets = packGradIn.specialOffsets(); + Nd4jLong const* gradOutTads = packGradOut.specialShapeInfo(); + Nd4jLong const* gradOutTadOffsets = packGradOut.specialOffsets(); segmentMaxBPTadKernel<<lengthOf(), input->lengthOf(), 256, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), tempRes.specialBuffer(), tempRes.specialShapeInfo(), gradOut->specialBuffer(), gradOut->specialShapeInfo(), @@ -397,18 +397,18 @@ namespace sd { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.getShapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); - Nd4jLong* gradInTads = packGradIn.specialShapeInfo(); - Nd4jLong* gradInTadOffsets = packGradIn.specialOffsets(); - Nd4jLong* gradOutTads = packGradOut.specialShapeInfo(); - Nd4jLong* gradOutTadOffsets = packGradOut.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + Nd4jLong const* inputTads = packX.specialShapeInfo(); + Nd4jLong const* inputTadOffsets = packX.specialOffsets(); + Nd4jLong const* outputTads = packZ.specialShapeInfo(); + Nd4jLong const* outputTadOffsets = packZ.specialOffsets(); + Nd4jLong const* gradInTads = packGradIn.specialShapeInfo(); + Nd4jLong const* gradInTadOffsets = packGradIn.specialOffsets(); + Nd4jLong const* gradOutTads = packGradOut.specialShapeInfo(); + Nd4jLong const* gradOutTadOffsets = packGradOut.specialOffsets(); segmentMaxBPTadKernel<<lengthOf(), input->lengthOf(), 256, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), tempRes.specialBuffer(), tempRes.specialShapeInfo(), gradOut->specialBuffer(), gradOut->specialShapeInfo(), diff --git a/libnd4j/include/ops/declarable/helpers/cuda/segment_mean.cu b/libnd4j/include/ops/declarable/helpers/cuda/segment_mean.cu index 76036a5e6..c75293c1d 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/segment_mean.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/segment_mean.cu @@ -34,7 +34,7 @@ namespace helpers { // Segment ops linear kernels // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void segmentMeanLinearKernel(void* input, Nd4jLong* inputShape, int* starts, int* lengths, Nd4jLong numOfClasses, void* output, Nd4jLong* outputShape) { + static __global__ void segmentMeanLinearKernel(void* input, Nd4jLong const* inputShape, int* starts, int* lengths, Nd4jLong numOfClasses, void* output, Nd4jLong const* outputShape) { __shared__ T* val; __shared__ Nd4jLong xLen, zLen, segment, zIndex; __shared__ T* x; @@ -72,7 +72,7 @@ namespace helpers { } // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void unsortedSegmentMeanLinearKernel(void* input, Nd4jLong* inputShape, void* indices, Nd4jLong* indicesShape, int* starts, int* lengths, Nd4jLong numOfClasses, void* output, Nd4jLong* outputShape) { + static __global__ void unsortedSegmentMeanLinearKernel(void* input, Nd4jLong const* inputShape, void* indices, Nd4jLong const* indicesShape, int* starts, int* lengths, Nd4jLong numOfClasses, void* output, Nd4jLong const* outputShape) { __shared__ T* val; __shared__ Nd4jLong xLen, zLen, zIndex; __shared__ T* x; @@ -115,7 +115,7 @@ namespace helpers { // -------------------------------------------------------------------------------------------------------------- // // SegmentMean kernel template - static __global__ void segmentMeanTadKernel(void* inputBuf, Nd4jLong* inputShape, Nd4jLong* inputTads, Nd4jLong* inputTadOffsets, I* indices, int* starts, int* lengths, Nd4jLong numOfClasses, void* outputBuf, Nd4jLong* outputShape, Nd4jLong* outputTads, Nd4jLong* outputTadOffsets) { + static __global__ void segmentMeanTadKernel(void* inputBuf, Nd4jLong const* inputShape, Nd4jLong const* inputTads, Nd4jLong const* inputTadOffsets, I* indices, int* starts, int* lengths, Nd4jLong numOfClasses, void* outputBuf, Nd4jLong const* outputShape, Nd4jLong const* outputTads, Nd4jLong const* outputTadOffsets) { __shared__ T* val; __shared__ Nd4jLong len, zIndex, total; __shared__ T* z; @@ -174,12 +174,12 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); segmentMeanTadKernel<<sizeAt(0), 512, 2048, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), inputTads, inputTadOffsets, reinterpret_cast(indices->specialBuffer()), begins, lengths, numClasses, output->specialBuffer(), output->specialShapeInfo(), outputTads, outputTadOffsets); } NDArray::registerSpecialUse({output}, {input, indices}); @@ -216,12 +216,12 @@ namespace helpers { else { output->assign(0); std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + Nd4jLong const* inputTads = packX.specialShapeInfo(); + Nd4jLong const* inputTadOffsets = packX.specialOffsets(); + Nd4jLong const* outputTads = packZ.specialShapeInfo(); + Nd4jLong const* outputTadOffsets = packZ.specialOffsets(); dims.x = input->sizeAt(0); segmentMeanTadKernel<<>>(input->specialBuffer(), input->specialShapeInfo(), inputTads, inputTadOffsets, reinterpret_cast(indices->specialBuffer()), begins, lengths, numOfClasses, output->specialBuffer(), output->specialShapeInfo(), outputTads, outputTadOffsets); } @@ -237,8 +237,8 @@ namespace helpers { // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void segmentMeanBPLinearKernel(void* inputBuf, Nd4jLong* inputShape, void* eps, Nd4jLong* epsShape, void* indicesBuf, Nd4jLong* indicesShape, - int* lengths, void* outputBuf, Nd4jLong* outputShape) { + static __global__ void segmentMeanBPLinearKernel(void* inputBuf, Nd4jLong const* inputShape, void* eps, Nd4jLong const* epsShape, void* indicesBuf, Nd4jLong const* indicesShape, + int* lengths, void* outputBuf, Nd4jLong const* outputShape) { __shared__ T* x; __shared__ T* gradIn; __shared__ T* gradOut; @@ -272,9 +272,9 @@ namespace helpers { } // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void segmentMeanBPTadKernel(void* inputBuf, Nd4jLong* inputShape, void* eps, Nd4jLong* epsShape, - void* indicesBuf, Nd4jLong* indicesShape, int* lengths, void* outputBuf, Nd4jLong* outputShape,Nd4jLong* inputTad, - Nd4jLong* inputOffsets, Nd4jLong* gradOutTad, Nd4jLong* gradOutOffsets, Nd4jLong* outTad, Nd4jLong* outOffsets) { + static __global__ void segmentMeanBPTadKernel(void* inputBuf, Nd4jLong const* inputShape, void* eps, Nd4jLong const* epsShape, + void* indicesBuf, Nd4jLong const* indicesShape, int* lengths, void* outputBuf, Nd4jLong const* outputShape,Nd4jLong const* inputTad, + Nd4jLong const* inputOffsets, Nd4jLong const* gradOutTad, Nd4jLong const* gradOutOffsets, Nd4jLong const* outTad, Nd4jLong const* outOffsets) { __shared__ T* x; __shared__ T* gradOut; __shared__ I* y; @@ -333,16 +333,16 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); -// auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.getShapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); - Nd4jLong* gradOutTads = packGradOut.specialShapeInfo(); - Nd4jLong* gradOutTadOffsets = packGradOut.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); +// auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + Nd4jLong const* inputTads = packX.specialShapeInfo(); + Nd4jLong const* inputTadOffsets = packX.specialOffsets(); + Nd4jLong const* outputTads = packZ.specialShapeInfo(); + Nd4jLong const* outputTadOffsets = packZ.specialOffsets(); + Nd4jLong const* gradOutTads = packGradOut.specialShapeInfo(); + Nd4jLong const* gradOutTadOffsets = packGradOut.specialOffsets(); segmentMeanBPTadKernel<<lengthOf(), input->lengthOf(), 256, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), gradOut->specialBuffer(), gradOut->specialShapeInfo(), indices->specialBuffer(), indices->specialShapeInfo(), lengths, @@ -386,16 +386,16 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); -// auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.getShapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); - Nd4jLong* gradOutTads = packGradOut.specialShapeInfo(); - Nd4jLong* gradOutTadOffsets = packGradOut.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); +// auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + Nd4jLong const* inputTads = packX.specialShapeInfo(); + Nd4jLong const* inputTadOffsets = packX.specialOffsets(); + Nd4jLong const* outputTads = packZ.specialShapeInfo(); + Nd4jLong const* outputTadOffsets = packZ.specialOffsets(); + Nd4jLong const* gradOutTads = packGradOut.specialShapeInfo(); + Nd4jLong const* gradOutTadOffsets = packGradOut.specialOffsets(); segmentMeanBPTadKernel<<lengthOf(), input->lengthOf(), 256, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), gradOut->specialBuffer(), gradOut->specialShapeInfo(), indices->specialBuffer(), indices->specialShapeInfo(), lengths, diff --git a/libnd4j/include/ops/declarable/helpers/cuda/segment_min.cu b/libnd4j/include/ops/declarable/helpers/cuda/segment_min.cu index 0133b3b11..c6f2d4ed2 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/segment_min.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/segment_min.cu @@ -36,11 +36,11 @@ namespace helpers { template static __global__ void - segmentMinLinearKernel(void *input, Nd4jLong *inputShape, int *starts, int *lengths, Nd4jLong numOfClasses, - void *output, Nd4jLong *outputShape) { + segmentMinLinearKernel(const void *input, const Nd4jLong *inputShape, int *starts, int *lengths, Nd4jLong numOfClasses, + void *output, const Nd4jLong *outputShape) { __shared__ T *val; __shared__ Nd4jLong xLen, zLen, zIndex; - __shared__ T *x; + __shared__ const T *x; __shared__ T *z; __shared__ int threadsPerSegment, start, finish; @@ -48,7 +48,7 @@ namespace helpers { if (threadIdx.x == 0) { // threadsPerSegment = (gridDim.x + numOfClasses - 1) / numOfClasses; // segment = blockIdx.x / threadsPerSegment; - x = reinterpret_cast(input); + x = reinterpret_cast(input); z = reinterpret_cast(output); extern __shared__ unsigned char shmem[]; val = reinterpret_cast(shmem); @@ -76,25 +76,25 @@ namespace helpers { template static __global__ void - unsortedSegmentMinLinearKernel(void *input, Nd4jLong *inputShape, void *indices, Nd4jLong *indicesShape, + unsortedSegmentMinLinearKernel(const void *input, const Nd4jLong *inputShape, const void *indices, const Nd4jLong *indicesShape, int *starts, int *lengths, Nd4jLong numOfClasses, void *output, - Nd4jLong *outputShape) { + const Nd4jLong *outputShape) { __shared__ T *val; __shared__ Nd4jLong xLen, zLen, segment, zIndex; __shared__ - T *x; + const T *x; __shared__ T *z; __shared__ - I *y; //int threadsPerSegment, start, finish; + const I *y; //int threadsPerSegment, start, finish; if (threadIdx.x == 0) { segment = blockIdx.x; - x = reinterpret_cast(input); + x = reinterpret_cast(input); z = reinterpret_cast(output); - y = reinterpret_cast(indices); + y = reinterpret_cast(indices); xLen = shape::length(inputShape); zLen = shape::length(outputShape); @@ -106,6 +106,7 @@ namespace helpers { } __syncthreads(); + if (lengths[segment] > 0) for (auto e = threadIdx.x + 1; e < xLen; e += blockDim.x) { auto xIndex = shape::getIndexOffset(e, inputShape); @@ -118,7 +119,7 @@ namespace helpers { // -------------------------------------------------------------------------------------------------------------- // // SegmentMin kernel template - static __global__ void segmentMinTadKernel(void* inputBuf, Nd4jLong* inputShape, Nd4jLong* inputTads, Nd4jLong* inputTadOffsets, I* indices, int* starts, int* lengths, Nd4jLong numOfClasses, void* outputBuf, Nd4jLong* outputShape, Nd4jLong* outputTads, Nd4jLong* outputTadOffsets) { + static __global__ void segmentMinTadKernel(const void* inputBuf, const Nd4jLong* inputShape, const Nd4jLong* inputTads, const Nd4jLong* inputTadOffsets, I* indices, int* starts, int* lengths, Nd4jLong numOfClasses, void* outputBuf, const Nd4jLong* outputShape, const Nd4jLong* outputTads, const Nd4jLong* outputTadOffsets) { __shared__ T* val; __shared__ Nd4jLong len, zIndex, total; __shared__ T* z; @@ -137,7 +138,7 @@ namespace helpers { auto idx = blockIdx.x; if (blockIdx.x <= total) { - auto x = reinterpret_cast(inputBuf) + inputTadOffsets[idx]; + auto x = reinterpret_cast(inputBuf) + inputTadOffsets[idx]; if (blockIdx.x == start) { for (auto e = threadIdx.x; e < len; e += blockDim.x) { auto xIndex = shape::getIndexOffset(e, inputTads); @@ -161,8 +162,8 @@ namespace helpers { static void segmentMinFunctor_(LaunchContext* context, NDArray* input, NDArray* indices, NDArray* output) { auto stream = context->getCudaStream(); Nd4jLong numClasses = indices->e(indices->lengthOf() - 1) + 1; - NDArray classesRangesLens = NDArrayFactory::create('c', {numClasses}, context); - NDArray classesRangesBegs = NDArrayFactory::create('c', {numClasses}, context); + auto classesRangesLens = NDArrayFactory::create('c', {numClasses}, context); + auto classesRangesBegs = NDArrayFactory::create('c', {numClasses}, context); output->assign(DataTypeUtils::infOrMax()); classesRangesBegs.assign(indices->lengthOf()); classesRangesLens.assign(0); @@ -176,12 +177,12 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); segmentMinTadKernel<<sizeAt(0), 512, 2048, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), inputTads, inputTadOffsets, reinterpret_cast(indices->specialBuffer()), begins, lengths, numClasses, output->specialBuffer(), output->specialShapeInfo(), outputTads, outputTadOffsets); } @@ -221,12 +222,12 @@ namespace helpers { else { output->assign(DataTypeUtils::max()); std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); dims.x = input->sizeAt(0); segmentMinTadKernel<<>>(input->specialBuffer(), input->specialShapeInfo(), inputTads, inputTadOffsets, reinterpret_cast(indices->specialBuffer()), begins, lengths, numOfClasses, output->specialBuffer(), output->specialShapeInfo(), outputTads, outputTadOffsets); } @@ -243,20 +244,20 @@ namespace helpers { } template - static __global__ void segmentMinBPLinearKernel(void* inputBuf, Nd4jLong* inputShape, void* forwardOutput, - Nd4jLong* forwardShape, void* eps, Nd4jLong* epsShape, void* indicesBuf, Nd4jLong* indicesShape, - void* outputBuf, Nd4jLong* outputShape) { - __shared__ T* x; + static __global__ void segmentMinBPLinearKernel(const void* inputBuf, const Nd4jLong* inputShape, void* forwardOutput, + const Nd4jLong* forwardShape, void* eps, const Nd4jLong* epsShape, const void* indicesBuf, const Nd4jLong* indicesShape, + void* outputBuf, const Nd4jLong* outputShape) { + __shared__ const T* x; __shared__ T* gradIn; __shared__ T* gradOut; - __shared__ I* y; + __shared__ const I* y; __shared__ T* z; __shared__ Nd4jLong xLen, gradLen; if (threadIdx.x == 0) { xLen = shape::length(inputShape); - x = reinterpret_cast(inputBuf); - y = reinterpret_cast(indicesBuf); + x = reinterpret_cast(inputBuf); + y = reinterpret_cast(indicesBuf); z = reinterpret_cast(outputBuf); gradIn = reinterpret_cast(forwardOutput); gradOut = reinterpret_cast(eps); @@ -284,23 +285,25 @@ namespace helpers { // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void segmentMinBPTadKernel(void* inputBuf, Nd4jLong* inputShape, void* forwardOutput, - Nd4jLong* forwardShape, void* eps, Nd4jLong* epsShape, void* indicesBuf, Nd4jLong* indicesShape, - void* outputBuf, Nd4jLong* outputShape,Nd4jLong* inputTad, - Nd4jLong* inputOffsets, Nd4jLong* gradInTad, Nd4jLong* gradInOffsets, - Nd4jLong* gradOutTad, Nd4jLong* gradOutOffsets, Nd4jLong* outTad, - Nd4jLong* outOffsets) { - __shared__ T* x; + static __global__ void segmentMinBPTadKernel(const void* inputBuf, const Nd4jLong* inputShape, void* forwardOutput, + const Nd4jLong* forwardShape, void* eps, const Nd4jLong* epsShape, + const void* indicesBuf, const Nd4jLong* indicesShape, + void* outputBuf, const Nd4jLong* outputShape, + const Nd4jLong* inputTad, const Nd4jLong* inputOffsets, + const Nd4jLong* gradInTad, const Nd4jLong* gradInOffsets, + const Nd4jLong* gradOutTad, const Nd4jLong* gradOutOffsets, + const Nd4jLong* outTad, const Nd4jLong* outOffsets) { + __shared__ const T* x; __shared__ T* gradIn; __shared__ T* gradOut; - __shared__ I* y; + __shared__ const I* y; __shared__ T* z; __shared__ Nd4jLong xLen, yLen, gradLen, currentLen; if (threadIdx.x == 0) { xLen = shape::length(inputShape); - x = reinterpret_cast(inputBuf); - y = reinterpret_cast(indicesBuf); + x = reinterpret_cast(inputBuf); + y = reinterpret_cast(indicesBuf); z = reinterpret_cast(outputBuf); yLen = shape::length(indicesShape); gradOut = reinterpret_cast(eps); @@ -313,10 +316,10 @@ namespace helpers { for (auto i = blockIdx.x; i < yLen; i += gridDim.x) { auto yIndex = shape::getIndexOffset(i, indicesShape); auto segment = y[yIndex]; - T* current = x + inputOffsets[i]; - T* currentOut = z + outOffsets[i]; - T* in = gradIn + gradInOffsets[segment]; - T* outGrad = gradOut + gradOutOffsets[segment]; + auto current = x + inputOffsets[i]; + auto currentOut = z + outOffsets[i]; + auto in = gradIn + gradInOffsets[segment]; + auto outGrad = gradOut + gradOutOffsets[segment]; for (auto e = threadIdx.x; e < currentLen; e += blockDim.x) { if (sd::math::nd4j_abs(in[e] - current[e]) <= T(1.e-6)) @@ -344,18 +347,18 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.getShapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); - Nd4jLong* gradInTads = packGradIn.specialShapeInfo(); - Nd4jLong* gradInTadOffsets = packGradIn.specialOffsets(); - Nd4jLong* gradOutTads = packGradOut.specialShapeInfo(); - Nd4jLong* gradOutTadOffsets = packGradOut.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); + auto gradInTads = packGradIn.specialShapeInfo(); + auto gradInTadOffsets = packGradIn.specialOffsets(); + auto gradOutTads = packGradOut.specialShapeInfo(); + auto gradOutTadOffsets = packGradOut.specialOffsets(); segmentMinBPTadKernel<<lengthOf(), input->lengthOf(), 256, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), tempRes.specialBuffer(), tempRes.specialShapeInfo(), gradOut->specialBuffer(), gradOut->specialShapeInfo(), @@ -392,18 +395,18 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.getShapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); - Nd4jLong* gradInTads = packGradIn.specialShapeInfo(); - Nd4jLong* gradInTadOffsets = packGradIn.specialOffsets(); - Nd4jLong* gradOutTads = packGradOut.specialShapeInfo(); - Nd4jLong* gradOutTadOffsets = packGradOut.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); + auto gradInTads = packGradIn.specialShapeInfo(); + auto gradInTadOffsets = packGradIn.specialOffsets(); + auto gradOutTads = packGradOut.specialShapeInfo(); + auto gradOutTadOffsets = packGradOut.specialOffsets(); segmentMinBPTadKernel<<lengthOf(), input->lengthOf(), 256, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), tempRes.specialBuffer(), tempRes.specialShapeInfo(), gradOut->specialBuffer(), gradOut->specialShapeInfo(), diff --git a/libnd4j/include/ops/declarable/helpers/cuda/segment_prod.cu b/libnd4j/include/ops/declarable/helpers/cuda/segment_prod.cu index d08f79817..026ded3e7 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/segment_prod.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/segment_prod.cu @@ -35,8 +35,8 @@ namespace helpers { // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void segmentProdLinearKernel(void* input, Nd4jLong* inputShape, int* starts, int* lengths, - Nd4jLong numOfClasses, void* output, Nd4jLong* outputShape) { + static __global__ void segmentProdLinearKernel(void* input, Nd4jLong const* inputShape, int* starts, int* lengths, + Nd4jLong numOfClasses, void* output, Nd4jLong const* outputShape) { __shared__ Nd4jLong xLen, zLen; __shared__ T* x; @@ -66,7 +66,7 @@ namespace helpers { } // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void unsortedSegmentProdLinearKernel(T* input, Nd4jLong* inputShape, I* indices, Nd4jLong* indicesShape, int* starts, int* lengths, Nd4jLong numOfClasses, T* output, Nd4jLong* outputShape) { + static __global__ void unsortedSegmentProdLinearKernel(T* input, Nd4jLong const* inputShape, I* indices, Nd4jLong const* indicesShape, int* starts, int* lengths, Nd4jLong numOfClasses, T* output, Nd4jLong const* outputShape) { __shared__ Nd4jLong xLen, zLen; if (threadIdx.x == 0) { @@ -90,9 +90,9 @@ namespace helpers { // -------------------------------------------------------------------------------------------------------------- // // SegmentProd kernel template - static __global__ void segmentProdTadKernel(void* inputBuf, Nd4jLong* inputShape, Nd4jLong* inputTads, - Nd4jLong* inputTadOffsets, I* indices, int* starts, int* lengths, Nd4jLong numOfClasses, void* outputBuf, - Nd4jLong* outputShape, Nd4jLong* outputTads, Nd4jLong* outputTadOffsets) { + static __global__ void segmentProdTadKernel(void* inputBuf, Nd4jLong const* inputShape, Nd4jLong const* inputTads, + Nd4jLong const* inputTadOffsets, I* indices, int* starts, int* lengths, Nd4jLong numOfClasses, void* outputBuf, + Nd4jLong const* outputShape, Nd4jLong const* outputTads, Nd4jLong const* outputTadOffsets) { __shared__ Nd4jLong len, total; @@ -138,12 +138,12 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); segmentProdTadKernel<<<128, 512, 2048, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), inputTads, inputTadOffsets, reinterpret_cast(indices->specialBuffer()), begins, lengths, numClasses, output->specialBuffer(), output->specialShapeInfo(), outputTads, outputTadOffsets); } @@ -181,12 +181,12 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); dims.x = input->sizeAt(0); segmentProdTadKernel<<<128, 256, 256, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), inputTads, inputTadOffsets, reinterpret_cast(indices->specialBuffer()), begins, lengths, numOfClasses, output->specialBuffer(), output->specialShapeInfo(), outputTads, outputTadOffsets); } @@ -202,9 +202,9 @@ namespace helpers { // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void segmentProdBPLinearKernel(void* inputBuf, Nd4jLong* inputShape, void* forwardOutput, - Nd4jLong* forwardShape, void* eps, Nd4jLong* epsShape, void* indicesBuf, Nd4jLong* indicesShape, - void* outputBuf, Nd4jLong* outputShape) { + static __global__ void segmentProdBPLinearKernel(void* inputBuf, Nd4jLong const* inputShape, void* forwardOutput, + Nd4jLong const* forwardShape, void* eps, Nd4jLong const* epsShape, void* indicesBuf, Nd4jLong const* indicesShape, + void* outputBuf, Nd4jLong const* outputShape) { __shared__ T* x; __shared__ T* gradIn; __shared__ T* gradOut; @@ -240,12 +240,12 @@ namespace helpers { } // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void segmentProdBPTadKernel(void* inputBuf, Nd4jLong* inputShape, void* forwardOutput, - Nd4jLong* forwardShape, void* eps, Nd4jLong* epsShape, void* indicesBuf, Nd4jLong* indicesShape, - void* outputBuf, Nd4jLong* outputShape,Nd4jLong* inputTad, - Nd4jLong* inputOffsets, Nd4jLong* gradInTad, Nd4jLong* gradInOffsets, - Nd4jLong* gradOutTad, Nd4jLong* gradOutOffsets, Nd4jLong* outTad, - Nd4jLong* outOffsets) { + static __global__ void segmentProdBPTadKernel(void* inputBuf, Nd4jLong const* inputShape, void* forwardOutput, + Nd4jLong const* forwardShape, void* eps, Nd4jLong const* epsShape, void* indicesBuf, Nd4jLong const* indicesShape, + void* outputBuf, Nd4jLong const* outputShape, Nd4jLong const* inputTad, + Nd4jLong const* inputOffsets, Nd4jLong const* gradInTad, Nd4jLong const* gradInOffsets, + Nd4jLong const* gradOutTad, Nd4jLong const* gradOutOffsets, Nd4jLong const* outTad, + Nd4jLong const* outOffsets) { __shared__ T* x; __shared__ T* gradIn; __shared__ T* gradOut; @@ -278,7 +278,6 @@ namespace helpers { currentOut[e] = outGrad[e] * in[e] / current[e]; } } - } // -------------------------------------------------------------------------------------------------------------- // @@ -297,18 +296,18 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.getShapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); - Nd4jLong* gradInTads = packGradIn.specialShapeInfo(); - Nd4jLong* gradInTadOffsets = packGradIn.specialOffsets(); - Nd4jLong* gradOutTads = packGradOut.specialShapeInfo(); - Nd4jLong* gradOutTadOffsets = packGradOut.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); + auto gradInTads = packGradIn.specialShapeInfo(); + auto gradInTadOffsets = packGradIn.specialOffsets(); + auto gradOutTads = packGradOut.specialShapeInfo(); + auto gradOutTadOffsets = packGradOut.specialOffsets(); segmentProdBPTadKernel<<lengthOf(), input->lengthOf(), 256, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), tempRes.specialBuffer(), tempRes.specialShapeInfo(), gradOut->specialBuffer(), gradOut->specialShapeInfo(), @@ -347,18 +346,18 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.getShapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); - Nd4jLong* gradInTads = packGradIn.specialShapeInfo(); - Nd4jLong* gradInTadOffsets = packGradIn.specialOffsets(); - Nd4jLong* gradOutTads = packGradOut.specialShapeInfo(); - Nd4jLong* gradOutTadOffsets = packGradOut.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); + auto gradInTads = packGradIn.specialShapeInfo(); + auto gradInTadOffsets = packGradIn.specialOffsets(); + auto gradOutTads = packGradOut.specialShapeInfo(); + auto gradOutTadOffsets = packGradOut.specialOffsets(); segmentProdBPTadKernel<<lengthOf(), input->lengthOf(), 256, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), tempRes.specialBuffer(), tempRes.specialShapeInfo(), gradOut->specialBuffer(), gradOut->specialShapeInfo(), diff --git a/libnd4j/include/ops/declarable/helpers/cuda/segment_sqrtn.cu b/libnd4j/include/ops/declarable/helpers/cuda/segment_sqrtn.cu index f9b6eaad0..b72abeffc 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/segment_sqrtn.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/segment_sqrtn.cu @@ -32,7 +32,7 @@ namespace ops { namespace helpers { // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void unsortedSegmentSqrtNLinearKernel(T* input, Nd4jLong* inputShape, I* indices, Nd4jLong* indicesShape, int* starts, int* lengths, Nd4jLong numOfClasses, T* output, Nd4jLong* outputShape) { + static __global__ void unsortedSegmentSqrtNLinearKernel(T* input, Nd4jLong const* inputShape, I* indices, Nd4jLong const* indicesShape, int* starts, int* lengths, Nd4jLong numOfClasses, T* output, Nd4jLong const* outputShape) { __shared__ Nd4jLong xLen, zLen; if (threadIdx.x == 0) { @@ -57,7 +57,7 @@ namespace helpers { // -------------------------------------------------------------------------------------------------------------- // // SegmentSqrtN kernel template - static __global__ void segmentSqrtNTadKernel(T* inputBuf, Nd4jLong* inputShape, Nd4jLong* inputTads, Nd4jLong* inputTadOffsets, I* indices, int* starts, int* lengths, Nd4jLong numOfClasses, void* outputBuf, Nd4jLong* outputShape, Nd4jLong* outputTads, Nd4jLong* outputTadOffsets) { + static __global__ void segmentSqrtNTadKernel(T* inputBuf, Nd4jLong const* inputShape, Nd4jLong const* inputTads, Nd4jLong const* inputTadOffsets, I* indices, int* starts, int* lengths, Nd4jLong numOfClasses, void* outputBuf, Nd4jLong const* outputShape, Nd4jLong const* outputTads, Nd4jLong const* outputTadOffsets) { __shared__ Nd4jLong len, total; @@ -108,12 +108,12 @@ namespace helpers { else { output->nullify(); std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); dims.x = input->sizeAt(0); segmentSqrtNTadKernel<<>>( input->dataBuffer()->specialAsT(), input->specialShapeInfo(), inputTads, inputTadOffsets, indices->dataBuffer()->specialAsT(), @@ -129,8 +129,8 @@ namespace helpers { } // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void segmentSqrtNBPLinearKernel(void* inputBuf, Nd4jLong* inputShape, void* eps, Nd4jLong* epsShape, void* indicesBuf, Nd4jLong* indicesShape, - int* lengths, void* outputBuf, Nd4jLong* outputShape) { + static __global__ void segmentSqrtNBPLinearKernel(void* inputBuf, Nd4jLong const* inputShape, void* eps, Nd4jLong const* epsShape, void* indicesBuf, Nd4jLong const* indicesShape, + int* lengths, void* outputBuf, Nd4jLong const* outputShape) { __shared__ T* x; __shared__ T* gradIn; __shared__ T* gradOut; @@ -165,9 +165,9 @@ namespace helpers { // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void segmentSqrtNBPTadKernel(void* inputBuf, Nd4jLong* inputShape, void* eps, Nd4jLong* epsShape, - void* indicesBuf, Nd4jLong* indicesShape, int* lengths, void* outputBuf, Nd4jLong* outputShape,Nd4jLong* inputTad, - Nd4jLong* inputOffsets, Nd4jLong* gradOutTad, Nd4jLong* gradOutOffsets, Nd4jLong* outTad, Nd4jLong* outOffsets) { + static __global__ void segmentSqrtNBPTadKernel(void* inputBuf, Nd4jLong const* inputShape, void* eps, Nd4jLong const* epsShape, + void* indicesBuf, Nd4jLong const* indicesShape, int* lengths, void* outputBuf, Nd4jLong const* outputShape,Nd4jLong const* inputTad, + Nd4jLong const* inputOffsets, Nd4jLong const* gradOutTad, Nd4jLong const* gradOutOffsets, Nd4jLong const* outTad, Nd4jLong const* outOffsets) { __shared__ T* x; __shared__ T* gradOut; __shared__ I* y; @@ -226,16 +226,16 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); -// auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.getShapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); - Nd4jLong* gradOutTads = packGradOut.specialShapeInfo(); - Nd4jLong* gradOutTadOffsets = packGradOut.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); +// auto packGradIn = sd::ConstantTadHelper::getInstance()->tadForDimensions(tempRes.shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); + auto gradOutTads = packGradOut.specialShapeInfo(); + auto gradOutTadOffsets = packGradOut.specialOffsets(); segmentSqrtNBPTadKernel<<lengthOf(), input->lengthOf(), 256, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), gradOut->specialBuffer(), gradOut->specialShapeInfo(), indices->specialBuffer(), indices->specialShapeInfo(), lengths, diff --git a/libnd4j/include/ops/declarable/helpers/cuda/segment_sum.cu b/libnd4j/include/ops/declarable/helpers/cuda/segment_sum.cu index 56d53710f..7a762a526 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/segment_sum.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/segment_sum.cu @@ -35,14 +35,16 @@ namespace helpers { // -------------------------------------------------------------------------------------------------------------- // template static __global__ void - segmentSumLinearKernel(void *input, Nd4jLong *inputShape, int *starts, int *lengths, Nd4jLong numOfClasses, - void *output, Nd4jLong *outputShape) { + segmentSumLinearKernel( + const void *input, const Nd4jLong *inputShape, + int *starts, int *lengths, Nd4jLong numOfClasses, + void *output, const Nd4jLong *outputShape) { __shared__ T *val; __shared__ Nd4jLong xLen, zLen, segment, zIndex; __shared__ - T *x; + const T *x; __shared__ T *z; __shared__ int threadsPerSegment, start, finish; @@ -50,7 +52,7 @@ namespace helpers { if (threadIdx.x == 0) { threadsPerSegment = (gridDim.x + numOfClasses - 1) / numOfClasses; segment = blockIdx.x / threadsPerSegment; - x = reinterpret_cast(input); + x = reinterpret_cast(input); z = reinterpret_cast(output); xLen = shape::length(inputShape); @@ -77,25 +79,27 @@ namespace helpers { template static __global__ void - unsortedSegmentSumLinearKernel(void *input, Nd4jLong *inputShape, void *indices, Nd4jLong *indicesShape, - int *starts, int *lengths, Nd4jLong numOfClasses, void *output, - Nd4jLong *outputShape) { + unsortedSegmentSumLinearKernel( + const void *input, const Nd4jLong *inputShape, + const void *indices, const Nd4jLong *indicesShape, + int *starts, int *lengths, Nd4jLong numOfClasses, + void *output, const Nd4jLong *outputShape) { __shared__ T *val; __shared__ Nd4jLong xLen, zLen, segment, zIndex; __shared__ - T *x; + const T *x; __shared__ T *z; __shared__ - I *y; //int threadsPerSegment, start, finish; + const I *y; //int threadsPerSegment, start, finish; if (threadIdx.x == 0) { segment = blockIdx.x; - x = reinterpret_cast(input); + x = reinterpret_cast(input); z = reinterpret_cast(output); - y = reinterpret_cast(indices); + y = reinterpret_cast(indices); xLen = shape::length(inputShape); zLen = shape::length(outputShape); @@ -119,7 +123,11 @@ namespace helpers { // -------------------------------------------------------------------------------------------------------------- // // SegmentSum kernel template - static __global__ void segmentSumTadKernel(void* inputBuf, Nd4jLong* inputShape, Nd4jLong* inputTads, Nd4jLong* inputTadOffsets, I* indices, int* starts, int* lengths, Nd4jLong numOfClasses, void* outputBuf, Nd4jLong* outputShape, Nd4jLong* outputTads, Nd4jLong* outputTadOffsets) { + static __global__ void segmentSumTadKernel( + const void* inputBuf, const Nd4jLong* inputShape, const Nd4jLong* inputTads, const Nd4jLong* inputTadOffsets, + const I* indices, + int* starts, int* lengths, Nd4jLong numOfClasses, + void* outputBuf, const Nd4jLong* outputShape, const Nd4jLong* outputTads, const Nd4jLong* outputTadOffsets) { __shared__ T* val; __shared__ Nd4jLong len, zIndex, total; __shared__ T* z; @@ -138,7 +146,7 @@ namespace helpers { auto idx = blockIdx.x; if (blockIdx.x <= total) { - auto x = reinterpret_cast(inputBuf) + inputTadOffsets[idx]; + auto x = reinterpret_cast(inputBuf) + inputTadOffsets[idx]; if (blockIdx.x == start) { for (auto e = threadIdx.x; e < len; e += blockDim.x) { auto xIndex = shape::getIndexOffset(e, inputTads); @@ -178,12 +186,12 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); segmentSumTadKernel<<sizeAt(0), 512, 2048, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), inputTads, inputTadOffsets, reinterpret_cast(indices->specialBuffer()), begins, lengths, numClasses, output->specialBuffer(), output->specialShapeInfo(), outputTads, outputTadOffsets); } @@ -219,12 +227,12 @@ namespace helpers { else { output->assign(0); std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); dims.x = input->sizeAt(0); segmentSumTadKernel<<>>(input->specialBuffer(), input->specialShapeInfo(), inputTads, inputTadOffsets, reinterpret_cast(indices->specialBuffer()), begins, lengths, numOfClasses, output->specialBuffer(), output->specialShapeInfo(), outputTads, outputTadOffsets); } @@ -245,21 +253,19 @@ namespace helpers { // -------------------------------------------------------------------------------------------------------------- // // Sorted sum backpropagate template - static __global__ void segmentSumBPLinearKernel(void* inputBuf, Nd4jLong* inputShape, void* eps, Nd4jLong* epsShape, - void* indicesBuf, Nd4jLong* indicesShape, void* outputBuf, Nd4jLong* outputShape) { - __shared__ T* x; - __shared__ T* gradIn; - __shared__ T* gradOut; - __shared__ I* y; - __shared__ T* z; + static __global__ void segmentSumBPLinearKernel( + const void* inputBuf, const Nd4jLong* inputShape, + const void* eps, const Nd4jLong* epsShape, + const void* indicesBuf, const Nd4jLong* indicesShape, + void* outputBuf, const Nd4jLong* outputShape) { + auto x = reinterpret_cast(inputBuf); + auto y = reinterpret_cast(indicesBuf); + auto z = reinterpret_cast(outputBuf); + auto gradOut = reinterpret_cast(eps); __shared__ Nd4jLong xLen, gradLen; if (threadIdx.x == 0) { xLen = shape::length(inputShape); - x = reinterpret_cast(inputBuf); - y = reinterpret_cast(indicesBuf); - z = reinterpret_cast(outputBuf); - gradOut = reinterpret_cast(eps); gradLen = shape::length(epsShape); } __syncthreads(); @@ -280,22 +286,27 @@ namespace helpers { } // -------------------------------------------------------------------------------------------------------------- // template - static __global__ void segmentSumBPTadKernel(void* inputBuf, Nd4jLong* inputShape, void* eps, Nd4jLong* epsShape, - void* indicesBuf, Nd4jLong* indicesShape, void* outputBuf, Nd4jLong* outputShape, Nd4jLong* inputTad, - Nd4jLong* inputOffsets, Nd4jLong* gradOutTad, Nd4jLong* gradOutOffsets, Nd4jLong* outTad, Nd4jLong* outOffsets) { - __shared__ T* x; - __shared__ T* gradOut; - __shared__ I* y; + static __global__ void segmentSumBPTadKernel( + const void* inputBuf, const Nd4jLong* inputShape, + const void* eps, const Nd4jLong* epsShape, + const void* indicesBuf, const Nd4jLong* indicesShape, + void* outputBuf, const Nd4jLong* outputShape, + const Nd4jLong* inputTad, const Nd4jLong* inputOffsets, + const Nd4jLong* gradOutTad, const Nd4jLong* gradOutOffsets, + const Nd4jLong* outTad, const Nd4jLong* outOffsets) { + __shared__ const T* x; + __shared__ const T* gradOut; + __shared__ const I* y; __shared__ T* z; __shared__ Nd4jLong xLen, yLen, gradLen, currentLen; if (threadIdx.x == 0) { xLen = shape::length(inputShape); - x = reinterpret_cast(inputBuf); - y = reinterpret_cast(indicesBuf); + x = reinterpret_cast(inputBuf); + y = reinterpret_cast(indicesBuf); z = reinterpret_cast(outputBuf); yLen = shape::length(indicesShape); - gradOut = reinterpret_cast(eps); + gradOut = reinterpret_cast(eps); gradLen = shape::length(epsShape); currentLen = shape::length(outTad); } @@ -304,8 +315,8 @@ namespace helpers { for (auto i = blockIdx.x; i < yLen; i += gridDim.x) { auto yIndex = shape::getIndexOffset(i, indicesShape); auto segment = y[yIndex]; - T* currentOut = z + outOffsets[i]; - T* outGrad = gradOut + gradOutOffsets[segment]; + auto currentOut = z + outOffsets[i]; + auto outGrad = gradOut + gradOutOffsets[segment]; for (auto e = threadIdx.x; e < currentLen; e += blockDim.x) { currentOut[e] = outGrad[e]; @@ -327,15 +338,15 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); - Nd4jLong* gradOutTads = packGradOut.specialShapeInfo(); - Nd4jLong* gradOutTadOffsets = packGradOut.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); + auto gradOutTads = packGradOut.specialShapeInfo(); + auto gradOutTadOffsets = packGradOut.specialOffsets(); segmentSumBPTadKernel<<lengthOf(), input->lengthOf(), 256, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), gradOut->specialBuffer(), gradOut->specialShapeInfo(), @@ -368,15 +379,15 @@ namespace helpers { } else { std::vector dimensions = ShapeUtils::evalDimsToExclude(input->rankOf(), {0}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), dimensions); - auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->getShapeInfo(), dimensions); - Nd4jLong* inputTads = packX.specialShapeInfo(); - Nd4jLong* inputTadOffsets = packX.specialOffsets(); - Nd4jLong* outputTads = packZ.specialShapeInfo(); - Nd4jLong* outputTadOffsets = packZ.specialOffsets(); - Nd4jLong* gradOutTads = packGradOut.specialShapeInfo(); - Nd4jLong* gradOutTadOffsets = packGradOut.specialOffsets(); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimensions); + auto packGradOut = sd::ConstantTadHelper::getInstance()->tadForDimensions(gradOut->shapeInfo(), dimensions); + auto inputTads = packX.specialShapeInfo(); + auto inputTadOffsets = packX.specialOffsets(); + auto outputTads = packZ.specialShapeInfo(); + auto outputTadOffsets = packZ.specialOffsets(); + auto gradOutTads = packGradOut.specialShapeInfo(); + auto gradOutTadOffsets = packGradOut.specialOffsets(); segmentSumBPTadKernel<<lengthOf(), input->lengthOf(), 256, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), gradOut->specialBuffer(), gradOut->specialShapeInfo(), diff --git a/libnd4j/include/ops/declarable/helpers/cuda/sequence_mask.cu b/libnd4j/include/ops/declarable/helpers/cuda/sequence_mask.cu index b06797753..51b7590c0 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/sequence_mask.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/sequence_mask.cu @@ -25,13 +25,13 @@ namespace ops { namespace helpers { template - static __global__ void sequenceMaskKernel(void* inputBuf, Nd4jLong* inputShape, void* outputBuf, Nd4jLong* outputShape, int maxIndex) { + static __global__ void sequenceMaskKernel(const void* inputBuf, const Nd4jLong* inputShape, void* outputBuf, const Nd4jLong* outputShape, int maxIndex) { - __shared__ I* input; + __shared__ const I* input; __shared__ B* output; __shared__ Nd4jLong inputLen, outputLen; if (threadIdx.x == 0) { - input = reinterpret_cast(inputBuf); + input = reinterpret_cast(inputBuf); output = reinterpret_cast(outputBuf); inputLen = shape::length(inputShape); outputLen = shape::length(outputShape); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/sg_cb.cu b/libnd4j/include/ops/declarable/helpers/cuda/sg_cb.cu index f85a855b7..3957f23d5 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/sg_cb.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/sg_cb.cu @@ -289,7 +289,7 @@ namespace sd { if (irow < 0 || irow >= vocabSize) continue; - auto syn1row = reinterpret_cast(s1.getSpecialBuffer()) + (irow * vectorLength); + auto syn1row = reinterpret_cast(s1.specialBuffer()) + (irow * vectorLength); auto code = bCodes[e + cShift]; //nd4j_printf("syn0: [%i]; syn1: [%i]; code: [%i]\n", target, irow, code); @@ -315,7 +315,7 @@ namespace sd { if (irow == nsStarter) continue; } - auto syn1row = reinterpret_cast(s1n.getSpecialBuffer()) + (irow * vectorLength); + auto syn1row = reinterpret_cast(s1n.specialBuffer()) + (irow * vectorLength); nSampling_(syn0row, syn1row, expTable, neu1e, alpha, vectorLength, r == 0 ? 1 : 0, expLength, false, stream); } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/solve.cu b/libnd4j/include/ops/declarable/helpers/cuda/solve.cu index 74823483e..cf8308bbe 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/solve.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/solve.cu @@ -34,7 +34,7 @@ namespace sd { namespace helpers { template - static __global__ void oneOnDiagonalKernel(T* ioBuf, Nd4jLong* ioShape, Nd4jLong* tadShape, Nd4jLong* tadOffsets, Nd4jLong batchNum, Nd4jLong rowNum) { + static __global__ void oneOnDiagonalKernel(T* ioBuf, Nd4jLong const* ioShape, Nd4jLong const* tadShape, Nd4jLong const* tadOffsets, Nd4jLong batchNum, Nd4jLong rowNum) { for (auto i = blockIdx.x; i < batchNum; i += gridDim.x) { auto matrixPart = ioBuf + tadOffsets[i]; for (auto j = threadIdx.x; j < rowNum; j += blockDim.x) { @@ -47,8 +47,8 @@ namespace sd { } template - static __global__ void restorePermutationsKernel(T* PBuf, Nd4jLong* PShapeInfo, int const* permutationsBuf, - Nd4jLong* PTadShapeInfo, Nd4jLong* PTadSOffsets, Nd4jLong* permutationsTadShapeInfo, Nd4jLong* permutationsTadOffsets, Nd4jLong batchNum, Nd4jLong rowNum) { + static __global__ void restorePermutationsKernel(T* PBuf, Nd4jLong const* PShapeInfo, int const* permutationsBuf, + Nd4jLong const* PTadShapeInfo, Nd4jLong const* PTadSOffsets, Nd4jLong const* permutationsTadShapeInfo, Nd4jLong const* permutationsTadOffsets, Nd4jLong batchNum, Nd4jLong rowNum) { for (auto batch = blockIdx.x; batch < batchNum; batch += gridDim.x) { auto permutations = permutationsBuf + permutationsTadOffsets[batch]; auto P = PBuf + PTadSOffsets[batch]; @@ -73,12 +73,12 @@ namespace sd { helpers::lu(context, leftInput, &leftOutput, &permutations); auto leftLower = leftOutput.dup(); auto rightOutput = rightInput->ulike(); - auto leftLowerTad = ConstantTadHelper::getInstance()->tadForDimensions(leftLower.getShapeInfo(), {-2, -1}); + auto leftLowerTad = ConstantTadHelper::getInstance()->tadForDimensions(leftLower.shapeInfo(), {-2, -1}); auto stream = context->getCudaStream(); oneOnDiagonalKernel<<<128, 256, 256, *stream>>>(leftLower.dataBuffer()->specialAsT(), leftLower.specialShapeInfo(), leftLowerTad.specialShapeInfo(), leftLowerTad.specialOffsets(), leftLowerTad.numberOfTads(), leftLower.sizeAt(-1)); auto P = leftOutput.ulike(); P.nullify(); - auto PTad = ConstantTadHelper::getInstance()->tadForDimensions(P.getShapeInfo(), {-2, -1}); - auto permutationsTad = ConstantTadHelper::getInstance()->tadForDimensions(permutations.getShapeInfo(), {-1}); + auto PTad = ConstantTadHelper::getInstance()->tadForDimensions(P.shapeInfo(), {-2, -1}); + auto permutationsTad = ConstantTadHelper::getInstance()->tadForDimensions(permutations.shapeInfo(), {-1}); restorePermutationsKernel<<<128, 256, 256, *stream>>>(P.dataBuffer()->specialAsT(), P.specialShapeInfo(), permutations.dataBuffer()->specialAsT(), PTad.specialShapeInfo(), PTad.specialOffsets(), permutationsTad.specialShapeInfo(), permutationsTad.specialOffsets(), permutationsTad.numberOfTads(), permutations.sizeAt(-1)); P.tickWriteDevice(); @@ -99,8 +99,8 @@ namespace sd { } template - static __global__ void adjointKernel(T* output, Nd4jLong batchSize, Nd4jLong rows, Nd4jLong columns, Nd4jLong* outputTads, - Nd4jLong* outputOffsets) { + static __global__ void adjointKernel(T* output, Nd4jLong batchSize, Nd4jLong rows, Nd4jLong columns, Nd4jLong const* outputTads, + Nd4jLong const* outputOffsets) { for (auto b = blockIdx.x; b < batchSize; b += gridDim.x) { auto outputPart = output + outputOffsets[b]; @@ -120,8 +120,8 @@ namespace sd { template static void adjointMatrix_(sd::LaunchContext* context, NDArray const* input, NDArray* output) { NDArray::prepareSpecialUse({output}, {input}); - auto inputTads = ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), {-2, -1}); - auto outputTads = ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), {-2, -1}); + auto inputTads = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {-2, -1}); + auto outputTads = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {-2, -1}); auto stream = context->getCudaStream(); auto outputBuf = reinterpret_cast(output->specialBuffer()); auto rows = input->sizeAt(-2); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/split.cu b/libnd4j/include/ops/declarable/helpers/cuda/split.cu index 5690d786c..19c58b89e 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/split.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/split.cu @@ -103,12 +103,12 @@ void split(sd::LaunchContext* context, const NDArray& input, std::vector(input.getSpecialBuffer()); + auto x = static_cast(input.specialBuffer()); for (uint i = 0; i < numOfSubArrs; ++i) { const auto memAmountToCopy = outArrs[i]->lengthOf() * sizeofT; - cudaMemcpyAsync(static_cast(outArrs[i]->getSpecialBuffer()), x, memAmountToCopy, cudaMemcpyDeviceToDevice, *context->getCudaStream()); - x = static_cast(x) + memAmountToCopy; + cudaMemcpyAsync(static_cast(outArrs[i]->specialBuffer()), x, memAmountToCopy, cudaMemcpyDeviceToDevice, *context->getCudaStream()); + x = static_cast(x) + memAmountToCopy; } if(cudaStreamSynchronize(*context->getCudaStream()) != 0) @@ -135,7 +135,7 @@ void split(sd::LaunchContext* context, const NDArray& input, std::vectorgetShapeInfo()); + // strideOfContigStride[i] = shape::strideOverContigAxis(axis, outArrs[i]->shapeInfo()); // } // } @@ -143,16 +143,16 @@ void split(sd::LaunchContext* context, const NDArray& input, std::vectorsizeAt(axis); // same for all outArrs // for (uint i = 0; i < input.lengthOf() / input.sizeAt(axis); ++i) { // const auto iShift = i * sizeofT; - // void* x = static_cast(input.getSpecialBuffer()) + xStep * iShift; + // void* x = static_cast(input.specialBuffer()) + xStep * iShift; // for (uint j = 0; j < numOfSubArrs; ++j) { - // void* z = static_cast(outArrs[j]->getSpecialBuffer()) + strideOfContigStride[j] * iShift; + // void* z = static_cast(outArrs[j]->specialBuffer()) + strideOfContigStride[j] * iShift; // const auto memSizeToCopy = zDim * sizeofT; // cudaMemcpyAsync(z, x, memSizeToCopy, cudaMemcpyDeviceToDevice, *context->getCudaStream()); // x = static_cast(x) + memSizeToCopy; @@ -171,13 +171,13 @@ void split(sd::LaunchContext* context, const NDArray& input, std::vector hOutBuffers(numOfSubArrs); for(int i = 0; i < numOfSubArrs; ++i) - hOutBuffers[i] = outArrs[i]->getSpecialBuffer(); + hOutBuffers[i] = outArrs[i]->specialBuffer(); PointersManager manager(context, "helpers::split"); void* dOutBuffers = manager.replicatePointer(hOutBuffers.data(), hOutBuffers.size() * sizeof(void*)); - BUILD_SINGLE_SELECTOR(input.dataType(), splitCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), dOutBuffers, outArrs[0]->specialShapeInfo(), axis), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), splitCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), dOutBuffers, outArrs[0]->specialShapeInfo(), axis), LIBND4J_TYPES); manager.synchronize(); // } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/sru.cu b/libnd4j/include/ops/declarable/helpers/cuda/sru.cu index 518525ecf..b59ac0052 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/sru.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/sru.cu @@ -248,7 +248,7 @@ void sruBI(sd::LaunchContext * context, NDArray* x, const NDArray* w, const NDAr const int sharedMem = threadsPerBlock * sizeof(int) * x->rankOf() + 128; NDArray::prepareSpecialUse({ht, ct}, {x, &wi, b, c0, mask}); - BUILD_SINGLE_SELECTOR(x->dataType(), sruBICudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), x->getSpecialBuffer(), x->getSpecialShapeInfo(), wi.getSpecialBuffer(), wi.getSpecialShapeInfo(), b->getSpecialBuffer(), b->getSpecialShapeInfo(), c0->getSpecialBuffer(), c0->getSpecialShapeInfo(), mask ? mask->getSpecialBuffer() : nullptr, mask ? mask->getSpecialShapeInfo() : nullptr, ht->specialBuffer(), ht->specialShapeInfo(), ct->specialBuffer(), ct->specialShapeInfo()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(x->dataType(), sruBICudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), x->specialBuffer(), x->specialShapeInfo(), wi.specialBuffer(), wi.specialShapeInfo(), b->specialBuffer(), b->specialShapeInfo(), c0->specialBuffer(), c0->specialShapeInfo(), mask ? mask->specialBuffer() : nullptr, mask ? mask->specialShapeInfo() : nullptr, ht->specialBuffer(), ht->specialShapeInfo(), ct->specialBuffer(), ct->specialShapeInfo()), FLOAT_TYPES); NDArray::registerSpecialUse({ht, ct}, {x, &wi, b, c0, mask}); manager.synchronize(); @@ -516,7 +516,7 @@ void sruBIBP(sd::LaunchContext* context, NDArray* x, const NDArray* w, const NDA const int sharedMem = threadsPerBlock * sizeof(int) * x->rankOf() + 128; NDArray::prepareSpecialUse({gradI, &gradWi, &gradBias, gradC0}, {x, &wi, b, c0, ct, gradCt, gradHt, mask}); - BUILD_SINGLE_SELECTOR(x->dataType(), sruBIBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), x->getSpecialBuffer(), x->getSpecialShapeInfo(), wi.getSpecialBuffer(), wi.getSpecialShapeInfo(), b->getSpecialBuffer(), b->getSpecialShapeInfo(), c0->getSpecialBuffer(), c0->getSpecialShapeInfo(), mask ? mask->getSpecialBuffer() : nullptr, mask ? mask->getSpecialShapeInfo() : nullptr, ct->getSpecialBuffer(), ct->getSpecialShapeInfo(), gradHt->getSpecialBuffer(), gradHt->getSpecialShapeInfo(), gradCt->getSpecialBuffer(), gradCt->getSpecialShapeInfo(), gradI->specialBuffer(), gradI->specialShapeInfo(), gradWi.specialBuffer(), gradWi.specialShapeInfo(), gradBias.specialBuffer(), gradBias.specialShapeInfo(), gradC0->specialBuffer(), gradC0->specialShapeInfo()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(x->dataType(), sruBIBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), x->specialBuffer(), x->specialShapeInfo(), wi.specialBuffer(), wi.specialShapeInfo(), b->specialBuffer(), b->specialShapeInfo(), c0->specialBuffer(), c0->specialShapeInfo(), mask ? mask->specialBuffer() : nullptr, mask ? mask->specialShapeInfo() : nullptr, ct->specialBuffer(), ct->specialShapeInfo(), gradHt->specialBuffer(), gradHt->specialShapeInfo(), gradCt->specialBuffer(), gradCt->specialShapeInfo(), gradI->specialBuffer(), gradI->specialShapeInfo(), gradWi.specialBuffer(), gradWi.specialShapeInfo(), gradBias.specialBuffer(), gradBias.specialShapeInfo(), gradC0->specialBuffer(), gradC0->specialShapeInfo()), FLOAT_TYPES); NDArray::registerSpecialUse({gradI, &gradWi, &gradBias, gradC0}, {x, &wi, b, c0, ct, gradCt, gradHt, mask}); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/stack.cu b/libnd4j/include/ops/declarable/helpers/cuda/stack.cu index 89859ae1d..f0983b76c 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/stack.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/stack.cu @@ -73,10 +73,10 @@ static void stack_(sd::LaunchContext* context, const std::vector if(inArrs[0]->rankOf() == 0) { - std::vector hInBuffers(numOfSubArrs); + std::vector hInBuffers(numOfSubArrs); for(int i = 0; i < numOfSubArrs; ++i) - hInBuffers[i] = inArrs[i]->getSpecialBuffer(); + hInBuffers[i] = inArrs[i]->specialBuffer(); PointersManager manager(context, "helpers::stack cuda"); @@ -91,15 +91,15 @@ static void stack_(sd::LaunchContext* context, const std::vector } else { - auto zTadPack = ConstantTadHelper::getInstance()->tadForDimensions(output.getShapeInfo(), ShapeUtils::evalDimsToExclude(output.rankOf(), {dim})); - Nd4jLong* zTadShapeInfo = zTadPack.primaryShapeInfo(); + auto zTadPack = ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), ShapeUtils::evalDimsToExclude(output.rankOf(), {dim})); + auto zTadShapeInfo = zTadPack.primaryShapeInfo(); for (uint i = 0; i < numOfSubArrs; ++i) { void* zBuff = output.specialBufferWithOffset(zTadPack.primaryOffsets()[i]); NativeOpExecutioner::execTransformAny(context, transform::Assign, - nullptr, inArrs[i]->getShapeInfo(), inArrs[i]->getSpecialBuffer(), inArrs[i]->getSpecialShapeInfo(), + nullptr, inArrs[i]->shapeInfo(), inArrs[i]->specialBuffer(), inArrs[i]->specialShapeInfo(), nullptr, zTadShapeInfo, zBuff, zTadPack.specialShapeInfo(), nullptr, nullptr, nullptr, false/*allowParallelism*/); } @@ -164,7 +164,7 @@ static void unstack_(sd::LaunchContext* context, const NDArray& input, const std std::vector hOutBuffers(numOfSubArrs); for(int i = 0; i < numOfSubArrs; ++i) - hOutBuffers[i] = outArrs[i]->getSpecialBuffer(); + hOutBuffers[i] = outArrs[i]->specialBuffer(); PointersManager manager(context, "helpers::unstack cuda"); @@ -173,22 +173,22 @@ static void unstack_(sd::LaunchContext* context, const NDArray& input, const std const int threadsPerBlock = MAX_NUM_THREADS / 2; const int blocksPerGrid = (input.lengthOf() + threadsPerBlock - 1) / threadsPerBlock; - unstackScalarsCudaLauncher(blocksPerGrid, threadsPerBlock, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), dOutBuffers); + unstackScalarsCudaLauncher(blocksPerGrid, threadsPerBlock, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), dOutBuffers); manager.synchronize(); } else { - auto xTadPack = ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), ShapeUtils::evalDimsToExclude(input.rankOf(), {dim})); - Nd4jLong* xTadShapeInfo = xTadPack.primaryShapeInfo(); + auto xTadPack = ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), ShapeUtils::evalDimsToExclude(input.rankOf(), {dim})); + auto xTadShapeInfo = xTadPack.primaryShapeInfo(); for (uint i = 0; i < numOfSubArrs; ++i) { - void* xBuff = input.specialBufferWithOffset(xTadPack.primaryOffsets()[i]); + auto xBuff = input.specialBufferWithOffset(xTadPack.primaryOffsets()[i]); NativeOpExecutioner::execTransformAny(input.getContext(), transform::Assign, nullptr, xTadShapeInfo, xBuff, xTadPack.specialShapeInfo(), - nullptr, outArrs[i]->getShapeInfo(), outArrs[i]->specialBuffer(), outArrs[i]->specialShapeInfo(), + nullptr, outArrs[i]->shapeInfo(), outArrs[i]->specialBuffer(), outArrs[i]->specialShapeInfo(), nullptr, nullptr, nullptr, false/*allowParallelism*/); } } @@ -262,7 +262,7 @@ BUILD_SINGLE_TEMPLATE(template void unstack_, (sd::LaunchContext* context, const // std::vector hOutBuffers(numOfSubArrs); // for(int i = 0; i < numOfSubArrs; ++i) -// hOutBuffers[i] = outArrs[i]->getSpecialBuffer(); +// hOutBuffers[i] = outArrs[i]->specialBuffer(); // PointersManager manager(context, "helpers::unstack"); @@ -272,7 +272,7 @@ BUILD_SINGLE_TEMPLATE(template void unstack_, (sd::LaunchContext* context, const // outArrs[i]->syncToDevice(); // input.syncToDevice(); -// BUILD_SINGLE_SELECTOR(input.dataType(), unstackCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), dOutBuffers, outArrs[0]->getSpecialShapeInfo(), axis), LIBND4J_TYPES); +// BUILD_SINGLE_SELECTOR(input.dataType(), unstackCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), dOutBuffers, outArrs[0]->specialShapeInfo(), axis), LIBND4J_TYPES); // manager.synchronize(); @@ -340,7 +340,7 @@ BUILD_SINGLE_TEMPLATE(template void unstack_, (sd::LaunchContext* context, const // std::vector hInBuffers(numOfSubArrs); // for(int i = 0; i < numOfSubArrs; ++i) -// hInBuffers[i] = inArrs[i]->getSpecialBuffer(); +// hInBuffers[i] = inArrs[i]->specialBuffer(); // PointersManager manager(context, "helpers::stack"); @@ -350,7 +350,7 @@ BUILD_SINGLE_TEMPLATE(template void unstack_, (sd::LaunchContext* context, const // inArrs[i]->syncToDevice(); // output.syncToDevice(); -// BUILD_SINGLE_SELECTOR(output.dataType(), stackCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), dInBuffers, inArrs[0]->getSpecialShapeInfo(), output.getSpecialBuffer(), output.getSpecialShapeInfo(), axis), LIBND4J_TYPES); +// BUILD_SINGLE_SELECTOR(output.dataType(), stackCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), dInBuffers, inArrs[0]->specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), axis), LIBND4J_TYPES); // manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/svd.cu b/libnd4j/include/ops/declarable/helpers/cuda/svd.cu index 5c3d2811c..33dd0251a 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/svd.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/svd.cu @@ -229,10 +229,10 @@ static void svdQR(sd::LaunchContext* context, const NDArray* A, NDArray* S, NDAr // choose appropriate cuda gemm api depending on data types if(A->dataType() == DataType::DOUBLE) { - status = cusolverDnDgesvd(*handle, jobu, jobvt, m, n, reinterpret_cast(pA->getSpecialBuffer()), lda, reinterpret_cast(pS->getSpecialBuffer()), calcUV ? reinterpret_cast(pU->getSpecialBuffer()) : nullptr, ldu, calcUV ? reinterpret_cast(pVT->getSpecialBuffer()) : nullptr, ldvt, reinterpret_cast(dWork), lwork, reinterpret_cast(rWork), devInfo); + status = cusolverDnDgesvd(*handle, jobu, jobvt, m, n, reinterpret_cast(pA->specialBuffer()), lda, reinterpret_cast(pS->specialBuffer()), calcUV ? reinterpret_cast(pU->specialBuffer()) : nullptr, ldu, calcUV ? reinterpret_cast(pVT->specialBuffer()) : nullptr, ldvt, reinterpret_cast(dWork), lwork, reinterpret_cast(rWork), devInfo); } else if(A->dataType() == DataType::FLOAT32) { - status = cusolverDnSgesvd(*handle, jobu, jobvt, m, n, reinterpret_cast(pA->getSpecialBuffer()), lda, reinterpret_cast(pS->getSpecialBuffer()), calcUV ? reinterpret_cast(pU->getSpecialBuffer()) : nullptr, ldu, calcUV ? reinterpret_cast(pVT->getSpecialBuffer()) : nullptr, ldvt, reinterpret_cast(dWork), lwork, reinterpret_cast(rWork), devInfo); + status = cusolverDnSgesvd(*handle, jobu, jobvt, m, n, reinterpret_cast(pA->specialBuffer()), lda, reinterpret_cast(pS->specialBuffer()), calcUV ? reinterpret_cast(pU->specialBuffer()) : nullptr, ldu, calcUV ? reinterpret_cast(pVT->specialBuffer()) : nullptr, ldvt, reinterpret_cast(dWork), lwork, reinterpret_cast(rWork), devInfo); } else throw std::invalid_argument("svdQR: given data type is unsupported !"); @@ -386,7 +386,7 @@ static void svdJcb(sd::LaunchContext* context, const NDArray* A, NDArray* S, NDA if(!calcUV && m != n) { int maxDim = m > n ? m : n; arrToAvoidBugInAPI = new NDArray('c', {maxDim, maxDim}, pA->dataType(), context); - nullPtr = arrToAvoidBugInAPI->getSpecialBuffer(); + nullPtr = arrToAvoidBugInAPI->specialBuffer(); } // ****************** @@ -395,9 +395,9 @@ static void svdJcb(sd::LaunchContext* context, const NDArray* A, NDArray* S, NDA // query working space of SVD int lwork = 0; if(A->dataType() == DataType::DOUBLE) - status = cusolverDnDgesvdj_bufferSize(*handle, jobz, econ, m, n, reinterpret_cast(pA->getSpecialBuffer()), lda, reinterpret_cast(pS->getSpecialBuffer()), calcUV ? reinterpret_cast(pU->getSpecialBuffer()) : reinterpret_cast(nullPtr), ldu, calcUV ? reinterpret_cast(pV->getSpecialBuffer()) : reinterpret_cast(nullPtr), ldv, &lwork, gesvdjParams); + status = cusolverDnDgesvdj_bufferSize(*handle, jobz, econ, m, n, reinterpret_cast(pA->specialBuffer()), lda, reinterpret_cast(pS->specialBuffer()), calcUV ? reinterpret_cast(pU->specialBuffer()) : reinterpret_cast(nullPtr), ldu, calcUV ? reinterpret_cast(pV->specialBuffer()) : reinterpret_cast(nullPtr), ldv, &lwork, gesvdjParams); else if(A->dataType() == DataType::FLOAT32) - status = cusolverDnSgesvdj_bufferSize(*handle, jobz, econ, m, n, reinterpret_cast(pA->getSpecialBuffer()), lda, reinterpret_cast(pS->getSpecialBuffer()), calcUV ? reinterpret_cast(pU->getSpecialBuffer()) : reinterpret_cast(nullPtr), ldu, calcUV ? reinterpret_cast(pV->getSpecialBuffer()) : reinterpret_cast(nullPtr), ldv, &lwork, gesvdjParams); + status = cusolverDnSgesvdj_bufferSize(*handle, jobz, econ, m, n, reinterpret_cast(pA->specialBuffer()), lda, reinterpret_cast(pS->specialBuffer()), calcUV ? reinterpret_cast(pU->specialBuffer()) : reinterpret_cast(nullPtr), ldu, calcUV ? reinterpret_cast(pV->specialBuffer()) : reinterpret_cast(nullPtr), ldv, &lwork, gesvdjParams); else throw std::invalid_argument("svdJcb: given data type is unsupported !"); @@ -414,10 +414,10 @@ static void svdJcb(sd::LaunchContext* context, const NDArray* A, NDArray* S, NDA // choose appropriate cuda gemm api depending on data types if(A->dataType() == DataType::DOUBLE) { - status = cusolverDnDgesvdj(*handle, jobz, econ, m, n, reinterpret_cast(pA->getSpecialBuffer()), lda, reinterpret_cast(pS->getSpecialBuffer()), calcUV ? reinterpret_cast(pU->getSpecialBuffer()) : reinterpret_cast(nullPtr), ldu, calcUV ? reinterpret_cast(pV->getSpecialBuffer()) : reinterpret_cast(nullPtr), ldv, reinterpret_cast(dWork), lwork, devInfo, gesvdjParams); + status = cusolverDnDgesvdj(*handle, jobz, econ, m, n, reinterpret_cast(pA->specialBuffer()), lda, reinterpret_cast(pS->specialBuffer()), calcUV ? reinterpret_cast(pU->specialBuffer()) : reinterpret_cast(nullPtr), ldu, calcUV ? reinterpret_cast(pV->specialBuffer()) : reinterpret_cast(nullPtr), ldv, reinterpret_cast(dWork), lwork, devInfo, gesvdjParams); } else if(A->dataType() == DataType::FLOAT32) { - status = cusolverDnSgesvdj(*handle, jobz, econ, m, n, reinterpret_cast(pA->getSpecialBuffer()), lda, reinterpret_cast(pS->getSpecialBuffer()), calcUV ? reinterpret_cast(pU->getSpecialBuffer()) : reinterpret_cast(nullPtr), ldu, calcUV ? reinterpret_cast(pV->getSpecialBuffer()) : reinterpret_cast(nullPtr), ldv, reinterpret_cast(dWork), lwork, devInfo, gesvdjParams); + status = cusolverDnSgesvdj(*handle, jobz, econ, m, n, reinterpret_cast(pA->specialBuffer()), lda, reinterpret_cast(pS->specialBuffer()), calcUV ? reinterpret_cast(pU->specialBuffer()) : reinterpret_cast(nullPtr), ldu, calcUV ? reinterpret_cast(pV->specialBuffer()) : reinterpret_cast(nullPtr), ldv, reinterpret_cast(dWork), lwork, devInfo, gesvdjParams); } else throw std::invalid_argument("svdJcb: given data type is unsupported !"); @@ -570,9 +570,9 @@ static void svdBatched(sd::LaunchContext* context, const NDArray* A, NDArray* S, // query working space of SVD int lwork = 0; if(A->dataType() == DataType::DOUBLE) - status = cusolverDnDgesvdjBatched_bufferSize(handle, jobz, m, n, reinterpret_cast(pA->getSpecialBuffer()), lda, reinterpret_cast(pS->getSpecialBuffer()), calcUV ? reinterpret_cast(pU->getSpecialBuffer()) : nullptr, ldu, calcUV ? reinterpret_cast(pV->getSpecialBuffer()) : nullptr, ldv, &lwork, gesvdjParams, bS); + status = cusolverDnDgesvdjBatched_bufferSize(handle, jobz, m, n, reinterpret_cast(pA->specialBuffer()), lda, reinterpret_cast(pS->specialBuffer()), calcUV ? reinterpret_cast(pU->specialBuffer()) : nullptr, ldu, calcUV ? reinterpret_cast(pV->specialBuffer()) : nullptr, ldv, &lwork, gesvdjParams, bS); else if(A->dataType() == DataType::FLOAT32) - status = cusolverDnSgesvdjBatched_bufferSize(handle, jobz, m, n, reinterpret_cast(pA->getSpecialBuffer()), lda, reinterpret_cast(pS->getSpecialBuffer()), calcUV ? reinterpret_cast(pU->getSpecialBuffer()) : nullptr, ldu, calcUV ? reinterpret_cast(pV->getSpecialBuffer()) : nullptr, ldv, &lwork, gesvdjParams, bS); + status = cusolverDnSgesvdjBatched_bufferSize(handle, jobz, m, n, reinterpret_cast(pA->specialBuffer()), lda, reinterpret_cast(pS->specialBuffer()), calcUV ? reinterpret_cast(pU->specialBuffer()) : nullptr, ldu, calcUV ? reinterpret_cast(pV->specialBuffer()) : nullptr, ldv, &lwork, gesvdjParams, bS); else throw std::invalid_argument("svdBatched: given data type is unsupported !"); @@ -594,10 +594,10 @@ static void svdBatched(sd::LaunchContext* context, const NDArray* A, NDArray* S, // choose appropriate cuda gemm api depending on data types if(A->dataType() == DataType::DOUBLE) { - status = cusolverDnDgesvdjBatched(handle, jobz, m, n, reinterpret_cast(pA->getSpecialBuffer()), lda, reinterpret_cast(pS->getSpecialBuffer()), calcUV ? reinterpret_cast(pU->getSpecialBuffer()) : nullptr, ldu, calcUV ? reinterpret_cast(pV->getSpecialBuffer()) : nullptr, ldv, reinterpret_cast(dWork), lwork, devInfo, gesvdjParams, bS); + status = cusolverDnDgesvdjBatched(handle, jobz, m, n, reinterpret_cast(pA->specialBuffer()), lda, reinterpret_cast(pS->specialBuffer()), calcUV ? reinterpret_cast(pU->specialBuffer()) : nullptr, ldu, calcUV ? reinterpret_cast(pV->specialBuffer()) : nullptr, ldv, reinterpret_cast(dWork), lwork, devInfo, gesvdjParams, bS); } else if(A->dataType() == DataType::FLOAT32) { - status = cusolverDnSgesvdjBatched(handle, jobz, m, n, reinterpret_cast(pA->getSpecialBuffer()), lda, reinterpret_cast(pS->getSpecialBuffer()), calcUV ? reinterpret_cast(pU->getSpecialBuffer()) : nullptr, ldu, calcUV ? reinterpret_cast(pV->getSpecialBuffer()) : nullptr, ldv, reinterpret_cast(dWork), lwork, devInfo, gesvdjParams, bS); + status = cusolverDnSgesvdjBatched(handle, jobz, m, n, reinterpret_cast(pA->specialBuffer()), lda, reinterpret_cast(pS->specialBuffer()), calcUV ? reinterpret_cast(pU->specialBuffer()) : nullptr, ldu, calcUV ? reinterpret_cast(pV->specialBuffer()) : nullptr, ldv, reinterpret_cast(dWork), lwork, devInfo, gesvdjParams, bS); } else throw std::invalid_argument("svdBatched: given data type is unsupported !"); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/top_k.cu b/libnd4j/include/ops/declarable/helpers/cuda/top_k.cu index b344f570e..ce19d41cc 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/top_k.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/top_k.cu @@ -91,7 +91,7 @@ int inTopKFunctor(sd::LaunchContext * context, const NDArray* predictions, const PointersManager manager(context, "in_top_k"); - const auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(predictions->getShapeInfo(), {1}); + const auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(predictions->shapeInfo(), {1}); const int threadsPerBlock = MAX_NUM_THREADS; const int blocksPerGrid = static_cast(packX.numberOfTads()); @@ -101,7 +101,7 @@ int inTopKFunctor(sd::LaunchContext * context, const NDArray* predictions, const const auto yType = targets->dataType(); NDArray::prepareSpecialUse({output}, {predictions, targets}); - BUILD_DOUBLE_SELECTOR(xType, yType, inTopKCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), predictions->getSpecialBuffer(), predictions->getSpecialShapeInfo(), targets->getSpecialBuffer(), targets->getSpecialShapeInfo(), output->getSpecialBuffer(), output->getSpecialShapeInfo(), packX.specialShapeInfo(), packX.specialOffsets(), k), FLOAT_TYPES, INDEXING_TYPES); + BUILD_DOUBLE_SELECTOR(xType, yType, inTopKCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), predictions->specialBuffer(), predictions->specialShapeInfo(), targets->specialBuffer(), targets->specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), packX.specialShapeInfo(), packX.specialOffsets(), k), FLOAT_TYPES, INDEXING_TYPES); NDArray::registerSpecialUse({output}, {predictions, targets}); manager.synchronize(); @@ -110,10 +110,10 @@ int inTopKFunctor(sd::LaunchContext * context, const NDArray* predictions, const } template - static _CUDA_G void topValuesMover(void *vx, Nd4jLong *xTadShapeInfo, Nd4jLong *xTadOffsets, void *vi, Nd4jLong *iTadShapeInfo, Nd4jLong *iTadOffsets, void *vz, Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets, Nd4jLong tadLength, int numTads, int k) { + static _CUDA_G void topValuesMover(void const* vx, Nd4jLong const* xTadShapeInfo, Nd4jLong const* xTadOffsets, void const* vi, Nd4jLong const* iTadShapeInfo, Nd4jLong const* iTadOffsets, void *vz, Nd4jLong const* zTadShapeInfo, Nd4jLong const* zTadOffsets, Nd4jLong tadLength, int numTads, int k) { for (int t = blockIdx.x; t < numTads; t += gridDim.x) { - auto x = reinterpret_cast(vx) + xTadOffsets[t]; - auto i = reinterpret_cast(vi) + iTadOffsets[t]; + auto x = reinterpret_cast(vx) + xTadOffsets[t]; + auto i = reinterpret_cast(vi) + iTadOffsets[t]; auto z = reinterpret_cast(vz) + zTadOffsets[t]; for (int e = threadIdx.x; e < k; e += blockDim.x) { @@ -126,7 +126,7 @@ int inTopKFunctor(sd::LaunchContext * context, const NDArray* predictions, const template - static _CUDA_G void indicesAlongDimension(void *vx, Nd4jLong *xTadShapeInfo, Nd4jLong *xTadOffsets, void *vi, Nd4jLong *iTadShapeInfo, Nd4jLong *iTadOffsets, void *vz, Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets, Nd4jLong tadLength, int numTads, int k, int scanWidth, bool needSort) { + static _CUDA_G void indicesAlongDimension(void const* vx, Nd4jLong const* xTadShapeInfo, Nd4jLong const* xTadOffsets, void* vi, Nd4jLong const* iTadShapeInfo, Nd4jLong const* iTadOffsets, void *vz, Nd4jLong const* zTadShapeInfo, Nd4jLong const* zTadOffsets, Nd4jLong tadLength, int numTads, int k, int scanWidth, bool needSort) { extern __shared__ char _shmem[]; X* tempValues = reinterpret_cast(_shmem) + threadIdx.x * scanWidth; @@ -138,8 +138,8 @@ int inTopKFunctor(sd::LaunchContext * context, const NDArray* predictions, const __syncthreads(); for (int t = blockIdx.x; t < numTads; t += gridDim.x) { - auto x = reinterpret_cast(vx) + xTadOffsets[t]; - auto i = reinterpret_cast(vi) + iTadOffsets[t]; + auto x = reinterpret_cast(vx) + xTadOffsets[t]; + auto i = reinterpret_cast(vi) + iTadOffsets[t]; auto z = reinterpret_cast(vz) + zTadOffsets[t]; // we'll do multiple reads here @@ -243,7 +243,7 @@ int inTopKFunctor(sd::LaunchContext * context, const NDArray* predictions, const template static int topKFunctor_(sd::LaunchContext * context, const NDArray* input, NDArray* values, NDArray* indices, const uint k, bool needSort) { - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), {input->rankOf() - 1}); + auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {input->rankOf() - 1}); auto packI = ConstantTadHelper::getInstance()->tadForDimensions(indices->shapeInfo(), {input->rankOf() - 1}); auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(values->shapeInfo(), {input->rankOf() - 1}); @@ -254,13 +254,13 @@ int inTopKFunctor(sd::LaunchContext * context, const NDArray* predictions, const input->applyIndexReduce(indexreduce::IndexMax, *indices, {input->rankOf() - 1}); // copy values on specified indices - topValuesMover<<<256, 256, 1024, *context->getCudaStream()>>>(input->getSpecialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), indices->specialBuffer(), packI.platformShapeInfo(), packI.platformOffsets(), values->specialBuffer(), packZ.platformShapeInfo(), packZ.platformOffsets(), tadLength, packX.numberOfTads(), k); + topValuesMover<<<256, 256, 1024, *context->getCudaStream()>>>(input->specialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), indices->specialBuffer(), packI.platformShapeInfo(), packI.platformOffsets(), values->specialBuffer(), packZ.platformShapeInfo(), packZ.platformOffsets(), tadLength, packX.numberOfTads(), k); } else { int scanWidth = 1; int numTreads = 256; int shMemSize = (numTreads * sizeof(X) * scanWidth) + (numTreads * sizeof(Y) * scanWidth) + 512; - indicesAlongDimension<<<256, numTreads, shMemSize, *context->getCudaStream()>>>(input->getSpecialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), indices->specialBuffer(), packI.platformShapeInfo(), packI.platformOffsets(), values->specialBuffer(), packZ.platformShapeInfo(), packZ.platformOffsets(), tadLength, packX.numberOfTads(), k, scanWidth, needSort); + indicesAlongDimension<<<256, numTreads, shMemSize, *context->getCudaStream()>>>(input->specialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), indices->specialBuffer(), packI.platformShapeInfo(), packI.platformOffsets(), values->specialBuffer(), packZ.platformShapeInfo(), packZ.platformOffsets(), tadLength, packX.numberOfTads(), k, scanWidth, needSort); } return Status::OK(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/transforms.cu b/libnd4j/include/ops/declarable/helpers/cuda/transforms.cu index b4dcfb2f6..f016491a6 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/transforms.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/transforms.cu @@ -79,7 +79,7 @@ void invertPermutation(sd::LaunchContext* context, const NDArray& input, NDArray PointersManager manager(context, "invertPermutation"); NDArray::prepareSpecialUse({&output}, {&input}); - BUILD_SINGLE_SELECTOR(input.dataType(), invertPermutationCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.getSpecialBuffer(), output.getSpecialShapeInfo()), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), invertPermutationCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo()), LIBND4J_TYPES); NDArray::registerSpecialUse({&output}, {&input}); manager.synchronize(); @@ -163,7 +163,7 @@ void trace(sd::LaunchContext* context, const NDArray& input, NDArray& output) { const int sharedMem = threadsPerBlock * (sizeof(int) * input.rankOf() + input.sizeOfT()) + 128; NDArray::prepareSpecialUse({&output}, {&input}); - BUILD_SINGLE_SELECTOR(input.dataType(), traceCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), diagLen), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), traceCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), diagLen), LIBND4J_TYPES); NDArray::registerSpecialUse({&output}, {&input}); manager.synchronize(); @@ -226,7 +226,7 @@ void triuBP(sd::LaunchContext* context, const NDArray& input, const NDArray& gra PointersManager manager(context, "triuBP"); NDArray::prepareSpecialUse({&gradI}, {&gradO}); - BUILD_SINGLE_SELECTOR(gradI.dataType(), triuBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), gradO.getSpecialBuffer(), gradO.getSpecialShapeInfo(), gradI.specialBuffer(), gradI.specialShapeInfo(), diagonal), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(gradI.dataType(), triuBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), gradO.specialBuffer(), gradO.specialShapeInfo(), gradI.specialBuffer(), gradI.specialShapeInfo(), diagonal), LIBND4J_TYPES); NDArray::registerSpecialUse({&gradI}, {&gradO}); manager.synchronize(); @@ -294,7 +294,7 @@ void tileBP(sd::LaunchContext * context, const NDArray& gradO /*input*/, NDArray PointersManager manager(context, "tileBP"); NDArray::prepareSpecialUse({&gradI}, {&gradO, &memBuff}); - BUILD_SINGLE_SELECTOR(gradI.dataType(), tileBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), gradO.getSpecialBuffer(), gradO.getSpecialShapeInfo(), gradI.specialBuffer(), gradI.specialShapeInfo(), reinterpret_cast(memBuff.specialBuffer())), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(gradI.dataType(), tileBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), gradO.specialBuffer(), gradO.specialShapeInfo(), gradI.specialBuffer(), gradI.specialShapeInfo(), reinterpret_cast(memBuff.specialBuffer())), FLOAT_TYPES); NDArray::registerSpecialUse({&gradI}, {&gradO, &memBuff}); manager.synchronize(); @@ -546,16 +546,16 @@ void clipByNormBP(sd::LaunchContext* context, const NDArray& input, const NDArra if(dimensions.empty() || dimensions.size() == input.rankOf()) { // means whole array const int blocksPerGrid = (input.lengthOf() + threadsPerBlock - 1) / threadsPerBlock; - BUILD_DOUBLE_SELECTOR(xType, zType, clipByNormBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), nullptr, gradO.getSpecialBuffer(), gradO.getSpecialShapeInfo(), nullptr, gradI.getSpecialBuffer(), gradI.getSpecialShapeInfo(), nullptr, context->getReductionPointer(), clipNormVal), FLOAT_TYPES, FLOAT_TYPES); + BUILD_DOUBLE_SELECTOR(xType, zType, clipByNormBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.specialBuffer(), input.specialShapeInfo(), nullptr, gradO.specialBuffer(), gradO.specialShapeInfo(), nullptr, gradI.specialBuffer(), gradI.specialShapeInfo(), nullptr, context->getReductionPointer(), clipNormVal), FLOAT_TYPES, FLOAT_TYPES); } else { // means tads using - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), dimensions); - auto packY = ConstantTadHelper::getInstance()->tadForDimensions(gradO.getShapeInfo(), dimensions); - auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(gradI.getShapeInfo(), dimensions); + auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions); + auto packY = ConstantTadHelper::getInstance()->tadForDimensions(gradO.shapeInfo(), dimensions); + auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(gradI.shapeInfo(), dimensions); const int blocksPerGrid = packX.numberOfTads(); - BUILD_DOUBLE_SELECTOR(xType, zType, clipByNormBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.getSpecialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), gradO.getSpecialBuffer(), packY.platformShapeInfo(), packY.platformOffsets(), gradI.getSpecialBuffer(), packZ.platformShapeInfo(), packZ.platformOffsets(), nullptr, clipNormVal), FLOAT_TYPES, FLOAT_TYPES); + BUILD_DOUBLE_SELECTOR(xType, zType, clipByNormBPCudaLauncher, (blocksPerGrid, threadsPerBlock, sharedMem, context->getCudaStream(), input.specialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), gradO.specialBuffer(), packY.platformShapeInfo(), packY.platformOffsets(), gradI.specialBuffer(), packZ.platformShapeInfo(), packZ.platformOffsets(), nullptr, clipNormVal), FLOAT_TYPES, FLOAT_TYPES); } NDArray::registerSpecialUse({&gradI}, {&input, &gradO}); @@ -564,7 +564,7 @@ void clipByNormBP(sd::LaunchContext* context, const NDArray& input, const NDArra } template - static __global__ void swapShuffleKernel(T* input, Nd4jLong* shape, Nd4jLong firstDim, sd::graph::RandomGenerator* rng) { + static __global__ void swapShuffleKernel(T* input, Nd4jLong const* shape, Nd4jLong firstDim, sd::graph::RandomGenerator* rng) { auto tid = blockIdx.x * blockDim.x; auto step = blockDim.x * gridDim.x; @@ -582,7 +582,7 @@ void clipByNormBP(sd::LaunchContext* context, const NDArray& input, const NDArra } } template - static __global__ void fillShuffleKernel(T* input, Nd4jLong* inputShape, T* output, Nd4jLong* outputShape, Nd4jLong firstDim, int* indices, sd::graph::RandomGenerator* rng) { + static __global__ void fillShuffleKernel(T* input, Nd4jLong const* inputShape, T* output, Nd4jLong const* outputShape, Nd4jLong firstDim, int* indices, sd::graph::RandomGenerator* rng) { // PRAGMA_OMP_PARALLEL_FOR_IF((firstDim-1) > Environment::getInstance()->tadThreshold()) auto tid = blockIdx.x * blockDim.x; @@ -613,7 +613,7 @@ void clipByNormBP(sd::LaunchContext* context, const NDArray& input, const NDArra if(!isInplace) output.assign(input); } - else if (input.isVector() || shape::isLikeVector(input.getShapeInfo(), temp)) { + else if (input.isVector() || shape::isLikeVector(input.shapeInfo(), temp)) { // apply Fisher-Yates shuffle sd::graph::RandomGenerator* dRandom = nullptr; @@ -694,7 +694,7 @@ void clipByNormBP(sd::LaunchContext* context, const NDArray& input, const NDArra //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// template - static __global__ void clipByNormInplaceKernel(Nd4jLong numOfSubArrs, T* inputBuffer, Nd4jLong* shape, Nd4jLong* inputOffsets, T* norm2Buf, Nd4jLong* norm2shape, T clipNorm) { + static __global__ void clipByNormInplaceKernel(Nd4jLong numOfSubArrs, T* inputBuffer, Nd4jLong const* shape, Nd4jLong const* inputOffsets, T* norm2Buf, Nd4jLong const* norm2shape, T clipNorm) { for (int arr = blockIdx.x; arr < numOfSubArrs; arr += gridDim.x) { __shared__ T* z; __shared__ Nd4jLong len; @@ -713,7 +713,7 @@ void clipByNormBP(sd::LaunchContext* context, const NDArray& input, const NDArra } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// template - static __global__ void clipByNormKernel(Nd4jLong numOfSubArrs, T* inputBuffer, Nd4jLong* shape, Nd4jLong* inputOffsets, T* outputBuffer, Nd4jLong* outputShape, Nd4jLong* outputOffsets, T* norm2Buf, Nd4jLong* norm2shape, T clipNorm) { + static __global__ void clipByNormKernel(Nd4jLong numOfSubArrs, T* inputBuffer, Nd4jLong const* shape, Nd4jLong const* inputOffsets, T* outputBuffer, Nd4jLong const* outputShape, Nd4jLong const* outputOffsets, T* norm2Buf, Nd4jLong const* norm2shape, T clipNorm) { for (Nd4jLong arr = blockIdx.x; arr < numOfSubArrs; arr += gridDim.x) { __shared__ T* x, *z; @@ -761,9 +761,9 @@ void clipByNormBP(sd::LaunchContext* context, const NDArray& input, const NDArra else { std::vector dimsToExclude = ShapeUtils::evalDimsToExclude(rank, dimensions); - const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(input.getShapeInfo(), dimsToExclude); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), dimensions); - //auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.getShapeInfo(), dimsToExclude); + const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(input.shapeInfo(), dimsToExclude); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions); + //auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), dimsToExclude); T* inputBuffer = reinterpret_cast(input.specialBuffer()); T* norm2buf = reinterpret_cast(norm2.specialBuffer()); @@ -784,9 +784,9 @@ void clipByNormBP(sd::LaunchContext* context, const NDArray& input, const NDArra else { std::vector dimsToExclude = ShapeUtils::evalDimsToExclude(rank, dimensions); - const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(input.getShapeInfo(), dimsToExclude); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), dimensions); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.getShapeInfo(), dimensions); + const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(input.shapeInfo(), dimsToExclude); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(output.shapeInfo(), dimensions); T* inputBuffer = reinterpret_cast(input.specialBuffer()); T* norm2buf = reinterpret_cast(norm2.specialBuffer()); T* outputBuffer = reinterpret_cast(output.specialBuffer()); @@ -891,7 +891,7 @@ void clipByNormBP(sd::LaunchContext* context, const NDArray& input, const NDArra else return d1; */ template - static void __global__ clipByValueKernel(void* input, Nd4jLong* inputShape, void* output, Nd4jLong* outputShape, double leftBound, double rightBound) { + static void __global__ clipByValueKernel(void* input, Nd4jLong const* inputShape, void* output, Nd4jLong const* outputShape, double leftBound, double rightBound) { __shared__ T* outputBuf; __shared__ T* inputBuf; __shared__ Nd4jLong length; diff --git a/libnd4j/include/ops/declarable/helpers/cuda/triangular_solve.cu b/libnd4j/include/ops/declarable/helpers/cuda/triangular_solve.cu index e34fd11f8..c8f26de6f 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/triangular_solve.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/triangular_solve.cu @@ -43,7 +43,7 @@ namespace sd { template static __device__ void lowerTriangularSolve(T const* leftInput, Nd4jLong const* leftInputShape, T const* rightInput, Nd4jLong const* rightInputShape, - bool const adjoint, T* output, Nd4jLong* outputShape, + bool const adjoint, T* output, Nd4jLong const* outputShape, Nd4jLong rows, Nd4jLong cols) { for (auto r = 0; r < rows; r++) { @@ -84,7 +84,7 @@ namespace sd { template static __device__ void upperTriangularSolve(T const* leftInput, Nd4jLong const* leftInputShape, T const* rightInput, Nd4jLong const* rightInputShape, bool const adjoint, T* output, - Nd4jLong* outputShape, Nd4jLong rows, Nd4jLong cols) { + Nd4jLong const* outputShape, Nd4jLong rows, Nd4jLong cols) { for (auto r = rows; r > 0; r--) { for (auto j = 0; j < cols; j++) { @@ -109,8 +109,8 @@ namespace sd { template static __global__ void triangularSolveKernel(T const* leftInput, Nd4jLong const* leftPartShape, T const* rightInput, Nd4jLong const* rightPartShape, bool const lower, bool const adjoint, T* output, - Nd4jLong* outputShape, Nd4jLong* tadLeftShape, Nd4jLong* tadLeftOffset, Nd4jLong* tadRightShape, - Nd4jLong* tadRightOffset, Nd4jLong* tadOutputShape, Nd4jLong* tadOutputOffset, Nd4jLong batchNum) { + Nd4jLong const* outputShape, Nd4jLong const* tadLeftShape, Nd4jLong const* tadLeftOffset, Nd4jLong const* tadRightShape, + Nd4jLong const* tadRightOffset, Nd4jLong const* tadOutputShape, Nd4jLong const* tadOutputOffset, Nd4jLong batchNum) { __shared__ Nd4jLong rows; __shared__ Nd4jLong cols; @@ -141,16 +141,16 @@ namespace sd { static int triangularSolveFunctor_(sd::LaunchContext * context, NDArray* leftInput, NDArray* rightInput, bool lower, bool adjoint, NDArray* output) { NDArray::prepareSpecialUse({output}, {leftInput, rightInput}); - auto leftTads = ConstantTadHelper::getInstance()->tadForDimensions(leftInput->getShapeInfo(), {-2, -1}); - auto rightTads = ConstantTadHelper::getInstance()->tadForDimensions(rightInput->getShapeInfo(), {-2, -1}); + auto leftTads = ConstantTadHelper::getInstance()->tadForDimensions(leftInput->shapeInfo(), {-2, -1}); + auto rightTads = ConstantTadHelper::getInstance()->tadForDimensions(rightInput->shapeInfo(), {-2, -1}); auto outputTads = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {-2, -1}); auto stream = context->getCudaStream(); - T const* leftBuf = reinterpret_cast(leftInput->getSpecialBuffer()); - T const* rightBuf = reinterpret_cast(rightInput->getSpecialBuffer()); + T const* leftBuf = reinterpret_cast(leftInput->specialBuffer()); + T const* rightBuf = reinterpret_cast(rightInput->specialBuffer()); T* outputBuf = reinterpret_cast(output->specialBuffer()); - triangularSolveKernel<<<128, 128, 256, *stream>>>(leftBuf, leftInput->getSpecialShapeInfo(), - rightBuf, rightInput->getSpecialShapeInfo(), lower, adjoint, outputBuf, output->specialShapeInfo(), + triangularSolveKernel<<<128, 128, 256, *stream>>>(leftBuf, leftInput->specialShapeInfo(), + rightBuf, rightInput->specialShapeInfo(), lower, adjoint, outputBuf, output->specialShapeInfo(), leftTads.specialShapeInfo(), leftTads.specialOffsets(), rightTads.specialShapeInfo(), rightTads.specialOffsets(), outputTads.specialShapeInfo(), outputTads.specialOffsets(), leftTads.numberOfTads()); @@ -168,7 +168,7 @@ namespace sd { template static __global__ void upperAdjointKernel(T const* input, T* output, Nd4jLong batchSize, Nd4jLong rows, Nd4jLong columns, - Nd4jLong* inputTads, Nd4jLong* inputOffsets, Nd4jLong* outputTads, Nd4jLong* outputOffsets) { + Nd4jLong const* inputTads, Nd4jLong const* inputOffsets, Nd4jLong const* outputTads, Nd4jLong const* outputOffsets) { for (auto b = blockIdx.x; b < batchSize; b += gridDim.x) { auto inputPart = input + inputOffsets[b]; @@ -189,7 +189,7 @@ namespace sd { template static __global__ void lowerAdjointKernel(T const* input, T* output, Nd4jLong batchSize, Nd4jLong rows, Nd4jLong columns, - Nd4jLong* inputTads, Nd4jLong* inputOffsets, Nd4jLong* outputTads, Nd4jLong* outputOffsets) { + Nd4jLong const* inputTads, Nd4jLong const* inputOffsets, Nd4jLong const* outputTads, Nd4jLong const* outputOffsets) { for (auto b = blockIdx.x; b < batchSize; b += gridDim.x) { auto inputPart = input + inputOffsets[b]; @@ -210,10 +210,10 @@ namespace sd { static void adjointTriangularMatrix_(sd::LaunchContext* context, NDArray const* input, bool const lower, NDArray* output) { - auto inputTads = ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), {-2, -1}); - auto outputTads = ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), {-2, -1}); + auto inputTads = ConstantTadHelper::getInstance()->tadForDimensions(input->shapeInfo(), {-2, -1}); + auto outputTads = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {-2, -1}); auto stream = context->getCudaStream(); - auto inputBuf = reinterpret_cast(input->getSpecialBuffer()); + auto inputBuf = reinterpret_cast(input->specialBuffer()); auto outputBuf = reinterpret_cast(output->specialBuffer()); auto rows = input->sizeAt(-2); auto columns = input->sizeAt(-1); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/updaterAdaDelta.cu b/libnd4j/include/ops/declarable/helpers/cuda/updaterAdaDelta.cu index 33272ff57..c096c4294 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/updaterAdaDelta.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/updaterAdaDelta.cu @@ -101,7 +101,7 @@ linkage void adaDeltaUpdaterCudaLauncher(const int blocksPerGrid, const int thre const T rho = static_cast(dRho); const T epsilon = static_cast(dEpsilon); - adaDeltaUpdaterCuda << > > (vx, xShapeInfo, vinMsg, inMsgShapeInfo, + adaDeltaUpdaterCuda<<>>(vx, xShapeInfo, vinMsg, inMsgShapeInfo, vinMsdx, inMsdxShapeInfo, vz, zShapeInfo, vstMsg, stMsgShapeInfo, vstMsdx, stMsdxShapeInfo, rho, epsilon); } @@ -115,10 +115,10 @@ void updaterAdaDelta(sd::LaunchContext* context, const NDArray& gradient, const const int blocksPerGrid = (gradient.lengthOf() + threadsPerBlock - 1) / threadsPerBlock; NDArray::prepareSpecialUse({ &update, &stateMsg, &stateMsdx }, { &gradient, &initStateMsg, &initStateMsdx }); - BUILD_SINGLE_SELECTOR(gradient.dataType(), adaDeltaUpdaterCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), gradient.getSpecialBuffer(), gradient.getSpecialShapeInfo(), - initStateMsg.getSpecialBuffer(), initStateMsg.getSpecialShapeInfo(), initStateMsdx.getSpecialBuffer(), initStateMsdx.getSpecialShapeInfo(), - update.getSpecialBuffer(), update.getSpecialShapeInfo(),stateMsg.getSpecialBuffer(), stateMsg.getSpecialShapeInfo(), - stateMsdx.getSpecialBuffer(), stateMsdx.getSpecialShapeInfo(), dRho, dEpsilon), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(gradient.dataType(), adaDeltaUpdaterCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), gradient.specialBuffer(), gradient.specialShapeInfo(), + initStateMsg.specialBuffer(), initStateMsg.specialShapeInfo(), initStateMsdx.specialBuffer(), initStateMsdx.specialShapeInfo(), + update.specialBuffer(), update.specialShapeInfo(),stateMsg.specialBuffer(), stateMsg.specialShapeInfo(), + stateMsdx.specialBuffer(), stateMsdx.specialShapeInfo(), dRho, dEpsilon), FLOAT_TYPES); NDArray::registerSpecialUse({ &update, &stateMsg, &stateMsdx }, { &gradient, &initStateMsg, &initStateMsdx }); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/updaterAdaGrad.cu b/libnd4j/include/ops/declarable/helpers/cuda/updaterAdaGrad.cu index f0e77826d..50a43986c 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/updaterAdaGrad.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/updaterAdaGrad.cu @@ -88,7 +88,7 @@ linkage void adaGradUpdaterCudaLauncher(const int blocksPerGrid, const int threa const T lr = static_cast(dLr); const T epsilon = static_cast(dEpsilon); - adaGradUpdaterCuda << > > (vx, xShapeInfo, vin, inShapeInfo, + adaGradUpdaterCuda<<>>(vx, xShapeInfo, vin, inShapeInfo, vz, zShapeInfo, vst, stShapeInfo, lr, epsilon); } @@ -103,10 +103,10 @@ void updaterAdaGrad(sd::LaunchContext* context, const NDArray& gradient, const N NDArray::prepareSpecialUse({ &update, &stateH }, { &gradient, &initState }); BUILD_SINGLE_SELECTOR(gradient.dataType(), adaGradUpdaterCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), - gradient.getSpecialBuffer(), gradient.getSpecialShapeInfo(), - initState.getSpecialBuffer(), initState.getSpecialShapeInfo(), - update.getSpecialBuffer(), update.getSpecialShapeInfo(), - stateH.getSpecialBuffer(), stateH.getSpecialShapeInfo(), dLr, dEpsilon), FLOAT_TYPES); + gradient.specialBuffer(), gradient.specialShapeInfo(), + initState.specialBuffer(), initState.specialShapeInfo(), + update.specialBuffer(), update.specialShapeInfo(), + stateH.specialBuffer(), stateH.specialShapeInfo(), dLr, dEpsilon), FLOAT_TYPES); NDArray::registerSpecialUse({ &update, &stateH }, { &gradient, &initState }); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/updaterAdaMax.cu b/libnd4j/include/ops/declarable/helpers/cuda/updaterAdaMax.cu index 514440304..09301d05a 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/updaterAdaMax.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/updaterAdaMax.cu @@ -111,7 +111,7 @@ linkage void adaMaxUpdaterCudaLauncher(const int blocksPerGrid, const int thread const T epsilon = static_cast(dEpsilon); const T iteration = static_cast(nIteration); - adaMaxUpdaterCuda << > > (vx, xShapeInfo, vinv, invShapeInfo, vinm, inmShapeInfo, vz, + adaMaxUpdaterCuda<<>>(vx, xShapeInfo, vinv, invShapeInfo, vinm, inmShapeInfo, vz, zShapeInfo, vstV, stvShapeInfo, vstM, stmShapeInfo, lr, beta1, beta2, epsilon, iteration); } @@ -127,10 +127,10 @@ void updaterAdaMax(sd::LaunchContext* context, const NDArray& gradient, const ND NDArray::prepareSpecialUse({ &update, &stateU, &stateM }, { &gradient, &initStateU, &initStateM }); BUILD_SINGLE_SELECTOR(gradient.dataType(), adaMaxUpdaterCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), - gradient.getSpecialBuffer(), gradient.getSpecialShapeInfo(), initStateU.getSpecialBuffer(), - initStateU.getSpecialShapeInfo(), initStateM.getSpecialBuffer(), initStateM.getSpecialShapeInfo(), - update.getSpecialBuffer(), update.getSpecialShapeInfo(), stateU.getSpecialBuffer(), - stateU.getSpecialShapeInfo(), stateM.getSpecialBuffer(), stateM.getSpecialShapeInfo(), + gradient.specialBuffer(), gradient.specialShapeInfo(), initStateU.specialBuffer(), + initStateU.specialShapeInfo(), initStateM.specialBuffer(), initStateM.specialShapeInfo(), + update.specialBuffer(), update.specialShapeInfo(), stateU.specialBuffer(), + stateU.specialShapeInfo(), stateM.specialBuffer(), stateM.specialShapeInfo(), dLr, dBeta1, dBeta2, dEpsilon, nIteration ), FLOAT_TYPES); NDArray::registerSpecialUse({ &update, &stateU, &stateM }, { &gradient, &initStateU, &initStateM }); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/updaterAdam.cu b/libnd4j/include/ops/declarable/helpers/cuda/updaterAdam.cu index e23f4a5ca..91d79809c 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/updaterAdam.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/updaterAdam.cu @@ -108,7 +108,7 @@ linkage void adamUpdaterCudaLauncher(const int blocksPerGrid, const int threadsP const T beta2 = static_cast(dBeta2); const T epsilon = static_cast(dEpsilon); const T iteration = static_cast(nIteration); - adamUpdaterCuda << > > (vx, xShapeInfo, vinv, invShapeInfo, vinm, inmShapeInfo, + adamUpdaterCuda<<>>(vx, xShapeInfo, vinv, invShapeInfo, vinm, inmShapeInfo, vz, zShapeInfo, vstV, stvShapeInfo, vstM, stmShapeInfo, lr, beta1, beta2, epsilon, iteration); } @@ -124,10 +124,10 @@ void updaterAdam(sd::LaunchContext* context, const NDArray& gradient, const NDAr NDArray::prepareSpecialUse({ &update, &stateU, &stateM }, { &gradient, &initStateU, &initStateM }); - BUILD_SINGLE_SELECTOR(gradient.dataType(), adamUpdaterCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), gradient.getSpecialBuffer(), gradient.getSpecialShapeInfo(), - initStateU.getSpecialBuffer(), initStateU.getSpecialShapeInfo(), initStateM.getSpecialBuffer(), initStateM.getSpecialShapeInfo(), - update.getSpecialBuffer(), update.getSpecialShapeInfo(), stateU.getSpecialBuffer(), stateU.getSpecialShapeInfo(), - stateM.getSpecialBuffer(), stateM.getSpecialShapeInfo(), dLr, dBeta1, dBeta2, dEpsilon, nIteration), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(gradient.dataType(), adamUpdaterCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), gradient.specialBuffer(), gradient.specialShapeInfo(), + initStateU.specialBuffer(), initStateU.specialShapeInfo(), initStateM.specialBuffer(), initStateM.specialShapeInfo(), + update.specialBuffer(), update.specialShapeInfo(), stateU.specialBuffer(), stateU.specialShapeInfo(), + stateM.specialBuffer(), stateM.specialShapeInfo(), dLr, dBeta1, dBeta2, dEpsilon, nIteration), FLOAT_TYPES); NDArray::registerSpecialUse({ &update, &stateU, &stateM }, { &gradient, &initStateU, &initStateM }); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/updaterAmsGrad.cu b/libnd4j/include/ops/declarable/helpers/cuda/updaterAmsGrad.cu index d24c83f17..ff3bc1e4b 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/updaterAmsGrad.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/updaterAmsGrad.cu @@ -122,7 +122,7 @@ linkage void amsGradUpdaterCudaLauncher(const int blocksPerGrid, const int threa const T epsilon = static_cast(dEpsilon); const T iteration = static_cast(nIteration); - amsGradUpdaterCuda << > > (vx, xShapeInfo, vinv, invShapeInfo, vinm, inmShapeInfo, + amsGradUpdaterCuda<<>>(vx, xShapeInfo, vinv, invShapeInfo, vinm, inmShapeInfo, vinh, inhShapeInfo, vz, zShapeInfo, vstV, stvShapeInfo, vstM, stmShapeInfo, vstH, sthShapeInfo, lr, beta1, beta2, epsilon, iteration); } @@ -136,11 +136,11 @@ void updaterAmsGrad(sd::LaunchContext* context, const NDArray& gradient, const N const int blocksPerGrid = (gradient.lengthOf() + threadsPerBlock - 1) / threadsPerBlock; NDArray::prepareSpecialUse({ &update, &stateV, &stateM, &stateH }, { &gradient, &initStateV, &initStateM, &initStateH }); - BUILD_SINGLE_SELECTOR(gradient.dataType(), amsGradUpdaterCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), gradient.getSpecialBuffer(), gradient.getSpecialShapeInfo(), - initStateV.getSpecialBuffer(), initStateV.getSpecialShapeInfo(), initStateM.getSpecialBuffer(), initStateM.getSpecialShapeInfo(), - initStateH.getSpecialBuffer(), initStateH.getSpecialShapeInfo(), update.getSpecialBuffer(), update.getSpecialShapeInfo(), - stateV.getSpecialBuffer(), stateV.getSpecialShapeInfo(), stateM.getSpecialBuffer(), stateM.getSpecialShapeInfo(), - stateH.getSpecialBuffer(), stateH.getSpecialShapeInfo(), dLr, dBeta1, dBeta2, dEpsilon, nIteration), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(gradient.dataType(), amsGradUpdaterCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), gradient.specialBuffer(), gradient.specialShapeInfo(), + initStateV.specialBuffer(), initStateV.specialShapeInfo(), initStateM.specialBuffer(), initStateM.specialShapeInfo(), + initStateH.specialBuffer(), initStateH.specialShapeInfo(), update.specialBuffer(), update.specialShapeInfo(), + stateV.specialBuffer(), stateV.specialShapeInfo(), stateM.specialBuffer(), stateM.specialShapeInfo(), + stateH.specialBuffer(), stateH.specialShapeInfo(), dLr, dBeta1, dBeta2, dEpsilon, nIteration), FLOAT_TYPES); NDArray::registerSpecialUse({ &update, &stateV, &stateM , &stateH }, { &gradient, &initStateV, &initStateM, &initStateH }); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/updaterNadam.cu b/libnd4j/include/ops/declarable/helpers/cuda/updaterNadam.cu index 2ac1ec99b..141ed27db 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/updaterNadam.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/updaterNadam.cu @@ -108,7 +108,7 @@ linkage void nadamUpdaterCudaLauncher(const int blocksPerGrid, const int threads const T epsilon = static_cast(dEpsilon); const T iteration = static_cast(nIteration); - nadamUpdaterCuda << > > (vx, xShapeInfo, vinv, invShapeInfo, vinm, inmShapeInfo, + nadamUpdaterCuda<<>>(vx, xShapeInfo, vinv, invShapeInfo, vinm, inmShapeInfo, vz, zShapeInfo, vstV, stvShapeInfo, vstM, stmShapeInfo, lr, beta1, beta2, epsilon, iteration); } @@ -122,10 +122,10 @@ void updaterNadam(sd::LaunchContext* context, const NDArray& gradient, const NDA const int blocksPerGrid = (gradient.lengthOf() + threadsPerBlock - 1) / threadsPerBlock; NDArray::prepareSpecialUse({ &update, &stateV, &stateM }, { &gradient, &initStateV, &initStateM }); - BUILD_SINGLE_SELECTOR(gradient.dataType(), nadamUpdaterCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), gradient.getSpecialBuffer(), gradient.getSpecialShapeInfo(), - initStateV.getSpecialBuffer(), initStateV.getSpecialShapeInfo(), initStateM.getSpecialBuffer(), initStateM.getSpecialShapeInfo(), - update.getSpecialBuffer(), update.getSpecialShapeInfo(), stateV.getSpecialBuffer(), stateV.getSpecialShapeInfo(), - stateM.getSpecialBuffer(), stateM.getSpecialShapeInfo(), dLr, dBeta1, dBeta2, dEpsilon, nIteration), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(gradient.dataType(), nadamUpdaterCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), gradient.specialBuffer(), gradient.specialShapeInfo(), + initStateV.specialBuffer(), initStateV.specialShapeInfo(), initStateM.specialBuffer(), initStateM.specialShapeInfo(), + update.specialBuffer(), update.specialShapeInfo(), stateV.specialBuffer(), stateV.specialShapeInfo(), + stateM.specialBuffer(), stateM.specialShapeInfo(), dLr, dBeta1, dBeta2, dEpsilon, nIteration), FLOAT_TYPES); NDArray::registerSpecialUse({ &update, &stateV, &stateM }, { &gradient, &initStateV, &initStateM }); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/updaterNesterovs.cu b/libnd4j/include/ops/declarable/helpers/cuda/updaterNesterovs.cu index 73616a5cd..75e1f5938 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/updaterNesterovs.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/updaterNesterovs.cu @@ -88,7 +88,7 @@ linkage void nesterovsUpdaterCudaLauncher(const int blocksPerGrid, const int thr const T lr = static_cast(dLr); const T momentum = static_cast(dMomentum); - nesterovsUpdaterCuda << > > (vx, xShapeInfo, vin, inShapeInfo, + nesterovsUpdaterCuda<<>>(vx, xShapeInfo, vin, inShapeInfo, vz, zShapeInfo, vst, stShapeInfo, lr, momentum); } @@ -103,10 +103,10 @@ void updaterNesterovs(sd::LaunchContext* context, const NDArray& gradient, const NDArray::prepareSpecialUse({ &update, &stateV }, { &gradient, &initState }); BUILD_SINGLE_SELECTOR(gradient.dataType(), nesterovsUpdaterCudaLauncher, (blocksPerGrid, threadsPerBlock, - context->getCudaStream(), gradient.getSpecialBuffer(), gradient.getSpecialShapeInfo(), - initState.getSpecialBuffer(), initState.getSpecialShapeInfo(), - update.getSpecialBuffer(), update.getSpecialShapeInfo(), - stateV.getSpecialBuffer(), stateV.getSpecialShapeInfo(), dLr, dMomentum), FLOAT_TYPES); + context->getCudaStream(), gradient.specialBuffer(), gradient.specialShapeInfo(), + initState.specialBuffer(), initState.specialShapeInfo(), + update.specialBuffer(), update.specialShapeInfo(), + stateV.specialBuffer(), stateV.specialShapeInfo(), dLr, dMomentum), FLOAT_TYPES); NDArray::registerSpecialUse({ &update, &stateV }, { &gradient, &initState }); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/updaterRmsProp.cu b/libnd4j/include/ops/declarable/helpers/cuda/updaterRmsProp.cu index de0a5dba1..26f7253d2 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/updaterRmsProp.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/updaterRmsProp.cu @@ -105,10 +105,10 @@ void updaterRmsProp(sd::LaunchContext* context, const NDArray& gradient, const N NDArray::prepareSpecialUse({&update, &stateG}, {&gradient, &initState }); BUILD_SINGLE_SELECTOR(gradient.dataType(), rmsPropUpdaterCudaLauncher, (blocksPerGrid, threadsPerBlock, - context->getCudaStream(), gradient.getSpecialBuffer(), gradient.getSpecialShapeInfo(), - initState.getSpecialBuffer(), initState.getSpecialShapeInfo(), - update.getSpecialBuffer(), update.getSpecialShapeInfo(), - stateG.getSpecialBuffer(), stateG.getSpecialShapeInfo(), + context->getCudaStream(), gradient.specialBuffer(), gradient.specialShapeInfo(), + initState.specialBuffer(), initState.specialShapeInfo(), + update.specialBuffer(), update.specialShapeInfo(), + stateG.specialBuffer(), stateG.specialShapeInfo(), dLr, dRmsDecay, dEpsilon ), FLOAT_TYPES); NDArray::registerSpecialUse({&update, &stateG}, {&gradient, &initState}); diff --git a/libnd4j/include/ops/declarable/helpers/cuda/weights.cu b/libnd4j/include/ops/declarable/helpers/cuda/weights.cu index b543fa1c2..1620820a5 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/weights.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/weights.cu @@ -27,8 +27,8 @@ namespace helpers { template - static __device__ void adjustWeightsKernelD(void* inputBuffer, Nd4jLong* inputShape, - void* weightsBuffer, Nd4jLong* weightsShape, + static __device__ void adjustWeightsKernelD(void* inputBuffer, Nd4jLong const* inputShape, + void* weightsBuffer, Nd4jLong const* weightsShape, void* outputBuffer, Nd4jLong inputLength, Nd4jLong outputLength, int val) { // typedef Nd4jLong T; @@ -66,9 +66,9 @@ namespace helpers { } template - static __global__ void adjustWeightsKernel(void* inputBuffer, Nd4jLong* inputShape, - void* weightsBuffer, Nd4jLong* weightsShape, - void* outputBuffer, Nd4jLong* outputShape, + static __global__ void adjustWeightsKernel(void* inputBuffer, Nd4jLong const* inputShape, + void* weightsBuffer, Nd4jLong const* weightsShape, + void* outputBuffer, Nd4jLong const* outputShape, int minLength, int maxLength) { //auto tid = blockIdx.x * blockDim.x + threadIdx.x; // * blockDim.x; // + threadIdx.x; @@ -105,7 +105,7 @@ namespace helpers { dim3 launchDims(256, 512, 8192); auto stream = context->getCudaStream(); adjustWeightsKernel<<>>(input->specialBuffer(), - input->getSpecialShapeInfo(), weights?weights->specialBuffer():nullptr, weights?weights->getSpecialShapeInfo():nullptr, + input->specialShapeInfo(), weights?weights->specialBuffer():nullptr, weights?weights->specialShapeInfo():nullptr, output->specialBuffer(), output->specialShapeInfo(), minLength, maxLength); } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/zeta.cu b/libnd4j/include/ops/declarable/helpers/cuda/zeta.cu index 43f0ee8d1..660c49325 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/zeta.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/zeta.cu @@ -69,7 +69,7 @@ void zeta(sd::LaunchContext * context, const NDArray& x, const NDArray& q, NDArr int threadsPerBlock = MAX_NUM_THREADS / 2; int blocksPerGrid = (z.lengthOf() + threadsPerBlock - 1) / threadsPerBlock; - BUILD_SINGLE_SELECTOR(x.dataType(), zetaCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), x.getSpecialBuffer(), x.getSpecialShapeInfo(), q.getSpecialBuffer(), q.getSpecialShapeInfo(), z.getSpecialBuffer(), z.getSpecialShapeInfo()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(x.dataType(), zetaCudaLauncher, (blocksPerGrid, threadsPerBlock, context->getCudaStream(), x.specialBuffer(), x.specialShapeInfo(), q.specialBuffer(), q.specialShapeInfo(), z.specialBuffer(), z.specialShapeInfo()), FLOAT_TYPES); x.tickReadHost(); q.tickReadHost(); diff --git a/libnd4j/include/ops/declarable/helpers/dilation2d.h b/libnd4j/include/ops/declarable/helpers/dilation2d.h index a26fe10f1..281a2f26a 100644 --- a/libnd4j/include/ops/declarable/helpers/dilation2d.h +++ b/libnd4j/include/ops/declarable/helpers/dilation2d.h @@ -54,7 +54,7 @@ FORCEINLINE Nd4jStatus outputSize(sd::LaunchContext * context, const int inSize, } ////////////////////////////////////////////////////////////////////// -FORCEINLINE Nd4jStatus dilation_hw(sd::LaunchContext * context, Nd4jLong *in, Nd4jLong *wh, std::vector &strides, std::vector &rates, bool isSameMode, int *sH, int *sW, int *pH, int *pW, int *dH, int *dW, int *oH, int *oW) { +FORCEINLINE Nd4jStatus dilation_hw(sd::LaunchContext * context, Nd4jLong const* in, Nd4jLong const* wh, std::vector &strides, std::vector &rates, bool isSameMode, int *sH, int *sW, int *pH, int *pW, int *dH, int *dW, int *oH, int *oW) { const int iH = shape::sizeAt(in, 1); const int iW = shape::sizeAt(in, 2); const int iC = shape::sizeAt(in, 3); diff --git a/libnd4j/include/ops/declarable/helpers/impl/knn_mindistance.cpp b/libnd4j/include/ops/declarable/helpers/impl/knn_mindistance.cpp index d3880c730..1a61587a3 100644 --- a/libnd4j/include/ops/declarable/helpers/impl/knn_mindistance.cpp +++ b/libnd4j/include/ops/declarable/helpers/impl/knn_mindistance.cpp @@ -53,7 +53,7 @@ namespace sd { void knn_mindistance(const NDArray &input, const NDArray &lowest, const NDArray &highest, NDArray &output) { NDArray::preparePrimaryUse({&output}, {&input, &lowest, &highest}); - BUILD_SINGLE_SELECTOR(input.dataType(), mindistance_, (input.getBuffer(), lowest.getBuffer(), highest.getBuffer(), input.lengthOf(), output.buffer()), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input.dataType(), mindistance_, (input.buffer(), lowest.buffer(), highest.buffer(), input.lengthOf(), output.buffer()), FLOAT_TYPES); NDArray::registerPrimaryUse({&output}, {&input, &lowest, &highest}); } diff --git a/libnd4j/include/ops/declarable/helpers/impl/sparse_to_dense.cpp b/libnd4j/include/ops/declarable/helpers/impl/sparse_to_dense.cpp index 36044907e..bbcb1eca3 100644 --- a/libnd4j/include/ops/declarable/helpers/impl/sparse_to_dense.cpp +++ b/libnd4j/include/ops/declarable/helpers/impl/sparse_to_dense.cpp @@ -113,7 +113,7 @@ namespace sd { } // write out values - BUILD_DOUBLE_SELECTOR(values.dataType(), indices.dataType(), fill_, (values.getBuffer(), indices.getBuffer(), output.buffer(), output.getShapeInfo(), rank, values.lengthOf()), LIBND4J_TYPES, INDEXING_TYPES); + BUILD_DOUBLE_SELECTOR(values.dataType(), indices.dataType(), fill_, (values.buffer(), indices.buffer(), output.buffer(), output.shapeInfo(), rank, values.lengthOf()), LIBND4J_TYPES, INDEXING_TYPES); } // copy back to device, if there's any output.syncToDevice(); diff --git a/libnd4j/include/ops/declarable/helpers/impl/where.cpp b/libnd4j/include/ops/declarable/helpers/impl/where.cpp index df8fd1074..b2d758673 100644 --- a/libnd4j/include/ops/declarable/helpers/impl/where.cpp +++ b/libnd4j/include/ops/declarable/helpers/impl/where.cpp @@ -33,9 +33,9 @@ namespace sd { for (Nd4jLong e = 0; e < condition.lengthOf(); e++) { - shape::index2coordsCPU(0, e, condition.getShapeInfo(), idx); + shape::index2coordsCPU(0, e, condition.shapeInfo(), idx); - auto offset = shape::getOffset(condition.getShapeInfo(), idx); + auto offset = shape::getOffset(condition.shapeInfo(), idx); if (condition.e(offset)) { auto array = NDArrayFactory::create_('c', {1, condition.rankOf()}, output.dataType(), output.getContext()); diff --git a/libnd4j/include/ops/declarable/helpers/threshold.h b/libnd4j/include/ops/declarable/helpers/threshold.h new file mode 100644 index 000000000..21ac0c820 --- /dev/null +++ b/libnd4j/include/ops/declarable/helpers/threshold.h @@ -0,0 +1,37 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#ifndef SD_THRESHOLD_H +#define SD_THRESHOLD_H + +#include + +namespace sd { + namespace ops { + namespace helpers { + int32_t thresholdEstimate(const NDArray &updates, float threshold); + + void thresholdEncode(NDArray &updates, NDArray &encoded, float threshold); + void thresholdDecode(const NDArray &encoded, NDArray &updates); + } + } +} + +#endif //SD_THRESHOLD_H diff --git a/libnd4j/include/ops/declarable/impl/BroadcastableBoolOp.cpp b/libnd4j/include/ops/declarable/impl/BroadcastableBoolOp.cpp index 66eade39f..8f0a6dcb8 100644 --- a/libnd4j/include/ops/declarable/impl/BroadcastableBoolOp.cpp +++ b/libnd4j/include/ops/declarable/impl/BroadcastableBoolOp.cpp @@ -42,7 +42,7 @@ namespace sd { return shapeList; } - Nd4jLong *newshape = nullptr; + const Nd4jLong *newshape = nullptr; ShapeUtils::evalBroadcastShapeInfo(x, y, true, newshape, block.workspace()); shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(newshape, dtype))); } else if (shape::isScalar(x) && shape::isScalar(y)) { @@ -58,7 +58,7 @@ namespace sd { } else if (!shape::isScalar(x) && shape::isScalar(y)) { shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(x, dtype))); } else if (ShapeUtils::areShapesBroadcastable(x, y)) { - Nd4jLong *newshape = nullptr; + const Nd4jLong *newshape = nullptr; ShapeUtils::evalBroadcastShapeInfo(x, y, true, newshape, block.workspace()); shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(newshape, dtype))); } else { diff --git a/libnd4j/include/ops/declarable/impl/BroadcastableOp.cpp b/libnd4j/include/ops/declarable/impl/BroadcastableOp.cpp index eb691b84d..7f7a14861 100644 --- a/libnd4j/include/ops/declarable/impl/BroadcastableOp.cpp +++ b/libnd4j/include/ops/declarable/impl/BroadcastableOp.cpp @@ -56,7 +56,7 @@ namespace sd { } - Nd4jLong *newshape = nullptr; + const Nd4jLong *newshape = nullptr; ShapeUtils::evalBroadcastShapeInfo(x, y, true, newshape, block.workspace()); shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(newshape, dtype))); } else if (shape::isScalar(x) && shape::isScalar(y)) { @@ -72,7 +72,7 @@ namespace sd { } else if (!shape::isScalar(x) && shape::isScalar(y)) { shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(x, dtype))); } else if (ShapeUtils::areShapesBroadcastable(x, y)) { - Nd4jLong *newshape = nullptr; + const Nd4jLong *newshape = nullptr; ShapeUtils::evalBroadcastShapeInfo(x, y, true, newshape, block.workspace()); shapeList->push_back(ConstantShapeHelper::getInstance()->createShapeInfo(ShapeDescriptor(newshape, dtype))); } else { diff --git a/libnd4j/include/ops/declarable/impl/DeclarableOp.cpp b/libnd4j/include/ops/declarable/impl/DeclarableOp.cpp index 44fbaae42..c839c41c9 100644 --- a/libnd4j/include/ops/declarable/impl/DeclarableOp.cpp +++ b/libnd4j/include/ops/declarable/impl/DeclarableOp.cpp @@ -234,7 +234,7 @@ namespace sd { // we build list of input shapes if (fp) { for (const auto p:ctx.fastpath_in()) { - inSha.push_back(p == nullptr ? nullptr : p->getShapeInfo()); + inSha.push_back(p == nullptr ? nullptr : p->shapeInfo()); } } else { int arrCnt = 0; @@ -245,7 +245,7 @@ namespace sd { if (array == nullptr) throw unresolved_input_exception::build("Variable wasn't resolved prior shape calculation", p); - inSha.push_back(array->getShapeInfo()); + inSha.push_back(array->shapeInfo()); // we're also filling ctx with arrays if (canUseFastPath) @@ -1095,7 +1095,7 @@ namespace sd { NDArray *a0 = block.array(0); for (int e = 0; e < block.width(); e++) { auto aV = block.array(e); - if (!shape::equalsSoft(a0->getShapeInfo(), aV->getShapeInfo())) + if (!shape::equalsSoft(a0->shapeInfo(), aV->shapeInfo())) return ND4J_STATUS_BAD_DIMENSIONS; } diff --git a/libnd4j/include/ops/declarable/impl/LegacyBroadcastBoolOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyBroadcastBoolOp.cpp index 03f34d269..f7cb3de92 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyBroadcastBoolOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyBroadcastBoolOp.cpp @@ -41,7 +41,7 @@ namespace sd { int opNum = block.opNum() < 0 ? this->_opNum : block.opNum(); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->getShapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); PointersManager manager(block.launchContext(), "LegacyBroadcastBoolOp"); auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); @@ -57,7 +57,7 @@ namespace sd { else { // this is rare, but possible use case - X and Z might have different shapes/strides/orders. In this case we prepare and pass separate TAD info - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(z->getShapeInfo(), dims); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(z->shapeInfo(), dims); auto zTadShape = Environment::getInstance()->isCPU() ? packZ.primaryShapeInfo() : packZ.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tadZ.tadOnlyShapeInfo, shape::shapeInfoByteLength(tadZ.tadOnlyShapeInfo)); auto zTadOffsets = Environment::getInstance()->isCPU() ? packZ.primaryOffsets() : packZ.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tadZ.tadOffsets, tadZ.numTads * sizeof(Nd4jLong)); diff --git a/libnd4j/include/ops/declarable/impl/LegacyBroadcastOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyBroadcastOp.cpp index 0297df28a..82899bbdb 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyBroadcastOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyBroadcastOp.cpp @@ -47,7 +47,7 @@ namespace sd { int opNum = block.opNum() < 0 ? this->_opNum : block.opNum(); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->getShapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); auto tadLen = shape::length(packX.primaryShapeInfo()); REQUIRE_TRUE(tadLen == y->lengthOf(), 0, "Length of broadcast TAD should be equal to length of Y operand, but got [%i] vs [%i]",tadLen, (int) y->lengthOf()); @@ -62,7 +62,7 @@ namespace sd { z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), dims.data(), dims.size(), pTadShape, pTadOffsets, pTadShape, pTadOffsets); else { // this is rare, but possible use case - X and Z might have different shapes/strides/orders. In this case we prepare and pass separate TAD info - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(z->getShapeInfo(), dims); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(z->shapeInfo(), dims); auto zTadShape = Environment::getInstance()->isCPU() ? packZ.primaryShapeInfo() : packZ.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tadZ.tadOnlyShapeInfo, shape::shapeInfoByteLength(tadZ.tadOnlyShapeInfo)); auto zTadOffsets = Environment::getInstance()->isCPU() ? packZ.primaryOffsets() : packZ.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tadZ.tadOffsets, tadZ.numTads * sizeof(Nd4jLong)); diff --git a/libnd4j/include/ops/declarable/impl/LegacyIndexReduceOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyIndexReduceOp.cpp index c92577f3b..7fc6bf793 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyIndexReduceOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyIndexReduceOp.cpp @@ -42,8 +42,8 @@ namespace sd { ShapeList *LegacyIndexReduceOp::calculateOutputShape(ShapeList *inputShape, sd::graph::Context &block) { auto inShape = inputShape->at(0); - Nd4jLong *newShape; if (block.getAxis()->size() == 0 && block.width() == 1) { + Nd4jLong *newShape; // in this case we just return scalar ALLOCATE(newShape, block.getWorkspace(), shape::shapeInfoLength(2), Nd4jLong); newShape[0] = 2; @@ -61,7 +61,7 @@ namespace sd { // in this case we're building proper shape for reduction auto array = INPUT_VARIABLE(0); //new NDArray(nullptr, inShape, block.getWorkspace()); - newShape = ShapeUtils::evalReduceShapeInfo('c', *block.getAxis(), *array, DataType::INT64, false, true, block.workspace()); + auto newShape = ShapeUtils::evalReduceShapeInfo('c', *block.getAxis(), *array, DataType::INT64, false, true, block.workspace()); return SHAPELIST(newShape); } else { @@ -78,6 +78,7 @@ namespace sd { axis[e] = f >= 0 ? f : f += rank; } if (allAxes){ + Nd4jLong *newShape; // in this case we just return scalar ALLOCATE(newShape, block.getWorkspace(), shape::shapeInfoLength(2), Nd4jLong); newShape[0] = 2; @@ -94,8 +95,7 @@ namespace sd { } else { // in this case we're building proper shape for reduction auto array = INPUT_VARIABLE(0); //new NDArray(nullptr, inShape, block.getWorkspace()); - newShape = ShapeUtils::evalReduceShapeInfo('c', axis, *array, DataType::INT64, false, true, block.workspace()); - return SHAPELIST(newShape); + return SHAPELIST(ShapeUtils::evalReduceShapeInfo('c', axis, *array, DataType::INT64, false, true, block.workspace())); } } } @@ -124,11 +124,11 @@ namespace sd { if (block.width() == 1) { if (block.getAxis()->size() == 0) { // scalar - NativeOpExecutioner::execIndexReduceScalar(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), - x->getSpecialBuffer(), x->getSpecialShapeInfo(), + NativeOpExecutioner::execIndexReduceScalar(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), + x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()), - z->getBuffer(), z->getShapeInfo(), - z->getSpecialBuffer(), z->getSpecialShapeInfo()); + z->buffer(), z->shapeInfo(), + z->specialBuffer(), z->specialShapeInfo()); } else { // TAD std::vector dims(block.getAxis()->size()); @@ -141,11 +141,11 @@ namespace sd { auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); - NativeOpExecutioner::execIndexReduce(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), - x->getSpecialBuffer(), x->getSpecialShapeInfo(), + NativeOpExecutioner::execIndexReduce(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), + x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()), - reinterpret_cast(z->getBuffer()), z->getShapeInfo(), - z->getSpecialBuffer(), z->getSpecialShapeInfo(), + reinterpret_cast(z->buffer()), z->shapeInfo(), + z->specialBuffer(), z->specialShapeInfo(), nullptr, (int) dims.size(), Environment::getInstance()->isCPU() ? tadPack.primaryShapeInfo() : tadPack.specialShapeInfo(), Environment::getInstance()->isCPU() ? tadPack.primaryOffsets() : tadPack.specialOffsets()); } @@ -163,11 +163,11 @@ namespace sd { } if (allAxes) { - NativeOpExecutioner::execIndexReduceScalar(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), - x->getSpecialBuffer(), x->getSpecialShapeInfo(), + NativeOpExecutioner::execIndexReduceScalar(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), + x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()), - z->getBuffer(), z->getShapeInfo(), z->getSpecialBuffer(), - z->getSpecialShapeInfo()); + z->buffer(), z->shapeInfo(), z->specialBuffer(), + z->specialShapeInfo()); } else { if (indices->lengthOf() > 1) @@ -178,10 +178,10 @@ namespace sd { auto tadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), axis); NativeOpExecutioner::execIndexReduce(block.launchContext(), opNum, - x->getBuffer(), x->getShapeInfo(), x->getSpecialBuffer(), x->getSpecialShapeInfo(), + x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()), - reinterpret_cast(z->getBuffer()), - z->getShapeInfo(), z->getSpecialBuffer(), z->getSpecialShapeInfo(), + reinterpret_cast(z->buffer()), + z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), nullptr, (int) axis.size(), Environment::getInstance()->isCPU() ? tadPack.primaryShapeInfo() : tadPack.specialShapeInfo(), Environment::getInstance()->isCPU() ? tadPack.primaryOffsets() : tadPack.specialOffsets()); diff --git a/libnd4j/include/ops/declarable/impl/LegacyPairwiseTransformBoolOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyPairwiseTransformBoolOp.cpp index eb75141a9..11a05a76c 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyPairwiseTransformBoolOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyPairwiseTransformBoolOp.cpp @@ -51,9 +51,9 @@ namespace sd { ExtraArguments extras(*block.getTArguments()); PointersManager manager(block.launchContext(), "LegacyPairwiseTransformBoolOp"); - NativeOpExecutioner::execPairwiseTransform(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->getSpecialBuffer(), x->getSpecialShapeInfo(), - y->getBuffer(), y->getShapeInfo(), y->getSpecialBuffer(), y->getSpecialShapeInfo(), - z->getBuffer(), z->getShapeInfo(), z->getSpecialBuffer(), z->getSpecialShapeInfo(), + NativeOpExecutioner::execPairwiseTransform(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), + y->buffer(), y->shapeInfo(), y->specialBuffer(), y->specialShapeInfo(), + z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), extras.argumentsAsT(x->dataType())); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/impl/LegacyPairwiseTransformOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyPairwiseTransformOp.cpp index 7f6eecb19..877d2d73d 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyPairwiseTransformOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyPairwiseTransformOp.cpp @@ -51,9 +51,9 @@ namespace sd { ExtraArguments extras(*block.getTArguments()); PointersManager manager(block.launchContext(), "LegacyPairwiseTransformOp"); - NativeOpExecutioner::execPairwiseTransform(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), - y->getBuffer(), y->getShapeInfo(), y->specialBuffer(), y->specialShapeInfo(), - z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), + NativeOpExecutioner::execPairwiseTransform(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), + y->buffer(), y->shapeInfo(), y->specialBuffer(), y->specialShapeInfo(), + z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), extras.argumentsAsT(z->dataType())); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/impl/LegacyRandomOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyRandomOp.cpp index eeb80f403..085780c56 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyRandomOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyRandomOp.cpp @@ -344,8 +344,7 @@ namespace sd { auto zShapeVector = zShapeArr->asVectorT(); auto dtype = block.dataType(); - newShape = ConstantShapeHelper::getInstance()->createShapeInfo(dtype, 'c', zShapeVector); - return SHAPELIST(newShape); + return SHAPELIST(ConstantShapeHelper::getInstance()->createShapeInfo(dtype, 'c', zShapeVector)); } else throw std::runtime_error("LegacyRandomOp: Unknown input data type!"); } diff --git a/libnd4j/include/ops/declarable/impl/LegacyReduce3Op.cpp b/libnd4j/include/ops/declarable/impl/LegacyReduce3Op.cpp index 7143c3bbd..f110c0c55 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyReduce3Op.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyReduce3Op.cpp @@ -52,8 +52,8 @@ namespace sd { if (dims[e] < 0) dims[e] += x->rankOf(); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->getShapeInfo(), dims); - auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(z->getShapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); + auto packZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(z->shapeInfo(), dims); REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions requuired for reduction!"); diff --git a/libnd4j/include/ops/declarable/impl/LegacyReduceBoolOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyReduceBoolOp.cpp index 433e173fc..4aced5aec 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyReduceBoolOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyReduceBoolOp.cpp @@ -63,7 +63,7 @@ namespace sd { if ((axis.empty()) || (axis.size() == 1 && axis[0] == sd::DataTypeUtils::max()) || allAxes) { // scalar - NativeOpExecutioner::execReduceBoolScalar(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), + NativeOpExecutioner::execReduceBoolScalar(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo()); } else { // TAD @@ -75,15 +75,15 @@ namespace sd { REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!"); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->getShapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); - NativeOpExecutioner::execReduceBool(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), + NativeOpExecutioner::execReduceBool(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()), - z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), - dims.data(), (int) dims.size(), reinterpret_cast(pTadShape), reinterpret_cast(pTadOffsets)); + z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), + dims.data(), (int) dims.size(), reinterpret_cast(pTadShape), reinterpret_cast(pTadOffsets)); } STORE_RESULT(*z); @@ -103,7 +103,7 @@ namespace sd { if ((block.getIArguments()->size() == 1 && INT_ARG(0) == sd::DataTypeUtils::max()) || allAxes) { // scalar - NativeOpExecutioner::execReduceBoolScalar(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo()); + NativeOpExecutioner::execReduceBoolScalar(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo()); } else { // TAD if (indices->lengthOf() > 1) @@ -111,13 +111,13 @@ namespace sd { REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!"); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->getShapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); - NativeOpExecutioner::execReduceBool(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()), - z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), dims.data(), (int) dims.size(), pTadShape, pTadOffsets); + NativeOpExecutioner::execReduceBool(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()), + z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), dims.data(), (int) dims.size(), pTadShape, pTadOffsets); } } diff --git a/libnd4j/include/ops/declarable/impl/LegacyReduceFloatOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyReduceFloatOp.cpp index 23f863ba2..55197844a 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyReduceFloatOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyReduceFloatOp.cpp @@ -64,7 +64,7 @@ namespace sd { // (block.getIArguments()->size() == 1 && INT_ARG(0) == sd::DataTypeUtils::max()) if (block.getAxis()->empty() || allAxes) { // scalar - NativeOpExecutioner::execReduceFloatScalar(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), + NativeOpExecutioner::execReduceFloatScalar(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(z->dataType()), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo()); } else { // TAD @@ -76,14 +76,14 @@ namespace sd { REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!"); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->getShapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); - NativeOpExecutioner::execReduceFloat(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), - extras.argumentsAsT(z->dataType()), z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), - dims.data(), (int) dims.size(), reinterpret_cast(pTadShape), reinterpret_cast(pTadOffsets)); + NativeOpExecutioner::execReduceFloat(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), + extras.argumentsAsT(z->dataType()), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), + dims.data(), (int) dims.size(), reinterpret_cast(pTadShape), reinterpret_cast(pTadOffsets)); } @@ -109,13 +109,13 @@ namespace sd { // TAD REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!"); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->getShapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); - NativeOpExecutioner::execReduceFloat(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), - extras.argumentsAsT(z->dataType()), z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), + NativeOpExecutioner::execReduceFloat(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), + extras.argumentsAsT(z->dataType()), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), dims.data(), (int) dims.size(), pTadShape, pTadOffsets); @@ -133,8 +133,6 @@ namespace sd { ShapeList *LegacyReduceFloatOp::calculateOutputShape(ShapeList *inputShape, sd::graph::Context &block) { auto inShape = inputShape->at(0); - Nd4jLong *newShape; - bool allAxes = false; auto keepDims = block.numB() > 0 ? B_ARG(0) : false; @@ -146,7 +144,7 @@ namespace sd { allAxes = true; // in this case we're building proper shape for reduction - newShape = ShapeUtils::evalReduceShapeInfo(shape::order(inShape), axis, inShape, keepDims, !newFormat, block.workspace()); + auto newShape = ShapeUtils::evalReduceShapeInfo(shape::order(inShape), axis, inShape, keepDims, !newFormat, block.workspace()); return SHAPELIST(newShape); } diff --git a/libnd4j/include/ops/declarable/impl/LegacyReduceLongOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyReduceLongOp.cpp index 17cba4227..628c4cb5f 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyReduceLongOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyReduceLongOp.cpp @@ -63,7 +63,7 @@ namespace sd { if ((axis.empty()) || (axis.size() == 1 && axis[0] == sd::DataTypeUtils::max()) || allAxes) { // scalar - NativeOpExecutioner::execReduceLongScalar(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), + NativeOpExecutioner::execReduceLongScalar(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo()); } else { // TAD @@ -78,14 +78,14 @@ namespace sd { REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!"); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->getShapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); - NativeOpExecutioner::execReduceLong(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), + NativeOpExecutioner::execReduceLong(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()), - z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), + z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), dims.data(), (int) dims.size(), pTadShape, pTadOffsets); } @@ -111,13 +111,13 @@ namespace sd { // TAD REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!"); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->getShapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); - NativeOpExecutioner::execReduceLong(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()), - z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), dims.data(), (int) dims.size(), pTadShape, pTadOffsets); + NativeOpExecutioner::execReduceLong(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(x->dataType()), + z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), dims.data(), (int) dims.size(), pTadShape, pTadOffsets); } } diff --git a/libnd4j/include/ops/declarable/impl/LegacyReduceOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyReduceOp.cpp index 46be149c6..e6c3dd63b 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyReduceOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyReduceOp.cpp @@ -54,7 +54,7 @@ namespace sd { if ((block.getIArguments()->size() == 0) || (block.getIArguments()->size() == 1 && INT_ARG(0) == MAX_INT) || allAxes) { // scalar - NativeOpExcutioner::execReduceFloatScalar(opNum, x->getBuffer(), x->getShapeInfo(), block.getTArguments()->data(), z->buffer(), z->shapeInfo()); + NativeOpExcutioner::execReduceFloatScalar(opNum, x->buffer(), x->shapeInfo(), block.getTArguments()->data(), z->buffer(), z->shapeInfo()); } else { // TAD std::vector dims(*block.getIArguments()); @@ -67,11 +67,11 @@ namespace sd { REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!"); - shape::TAD tad(x->getShapeInfo(), dims.data(), dims.size()); + shape::TAD tad(x->shapeInfo(), dims.data(), dims.size()); tad.createTadOnlyShapeInfo(); tad.createOffsets(); - NativeOpExcutioner::execReduceFloat(opNum, x->getBuffer(), x->getShapeInfo(), block.getTArguments()->data(), z->getBuffer(), z->getShapeInfo(), dims.data(), (int) dims.size(), tad.tadOnlyShapeInfo, tad.tadOffsets); + NativeOpExcutioner::execReduceFloat(opNum, x->buffer(), x->shapeInfo(), block.getTArguments()->data(), z->buffer(), z->shapeInfo(), dims.data(), (int) dims.size(), tad.tadOnlyShapeInfo, tad.tadOffsets); } STORE_RESULT(*z); @@ -92,7 +92,7 @@ namespace sd { if ((block.getIArguments()->size() == 1 && INT_ARG(0) == MAX_INT) || allAxes) { auto z = OUTPUT_VARIABLE(0); - auto b = x->getBuffer(); + auto b = x->buffer(); auto s = x->shapeInfo(); auto e = block.numT() > 0 ? block.getTArguments()->data() : nullptr; @@ -107,14 +107,14 @@ namespace sd { REQUIRE_TRUE(axis.size() > 0, 0, "Some dimensions required for reduction!"); - shape::TAD tad(x->getShapeInfo(), axis.data(), axis.size()); + shape::TAD tad(x->shapeInfo(), axis.data(), axis.size()); tad.createTadOnlyShapeInfo(); tad.createOffsets(); auto newShape = ShapeUtils::evalReduceShapeInfo(x->ordering(), axis, *x); auto z = new NDArray(newShape, x->getWorkspace()); - NativeOpExcutioner::execReduceFloat(opNum, x->getBuffer(), x->getShapeInfo(), block.getTArguments()->data(), z->getBuffer(), z->getShapeInfo(), axis.data(), (int) axis.size(), tad.tadOnlyShapeInfo, tad.tadOffsets); + NativeOpExcutioner::execReduceFloat(opNum, x->buffer(), x->shapeInfo(), block.getTArguments()->data(), z->buffer(), z->shapeInfo(), axis.data(), (int) axis.size(), tad.tadOnlyShapeInfo, tad.tadOffsets); // keepDims processing, for TF compatibility diff --git a/libnd4j/include/ops/declarable/impl/LegacyReduceSameOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyReduceSameOp.cpp index 3c96bca70..e406a3a2d 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyReduceSameOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyReduceSameOp.cpp @@ -61,7 +61,7 @@ namespace sd { if (axis.empty() || allAxes) { // scalar - NativeOpExecutioner::execReduceSameScalar(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), + NativeOpExecutioner::execReduceSameScalar(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(z->dataType()), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo()); } else { // TAD @@ -73,14 +73,14 @@ namespace sd { REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!"); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->getShapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); - NativeOpExecutioner::execReduceSame(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), + NativeOpExecutioner::execReduceSame(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(z->dataType()), - z->getBuffer(), z->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), + z->buffer(), z->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), dims.data(), (int) dims.size(), pTadShape, pTadOffsets); } @@ -106,13 +106,13 @@ namespace sd { // TAD REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions required for reduction!"); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->getShapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); - NativeOpExecutioner::execReduceSame(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), - extras.argumentsAsT(z->dataType()), z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), + NativeOpExecutioner::execReduceSame(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), + extras.argumentsAsT(z->dataType()), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), dims.data(), (int) dims.size(), pTadShape, pTadOffsets); } } @@ -129,8 +129,6 @@ namespace sd { ShapeList *LegacyReduceSameOp::calculateOutputShape(ShapeList *inputShape, sd::graph::Context &block) { auto inShape = inputShape->at(0); - Nd4jLong *newShape; - bool allAxes = false; auto keepDims = block.numB() > 0 ? B_ARG(0) : false; @@ -142,7 +140,7 @@ namespace sd { allAxes = true; // in this case we're building proper shape for reduction - newShape = ShapeUtils::evalReduceShapeInfo(shape::order(inShape), axis, inShape, keepDims, !newFormat, block.workspace()); + auto newShape = ShapeUtils::evalReduceShapeInfo(shape::order(inShape), axis, inShape, keepDims, !newFormat, block.workspace()); return SHAPELIST(newShape); } diff --git a/libnd4j/include/ops/declarable/impl/LegacyScalarBoolOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyScalarBoolOp.cpp index 46728ede1..abfd84efb 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyScalarBoolOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyScalarBoolOp.cpp @@ -64,19 +64,19 @@ namespace sd { NDArray::prepareSpecialUse({z}, {x, y}); - NativeOpExecutioner::execScalarBool(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), y->buffer(), y->shapeInfo(), y->specialBuffer(), y->specialShapeInfo(), extras.argumentsAsT(x->dataType())); + NativeOpExecutioner::execScalarBool(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), y->buffer(), y->shapeInfo(), y->specialBuffer(), y->specialShapeInfo(), extras.argumentsAsT(x->dataType())); } else if (block.getTArguments()->size() > 0) { auto y = NDArrayFactory::create(T_ARG(0), block.launchContext()); NDArray::prepareSpecialUse({z}, {x, &y}); - NativeOpExecutioner::execScalarBool(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(),z->getBuffer(), z->getShapeInfo(),z->specialBuffer(), z->specialShapeInfo(), y.buffer(), y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), extras.argumentsAsT(x->dataType(), 1)); + NativeOpExecutioner::execScalarBool(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(),z->buffer(), z->shapeInfo(),z->specialBuffer(), z->specialShapeInfo(), y.buffer(), y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), extras.argumentsAsT(x->dataType(), 1)); manager.synchronize(); } else { NDArray::prepareSpecialUse({z}, {x, _scalar}); - NativeOpExecutioner::execScalarBool(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(),z->getBuffer(), z->getShapeInfo(),z->specialBuffer(), z->specialShapeInfo(), _scalar->buffer(), _scalar->shapeInfo(), _scalar->specialBuffer(), _scalar->specialShapeInfo(), extras.argumentsAsT(x->dataType())); + NativeOpExecutioner::execScalarBool(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(),z->buffer(), z->shapeInfo(),z->specialBuffer(), z->specialShapeInfo(), _scalar->buffer(), _scalar->shapeInfo(), _scalar->specialBuffer(), _scalar->specialShapeInfo(), extras.argumentsAsT(x->dataType())); } manager.synchronize(); STORE_RESULT(*z); diff --git a/libnd4j/include/ops/declarable/impl/LegacyScalarOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyScalarOp.cpp index de104a11d..3e73b10f5 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyScalarOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyScalarOp.cpp @@ -64,7 +64,7 @@ namespace sd { NDArray::prepareSpecialUse({z}, {x, y}); - NativeOpExecutioner::execScalar(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), y->buffer(), y->shapeInfo(), y->specialBuffer(), y->specialShapeInfo(), extras.argumentsAsT(z->dataType())); + NativeOpExecutioner::execScalar(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), y->buffer(), y->shapeInfo(), y->specialBuffer(), y->specialShapeInfo(), extras.argumentsAsT(z->dataType())); NDArray::registerSpecialUse({z}, {x, y}); } else if (block.getTArguments()->size() > 0) { @@ -72,13 +72,13 @@ namespace sd { x->applyScalarArr(static_cast(opNum), y, *z); // NDArray::prepareSpecialUse({z}, {x, &y}); - // NativeOpExecutioner::execScalar(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), y.buffer(), y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), extras.argumentsAsT(z->dataType(), 1)); + // NativeOpExecutioner::execScalar(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), y.buffer(), y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), extras.argumentsAsT(z->dataType(), 1)); manager.synchronize(); } else { NDArray::prepareSpecialUse({z}, {x, _scalar}); - NativeOpExecutioner::execScalar(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), _scalar->buffer(), _scalar->shapeInfo(), _scalar->specialBuffer(), _scalar->specialShapeInfo(), extras.argumentsAsT(z->dataType())); + NativeOpExecutioner::execScalar(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), _scalar->buffer(), _scalar->shapeInfo(), _scalar->specialBuffer(), _scalar->specialShapeInfo(), extras.argumentsAsT(z->dataType())); NDArray::registerSpecialUse({z}, {x, _scalar}); } diff --git a/libnd4j/include/ops/declarable/impl/LegacyStatsOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyStatsOp.cpp index 74f82d162..b8694f9ff 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyStatsOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyStatsOp.cpp @@ -46,8 +46,8 @@ namespace sd { if (block.getIArguments()->size() == 1 || (block.getIArguments()->size() == 2 && INT_ARG(1) == sd::DataTypeUtils::max())) { // scalar - NativeOpExecutioner::execSummaryStatsScalar(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), - extras.argumentsAsT(z->dataType()), z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), biasCorrected); + NativeOpExecutioner::execSummaryStatsScalar(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), + extras.argumentsAsT(z->dataType()), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), biasCorrected); } else { // dimensions for TAD // we should skip first argument here, because it's addressing bias correction @@ -58,13 +58,13 @@ namespace sd { REQUIRE_TRUE(dims.size() > 0, 0, "Some dimensions requuired for reduction!"); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->getShapeInfo(), dims); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x->shapeInfo(), dims); auto pTadShape = Environment::getInstance()->isCPU() ? packX.primaryShapeInfo() : packX.specialShapeInfo(); //(Nd4jLong *) manager.replicatePointer(tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); auto pTadOffsets = Environment::getInstance()->isCPU() ? packX.primaryOffsets() : packX.specialOffsets(); //(Nd4jLong *) manager.replicatePointer(tad.tadOffsets, tad.numTads * sizeof(Nd4jLong)); - NativeOpExecutioner::execSummaryStats(block.launchContext(), opNum, x->getBuffer(), x->getShapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(z->dataType()), - z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), dims.data(), (int) dims.size(), pTadShape, pTadOffsets, biasCorrected); + NativeOpExecutioner::execSummaryStats(block.launchContext(), opNum, x->buffer(), x->shapeInfo(), x->specialBuffer(), x->specialShapeInfo(), extras.argumentsAsT(z->dataType()), + z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), dims.data(), (int) dims.size(), pTadShape, pTadOffsets, biasCorrected); } manager.synchronize(); @@ -108,12 +108,13 @@ namespace sd { // in this case we're building proper shape for reduction auto array = new NDArray(nullptr, inShape, block.launchContext()); - newShape = ShapeUtils::evalReduceShapeInfo('c', *block.getIArguments(), *array, false, true); + auto newShape = ShapeUtils::evalReduceShapeInfo('c', *block.getIArguments(), *array, false, true); delete array; + return SHAPELIST(newShape); } - return SHAPELIST(newShape); + return SHAPELIST(CONSTANT(newShape)); } } } \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/impl/LegacyTransformAnyOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyTransformAnyOp.cpp index def577eb3..dde8ce9e9 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyTransformAnyOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyTransformAnyOp.cpp @@ -48,8 +48,8 @@ namespace sd { ExtraArguments extras(*block.getTArguments()); PointersManager manager(block.launchContext(),"LegacyTransformAnyOp"); - NativeOpExecutioner::execTransformAny(block.launchContext(), opNum, input->getBuffer(), input->getShapeInfo(), input->specialBuffer(), input->specialShapeInfo(), - z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), extras.argumentsAsT(z->dataType()), nullptr, nullptr); + NativeOpExecutioner::execTransformAny(block.launchContext(), opNum, input->buffer(), input->shapeInfo(), input->specialBuffer(), input->specialShapeInfo(), + z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), extras.argumentsAsT(z->dataType()), nullptr, nullptr); manager.synchronize(); STORE_RESULT(*z); diff --git a/libnd4j/include/ops/declarable/impl/LegacyTransformBoolOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyTransformBoolOp.cpp index 99b856b8a..a0651d1fc 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyTransformBoolOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyTransformBoolOp.cpp @@ -48,8 +48,8 @@ namespace sd { ExtraArguments extras(*block.getTArguments()); PointersManager manager(block.launchContext(),"LegacyTransformBoolOp"); - NativeOpExecutioner::execTransformBool(block.launchContext(), opNum, input->getBuffer(), input->getShapeInfo(), input->specialBuffer(), input->specialShapeInfo(), - z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), + NativeOpExecutioner::execTransformBool(block.launchContext(), opNum, input->buffer(), input->shapeInfo(), input->specialBuffer(), input->specialShapeInfo(), + z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), extras.argumentsAsT(input->dataType()), nullptr, nullptr); manager.synchronize(); diff --git a/libnd4j/include/ops/declarable/impl/LegacyTransformFloatOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyTransformFloatOp.cpp index f0795b7bb..f25ba00fe 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyTransformFloatOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyTransformFloatOp.cpp @@ -48,8 +48,8 @@ namespace sd { ExtraArguments extras(*block.getTArguments()); PointersManager manager(block.launchContext(), "LegacyTransformFloatOp"); - NativeOpExecutioner::execTransformFloat(block.launchContext(), opNum, input->getBuffer(), input->getShapeInfo(), input->specialBuffer(), input->specialShapeInfo(), - z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), extras.argumentsAsT(z->dataType()), nullptr, nullptr); + NativeOpExecutioner::execTransformFloat(block.launchContext(), opNum, input->buffer(), input->shapeInfo(), input->specialBuffer(), input->specialShapeInfo(), + z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), extras.argumentsAsT(z->dataType()), nullptr, nullptr); manager.synchronize(); STORE_RESULT(*z); diff --git a/libnd4j/include/ops/declarable/impl/LegacyTransformOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyTransformOp.cpp index b073d9df1..d0a8f7604 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyTransformOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyTransformOp.cpp @@ -43,7 +43,7 @@ namespace sd { int opNum = block.opNum() < 0 ? this->_opNum : block.opNum(); - NativeOpExcutioner::execTransformSame(opNum, input->getBuffer(), input->getShapeInfo(), z->getBuffer(), z->getShapeInfo(), block.getTArguments()->data(), nullptr, nullptr); + NativeOpExcutioner::execTransformSame(opNum, input->buffer(), input->shapeInfo(), z->buffer(), z->shapeInfo(), block.getTArguments()->data(), nullptr, nullptr); STORE_RESULT(*z); diff --git a/libnd4j/include/ops/declarable/impl/LegacyTransformSameOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyTransformSameOp.cpp index 0d827787e..02a69da6b 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyTransformSameOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyTransformSameOp.cpp @@ -48,8 +48,8 @@ namespace sd { ExtraArguments extras(*block.getTArguments()); PointersManager manager(block.launchContext(), "LegacyTransformSameOp"); - NativeOpExecutioner::execTransformSame(block.launchContext(), opNum, input->getBuffer(), input->getShapeInfo(), input->specialBuffer(), input->specialShapeInfo(), - z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), extras.argumentsAsT(z->dataType()), nullptr, nullptr); + NativeOpExecutioner::execTransformSame(block.launchContext(), opNum, input->buffer(), input->shapeInfo(), input->specialBuffer(), input->specialShapeInfo(), + z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), extras.argumentsAsT(z->dataType()), nullptr, nullptr); manager.synchronize(); STORE_RESULT(*z); diff --git a/libnd4j/include/ops/declarable/impl/LegacyTransformStrictOp.cpp b/libnd4j/include/ops/declarable/impl/LegacyTransformStrictOp.cpp index f36853579..2093e3aab 100644 --- a/libnd4j/include/ops/declarable/impl/LegacyTransformStrictOp.cpp +++ b/libnd4j/include/ops/declarable/impl/LegacyTransformStrictOp.cpp @@ -48,7 +48,7 @@ namespace sd { ExtraArguments extras(*block.getTArguments()); PointersManager manager(block.launchContext(), "LegacyTransformStrictOp"); - NativeOpExecutioner::execTransformStrict(block.launchContext(), opNum, input->getBuffer(), input->getShapeInfo(), input->specialBuffer(), input->specialShapeInfo(), z->getBuffer(), z->getShapeInfo(), z->specialBuffer(), z->specialShapeInfo(), extras.argumentsAsT(z->dataType()), nullptr, nullptr); + NativeOpExecutioner::execTransformStrict(block.launchContext(), opNum, input->buffer(), input->shapeInfo(), input->specialBuffer(), input->specialShapeInfo(), z->buffer(), z->shapeInfo(), z->specialBuffer(), z->specialShapeInfo(), extras.argumentsAsT(z->dataType()), nullptr, nullptr); manager.synchronize(); STORE_RESULT(*z); diff --git a/libnd4j/include/ops/declarable/platform/cudnn/avgpool2d.cu b/libnd4j/include/ops/declarable/platform/cudnn/avgpool2d.cu index 5cf93f10f..eb213f4c2 100644 --- a/libnd4j/include/ops/declarable/platform/cudnn/avgpool2d.cu +++ b/libnd4j/include/ops/declarable/platform/cudnn/avgpool2d.cu @@ -129,7 +129,7 @@ PLATFORM_CHECK(avgpool2d_bp, ENGINE_CUDA) { return goodType && (input->dataType() == gradO->dataType()) && (input->dataType() == gradI->dataType()) - && shape::haveSameShapeAndStrides(input->getShapeInfo(), gradI->getShapeInfo()); + && shape::haveSameShapeAndStrides(input->shapeInfo(), gradI->shapeInfo()); } diff --git a/libnd4j/include/ops/declarable/platform/cudnn/avgpool3d.cu b/libnd4j/include/ops/declarable/platform/cudnn/avgpool3d.cu index 0d01dfef3..da2fdbc09 100644 --- a/libnd4j/include/ops/declarable/platform/cudnn/avgpool3d.cu +++ b/libnd4j/include/ops/declarable/platform/cudnn/avgpool3d.cu @@ -135,7 +135,7 @@ PLATFORM_CHECK(avgpool3dnew_bp, ENGINE_CUDA) { return goodType && (input->dataType() == gradO->dataType()) && (input->dataType() == gradI->dataType()) - && shape::haveSameShapeAndStrides(input->getShapeInfo(), gradI->getShapeInfo()); + && shape::haveSameShapeAndStrides(input->shapeInfo(), gradI->shapeInfo()); } diff --git a/libnd4j/include/ops/declarable/platform/cudnn/batchnorm.cu b/libnd4j/include/ops/declarable/platform/cudnn/batchnorm.cu index 8d0b1301a..7568ba47a 100644 --- a/libnd4j/include/ops/declarable/platform/cudnn/batchnorm.cu +++ b/libnd4j/include/ops/declarable/platform/cudnn/batchnorm.cu @@ -108,11 +108,11 @@ static void batchnormCUDNN(const LaunchContext* context, // calculations err = cudnnBatchNormalizationForwardInference(*handle, isSpatialMode ? CUDNN_BATCHNORM_SPATIAL : CUDNN_BATCHNORM_PER_ACTIVATION, ptrAlpha, ptrBeta, - x, input->getSpecialBuffer(), - z, output->getSpecialBuffer(), + x, input->specialBuffer(), + z, output->specialBuffer(), params, - gamma->getSpecialBuffer(), beta->getSpecialBuffer(), - mean->getSpecialBuffer(), variance->getSpecialBuffer(), epsilon); + gamma->specialBuffer(), beta->specialBuffer(), + mean->specialBuffer(), variance->specialBuffer(), epsilon); if (err != 0) throw sd::cuda_exception::build("batchnormCUDNN: cudnnBatchNormalizationForwardInference failed", err); @@ -215,13 +215,13 @@ static void batchnormBpCUDNN(const LaunchContext* context, // TODO: we can use cache here err = cudnnBatchNormalizationBackward(*handle, isSpatialMode ? CUDNN_BATCHNORM_SPATIAL : CUDNN_BATCHNORM_PER_ACTIVATION, ptrAlpha, ptrBeta, ptrAlpha, ptrBeta, - x, input->getSpecialBuffer(), - dz, gradO->getSpecialBuffer(), - dx, gradI->getSpecialBuffer(), + x, input->specialBuffer(), + dz, gradO->specialBuffer(), + dx, gradI->specialBuffer(), params, - gamma->getSpecialBuffer(), gradG->getSpecialBuffer(), gradB->getSpecialBuffer(), + gamma->specialBuffer(), gradG->specialBuffer(), gradB->specialBuffer(), epsilon, - nullptr/*mean->getSpecialBuffer()*/, nullptr/*variance->getSpecialBuffer()*/); + nullptr/*mean->specialBuffer()*/, nullptr/*variance->specialBuffer()*/); if (err != 0) throw sd::cuda_exception::build("batchnormBpCUDNN: cudnnBatchNormalizationBackward failed", err); @@ -362,11 +362,11 @@ PLATFORM_CHECK(batchnorm, ENGINE_CUDA) { return false; // *********************************** // - bool allParamsHaveSameShapeAndStrides = shape::haveSameShapeAndStrides(mean->getShapeInfo(), variance->getShapeInfo()); + bool allParamsHaveSameShapeAndStrides = shape::haveSameShapeAndStrides(mean->shapeInfo(), variance->shapeInfo()); if(gamma) - allParamsHaveSameShapeAndStrides &= shape::haveSameShapeAndStrides(mean->getShapeInfo(), gamma->getShapeInfo()); + allParamsHaveSameShapeAndStrides &= shape::haveSameShapeAndStrides(mean->shapeInfo(), gamma->shapeInfo()); if(beta) - allParamsHaveSameShapeAndStrides &= shape::haveSameShapeAndStrides(mean->getShapeInfo(), beta->getShapeInfo()); + allParamsHaveSameShapeAndStrides &= shape::haveSameShapeAndStrides(mean->shapeInfo(), beta->shapeInfo()); if(!allParamsHaveSameShapeAndStrides) return false; @@ -536,13 +536,13 @@ PLATFORM_CHECK(batchnorm_bp, ENGINE_CUDA) { return false; // *********************************** // - bool allParamsHaveSameShapeAndStrides = shape::haveSameShapeAndStrides(mean->getShapeInfo(), variance->getShapeInfo()); + bool allParamsHaveSameShapeAndStrides = shape::haveSameShapeAndStrides(mean->shapeInfo(), variance->shapeInfo()); if(gamma) - allParamsHaveSameShapeAndStrides &= shape::haveSameShapeAndStrides(mean->getShapeInfo(), gamma->getShapeInfo()); + allParamsHaveSameShapeAndStrides &= shape::haveSameShapeAndStrides(mean->shapeInfo(), gamma->shapeInfo()); if(gradG) - allParamsHaveSameShapeAndStrides &= shape::haveSameShapeAndStrides(mean->getShapeInfo(), gradG->getShapeInfo()); + allParamsHaveSameShapeAndStrides &= shape::haveSameShapeAndStrides(mean->shapeInfo(), gradG->shapeInfo()); if(gradB) - allParamsHaveSameShapeAndStrides &= shape::haveSameShapeAndStrides(mean->getShapeInfo(), gradB->getShapeInfo()); + allParamsHaveSameShapeAndStrides &= shape::haveSameShapeAndStrides(mean->shapeInfo(), gradB->shapeInfo()); if(!allParamsHaveSameShapeAndStrides) return false; diff --git a/libnd4j/include/ops/declarable/platform/cudnn/conv2d.cu b/libnd4j/include/ops/declarable/platform/cudnn/conv2d.cu index 43dc7ce07..a77faf6f7 100644 --- a/libnd4j/include/ops/declarable/platform/cudnn/conv2d.cu +++ b/libnd4j/include/ops/declarable/platform/cudnn/conv2d.cu @@ -102,7 +102,7 @@ static void conv2dCUDNN(const LaunchContext* context, NDArray::prepareSpecialUse({output}, {input, weights, bias}); // run calculation - err = cudnnConvolutionForward(*handle, alpha, x, input->getSpecialBuffer(), w, weights->getSpecialBuffer(), conv, algo, wsData, wsSize, beta, z, output->specialBuffer()); + err = cudnnConvolutionForward(*handle, alpha, x, input->specialBuffer(), w, weights->specialBuffer(), conv, algo, wsData, wsSize, beta, z, output->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("conv2dCUDNN: cudnnConvolutionForward failed", err); // add bias if it is present @@ -112,7 +112,7 @@ static void conv2dCUDNN(const LaunchContext* context, // err = cudnnSetTensor4dDescriptor(b, format, cudnnDataType(bias->dataType()), 1, isNCHW ? bias->lengthOf() : 1, 1, isNCHW ? 1: bias->lengthOf()); err = cudnnSetTensor4dDescriptor(b, CUDNN_TENSOR_NCHW, cudnnDataType(bias->dataType()), 1, oC, 1, 1); if (err != 0) throw sd::cuda_exception::build("conv2dCUDNN: cudnnSetTensor4dDescriptor for bias failed", err); - err = cudnnAddTensor(*handle, alpha, b, bias->getSpecialBuffer(), alpha, z, output->specialBuffer()); + err = cudnnAddTensor(*handle, alpha, b, bias->specialBuffer(), alpha, z, output->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("conv2dCUDNN: cudnnAddTensor bias failed", err); } @@ -228,16 +228,16 @@ static void conv2dBpCUDNN(const LaunchContext* context, err = cudnnSetTensor4dDescriptor(db, CUDNN_TENSOR_NCHW, cudnnDataType(gradB->dataType()), 1, oC, 1, 1); if (err != 0) throw sd::cuda_exception::build("conv2dBpCUDNN: cudnnSetTensor4dDescriptor for gradB failed", err); - err = cudnnConvolutionBackwardBias(*handle, alpha, dz, gradO->getSpecialBuffer(), beta, db, gradB->getSpecialBuffer()); + err = cudnnConvolutionBackwardBias(*handle, alpha, dz, gradO->specialBuffer(), beta, db, gradB->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("conv2dBpCUDNN: cudnnConvolutionBackwardBias failed", err); } // run calculation for gradW - err = cudnnConvolutionBackwardFilter(*handle, alpha, x, input->getSpecialBuffer(), dz, gradO->getSpecialBuffer(), conv, algoGradW, wsGradWData, wsGradWSize, beta, dw, gradW->getSpecialBuffer()); + err = cudnnConvolutionBackwardFilter(*handle, alpha, x, input->specialBuffer(), dz, gradO->specialBuffer(), conv, algoGradW, wsGradWData, wsGradWSize, beta, dw, gradW->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("conv2dBpCUDNN: cudnnConvolutionBackwardFilter failed", err); // run calculation for gradI - err = cudnnConvolutionBackwardData(*handle, alpha, dw, weights->getSpecialBuffer(), dz, gradO->getSpecialBuffer(), conv, algoGradI, wsGradIData, wsGradISize, beta, dx, gradI->getSpecialBuffer()); + err = cudnnConvolutionBackwardData(*handle, alpha, dw, weights->specialBuffer(), dz, gradO->specialBuffer(), conv, algoGradI, wsGradIData, wsGradISize, beta, dx, gradI->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("conv2dBpCUDNN: cudnnConvolutionBackwardData failed", err); // cudaErr = cudaStreamSynchronize(*context->getCudaStream()); diff --git a/libnd4j/include/ops/declarable/platform/cudnn/conv3d.cu b/libnd4j/include/ops/declarable/platform/cudnn/conv3d.cu index 9d226d6f7..693ebeefa 100644 --- a/libnd4j/include/ops/declarable/platform/cudnn/conv3d.cu +++ b/libnd4j/include/ops/declarable/platform/cudnn/conv3d.cu @@ -114,7 +114,7 @@ static void conv3dCUDNN(const LaunchContext* context, NDArray::prepareSpecialUse({output}, {input, weights, bias}); // run calculation - err = cudnnConvolutionForward(*handle, alpha, x, input->getSpecialBuffer(), w, weights->getSpecialBuffer(), conv, algo, wsData, wsSize, beta, z, output->specialBuffer()); + err = cudnnConvolutionForward(*handle, alpha, x, input->specialBuffer(), w, weights->specialBuffer(), conv, algo, wsData, wsSize, beta, z, output->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("conv3dCUDNN: cudnnConvolutionForward failed", err); // add bias if it is present @@ -124,7 +124,7 @@ static void conv3dCUDNN(const LaunchContext* context, cudnnCreateTensorDescriptor(&b); err = cudnnSetTensorNdDescriptorEx(b, /*format*/CUDNN_TENSOR_NCHW, cudnnDataType(bias->dataType()), numDims, bShape.data()); if (err != 0) throw sd::cuda_exception::build("conv3dCUDNN: cudnnSetTensorNdDescriptor for bias failed", err); - err = cudnnAddTensor(*handle, alpha, b, bias->getSpecialBuffer(), alpha, z, output->specialBuffer()); + err = cudnnAddTensor(*handle, alpha, b, bias->specialBuffer(), alpha, z, output->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("conv3dCUDNN: cudnnAddTensor bias failed", err); } @@ -257,16 +257,16 @@ static void conv3dBpCUDNN(const LaunchContext* context, err = cudnnSetTensorNdDescriptorEx(db, format, cudnnDataType(gradB->dataType()), numDims, dbShape.data()); if (err != 0) throw sd::cuda_exception::build("conv3dBpCUDNN: cudnnSetTensorNdDescriptor for gradB failed", err); - err = cudnnConvolutionBackwardBias(*handle, alpha, dz, gradO->getSpecialBuffer(), beta, db, gradB->getSpecialBuffer()); + err = cudnnConvolutionBackwardBias(*handle, alpha, dz, gradO->specialBuffer(), beta, db, gradB->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("conv3dBpCUDNN: cudnnConvolutionBackwardBias failed", err); } // run calculation for gradW - err = cudnnConvolutionBackwardFilter(*handle, alpha, x, input->getSpecialBuffer(), dz, gradO->getSpecialBuffer(), conv, algoGradW, wsGradWData, wsGradWSize, beta, dw, gradW->getSpecialBuffer()); + err = cudnnConvolutionBackwardFilter(*handle, alpha, x, input->specialBuffer(), dz, gradO->specialBuffer(), conv, algoGradW, wsGradWData, wsGradWSize, beta, dw, gradW->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("conv3dBpCUDNN: cudnnConvolutionBackwardFilter failed", err); // run calculation for gradI - err = cudnnConvolutionBackwardData(*handle, alpha, dw, weights->getSpecialBuffer(), dz, gradO->getSpecialBuffer(), conv, algoGradI, wsGradIData, wsGradISize, beta, dx, gradI->getSpecialBuffer()); + err = cudnnConvolutionBackwardData(*handle, alpha, dw, weights->specialBuffer(), dz, gradO->specialBuffer(), conv, algoGradI, wsGradIData, wsGradISize, beta, dx, gradI->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("conv3dBpCUDNN: cudnnConvolutionBackwardData failed", err); // cudaErr = cudaStreamSynchronize(*context->getCudaStream()); diff --git a/libnd4j/include/ops/declarable/platform/cudnn/cudnnUtils.cu b/libnd4j/include/ops/declarable/platform/cudnn/cudnnUtils.cu index 28e845b00..54f8a1f3b 100644 --- a/libnd4j/include/ops/declarable/platform/cudnn/cudnnUtils.cu +++ b/libnd4j/include/ops/declarable/platform/cudnn/cudnnUtils.cu @@ -165,7 +165,7 @@ void pooling2dCUDNN(const LaunchContext* context, NDArray::prepareSpecialUse({output}, {input}); // run calculation - err = cudnnPoolingForward(*handle, pooling, alpha, x, input->getSpecialBuffer(), beta, z, output->specialBuffer()); + err = cudnnPoolingForward(*handle, pooling, alpha, x, input->specialBuffer(), beta, z, output->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("pooling2dCUDNN: cudnnPoolingForward failed", err); auto cudaErr = cudaStreamSynchronize(*context->getCudaStream()); @@ -228,7 +228,7 @@ void pooling2dBpCUDNN(const LaunchContext* context, NDArray::prepareSpecialUse({gradI}, {input, gradO}); // run calculation for gradI - err = cudnnPoolingBackward(*handle, pooling, alpha, dz, gradO->getSpecialBuffer(), dz, gradO->getSpecialBuffer(), x, input->getSpecialBuffer(), beta, x, gradI->getSpecialBuffer()); + err = cudnnPoolingBackward(*handle, pooling, alpha, dz, gradO->specialBuffer(), dz, gradO->specialBuffer(), x, input->specialBuffer(), beta, x, gradI->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("pooling2dBpCUDNN: cudnnPoolingBackward failed", err); auto cudaErr = cudaStreamSynchronize(*context->getCudaStream()); @@ -302,7 +302,7 @@ void pooling3dCUDNN(const LaunchContext* context, NDArray::prepareSpecialUse({output}, {input}); // run calculation - err = cudnnPoolingForward(*handle, pooling, alpha, x, input->getSpecialBuffer(), beta, z, output->specialBuffer()); + err = cudnnPoolingForward(*handle, pooling, alpha, x, input->specialBuffer(), beta, z, output->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("pooling3dCUDNN: cudnnPoolingForward failed", err); auto cudaErr = cudaStreamSynchronize(*context->getCudaStream()); @@ -382,11 +382,11 @@ void pooling3dBpCUDNN(const LaunchContext* context, NDArray::prepareSpecialUse({gradI}, {input, gradO, &temp}); // run ff calculation - err = cudnnPoolingForward(*handle, pooling, alpha, x, input->getSpecialBuffer(), beta, dz, temp.specialBuffer()); + err = cudnnPoolingForward(*handle, pooling, alpha, x, input->specialBuffer(), beta, dz, temp.specialBuffer()); if (err != 0) throw sd::cuda_exception::build("pooling3dCUDNN: cudnnPoolingForward failed", err); // run bp calculation for gradI - err = cudnnPoolingBackward(*handle, pooling, alpha, dz, temp.getSpecialBuffer(), dz, gradO->getSpecialBuffer(), x, input->getSpecialBuffer(), beta, x, gradI->getSpecialBuffer()); + err = cudnnPoolingBackward(*handle, pooling, alpha, dz, temp.specialBuffer(), dz, gradO->specialBuffer(), x, input->specialBuffer(), beta, x, gradI->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("pooling2dBpCUDNN: cudnnPoolingBackward failed", err); NDArray::registerSpecialUse({gradI}, {input, gradO, &temp}); @@ -396,7 +396,7 @@ void pooling3dBpCUDNN(const LaunchContext* context, NDArray::prepareSpecialUse({gradI}, {input, gradO}); // run bp calculation for gradI - err = cudnnPoolingBackward(*handle, pooling, alpha, dz, gradO->getSpecialBuffer(), dz, gradO->getSpecialBuffer(), x, input->getSpecialBuffer(), beta, x, gradI->getSpecialBuffer()); + err = cudnnPoolingBackward(*handle, pooling, alpha, dz, gradO->specialBuffer(), dz, gradO->specialBuffer(), x, input->specialBuffer(), beta, x, gradI->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("pooling2dBpCUDNN: cudnnPoolingBackward failed", err); NDArray::registerSpecialUse({gradI}, {input, gradO}); diff --git a/libnd4j/include/ops/declarable/platform/cudnn/depthwiseConv2d.cu b/libnd4j/include/ops/declarable/platform/cudnn/depthwiseConv2d.cu index 612206f35..c268961ce 100644 --- a/libnd4j/include/ops/declarable/platform/cudnn/depthwiseConv2d.cu +++ b/libnd4j/include/ops/declarable/platform/cudnn/depthwiseConv2d.cu @@ -109,7 +109,7 @@ static void depthwiseConv2dCUDNN(const LaunchContext* context, NDArray::prepareSpecialUse({output}, {input, weights, bias}); // run calculation - err = cudnnConvolutionForward(*handle, alpha, x, input->getSpecialBuffer(), w, weights->getSpecialBuffer(), conv, algo, wsData, wsSize, beta, z, output->specialBuffer()); + err = cudnnConvolutionForward(*handle, alpha, x, input->specialBuffer(), w, weights->specialBuffer(), conv, algo, wsData, wsSize, beta, z, output->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("depthwiseConv2dCUDNN: cudnnConvolutionForward failed", err); // add bias if it is present @@ -120,7 +120,7 @@ static void depthwiseConv2dCUDNN(const LaunchContext* context, // err = cudnnSetTensor4dDescriptor(b, format, cudnnDataType(bias->dataType()), 1, isNCHW ? bias->lengthOf() : 1, 1, isNCHW ? 1: bias->lengthOf()); err = cudnnSetTensor4dDescriptor(b, CUDNN_TENSOR_NCHW, cudnnDataType(bias->dataType()), 1, oC, 1, 1); if (err != 0) throw sd::cuda_exception::build("depthwiseConv2dCUDNN: cudnnSetTensor4dDescriptor for bias failed", err); - err = cudnnAddTensor(*handle, alpha, b, bias->getSpecialBuffer(), alpha, z, output->specialBuffer()); + err = cudnnAddTensor(*handle, alpha, b, bias->specialBuffer(), alpha, z, output->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("depthwiseConv2dCUDNN: cudnnAddTensor bias failed", err); } @@ -246,16 +246,16 @@ static void depthwiseConv2dBpCUDNN(const LaunchContext* context, err = cudnnSetTensor4dDescriptor(db, CUDNN_TENSOR_NCHW, cudnnDataType(gradB->dataType()), 1, oC, 1, 1); if (err != 0) throw sd::cuda_exception::build("depthwiseConv2dBpCUDNN: cudnnSetTensor4dDescriptor for gradB failed", err); - err = cudnnConvolutionBackwardBias(*handle, alpha, dz, gradO->getSpecialBuffer(), beta, db, gradB->getSpecialBuffer()); + err = cudnnConvolutionBackwardBias(*handle, alpha, dz, gradO->specialBuffer(), beta, db, gradB->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("depthwiseConv2dBpCUDNN: cudnnConvolutionBackwardBias failed", err); } // run calculation for gradW - err = cudnnConvolutionBackwardFilter(*handle, alpha, x, input->getSpecialBuffer(), dz, gradO->getSpecialBuffer(), conv, algoGradW, wsGradWData, wsGradWSize, beta, dw, gradW->getSpecialBuffer()); + err = cudnnConvolutionBackwardFilter(*handle, alpha, x, input->specialBuffer(), dz, gradO->specialBuffer(), conv, algoGradW, wsGradWData, wsGradWSize, beta, dw, gradW->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("depthwiseConv2dBpCUDNN: cudnnConvolutionBackwardFilter failed", err); // run calculation for gradI - err = cudnnConvolutionBackwardData(*handle, alpha, dw, weights->getSpecialBuffer(), dz, gradO->getSpecialBuffer(), conv, algoGradI, wsGradIData, wsGradISize, beta, dx, gradI->getSpecialBuffer()); + err = cudnnConvolutionBackwardData(*handle, alpha, dw, weights->specialBuffer(), dz, gradO->specialBuffer(), conv, algoGradI, wsGradIData, wsGradISize, beta, dx, gradI->specialBuffer()); if (err != 0) throw sd::cuda_exception::build("depthwiseConv2dBpCUDNN: cudnnConvolutionBackwardData failed", err); // cudaErr = cudaStreamSynchronize(*context->getCudaStream()); diff --git a/libnd4j/include/ops/declarable/platform/cudnn/maxpool2d.cu b/libnd4j/include/ops/declarable/platform/cudnn/maxpool2d.cu index 3919d9614..5bb646f57 100644 --- a/libnd4j/include/ops/declarable/platform/cudnn/maxpool2d.cu +++ b/libnd4j/include/ops/declarable/platform/cudnn/maxpool2d.cu @@ -123,7 +123,7 @@ PLATFORM_CHECK(maxpool2d_bp, ENGINE_CUDA) { return goodType && (input->dataType() == gradO->dataType()) && (input->dataType() == gradI->dataType()) - && shape::haveSameShapeAndStrides(input->getShapeInfo(), gradI->getShapeInfo()); + && shape::haveSameShapeAndStrides(input->shapeInfo(), gradI->shapeInfo()); } diff --git a/libnd4j/include/ops/declarable/platform/cudnn/maxpool3d.cu b/libnd4j/include/ops/declarable/platform/cudnn/maxpool3d.cu index d28541b08..f7b9c8b50 100644 --- a/libnd4j/include/ops/declarable/platform/cudnn/maxpool3d.cu +++ b/libnd4j/include/ops/declarable/platform/cudnn/maxpool3d.cu @@ -131,7 +131,7 @@ PLATFORM_CHECK(maxpool3dnew_bp, ENGINE_CUDA) { return goodType && (input->dataType() == gradO->dataType()) && (input->dataType() == gradI->dataType()) - && shape::haveSameShapeAndStrides(input->getShapeInfo(), gradI->getShapeInfo()); + && shape::haveSameShapeAndStrides(input->shapeInfo(), gradI->shapeInfo()); } diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/batchnorm.cpp b/libnd4j/include/ops/declarable/platform/mkldnn/batchnorm.cpp index 6ae27b42a..21bdbbe8d 100644 --- a/libnd4j/include/ops/declarable/platform/mkldnn/batchnorm.cpp +++ b/libnd4j/include/ops/declarable/platform/mkldnn/batchnorm.cpp @@ -115,7 +115,7 @@ static void batchnormMKLDNN(const NDArray* x, const NDArray* mean, const NDArray mkldnnUtils::loadDataToMklStream(x, engine, stream, x_user_md, op_ff_prim_desc.src_desc(), args[DNNL_ARG_SRC]); // z - auto z_user_mem = dnnl::memory(z_user_md, engine, z->getBuffer()); + auto z_user_mem = dnnl::memory(z_user_md, engine, z->buffer()); const bool zReorder = op_ff_prim_desc.dst_desc() != z_user_mem.get_desc(); auto z_mkl_mem = zReorder ? dnnl::memory(op_ff_prim_desc.dst_desc(), engine) : z_user_mem; if (zReorder) @@ -123,17 +123,17 @@ static void batchnormMKLDNN(const NDArray* x, const NDArray* mean, const NDArray args[DNNL_ARG_DST] = z_mkl_mem; // mean - auto mean_mkl_mem = dnnl::memory(op_ff_prim_desc.mean_desc(), engine, mean->getBuffer()); + auto mean_mkl_mem = dnnl::memory(op_ff_prim_desc.mean_desc(), engine, const_cast(mean->buffer())); args[DNNL_ARG_MEAN] = mean_mkl_mem; // variance - auto var_mkl_mem = dnnl::memory(op_ff_prim_desc.variance_desc(), engine, variance->getBuffer()); + auto var_mkl_mem = dnnl::memory(op_ff_prim_desc.variance_desc(), engine, const_cast(variance->buffer())); args[DNNL_ARG_VARIANCE] = var_mkl_mem; // gamma and beta (and their gradients) if they are present if(weights != nullptr) { - auto w_mkl_mem = dnnl::memory(op_ff_prim_desc.weights_desc(), engine, weights->getBuffer()); + auto w_mkl_mem = dnnl::memory(op_ff_prim_desc.weights_desc(), engine, const_cast(weights->buffer())); args[DNNL_ARG_WEIGHTS] = w_mkl_mem; } @@ -245,15 +245,15 @@ static void batchnormBackPropMKLDNN(const NDArray* x, const NDArray* mean, const mkldnnUtils::loadDataToMklStream(&dLdO, engine, stream, dLdO_user_md, op_bp_prim_desc.diff_dst_desc(), args[DNNL_ARG_DIFF_DST]); // mean - auto mean_mkl_mem = dnnl::memory(op_bp_prim_desc.mean_desc(), engine, mean->getBuffer()); + auto mean_mkl_mem = dnnl::memory(op_bp_prim_desc.mean_desc(), engine, const_cast(mean->buffer())); args[DNNL_ARG_MEAN] = mean_mkl_mem; // variance - auto var_mkl_mem = dnnl::memory(op_bp_prim_desc.variance_desc(), engine, variance->getBuffer()); + auto var_mkl_mem = dnnl::memory(op_bp_prim_desc.variance_desc(), engine, const_cast(variance->buffer())); args[DNNL_ARG_VARIANCE] = var_mkl_mem; // dLdI - auto dLdI_user_mem = dnnl::memory(dLdI_user_md, engine, dLdI->getBuffer()); + auto dLdI_user_mem = dnnl::memory(dLdI_user_md, engine, dLdI->buffer()); const bool dLdIReorder = op_bp_prim_desc.diff_src_desc() != dLdI_user_mem.get_desc(); auto dLdI_mkl_mem = dLdIReorder ? dnnl::memory(op_bp_prim_desc.diff_src_desc(), engine) : dLdI_user_mem; args[DNNL_ARG_DIFF_SRC] = dLdI_mkl_mem; @@ -261,10 +261,10 @@ static void batchnormBackPropMKLDNN(const NDArray* x, const NDArray* mean, const // gamma and beta (and their gradients) if they are present if(weights != nullptr) { - auto w_mkl_mem = dnnl::memory(op_bp_prim_desc.weights_desc(), engine, weights->getBuffer()); + auto w_mkl_mem = dnnl::memory(op_bp_prim_desc.weights_desc(), engine, const_cast(weights->buffer())); args[DNNL_ARG_WEIGHTS] = w_mkl_mem; - auto dLdW_mkl_mem = dnnl::memory(op_bp_prim_desc.weights_desc(), engine, dLdW->getBuffer()); + auto dLdW_mkl_mem = dnnl::memory(op_bp_prim_desc.weights_desc(), engine, dLdW->buffer()); args[DNNL_ARG_DIFF_WEIGHTS] = dLdW_mkl_mem; } diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/conv2d.cpp b/libnd4j/include/ops/declarable/platform/mkldnn/conv2d.cpp index 0aa05f7f2..b1def8ed7 100644 --- a/libnd4j/include/ops/declarable/platform/mkldnn/conv2d.cpp +++ b/libnd4j/include/ops/declarable/platform/mkldnn/conv2d.cpp @@ -121,12 +121,12 @@ static void conv2dMKLDNN(const NDArray *input, const NDArray *weights, // bias if(bias != nullptr) { - auto b_mkl_mem = dnnl::memory(b_mkl_md, engine, bias->getBuffer()); + auto b_mkl_mem = dnnl::memory(b_mkl_md, engine, const_cast(bias->buffer())); args[DNNL_ARG_BIAS] = b_mkl_mem; } // output - auto z_user_mem = dnnl::memory(z_user_md, engine, output->getBuffer()); + auto z_user_mem = dnnl::memory(z_user_md, engine, output->buffer()); const bool zReorder = op_prim_desc.dst_desc() != z_user_mem.get_desc(); auto z_mkl_mem = zReorder ? dnnl::memory(op_prim_desc.dst_desc(), engine) : z_user_mem; args[DNNL_ARG_DST] = z_mkl_mem; @@ -262,7 +262,7 @@ static void conv2dBpMKLDNN(const NDArray *input, const NDArray *weights, const N mkldnnUtils::loadDataToMklStream(weights, engine, stream, w_user_md, op_data_bp_prim_desc.weights_desc(), args[DNNL_ARG_WEIGHTS]); // gradO - auto gradO_user_mem = dnnl::memory(gradO_user_md, engine, gradO->getBuffer()); + auto gradO_user_mem = dnnl::memory(gradO_user_md, engine, const_cast(gradO->buffer())); const bool gradOReorderW = op_weights_bp_prim_desc.diff_dst_desc() != gradO_user_mem.get_desc(); const bool gradOReorderD = op_data_bp_prim_desc.diff_dst_desc() != gradO_user_mem.get_desc(); auto gradO_mkl_memW = gradOReorderW ? dnnl::memory(op_weights_bp_prim_desc.diff_dst_desc(), engine) : gradO_user_mem; @@ -274,20 +274,20 @@ static void conv2dBpMKLDNN(const NDArray *input, const NDArray *weights, const N args[DNNL_ARG_DIFF_DST] = gradO_mkl_memD; // gradI - auto gradI_user_mem = dnnl::memory(gradI_user_md, engine, gradI->getBuffer()); + auto gradI_user_mem = dnnl::memory(gradI_user_md, engine, gradI->buffer()); const bool gradIReorder = op_data_bp_prim_desc.diff_src_desc() != gradI_user_mem.get_desc(); auto gradI_mkl_mem = gradIReorder ? dnnl::memory(op_data_bp_prim_desc.diff_src_desc(), engine) : gradI_user_mem; args[DNNL_ARG_DIFF_SRC] = gradI_mkl_mem; // gradW - auto gradW_user_mem = dnnl::memory(gradW_user_md, engine, gradW->getBuffer()); + auto gradW_user_mem = dnnl::memory(gradW_user_md, engine, gradW->buffer()); const bool gradWReorder = op_weights_bp_prim_desc.diff_weights_desc() != gradW_user_mem.get_desc(); auto gradW_mkl_mem = gradWReorder ? dnnl::memory(op_weights_bp_prim_desc.diff_weights_desc(), engine) : gradW_user_mem; args[DNNL_ARG_DIFF_WEIGHTS] = gradW_mkl_mem; // gradB if(gradB != nullptr) { - auto gradB_mkl_mem = dnnl::memory(gradB_mkl_md, engine, gradB->getBuffer()); + auto gradB_mkl_mem = dnnl::memory(gradB_mkl_md, engine, gradB->buffer()); args[DNNL_ARG_DIFF_BIAS] = gradB_mkl_mem; } diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/conv3d.cpp b/libnd4j/include/ops/declarable/platform/mkldnn/conv3d.cpp index 68f0eea89..b9fa696c5 100644 --- a/libnd4j/include/ops/declarable/platform/mkldnn/conv3d.cpp +++ b/libnd4j/include/ops/declarable/platform/mkldnn/conv3d.cpp @@ -125,12 +125,12 @@ static void conv3dMKLDNN(const NDArray *input, const NDArray *weights, // bias if(bias != nullptr) { - auto b_mkl_mem = dnnl::memory(b_mkl_md, engine, bias->getBuffer()); + auto b_mkl_mem = dnnl::memory(b_mkl_md, engine, const_cast(bias->buffer())); args[DNNL_ARG_BIAS] = b_mkl_mem; } // output - auto z_user_mem = dnnl::memory(z_user_md, engine, output->getBuffer()); + auto z_user_mem = dnnl::memory(z_user_md, engine, output->buffer()); const bool zReorder = op_prim_desc.dst_desc() != z_user_mem.get_desc(); auto z_mkl_mem = zReorder ? dnnl::memory(op_prim_desc.dst_desc(), engine) : z_user_mem; args[DNNL_ARG_DST] = z_mkl_mem; @@ -273,7 +273,7 @@ static void conv3dBpMKLDNN(const NDArray *input, const NDArray *weights, const N mkldnnUtils::loadDataToMklStream(weights, engine, stream, w_user_md, op_data_bp_prim_desc.weights_desc(), args[DNNL_ARG_WEIGHTS]); // gradO - auto gradO_user_mem = dnnl::memory(gradO_user_md, engine, gradO->getBuffer()); + auto gradO_user_mem = dnnl::memory(gradO_user_md, engine, const_cast(gradO->buffer())); const bool gradOReorderW = op_weights_bp_prim_desc.diff_dst_desc() != gradO_user_mem.get_desc(); const bool gradOReorderD = op_data_bp_prim_desc.diff_dst_desc() != gradO_user_mem.get_desc(); auto gradO_mkl_memW = gradOReorderW ? dnnl::memory(op_weights_bp_prim_desc.diff_dst_desc(), engine) : gradO_user_mem; @@ -285,20 +285,20 @@ static void conv3dBpMKLDNN(const NDArray *input, const NDArray *weights, const N args[DNNL_ARG_DIFF_DST] = gradO_mkl_memD; // gradI - auto gradI_user_mem = dnnl::memory(gradI_user_md, engine, gradI->getBuffer()); + auto gradI_user_mem = dnnl::memory(gradI_user_md, engine, gradI->buffer()); const bool gradIReorder = op_data_bp_prim_desc.diff_src_desc() != gradI_user_mem.get_desc(); auto gradI_mkl_mem = gradIReorder ? dnnl::memory(op_data_bp_prim_desc.diff_src_desc(), engine) : gradI_user_mem; args[DNNL_ARG_DIFF_SRC] = gradI_mkl_mem; // gradW - auto gradW_user_mem = dnnl::memory(gradW_user_md, engine, gradW->getBuffer()); + auto gradW_user_mem = dnnl::memory(gradW_user_md, engine, gradW->buffer()); const bool gradWReorder = op_weights_bp_prim_desc.diff_weights_desc() != gradW_user_mem.get_desc(); auto gradW_mkl_mem = gradWReorder ? dnnl::memory(op_weights_bp_prim_desc.diff_weights_desc(), engine) : gradW_user_mem; args[DNNL_ARG_DIFF_WEIGHTS] = gradW_mkl_mem; // gradB if(gradB != nullptr) { - auto gradB_mkl_mem = dnnl::memory(gradB_mkl_md, engine, gradB->getBuffer()); + auto gradB_mkl_mem = dnnl::memory(gradB_mkl_md, engine, gradB->buffer()); args[DNNL_ARG_DIFF_BIAS] = gradB_mkl_mem; } @@ -379,7 +379,7 @@ static void conv3dMKLDNN(sd::graph::Context &block, } if (bias != nullptr) { - auto conv_bias_memory = dnnl::memory(conv_prim_desc.bias_desc(), engine, bias->getBuffer()); + auto conv_bias_memory = dnnl::memory(conv_prim_desc.bias_desc(), engine, bias->buffer()); convolution_forward(conv_prim_desc).execute(stream, {{DNNL_ARG_SRC, conv_src_memory}, {DNNL_ARG_WEIGHTS, conv_weights_memory}, {DNNL_ARG_BIAS, conv_bias_memory}, diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/deconv2d.cpp b/libnd4j/include/ops/declarable/platform/mkldnn/deconv2d.cpp index a1ca2a717..584fd50a5 100644 --- a/libnd4j/include/ops/declarable/platform/mkldnn/deconv2d.cpp +++ b/libnd4j/include/ops/declarable/platform/mkldnn/deconv2d.cpp @@ -142,12 +142,12 @@ static void deconv2dMKLDNN(const NDArray* input, const NDArray* weights, const N // bias if(bias != nullptr) { - auto b_mkl_mem = dnnl::memory(b_mkl_md, engine, bias->getBuffer()); + auto b_mkl_mem = dnnl::memory(b_mkl_md, engine, const_cast(bias->buffer())); args[DNNL_ARG_BIAS] = b_mkl_mem; } // output - auto z_user_mem = dnnl::memory(z_user_md, engine, output->getBuffer()); + auto z_user_mem = dnnl::memory(z_user_md, engine, output->buffer()); const bool zReorder = op_prim_desc.dst_desc() != z_user_mem.get_desc(); auto z_mkl_mem = zReorder ? dnnl::memory(op_prim_desc.dst_desc(), engine) : z_user_mem; args[DNNL_ARG_DST] = z_mkl_mem; @@ -279,7 +279,7 @@ static void deconv2dBpMKLDNN(const NDArray* input, const NDArray* weights, const mkldnnUtils::loadDataToMklStream(weights, engine, stream, w_user_md, op_data_bp_prim_desc.weights_desc(), args[DNNL_ARG_WEIGHTS]); // gradO - auto gradO_user_mem = dnnl::memory(gradO_user_md, engine, gradO->getBuffer()); + auto gradO_user_mem = dnnl::memory(gradO_user_md, engine, const_cast(gradO->buffer())); const bool gradOReorderW = op_weights_bp_prim_desc.diff_dst_desc() != gradO_user_mem.get_desc(); const bool gradOReorderD = op_data_bp_prim_desc.diff_dst_desc() != gradO_user_mem.get_desc(); auto gradO_mkl_memW = gradOReorderW ? dnnl::memory(op_weights_bp_prim_desc.diff_dst_desc(), engine) : gradO_user_mem; @@ -291,20 +291,20 @@ static void deconv2dBpMKLDNN(const NDArray* input, const NDArray* weights, const args[DNNL_ARG_DIFF_DST] = gradO_mkl_memD; // gradI - auto gradI_user_mem = dnnl::memory(gradI_user_md, engine, gradI->getBuffer()); + auto gradI_user_mem = dnnl::memory(gradI_user_md, engine, gradI->buffer()); const bool gradIReorder = op_data_bp_prim_desc.diff_src_desc() != gradI_user_mem.get_desc(); auto gradI_mkl_mem = gradIReorder ? dnnl::memory(op_data_bp_prim_desc.diff_src_desc(), engine) : gradI_user_mem; args[DNNL_ARG_DIFF_SRC] = gradI_mkl_mem; // gradW - auto gradW_user_mem = dnnl::memory(gradW_user_md, engine, gradW->getBuffer()); + auto gradW_user_mem = dnnl::memory(gradW_user_md, engine, gradW->buffer()); const bool gradWReorder = op_weights_bp_prim_desc.diff_weights_desc() != gradW_user_mem.get_desc(); auto gradW_mkl_mem = gradWReorder ? dnnl::memory(op_weights_bp_prim_desc.diff_weights_desc(), engine) : gradW_user_mem; args[DNNL_ARG_DIFF_WEIGHTS] = gradW_mkl_mem; // gradB if(gradB != nullptr) { - auto gradB_mkl_mem = dnnl::memory(gradB_mkl_md, engine, gradB->getBuffer()); + auto gradB_mkl_mem = dnnl::memory(gradB_mkl_md, engine, gradB->buffer()); args[DNNL_ARG_DIFF_BIAS] = gradB_mkl_mem; } diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/deconv2d_tf.cpp b/libnd4j/include/ops/declarable/platform/mkldnn/deconv2d_tf.cpp index 3236990b1..5e5da4748 100644 --- a/libnd4j/include/ops/declarable/platform/mkldnn/deconv2d_tf.cpp +++ b/libnd4j/include/ops/declarable/platform/mkldnn/deconv2d_tf.cpp @@ -107,7 +107,7 @@ static void deconv2TFdBackPropMKLDNN(const NDArray* weights, const NDArray* grad mkldnnUtils::loadDataToMklStream(gradO, engine, stream, gradO_user_md, op_data_bp_prim_desc.diff_dst_desc(), args[DNNL_ARG_DIFF_DST]); // gradI - auto gradI_user_mem = dnnl::memory(gradI_user_md, engine, gradI->getBuffer()); + auto gradI_user_mem = dnnl::memory(gradI_user_md, engine, gradI->buffer()); const bool gradIReorder = op_data_bp_prim_desc.diff_src_desc() != gradI_user_mem.get_desc(); auto gradI_mkl_mem = gradIReorder ? dnnl::memory(op_data_bp_prim_desc.diff_src_desc(), engine) : gradI_user_mem; args[DNNL_ARG_DIFF_SRC] = gradI_mkl_mem; diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/deconv3d.cpp b/libnd4j/include/ops/declarable/platform/mkldnn/deconv3d.cpp index bcc3d700a..eb6966c77 100644 --- a/libnd4j/include/ops/declarable/platform/mkldnn/deconv3d.cpp +++ b/libnd4j/include/ops/declarable/platform/mkldnn/deconv3d.cpp @@ -144,12 +144,12 @@ static void deconv3dMKLDNN(const NDArray* input, const NDArray* weights, const N // bias if(bias != nullptr) { - auto b_mkl_mem = dnnl::memory(b_mkl_md, engine, bias->getBuffer()); + auto b_mkl_mem = dnnl::memory(b_mkl_md, engine, const_cast(bias->buffer())); args[DNNL_ARG_BIAS] = b_mkl_mem; } // output - auto z_user_mem = dnnl::memory(z_user_md, engine, output->getBuffer()); + auto z_user_mem = dnnl::memory(z_user_md, engine, output->buffer()); const bool zReorder = op_prim_desc.dst_desc() != z_user_mem.get_desc(); auto z_mkl_mem = zReorder ? dnnl::memory(op_prim_desc.dst_desc(), engine) : z_user_mem; args[DNNL_ARG_DST] = z_mkl_mem; @@ -287,7 +287,7 @@ static void deconv3dBackPropMKLDNN(const NDArray* input, const NDArray* weights, mkldnnUtils::loadDataToMklStream(weights, engine, stream, w_user_md, op_data_bp_prim_desc.weights_desc(), args[DNNL_ARG_WEIGHTS]); // gradO - auto gradO_user_mem = dnnl::memory(gradO_user_md, engine, gradO->getBuffer()); + auto gradO_user_mem = dnnl::memory(gradO_user_md, engine, const_cast(gradO->buffer())); const bool gradOReorderW = op_weights_bp_prim_desc.diff_dst_desc() != gradO_user_mem.get_desc(); const bool gradOReorderD = op_data_bp_prim_desc.diff_dst_desc() != gradO_user_mem.get_desc(); auto gradO_mkl_memW = gradOReorderW ? dnnl::memory(op_weights_bp_prim_desc.diff_dst_desc(), engine) : gradO_user_mem; @@ -299,20 +299,20 @@ static void deconv3dBackPropMKLDNN(const NDArray* input, const NDArray* weights, args[DNNL_ARG_DIFF_DST] = gradO_mkl_memD; // gradI - auto gradI_user_mem = dnnl::memory(gradI_user_md, engine, gradI->getBuffer()); + auto gradI_user_mem = dnnl::memory(gradI_user_md, engine, gradI->buffer()); const bool gradIReorder = op_data_bp_prim_desc.diff_src_desc() != gradI_user_mem.get_desc(); auto gradI_mkl_mem = gradIReorder ? dnnl::memory(op_data_bp_prim_desc.diff_src_desc(), engine) : gradI_user_mem; args[DNNL_ARG_DIFF_SRC] = gradI_mkl_mem; // gradW - auto gradW_user_mem = dnnl::memory(gradW_user_md, engine, gradW->getBuffer()); + auto gradW_user_mem = dnnl::memory(gradW_user_md, engine, gradW->buffer()); const bool gradWReorder = op_weights_bp_prim_desc.diff_weights_desc() != gradW_user_mem.get_desc(); auto gradW_mkl_mem = gradWReorder ? dnnl::memory(op_weights_bp_prim_desc.diff_weights_desc(), engine) : gradW_user_mem; args[DNNL_ARG_DIFF_WEIGHTS] = gradW_mkl_mem; // gradB if(gradB != nullptr) { - auto gradB_mkl_mem = dnnl::memory(gradB_mkl_md, engine, gradB->getBuffer()); + auto gradB_mkl_mem = dnnl::memory(gradB_mkl_md, engine, gradB->buffer()); args[DNNL_ARG_DIFF_BIAS] = gradB_mkl_mem; } diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/depthwiseConv2d.cpp b/libnd4j/include/ops/declarable/platform/mkldnn/depthwiseConv2d.cpp index 2ca16bb8e..92f40537b 100644 --- a/libnd4j/include/ops/declarable/platform/mkldnn/depthwiseConv2d.cpp +++ b/libnd4j/include/ops/declarable/platform/mkldnn/depthwiseConv2d.cpp @@ -153,12 +153,12 @@ static void depthwiseConv2dMKLDNN(const NDArray* input, const NDArray* weights, // bias if(bias != nullptr) { - auto b_mkl_mem = dnnl::memory(b_mkl_md, engine, bias->getBuffer()); + auto b_mkl_mem = dnnl::memory(b_mkl_md, engine, const_cast(bias->buffer())); args[DNNL_ARG_BIAS] = b_mkl_mem; } // output - auto z_user_mem = dnnl::memory(z_user_md, engine, output->getBuffer()); + auto z_user_mem = dnnl::memory(z_user_md, engine, output->buffer()); const bool zReorder = op_prim_desc.dst_desc() != z_user_mem.get_desc(); auto z_mkl_mem = zReorder ? dnnl::memory(op_prim_desc.dst_desc(), engine) : z_user_mem; args[DNNL_ARG_DST] = z_mkl_mem; @@ -300,7 +300,7 @@ static void depthwiseConv2dNackPropMKLDNN(const NDArray* input, const NDArray* w mkldnnUtils::loadDataToMklStream(weights, engine, stream, w_user_md, op_data_bp_prim_desc.weights_desc(), args[DNNL_ARG_WEIGHTS]); // gradO - auto gradO_user_mem = dnnl::memory(gradO_user_md, engine, gradO->getBuffer()); + auto gradO_user_mem = dnnl::memory(gradO_user_md, engine, const_cast(gradO->buffer())); const bool gradOReorderW = op_weights_bp_prim_desc.diff_dst_desc() != gradO_user_mem.get_desc(); const bool gradOReorderD = op_data_bp_prim_desc.diff_dst_desc() != gradO_user_mem.get_desc(); auto gradO_mkl_memW = gradOReorderW ? dnnl::memory(op_weights_bp_prim_desc.diff_dst_desc(), engine) : gradO_user_mem; @@ -312,20 +312,20 @@ static void depthwiseConv2dNackPropMKLDNN(const NDArray* input, const NDArray* w args[DNNL_ARG_DIFF_DST] = gradO_mkl_memD; // gradI - auto gradI_user_mem = dnnl::memory(gradI_user_md, engine, gradI->getBuffer()); + auto gradI_user_mem = dnnl::memory(gradI_user_md, engine, gradI->buffer()); const bool gradIReorder = op_data_bp_prim_desc.diff_src_desc() != gradI_user_mem.get_desc(); auto gradI_mkl_mem = gradIReorder ? dnnl::memory(op_data_bp_prim_desc.diff_src_desc(), engine) : gradI_user_mem; args[DNNL_ARG_DIFF_SRC] = gradI_mkl_mem; // gradW - auto gradW_user_mem = dnnl::memory(gradW_user_md, engine, gradW->getBuffer()); + auto gradW_user_mem = dnnl::memory(gradW_user_md, engine, gradW->buffer()); const bool gradWReorder = op_weights_bp_prim_desc.diff_weights_desc() != gradW_user_mem.get_desc(); auto gradW_mkl_mem = gradWReorder ? dnnl::memory(op_weights_bp_prim_desc.diff_weights_desc(), engine) : gradW_user_mem; args[DNNL_ARG_DIFF_WEIGHTS] = gradW_mkl_mem; // gradB if(gradB != nullptr) { - auto gradB_mkl_mem = dnnl::memory(gradB_mkl_md, engine, gradB->getBuffer()); + auto gradB_mkl_mem = dnnl::memory(gradB_mkl_md, engine, gradB->buffer()); args[DNNL_ARG_DIFF_BIAS] = gradB_mkl_mem; } diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/lstmLayer.cpp b/libnd4j/include/ops/declarable/platform/mkldnn/lstmLayer.cpp index 6763d1403..60c61ea5f 100644 --- a/libnd4j/include/ops/declarable/platform/mkldnn/lstmLayer.cpp +++ b/libnd4j/include/ops/declarable/platform/mkldnn/lstmLayer.cpp @@ -281,7 +281,7 @@ static void lstmLayerMKLDNN(const NDArray* x, const NDArray* Wx, const NDArray* mkldnnUtils::loadDataToMklStream(Wr, engine, stream, wr_user_md, lstm_prim_desc.weights_iter_desc(), args[DNNL_ARG_WEIGHTS_ITER]); // h - auto h_user_mem = dnnl::memory(h_user_md, engine, h->getBuffer()); + auto h_user_mem = dnnl::memory(h_user_md, engine, h->buffer()); const bool hReorder = lstm_prim_desc.dst_layer_desc() != h_user_mem.get_desc(); auto h_lstm_mem = hReorder ? dnnl::memory(lstm_prim_desc.dst_layer_desc(), engine) : h_user_mem; args[DNNL_ARG_DST_LAYER] = h_lstm_mem; @@ -306,7 +306,7 @@ static void lstmLayerMKLDNN(const NDArray* x, const NDArray* Wx, const NDArray* // hL if(hL) { - hL_user_mem = dnnl::memory(hL_user_md, engine, hL->getBuffer()); + hL_user_mem = dnnl::memory(hL_user_md, engine, hL->buffer()); hLReorder = lstm_prim_desc.dst_iter_desc() != hL_user_mem.get_desc(); hL_lstm_mem = hLReorder ? dnnl::memory(lstm_prim_desc.dst_iter_desc(), engine) : hL_user_mem; args[DNNL_ARG_DST_ITER] = hL_lstm_mem; @@ -314,7 +314,7 @@ static void lstmLayerMKLDNN(const NDArray* x, const NDArray* Wx, const NDArray* // cL if(cL) { - cL_user_mem = dnnl::memory(cL_user_md, engine, cL->getBuffer()); + cL_user_mem = dnnl::memory(cL_user_md, engine, cL->buffer()); cLReorder = lstm_prim_desc.dst_iter_c_desc() != cL_user_mem.get_desc(); cL_lstm_mem = cLReorder ? dnnl::memory(lstm_prim_desc.dst_iter_c_desc(), engine) : cL_user_mem; args[DNNL_ARG_DST_ITER_C] = cL_lstm_mem; diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/matmul.cpp b/libnd4j/include/ops/declarable/platform/mkldnn/matmul.cpp index 0dd3b21f7..265fb74bc 100644 --- a/libnd4j/include/ops/declarable/platform/mkldnn/matmul.cpp +++ b/libnd4j/include/ops/declarable/platform/mkldnn/matmul.cpp @@ -183,7 +183,7 @@ static void matmulMKLDNN(const NDArray* x, const NDArray* y, NDArray* z, const b // input mkldnnUtils::loadDataToMklStream(xTR, engine, stream, x_user_md, op_prim_desc.src_desc(), args[DNNL_ARG_SRC]); /* - auto x_user_mem = dnnl::memory(x_user_md, engine, xTR->getBuffer()); + auto x_user_mem = dnnl::memory(x_user_md, engine, xTR->buffer()); const bool xReorder = op_prim_desc.src_desc() != x_user_mem.get_desc(); auto x_mkl_mem = xReorder ? dnnl::memory(op_prim_desc.src_desc(), engine) : x_user_mem; if (xReorder) @@ -193,7 +193,7 @@ static void matmulMKLDNN(const NDArray* x, const NDArray* y, NDArray* z, const b // y mkldnnUtils::loadDataToMklStream(yTR, engine, stream, y_user_md, op_prim_desc.weights_desc(), args[DNNL_ARG_WEIGHTS]); /* - auto y_user_mem = dnnl::memory(y_user_md, engine, yTR->getBuffer()); + auto y_user_mem = dnnl::memory(y_user_md, engine, yTR->buffer()); const bool yReorder = op_prim_desc.weights_desc() != y_user_mem.get_desc(); auto y_mkl_mem = yReorder ? dnnl::memory(op_prim_desc.weights_desc(), engine) : y_user_mem; if (yReorder) @@ -201,7 +201,7 @@ static void matmulMKLDNN(const NDArray* x, const NDArray* y, NDArray* z, const b args[DNNL_ARG_WEIGHTS] = y_mkl_mem; */ // z - auto z_user_mem = dnnl::memory(z_user_md, engine, zR->getBuffer()); + auto z_user_mem = dnnl::memory(z_user_md, engine, zR->buffer()); const bool zReorder = op_prim_desc.dst_desc() != z_user_mem.get_desc(); auto z_mkl_mem = zReorder ? dnnl::memory(op_prim_desc.dst_desc(), engine) : z_user_mem; args[DNNL_ARG_DST] = z_mkl_mem; @@ -215,7 +215,7 @@ static void matmulMKLDNN(const NDArray* x, const NDArray* y, NDArray* z, const b stream.wait(); - if(zR->getBuffer() != z->getBuffer()) + if(zR->buffer() != z->buffer()) z->assign(zR); if(zR != z) diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/mkldnnUtils.cpp b/libnd4j/include/ops/declarable/platform/mkldnn/mkldnnUtils.cpp index b8e489c4c..bc79e6169 100644 --- a/libnd4j/include/ops/declarable/platform/mkldnn/mkldnnUtils.cpp +++ b/libnd4j/include/ops/declarable/platform/mkldnn/mkldnnUtils.cpp @@ -71,7 +71,7 @@ void setBlockStrides(const NDArray* array, dnnl::memory::desc& mklMd){ void loadDataToMklStream(const NDArray* array, const dnnl::engine& engine, const dnnl::stream& stream, const dnnl::memory::desc& user_md, const dnnl::memory::desc& primitive_md, dnnl::memory& arg) { - auto user_mem = dnnl::memory(user_md, engine, array->getBuffer()); + auto user_mem = dnnl::memory(user_md, engine,const_cast(array->buffer())); const bool bReorder = primitive_md != user_mem.get_desc(); auto mkl_mem = bReorder ? dnnl::memory(primitive_md, engine) : user_mem; if (bReorder) @@ -167,7 +167,7 @@ void poolingMKLDNN(const NDArray *input, NDArray *output, mkldnnUtils::loadDataToMklStream(input, engine, stream, x_user_md, op_prim_desc.src_desc(), args[DNNL_ARG_SRC]); // output - auto z_user_mem = dnnl::memory(z_user_md, engine, output->getBuffer()); + auto z_user_mem = dnnl::memory(z_user_md, engine, output->buffer()); const bool zReorder = op_prim_desc.dst_desc() != z_user_mem.get_desc(); auto z_mkl_mem = zReorder ? dnnl::memory(op_prim_desc.dst_desc(), engine) : z_user_mem; args[DNNL_ARG_DST] = z_mkl_mem; @@ -285,7 +285,7 @@ void poolingBpMKLDNN(const NDArray *input, const NDArray *gradO, NDArray *gradI, mkldnnUtils::loadDataToMklStream(gradO, engine, stream, gradO_user_md, op_bp_prim_desc.diff_dst_desc(), args[DNNL_ARG_DIFF_DST]); // gradI - auto gradI_user_mem = dnnl::memory(gradI_user_md, engine, gradI->getBuffer()); + auto gradI_user_mem = dnnl::memory(gradI_user_md, engine, gradI->buffer()); const bool gradIReorder = op_bp_prim_desc.diff_src_desc() != gradI_user_mem.get_desc(); auto gradI_mkl_mem = gradIReorder ? dnnl::memory(op_bp_prim_desc.diff_src_desc(), engine) : gradI_user_mem; args[DNNL_ARG_DIFF_SRC] = gradI_mkl_mem; @@ -322,7 +322,7 @@ void poolingBpMKLDNN(const NDArray *input, const NDArray *gradO, NDArray *gradI, void getMKLDNNMemoryDescLrn(const NDArray* src, const NDArray* diff_src, const NDArray* dst, dnnl::memory::desc* lrn_src_md, dnnl::memory::desc* lrn_diff_src_md, dnnl::memory::desc* lrn_dst_md, dnnl::memory::desc* user_src_md, dnnl::memory::desc* user_diff_src_md, dnnl::memory::desc* user_dst_md, int axis) { - const Nd4jLong* shape = src->getShapeInfo(); + const Nd4jLong* shape = src->shapeInfo(); long rank = shape[0]; long dim1 = axis; // MKL-DNN supports only 1 axis, which has to be the "channel" one long dim2 = axis >= 2 ? 1 : 2; @@ -333,7 +333,7 @@ void getMKLDNNMemoryDescLrn(const NDArray* src, const NDArray* diff_src, const N auto format = axis == 1 ? dnnl::memory::format_tag::nchw : dnnl::memory::format_tag::nhwc; auto supposed_to_be_any_format = format; // doesn't work with "any" - if (src != nullptr && src->getBuffer() != nullptr && lrn_src_md != nullptr) { + if (src != nullptr && src->buffer() != nullptr && lrn_src_md != nullptr) { *lrn_src_md = dnnl::memory::desc({ lrn_src_tz }, type, supposed_to_be_any_format); *user_src_md = dnnl::memory::desc({ lrn_src_tz }, type, format); user_src_md->data.format_kind = dnnl_blocked; @@ -343,7 +343,7 @@ void getMKLDNNMemoryDescLrn(const NDArray* src, const NDArray* diff_src, const N user_src_md->data.format_desc.blocking.strides[3] = rank > 3 ? src->stridesOf()[dim3] : 1; } - if (diff_src != nullptr && diff_src->getBuffer() != nullptr && lrn_diff_src_md != nullptr) { + if (diff_src != nullptr && diff_src->buffer() != nullptr && lrn_diff_src_md != nullptr) { *lrn_diff_src_md = dnnl::memory::desc({ lrn_src_tz }, type, supposed_to_be_any_format); *user_diff_src_md = dnnl::memory::desc({ lrn_src_tz }, type, format); user_diff_src_md->data.format_kind = dnnl_blocked; @@ -353,7 +353,7 @@ void getMKLDNNMemoryDescLrn(const NDArray* src, const NDArray* diff_src, const N user_diff_src_md->data.format_desc.blocking.strides[3] = rank > 3 ? diff_src->stridesOf()[dim3] : 1; } - if (dst != nullptr && dst->getBuffer() != nullptr && lrn_dst_md != nullptr) { + if (dst != nullptr && dst->buffer() != nullptr && lrn_dst_md != nullptr) { *lrn_dst_md = dnnl::memory::desc({ lrn_src_tz }, type, supposed_to_be_any_format); *user_dst_md = dnnl::memory::desc({ lrn_src_tz }, type, format); user_dst_md->data.format_kind = dnnl_blocked; @@ -396,7 +396,7 @@ void getMKLDNNMemoryDescPool2d( auto format = isNCHW ? dnnl::memory::format_tag::nchw : dnnl::memory::format_tag::nhwc; auto supposed_to_be_any_format = dnnl::memory::format_tag::nChw8c; // doesn't work with "any" - if (src != nullptr && src->getBuffer() != nullptr && pool_src_md != nullptr) { + if (src != nullptr && src->buffer() != nullptr && pool_src_md != nullptr) { *pool_src_md = dnnl::memory::desc({ pool_src_tz }, type, supposed_to_be_any_format); *user_src_md = dnnl::memory::desc({ pool_src_tz }, type, format); user_src_md->data.format_kind = dnnl_blocked; // overrides "format = isNCHW ? nchw : nhwc" @@ -406,7 +406,7 @@ void getMKLDNNMemoryDescPool2d( user_src_md->data.format_desc.blocking.strides[3] = src->stridesOf()[isNCHW ? 3 : 2]; } - if (diff_src != nullptr && diff_src->getBuffer() != nullptr && pool_diff_src_md != nullptr) { + if (diff_src != nullptr && diff_src->buffer() != nullptr && pool_diff_src_md != nullptr) { *pool_diff_src_md = dnnl::memory::desc({ pool_src_tz }, type, supposed_to_be_any_format); *user_diff_src_md = dnnl::memory::desc({ pool_src_tz }, type, format); user_diff_src_md->data.format_kind = dnnl_blocked; // overrides "format = isNCHW ? nchw : nhwc" @@ -416,7 +416,7 @@ void getMKLDNNMemoryDescPool2d( user_diff_src_md->data.format_desc.blocking.strides[3] = diff_src->stridesOf()[isNCHW ? 3 : 2]; } - if (dst != nullptr && dst->getBuffer() != nullptr && pool_dst_md != nullptr) { + if (dst != nullptr && dst->buffer() != nullptr && pool_dst_md != nullptr) { *pool_dst_md = dnnl::memory::desc({ pool_dst_tz }, type, supposed_to_be_any_format); *user_dst_md = dnnl::memory::desc({ pool_dst_tz }, type, format); user_dst_md->data.format_kind = dnnl_blocked; // overrides "format = isNCHW ? nchw : nhwc" @@ -452,7 +452,7 @@ void getMKLDNNMemoryDescPool3d( auto format = isNCDHW ? dnnl::memory::format_tag::ncdhw : dnnl::memory::format_tag::ndhwc; auto supposed_to_be_any_format = dnnl::memory::format_tag::nCdhw8c; // doesn't work with "any" - if (src != nullptr && src->getBuffer() != nullptr && pool_src_md != nullptr) { + if (src != nullptr && src->buffer() != nullptr && pool_src_md != nullptr) { *pool_src_md = dnnl::memory::desc({ pool_src_tz }, type, supposed_to_be_any_format); *user_src_md = dnnl::memory::desc({ pool_src_tz }, type, format); user_src_md->data.format_kind = dnnl_blocked; // overrides "format = isNCDHW ? ncdhw : ndhwc" @@ -463,7 +463,7 @@ void getMKLDNNMemoryDescPool3d( user_src_md->data.format_desc.blocking.strides[4] = src->stridesOf()[isNCDHW ? 4 : 3]; } - if (diff_src != nullptr && diff_src->getBuffer() != nullptr && pool_diff_src_md != nullptr) { + if (diff_src != nullptr && diff_src->buffer() != nullptr && pool_diff_src_md != nullptr) { *pool_diff_src_md = dnnl::memory::desc({ pool_src_tz }, type, supposed_to_be_any_format); *user_diff_src_md = dnnl::memory::desc({ pool_src_tz }, type, format); user_diff_src_md->data.format_kind = dnnl_blocked; // overrides "format = isNCDHW ? ncdhw : ndhwc" @@ -474,7 +474,7 @@ void getMKLDNNMemoryDescPool3d( user_diff_src_md->data.format_desc.blocking.strides[4] = diff_src->stridesOf()[isNCDHW ? 4 : 3]; } - if (dst != nullptr && dst->getBuffer() != nullptr && pool_dst_md != nullptr) { + if (dst != nullptr && dst->buffer() != nullptr && pool_dst_md != nullptr) { *pool_dst_md = dnnl::memory::desc({ pool_dst_tz }, type, supposed_to_be_any_format); *user_dst_md = dnnl::memory::desc({ pool_dst_tz }, type, format); user_dst_md->data.format_kind = dnnl_blocked; // overrides "format = isNCDHW ? ncdhw : ndhwc" @@ -656,7 +656,7 @@ void getMKLDNNMemoryDescConv3d( void getMKLDNNMemoryDescBatchNorm(const NDArray* src, const NDArray* diff_src, const NDArray* dst, dnnl::memory::desc* batchnorm_src_md, dnnl::memory::desc* batchnorm_diff_src_md, dnnl::memory::desc* batchnorm_dst_md, dnnl::memory::desc* user_src_md, dnnl::memory::desc* user_diff_src_md, dnnl::memory::desc* user_dst_md, int axis) { - const Nd4jLong* shape = src->getShapeInfo(); + const Nd4jLong* shape = src->shapeInfo(); Nd4jLong rank = shape[0]; Nd4jLong dim1 = axis; // MKL-DNN supports only 1 axis, which has to be the "channel" one Nd4jLong dim2 = axis >= 2 ? 1 : 2; @@ -667,7 +667,7 @@ void getMKLDNNMemoryDescBatchNorm(const NDArray* src, const NDArray* diff_src, c auto format = dnnl::memory::format_tag::nchw; auto supposed_to_be_any_format = dnnl::memory::format_tag::nChw8c; // doesn't work with "any" - if (src != nullptr && src->getBuffer() != nullptr && batchnorm_src_md != nullptr) { + if (src != nullptr && src->buffer() != nullptr && batchnorm_src_md != nullptr) { *batchnorm_src_md = dnnl::memory::desc({ batchnorm_src_tz }, type, supposed_to_be_any_format); *user_src_md = dnnl::memory::desc({ batchnorm_src_tz }, type, format); user_src_md->data.format_kind = dnnl_blocked; // overrides format @@ -677,7 +677,7 @@ void getMKLDNNMemoryDescBatchNorm(const NDArray* src, const NDArray* diff_src, c user_src_md->data.format_desc.blocking.strides[3] = rank > 3 ? src->stridesOf()[dim3] : 1; } - if (diff_src != nullptr && diff_src->getBuffer() != nullptr && batchnorm_diff_src_md != nullptr) { + if (diff_src != nullptr && diff_src->buffer() != nullptr && batchnorm_diff_src_md != nullptr) { *batchnorm_diff_src_md = dnnl::memory::desc({ batchnorm_src_tz }, type, supposed_to_be_any_format); *user_diff_src_md = dnnl::memory::desc({ batchnorm_src_tz }, type, format); user_diff_src_md->data.format_kind = dnnl_blocked; // overrides format @@ -687,7 +687,7 @@ void getMKLDNNMemoryDescBatchNorm(const NDArray* src, const NDArray* diff_src, c user_diff_src_md->data.format_desc.blocking.strides[3] = rank > 3 ? diff_src->stridesOf()[dim3] : 1; } - if (dst != nullptr && dst->getBuffer() != nullptr && batchnorm_dst_md != nullptr) { + if (dst != nullptr && dst->buffer() != nullptr && batchnorm_dst_md != nullptr) { *batchnorm_dst_md = dnnl::memory::desc({ batchnorm_src_tz }, type, supposed_to_be_any_format); *user_dst_md = dnnl::memory::desc({ batchnorm_src_tz }, type, format); user_dst_md->data.format_kind = dnnl_blocked; // overrides format diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/softmax.cpp b/libnd4j/include/ops/declarable/platform/mkldnn/softmax.cpp index a178e84c2..932affbd3 100644 --- a/libnd4j/include/ops/declarable/platform/mkldnn/softmax.cpp +++ b/libnd4j/include/ops/declarable/platform/mkldnn/softmax.cpp @@ -83,7 +83,7 @@ namespace sd { mkldnnUtils::loadDataToMklStream(x, engine, stream, x_user_md, op_prim_desc.src_desc(), args[DNNL_ARG_SRC]); // z - auto z_user_mem = dnnl::memory(z_user_md, engine, z->getBuffer()); + auto z_user_mem = dnnl::memory(z_user_md, engine, z->buffer()); const bool zReorder = op_prim_desc.dst_desc() != z_user_mem.get_desc(); auto z_mkl_mem = zReorder ? dnnl::memory(op_prim_desc.dst_desc(), engine) : z_user_mem; args[DNNL_ARG_DST] = z_mkl_mem; @@ -191,7 +191,7 @@ namespace sd { mkldnnUtils::loadDataToMklStream(x, engine, stream, x_user_md, op_ff_prim_desc.src_desc(), argsff[DNNL_ARG_SRC]); // dLdx - auto dLdx_user_mem = dnnl::memory(dLdx_user_md, engine, dLdx->getBuffer()); + auto dLdx_user_mem = dnnl::memory(dLdx_user_md, engine, dLdx->buffer()); const bool dLdxReorder = op_ff_prim_desc.dst_desc() != dLdx_user_mem.get_desc(); auto dLdx_mkl_mem = dLdxReorder ? dnnl::memory(op_ff_prim_desc.dst_desc(), engine) : dLdx_user_mem; argsff[DNNL_ARG_DST] = dLdx_mkl_mem; diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/tanh.cpp b/libnd4j/include/ops/declarable/platform/mkldnn/tanh.cpp index fab32f280..53d75d0a9 100644 --- a/libnd4j/include/ops/declarable/platform/mkldnn/tanh.cpp +++ b/libnd4j/include/ops/declarable/platform/mkldnn/tanh.cpp @@ -71,7 +71,7 @@ namespace sd { mkldnnUtils::loadDataToMklStream(x, engine, stream, x_user_md, op_prim_desc.src_desc(), args[DNNL_ARG_SRC]); // z - auto z_user_mem = dnnl::memory(z_user_md, engine, z->getBuffer()); + auto z_user_mem = dnnl::memory(z_user_md, engine, z->buffer()); const bool zReorder = op_prim_desc.dst_desc() != z_user_mem.get_desc(); auto z_mkl_mem = zReorder ? dnnl::memory(op_prim_desc.dst_desc(), engine) : z_user_mem; args[DNNL_ARG_DST] = z_mkl_mem; @@ -168,7 +168,7 @@ namespace sd { mkldnnUtils::loadDataToMklStream(dLdz, engine, stream, dLdz_user_md, op_prim_desc.diff_dst_desc(), args[DNNL_ARG_DIFF_DST]); // dLdx - auto dLdx_user_mem = dnnl::memory(dLdx_user_md, engine, dLdx->getBuffer()); + auto dLdx_user_mem = dnnl::memory(dLdx_user_md, engine, dLdx->buffer()); const bool dLdxReorder = op_prim_desc.diff_src_desc() != dLdx_user_mem.get_desc(); auto dLdx_mkl_mem = dLdxReorder ? dnnl::memory(op_prim_desc.diff_src_desc(), engine) : dLdx_user_mem; args[DNNL_ARG_DIFF_SRC] = dLdx_mkl_mem; diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/xw_plus_b.cpp b/libnd4j/include/ops/declarable/platform/mkldnn/xw_plus_b.cpp index 01a003c2c..ab7f340ed 100644 --- a/libnd4j/include/ops/declarable/platform/mkldnn/xw_plus_b.cpp +++ b/libnd4j/include/ops/declarable/platform/mkldnn/xw_plus_b.cpp @@ -131,11 +131,11 @@ namespace sd { mkldnnUtils::loadDataToMklStream(weights, engine, stream, weights_user_md, op_prim_desc.weights_desc(), args[DNNL_ARG_WEIGHTS]); // bias - auto bias_mkl_mem = dnnl::memory(bias_mkl_md, engine, bias->getBuffer()); + auto bias_mkl_mem = dnnl::memory(bias_mkl_md, engine, const_cast(bias->buffer())); args[DNNL_ARG_BIAS] = bias_mkl_mem; // z - auto z_user_mem = dnnl::memory(z_user_md, engine, z->getBuffer()); + auto z_user_mem = dnnl::memory(z_user_md, engine, z->buffer()); const bool zReorder = op_prim_desc.dst_desc() != z_user_mem.get_desc(); auto z_mkl_mem = zReorder ? dnnl::memory(op_prim_desc.dst_desc(), engine) : z_user_mem; args[DNNL_ARG_DST] = z_mkl_mem; @@ -266,19 +266,19 @@ namespace sd { mkldnnUtils::loadDataToMklStream(weights, engine, stream, weights_user_md, op_bpdx_prim_desc.weights_desc(), argsDx[DNNL_ARG_WEIGHTS]); // dLdw - auto dLdw_user_mem = dnnl::memory(dLdw_user_md, engine, dLdw->getBuffer()); + auto dLdw_user_mem = dnnl::memory(dLdw_user_md, engine, dLdw->buffer()); const bool dLdwReorder = op_bpdw_prim_desc.diff_weights_desc() != dLdw_user_mem.get_desc(); auto dLdw_mkl_mem = dLdwReorder ? dnnl::memory(op_bpdw_prim_desc.diff_weights_desc(), engine) : dLdw_user_mem; argsDw[DNNL_ARG_DIFF_WEIGHTS] = dLdw_mkl_mem; // dLdx - auto dLdx_user_mem = dnnl::memory(dLdx_user_md, engine, dLdx->getBuffer()); + auto dLdx_user_mem = dnnl::memory(dLdx_user_md, engine, dLdx->buffer()); const bool dLdxReorder = op_bpdx_prim_desc.diff_src_desc() != dLdx_user_mem.get_desc(); auto dLdx_mkl_mem = dLdxReorder ? dnnl::memory(op_bpdx_prim_desc.diff_src_desc(), engine) : dLdx_user_mem; argsDx[DNNL_ARG_DIFF_SRC] = dLdx_mkl_mem; // dLdb - auto dLdb_user_mem = dnnl::memory(dLdb_user_md, engine, dLdb->getBuffer()); + auto dLdb_user_mem = dnnl::memory(dLdb_user_md, engine, dLdb->buffer()); const bool dLdbReorder = op_bpdw_prim_desc.diff_bias_desc() != dLdb_user_mem.get_desc(); auto dLdb_mkl_mem = dLdbReorder ? dnnl::memory(op_bpdw_prim_desc.diff_bias_desc(), engine) : dLdb_user_mem; argsDw[DNNL_ARG_DIFF_BIAS] = dLdb_mkl_mem; diff --git a/libnd4j/include/ops/impl/specials_double.hpp b/libnd4j/include/ops/impl/specials_double.hpp index 96f7d2db2..1eaf3fbc0 100644 --- a/libnd4j/include/ops/impl/specials_double.hpp +++ b/libnd4j/include/ops/impl/specials_double.hpp @@ -50,7 +50,7 @@ namespace sd { template - void quickSort_parallel_internal_key(X* key, Nd4jLong *xShapeInfo, Y* values, Nd4jLong *yShapeInfo, int left, int right, int cutoff, bool descending) { + void quickSort_parallel_internal_key(X* key, Nd4jLong const* xShapeInfo, Y* values, Nd4jLong const* yShapeInfo, int left, int right, int cutoff, bool descending) { int i = left, j = right; X ktmp; X pivot = key[shape::getIndexOffset((left + right) / 2, xShapeInfo)]; @@ -115,7 +115,7 @@ PRAGMA_OMP_TASK template - void quickSort_parallel_internal_value(X* key, Nd4jLong *xShapeInfo, Y* value, Nd4jLong *yShapeInfo, int left, int right, int cutoff, bool descending) { + void quickSort_parallel_internal_value(X* key, Nd4jLong const* xShapeInfo, Y* value, Nd4jLong const* yShapeInfo, int left, int right, int cutoff, bool descending) { int i = left, j = right; X ktmp; Y pivot = value[shape::getIndexOffset((left + right) / 2, yShapeInfo)]; @@ -180,7 +180,7 @@ PRAGMA_OMP_TASK template - static void quickSort_parallel_key(void *varray, Nd4jLong *xShapeInfo, void *yarray, Nd4jLong *yShapeInfo, Nd4jLong lenArray, int numThreads, bool descending){ + static void quickSort_parallel_key(void *varray, Nd4jLong const* xShapeInfo, void *yarray, Nd4jLong const* yShapeInfo, Nd4jLong lenArray, int numThreads, bool descending){ auto array = reinterpret_cast(varray); auto values = reinterpret_cast(yarray); int cutoff = 1000; @@ -195,7 +195,7 @@ PRAGMA_OMP_SINGLE_ARGS(nowait) } template - static void quickSort_parallel_value(void *varray, Nd4jLong *xShapeInfo, void *yarray, Nd4jLong *yShapeInfo, Nd4jLong lenArray, int numThreads, bool descending){ + static void quickSort_parallel_value(void *varray, Nd4jLong const* xShapeInfo, void *yarray, Nd4jLong const* yShapeInfo, Nd4jLong lenArray, int numThreads, bool descending){ auto array = reinterpret_cast(varray); auto values = reinterpret_cast(yarray); int cutoff = 1000; @@ -210,17 +210,17 @@ PRAGMA_OMP_SINGLE_ARGS(nowait) } template - void DoubleMethods::sortByKey(void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, bool descending) { + void DoubleMethods::sortByKey(void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, bool descending) { quickSort_parallel_key(vx, xShapeInfo, vy, yShapeInfo, shape::length(xShapeInfo), omp_get_max_threads(), descending); } template - void DoubleMethods::sortByValue(void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, bool descending) { + void DoubleMethods::sortByValue(void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, bool descending) { quickSort_parallel_value(vx, xShapeInfo, vy, yShapeInfo, shape::length(xShapeInfo), omp_get_max_threads(), descending); } template - void DoubleMethods::sortTadByKey(void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int *dimension, int dimensionLength, bool descending) { + void DoubleMethods::sortTadByKey(void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int *dimension, int dimensionLength, bool descending) { auto x = reinterpret_cast(vx); auto y = reinterpret_cast(vy); @@ -244,7 +244,7 @@ PRAGMA_OMP_SINGLE_ARGS(nowait) } template - void DoubleMethods::sortTadByValue(void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int *dimension, int dimensionLength, bool descending) { + void DoubleMethods::sortTadByValue(void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int *dimension, int dimensionLength, bool descending) { auto x = reinterpret_cast(vx); auto y = reinterpret_cast(vy); diff --git a/libnd4j/include/ops/impl/specials_single.hpp b/libnd4j/include/ops/impl/specials_single.hpp index ed86315f7..9a700251c 100644 --- a/libnd4j/include/ops/impl/specials_single.hpp +++ b/libnd4j/include/ops/impl/specials_single.hpp @@ -100,7 +100,7 @@ namespace sd { // auto func = PRAGMA_THREADS_FOR { // for (auto i = start; i < stop; i += increment) { // auto temp = output(indices[i], true); -// sd::TransformLoops::template loopTransform>( inArrs[i]->bufferAsT(), inArrs[i]->getShapeInfo(), temp.bufferAsT(), temp.getShapeInfo(), nullptr, 0, 1); +// sd::TransformLoops::template loopTransform>( inArrs[i]->bufferAsT(), inArrs[i]->shapeInfo(), temp.bufferAsT(), temp.shapeInfo(), nullptr, 0, 1); // } // }; @@ -150,7 +150,7 @@ void SpecialMethods::concatCpuGeneric(const std::vector& inAr // if(!areInputsContin || !allSameOrder) // break; - // strideOfContigStride[i] = shape::strideOverContigAxis(axis, inArrs[i]->getShapeInfo()); + // strideOfContigStride[i] = shape::strideOverContigAxis(axis, inArrs[i]->shapeInfo()); // } // } @@ -158,7 +158,7 @@ void SpecialMethods::concatCpuGeneric(const std::vector& inAr // if(luckCase2) { // for example {2,1,3} + {2,5,3} + {2,10,3} = {2,16,3}, here axis 1 shoud have stride = 1 for all inputs arrays and output array - // const auto zStep = shape::strideOverContigAxis(axis, output.getShapeInfo()); + // const auto zStep = shape::strideOverContigAxis(axis, output.shapeInfo()); // for (uint i = 0; i < output.lengthOf() / output.sizeAt(axis); ++i) { @@ -182,9 +182,9 @@ void SpecialMethods::concatCpuGeneric(const std::vector& inAr for (auto i = start; i < stop; i += increment) { - shape::index2coordsCPU(start, i, output.getShapeInfo(), coords); + shape::index2coordsCPU(start, i, output.shapeInfo(), coords); - const auto zOffset = shape::getOffset(output.getShapeInfo(), coords); + const auto zOffset = shape::getOffset(output.shapeInfo(), coords); uint inArrIdx = 0; uint xDim = inArrs[inArrIdx]->sizeAt(axis); @@ -196,7 +196,7 @@ void SpecialMethods::concatCpuGeneric(const std::vector& inAr } const T* x = inArrs[inArrIdx]->bufferAsT(); - const auto xOffset = shape::getOffset(inArrs[inArrIdx]->getShapeInfo(), coords); + const auto xOffset = shape::getOffset(inArrs[inArrIdx]->shapeInfo(), coords); zBuff[zOffset] = x[xOffset]; @@ -212,11 +212,11 @@ void SpecialMethods::concatCpuGeneric(const std::vector& inAr * along a particular dimension */ template -void SpecialMethods::concatCpuGeneric(int dimension, int numArrays, Nd4jPointer *data, Nd4jPointer *inputShapeInfo, void *vresult, Nd4jLong *resultShapeInfo) { +void SpecialMethods::concatCpuGeneric(int dimension, int numArrays, Nd4jPointer *data, Nd4jPointer *inputShapeInfo, void *vresult, Nd4jLong const* resultShapeInfo) { auto result = reinterpret_cast(vresult); std::vector inputs(numArrays); - NDArray output(static_cast(result), static_cast(resultShapeInfo)); + NDArray output(static_cast(result), resultShapeInfo); for(int i = 0; i < numArrays; ++i) inputs[i] = new NDArray(static_cast(data[i]), static_cast(inputShapeInfo[i])); @@ -235,7 +235,7 @@ void SpecialMethods::splitCpuGeneric(const NDArray& input, const std::vector< const auto sizeofT = input.sizeOfT(); - T* xBuff = input.bufferAsT(); + auto xBuff = input.bufferAsT(); bool luckCase1 = ((axis == 0 && input.ordering() == 'c') || (axis == input.rankOf() - 1 && input.ordering() == 'f')) && input.ews() == 1; @@ -272,7 +272,7 @@ void SpecialMethods::splitCpuGeneric(const NDArray& input, const std::vector< // if (!areOutsContin || !allSameOrder) // break; - // strideOfContigStride[i] = shape::strideOverContigAxis(axis, outArrs[i]->getShapeInfo()); + // strideOfContigStride[i] = shape::strideOverContigAxis(axis, outArrs[i]->shapeInfo()); // } // } @@ -280,7 +280,7 @@ void SpecialMethods::splitCpuGeneric(const NDArray& input, const std::vector< // if (luckCase2) { - // const auto xStep = shape::strideOverContigAxis(axis, input.getShapeInfo()); + // const auto xStep = shape::strideOverContigAxis(axis, input.shapeInfo()); // for (uint i = 0; i < input.lengthOf() / input.sizeAt(axis); ++i) { @@ -306,8 +306,8 @@ void SpecialMethods::splitCpuGeneric(const NDArray& input, const std::vector< for (auto i = start; i < stop; i += increment) { - shape::index2coordsCPU(start, i, input.getShapeInfo(), coords); - const auto xOffset = shape::getOffset(input.getShapeInfo(), coords); + shape::index2coordsCPU(start, i, input.shapeInfo(), coords); + const auto xOffset = shape::getOffset(input.shapeInfo(), coords); uint outArrIdx = 0; temp = coords[axis]; @@ -318,7 +318,7 @@ void SpecialMethods::splitCpuGeneric(const NDArray& input, const std::vector< } T* z = outArrs[outArrIdx]->bufferAsT(); - const auto zOffset = shape::getOffset(outArrs[outArrIdx]->getShapeInfo(), coords); + const auto zOffset = shape::getOffset(outArrs[outArrIdx]->shapeInfo(), coords); z[zOffset] = xBuff[xOffset]; coords[axis] = temp; @@ -339,7 +339,7 @@ void SpecialMethods::splitCpuGeneric(const NDArray& input, const std::vector< * @param length */ template - void SpecialMethods::accumulateGeneric(void **vx, void *vz, Nd4jLong *zShapeInfo, int n, const Nd4jLong length) { + void SpecialMethods::accumulateGeneric(void **vx, void *vz, Nd4jLong const* zShapeInfo, int n, const Nd4jLong length) { auto z = reinterpret_cast(vz); auto x = reinterpret_cast(vx); @@ -366,7 +366,7 @@ void SpecialMethods::splitCpuGeneric(const NDArray& input, const std::vector< * @param propagate */ template - void SpecialMethods::averageGeneric(void **vx, void *vz, Nd4jLong *zShapeInfo, int n, const Nd4jLong length, bool propagate) { + void SpecialMethods::averageGeneric(void **vx, void *vz, Nd4jLong const* zShapeInfo, int n, const Nd4jLong length, bool propagate) { auto z = reinterpret_cast(vz); auto x = reinterpret_cast(vx); @@ -416,7 +416,7 @@ void SpecialMethods::splitCpuGeneric(const NDArray& input, const std::vector< } template - Nd4jLong SpecialMethods::getPosition(Nd4jLong *xShapeInfo, Nd4jLong index) { + Nd4jLong SpecialMethods::getPosition(Nd4jLong const* xShapeInfo, Nd4jLong index) { auto xEWS = shape::elementWiseStride(xShapeInfo); if (xEWS == 1) @@ -428,7 +428,7 @@ void SpecialMethods::splitCpuGeneric(const NDArray& input, const std::vector< } template - void SpecialMethods::quickSort_parallel_internal(T* array, Nd4jLong *xShapeInfo, int left, int right, int cutoff, bool descending) { + void SpecialMethods::quickSort_parallel_internal(T* array, Nd4jLong const* xShapeInfo, int left, int right, int cutoff, bool descending) { int i = left, j = right; T tmp; @@ -482,7 +482,7 @@ PRAGMA_OMP_TASK } template - void SpecialMethods::quickSort_parallel(void *varray, Nd4jLong *xShapeInfo, Nd4jLong lenArray, int numThreads, bool descending){ + void SpecialMethods::quickSort_parallel(void *varray, Nd4jLong const* xShapeInfo, Nd4jLong lenArray, int numThreads, bool descending){ auto array = reinterpret_cast(varray); int cutoff = 1000; @@ -521,14 +521,14 @@ PRAGMA_OMP_SINGLE_ARGS(nowait) template - void SpecialMethods::sortGeneric(void *vx, Nd4jLong *xShapeInfo, bool descending) { + void SpecialMethods::sortGeneric(void *vx, Nd4jLong const* xShapeInfo, bool descending) { auto x = reinterpret_cast(vx); quickSort_parallel(x, xShapeInfo, shape::length(xShapeInfo), omp_get_max_threads(), descending); } template - void SpecialMethods::sortTadGeneric(void *vx, Nd4jLong *xShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool descending) { + void SpecialMethods::sortTadGeneric(void *vx, Nd4jLong const* xShapeInfo, int *dimension, int dimensionLength, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool descending) { auto x = reinterpret_cast(vx); //quickSort_parallel(x, xShapeInfo, shape::length(xShapeInfo), omp_get_max_threads(), descending); @@ -548,30 +548,35 @@ PRAGMA_OMP_SINGLE_ARGS(nowait) template - void SpecialMethods::decodeBitmapGeneric(void *dx, Nd4jLong N, void *vz, Nd4jLong *zShapeInfo) { + void SpecialMethods::decodeBitmapGeneric(const void *dx, Nd4jLong N, void *vz, Nd4jLong const* zShapeInfo) { auto dz = reinterpret_cast(vz); - auto x = reinterpret_cast(dx); + auto x = reinterpret_cast(dx); Nd4jLong lim = N / 16 + 5; FloatBits2 fb; fb.i_ = x[2]; float threshold = fb.f_; + auto pPos = -1; auto func = PRAGMA_THREADS_FOR { for (auto e = start; e < stop; e++) { + const auto v = x[e]; for (int bitId = 0; bitId < 16; bitId++) { - bool hasBit = (x[e] & 1 << (bitId)) != 0; - bool hasSign = (x[e] & 1 << (bitId + 16)) != 0; + bool hasBit = (v & 1 << (bitId)) != 0; + bool hasSign = (v & 1 << (bitId + 16)) != 0; + auto cPos = (e - 4) * 16 + bitId; if (hasBit) { if (hasSign) - dz[(e - 4) * 16 + bitId] -= static_cast(threshold); + dz[cPos] -= static_cast(threshold); else - dz[(e - 4) * 16 + bitId] += static_cast(threshold); + dz[cPos] += static_cast(threshold); } else if (hasSign) { - dz[(e - 4) * 16 + bitId] -= static_cast(threshold / 2); + dz[cPos] -= static_cast(threshold / 2); } + + pPos = cPos; } } }; @@ -580,19 +585,23 @@ PRAGMA_OMP_SINGLE_ARGS(nowait) } template - Nd4jLong SpecialMethods::encodeBitmapGeneric(void *vx, Nd4jLong *xShapeInfo, Nd4jLong N, int *dz, float threshold) { + Nd4jLong SpecialMethods::encodeBitmapGeneric(void *vx, Nd4jLong const* xShapeInfo, Nd4jLong N, int *dz, float threshold) { auto dx = reinterpret_cast(vx); + const T two(2.0f); + const T zero(0.0f); + const T t(threshold); + const T thalf = t / two; -//PRAGMA_OMP_PARALLEL_FOR_ARGS(schedule(guided) proc_bind(close) reduction(+:retVal)) - auto func = PRAGMA_REDUCE_LONG { + //auto func = PRAGMA_REDUCE_LONG { Nd4jLong retVal = 0L; - for (auto x = start; x < stop; x += increment) { + PRAGMA_OMP_PARALLEL_FOR_REDUCTION(+:retVal) + for (auto x = 0; x < N; x += 16) { int byte = 0; int byteId = x / 16 + 4; for (int f = 0; f < 16; f++) { - Nd4jLong e = x + f; + auto e = x + f; if (e >= N) continue; @@ -602,19 +611,19 @@ PRAGMA_OMP_SINGLE_ARGS(nowait) int bitId = e % 16; - if (abs >= (T) threshold) { + if (abs >= t) { byte |= 1 << (bitId); retVal++; - if (val < (T) 0.0f) { + if (val < zero) { byte |= 1 << (bitId + 16); - dx[e] += static_cast(threshold); + dx[e] += t; } else { - dx[e] -= static_cast(threshold); + dx[e] -= t; } - } else if (abs >= (T) threshold / (T) 2.0f && val < (T) 0.0f) { + } else if (abs >= thalf && val < zero) { byte |= 1 << (bitId + 16); - dx[e] += static_cast(threshold / 2); + dx[e] += thalf; retVal++; } @@ -624,8 +633,9 @@ PRAGMA_OMP_SINGLE_ARGS(nowait) } return retVal; - }; - return samediff::Threads::parallel_long(func, LAMBDA_SUML, 0, N, 16); + //}; + + //return samediff::Threads::parallel_long(func, LAMBDA_SUML, 0, N, 16); } } diff --git a/libnd4j/include/ops/ops.h b/libnd4j/include/ops/ops.h index 2f02af11b..21cd07c40 100644 --- a/libnd4j/include/ops/ops.h +++ b/libnd4j/include/ops/ops.h @@ -37,21 +37,21 @@ #define DOUBLE_PI_T T(2.0 * 3.14159265358979323846) #define DOUBLE_PI_X X(2.0 * 3.14159265358979323846) -#define no_op_exec_special_any static const bool requiresSpecial = false; static void execSpecial(X *dx, Nd4jLong *xShapeBuffer, Z *result, Nd4jLong *resultShapeBuffer, X *extraParams, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) {} -#define no_op_exec_special_bool static const bool requiresSpecial = false; static void execSpecial(X *dx, Nd4jLong *xShapeBuffer, Z *result, Nd4jLong *resultShapeBuffer, X *extraParams, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) {} -#define no_op_exec_special_same static const bool requiresSpecial = false; static void execSpecial(X *dx, Nd4jLong *xShapeBuffer, X *result, Nd4jLong *resultShapeBuffer, X *extraParams, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) {} -#define no_op_exec_special static const bool requiresSpecial = false; static void execSpecial(X *dx, Nd4jLong *xShapeBuffer, Z *result, Nd4jLong *resultShapeBuffer, Z *extraParams, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) {} -#define no_op_exec_special_accumulation static const bool requiresSpecialAccumulation = false; static void execSpecial(X *x, Nd4jLong *xShapeInfo, Z *extraParams, Z *result, Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset){} -#define no_op_exec_special_accumulation_long static const bool requiresSpecialAccumulation = false; static void execSpecial(X *x, Nd4jLong *xShapeInfo, X *extraParams, Z *result, Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset){} -#define no_op_exec_special_accumulation_same static const bool requiresSpecialAccumulation = false; static void execSpecial(X *x, Nd4jLong *xShapeInfo, X *extraParams, X *result, Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset){} +#define no_op_exec_special_any static const bool requiresSpecial = false; static void execSpecial(const X *dx, const Nd4jLong *xShapeBuffer, Z *result, const Nd4jLong *resultShapeBuffer, X *extraParams, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {} +#define no_op_exec_special_bool static const bool requiresSpecial = false; static void execSpecial(const X *dx, const Nd4jLong *xShapeBuffer, Z *result, const Nd4jLong *resultShapeBuffer, X *extraParams, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {} +#define no_op_exec_special_same static const bool requiresSpecial = false; static void execSpecial(const X *dx, const Nd4jLong *xShapeBuffer, X *result, const Nd4jLong *resultShapeBuffer, X *extraParams, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {} +#define no_op_exec_special static const bool requiresSpecial = false; static void execSpecial(const X *dx, const Nd4jLong *xShapeBuffer, Z *result, const Nd4jLong *resultShapeBuffer, Z *extraParams, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {} +#define no_op_exec_special_accumulation static const bool requiresSpecialAccumulation = false; static void execSpecial(const X *x, const Nd4jLong *xShapeInfo, Z *extraParams, Z *result, const Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset){} +#define no_op_exec_special_accumulation_long static const bool requiresSpecialAccumulation = false; static void execSpecial(const X *x, const Nd4jLong *xShapeInfo, X *extraParams, Z *result, const Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset){} +#define no_op_exec_special_accumulation_same static const bool requiresSpecialAccumulation = false; static void execSpecial(const X *x, const Nd4jLong *xShapeInfo, X *extraParams, X *result, const Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffset){} #ifdef __CUDACC__ -#define no_op_exec_special_any_cuda static __device__ void execSpecialCuda(X *dx, Nd4jLong *xShapeBuffer, Z *result, Nd4jLong *resultShapeBuffer, X *extraParams, int *allocationPointer, Z *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) {} -#define no_op_exec_special_bool_cuda static __device__ void execSpecialCuda(X *dx, Nd4jLong *xShapeBuffer, Z *result, Nd4jLong *resultShapeBuffer, X *extraParams, int *allocationPointer, Z *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) {} -#define no_op_exec_special_same_cuda static __device__ void execSpecialCuda(X *dx, Nd4jLong *xShapeBuffer, X *result, Nd4jLong *resultShapeBuffer, X *extraParams, int *allocationPointer, X *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) {} -#define no_op_exec_special_cuda static __device__ void execSpecialCuda(X *dx, Nd4jLong *xShapeBuffer,Z *result, Nd4jLong *resultShapeBuffer,Z *extraParams, int *allocationPointer, Z *reductionPointer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) {} -#define no_op_exec_special_accumulation_same_cuda static inline __device__ void execSpecialCuda(X *dx, Nd4jLong *xShapeInfo, X *extraParams, X *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, X *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) {} -#define no_op_exec_special_accumulation_long_cuda static inline __device__ void execSpecialCuda(X *dx, Nd4jLong *xShapeInfo, X *extraParams, Z *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, Z *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) {} -#define no_op_exec_special_accumulation_cuda static inline __device__ void execSpecialCuda(X *dx, Nd4jLong *xShapeInfo, Z *extraParams, Z *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, Z *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) {} +#define no_op_exec_special_any_cuda static __device__ void execSpecialCuda(const X *dx, const Nd4jLong *xShapeBuffer, Z *result, const Nd4jLong *resultShapeBuffer, X *extraParams, int *allocationPointer, Z *reductionPointer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {} +#define no_op_exec_special_bool_cuda static __device__ void execSpecialCuda(const X *dx, const Nd4jLong *xShapeBuffer, Z *result, const Nd4jLong *resultShapeBuffer, X *extraParams, int *allocationPointer, Z *reductionPointer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {} +#define no_op_exec_special_same_cuda static __device__ void execSpecialCuda(const X *dx, const Nd4jLong *xShapeBuffer, X *result, const Nd4jLong *resultShapeBuffer, X *extraParams, int *allocationPointer, X *reductionPointer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {} +#define no_op_exec_special_cuda static __device__ void execSpecialCuda(const X *dx, const Nd4jLong *xShapeBuffer,Z *result, const Nd4jLong *resultShapeBuffer,Z *extraParams, int *allocationPointer, Z *reductionPointer, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {} +#define no_op_exec_special_accumulation_same_cuda static inline __device__ void execSpecialCuda(const X *dx, const Nd4jLong *xShapeInfo, X *extraParams, X *result, const Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, X *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets) {} +#define no_op_exec_special_accumulation_long_cuda static inline __device__ void execSpecialCuda(const X *dx, const Nd4jLong *xShapeInfo, X *extraParams, Z *result, const Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, Z *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets) {} +#define no_op_exec_special_accumulation_cuda static inline __device__ void execSpecialCuda(const X *dx, const Nd4jLong *xShapeInfo, Z *extraParams, Z *result, const Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, Z *reductionBuffer, const Nd4jLong *tadOnlyShapeInfo, const Nd4jLong *tadOffsets) {} #else // hacky fix for isnan/being being out of scope @@ -4017,7 +4017,7 @@ namespace simdOps { return 0; } - static _CUDA_HD inline functions::indexreduce::IndexValue startingIndexValue(X *input) { + static _CUDA_HD inline functions::indexreduce::IndexValue startingIndexValue(const X *input) { functions::indexreduce::IndexValue local; local.value = startingValue(input); local.index = 0; @@ -4064,7 +4064,7 @@ namespace simdOps { return -sd::DataTypeUtils::infOrMax(); } - static _CUDA_HD inline functions::indexreduce::IndexValue startingIndexValue(X *input) { + static _CUDA_HD inline functions::indexreduce::IndexValue startingIndexValue(const X *input) { functions::indexreduce::IndexValue local; local.value = startingValue(input); local.index = -1; @@ -4123,7 +4123,7 @@ namespace simdOps { return -sd::DataTypeUtils::infOrMax(); } - static _CUDA_HD inline functions::indexreduce::IndexValue startingIndexValue(X *input) { + static _CUDA_HD inline functions::indexreduce::IndexValue startingIndexValue(const X *input) { functions::indexreduce::IndexValue local; local.value = startingValue(input); local.index = -1; @@ -4191,7 +4191,7 @@ namespace simdOps { return -sd::DataTypeUtils::infOrMax(); } - static _CUDA_HD inline functions::indexreduce::IndexValue startingIndexValue(X *input) { + static _CUDA_HD inline functions::indexreduce::IndexValue startingIndexValue(const X *input) { functions::indexreduce::IndexValue local; local.value = startingValue(input); local.index = 0; @@ -4217,7 +4217,7 @@ namespace simdOps { return sd::DataTypeUtils::infOrMax(); } - static _CUDA_HD inline functions::indexreduce::IndexValue startingIndexValue(X *input) { + static _CUDA_HD inline functions::indexreduce::IndexValue startingIndexValue(const X *input) { functions::indexreduce::IndexValue local; local.value = startingValue(input); local.index = 0; @@ -4273,7 +4273,7 @@ namespace simdOps { return sd::DataTypeUtils::infOrMax(); } - static _CUDA_HD inline functions::indexreduce::IndexValue startingIndexValue(X *input) { + static _CUDA_HD inline functions::indexreduce::IndexValue startingIndexValue(const X *input) { functions::indexreduce::IndexValue local; local.value = startingValue(input); local.index = 0; diff --git a/libnd4j/include/ops/random_ops.h b/libnd4j/include/ops/random_ops.h index 939ffa975..d738589a7 100644 --- a/libnd4j/include/ops/random_ops.h +++ b/libnd4j/include/ops/random_ops.h @@ -32,10 +32,10 @@ #define method_X random_def T op(T valueX, Nd4jLong idx, Nd4jLong length, sd::graph::RandomGenerator* rng, T *extraParams) { return -2.0f; } #define method_XY random_def T op(T valueX, T valueY, Nd4jLong idx, Nd4jLong length, sd::graph::RandomGenerator* rng, T *extraParams) { return -3.0f; } -#define no_exec_special static const bool requiresSpecial = false; static inline void specialOp(Nd4jPointer state, T *x, Nd4jLong *xShapeBuffer, T *y, Nd4jLong *yShapeBuffer, T *z, Nd4jLong *zShapeBuffer, T *extraArguments) { } +#define no_exec_special static const bool requiresSpecial = false; static inline void specialOp(Nd4jPointer state, const T *x, const Nd4jLong *xShapeBuffer, const T *y, const Nd4jLong *yShapeBuffer, T *z, const Nd4jLong *zShapeBuffer, T *extraArguments) { } #ifdef __CUDACC__ -#define no_exec_special_cuda __device__ static inline void specialOpCuda(Nd4jPointer state, T *x, Nd4jLong *xShapeBuffer, T *y, Nd4jLong *yShapeBuffer, T *z, Nd4jLong *zShapeBuffer, T *extraArguments) { } +#define no_exec_special_cuda __device__ static inline void specialOpCuda(Nd4jPointer state, T const* x, Nd4jLong const* xShapeBuffer, T const* y, Nd4jLong const* yShapeBuffer, T *z, Nd4jLong const* zShapeBuffer, T *extraArguments) { } #else #define no_exec_special_cuda #endif diff --git a/libnd4j/include/ops/special_random_ops.h b/libnd4j/include/ops/special_random_ops.h index 50a50752e..08808e67c 100644 --- a/libnd4j/include/ops/special_random_ops.h +++ b/libnd4j/include/ops/special_random_ops.h @@ -42,7 +42,7 @@ namespace randomOps { #ifdef __CUDACC__ - __device__ static inline void specialOpCuda(Nd4jPointer state, T *x, Nd4jLong *xShapeBuffer, T *y, Nd4jLong *yShapeBuffer, T *z, Nd4jLong *zShapeBuffer, T *extraArguments) { + __device__ static inline void specialOpCuda(Nd4jPointer state, T const* x, Nd4jLong const* xShapeBuffer, T const* y, Nd4jLong const* yShapeBuffer, T *z, Nd4jLong const* zShapeBuffer, T *extraArguments) { /** * X holds data, * Y holds probabilities @@ -141,7 +141,7 @@ namespace randomOps { } #endif - static inline void specialOp(Nd4jPointer state, T *x, Nd4jLong *xShapeBuffer, T *y, Nd4jLong *yShapeBuffer, T *z, Nd4jLong *zShapeBuffer, T *extraArguments) { + static inline void specialOp(Nd4jPointer state, const T *x, const Nd4jLong *xShapeBuffer, const T *y, const Nd4jLong *yShapeBuffer, T *z, const Nd4jLong *zShapeBuffer, T *extraArguments) { /** * X holds data, * Y holds probabilities @@ -230,7 +230,7 @@ namespace randomOps { static const bool requiresSpecial = true; #ifdef __CUDACC__ - __device__ static inline void specialOpCuda(Nd4jPointer state, T *x, Nd4jLong *xShapeBuffer, T *y, Nd4jLong *yShapeBuffer, T *z, Nd4jLong *zShapeBuffer, T *extraArguments) { + __device__ static inline void specialOpCuda(Nd4jPointer state, T const* x, Nd4jLong const* xShapeBuffer, T const* y, Nd4jLong const *yShapeBuffer, T *z, Nd4jLong const* zShapeBuffer, T *extraArguments) { __shared__ T epsilon; __shared__ T two_pi; @@ -304,7 +304,7 @@ namespace randomOps { static inline void - specialOp(Nd4jPointer state, T *x, Nd4jLong *xShapeBuffer, T *y, Nd4jLong *yShapeBuffer, T *z, Nd4jLong *zShapeBuffer, T *extraArguments) { + specialOp(Nd4jPointer state, const T *x, const Nd4jLong *xShapeBuffer, const T *y, const Nd4jLong *yShapeBuffer, T *z, const Nd4jLong *zShapeBuffer, T *extraArguments) { const T two_pi = static_cast(2.0f) * static_cast(3.14159265358979323846); auto zLength = shape::length(zShapeBuffer); @@ -373,7 +373,7 @@ namespace randomOps { static const bool requiresSpecial = true; #ifdef __CUDACC__ - __device__ static inline void specialOpCuda(Nd4jPointer state, T *x, Nd4jLong *xShapeBuffer, T *y, Nd4jLong *yShapeBuffer, T *z, Nd4jLong *zShapeBuffer, T *extraArguments) { + __device__ static inline void specialOpCuda(Nd4jPointer state, T const* x, Nd4jLong const* xShapeBuffer, T const* y, Nd4jLong const* yShapeBuffer, T *z, Nd4jLong const* zShapeBuffer, T *extraArguments) { int trials = (int) extraArguments[0]; T prob = extraArguments[1]; @@ -424,7 +424,7 @@ namespace randomOps { } #endif - static inline void specialOp(Nd4jPointer state, T *x, Nd4jLong *xShapeBuffer, T *y, Nd4jLong *yShapeBuffer, T *z, Nd4jLong *zShapeBuffer, T *extraArguments) { + static inline void specialOp(Nd4jPointer state, const T *x, const Nd4jLong *xShapeBuffer, const T *y, const Nd4jLong *yShapeBuffer, T *z, const Nd4jLong *zShapeBuffer, T *extraArguments) { int trials = (int) extraArguments[0]; Nd4jLong zLength = shape::length(zShapeBuffer); @@ -480,7 +480,7 @@ namespace randomOps { static const bool requiresSpecial = true; #ifdef __CUDACC__ - __device__ static inline void specialOpCuda(Nd4jPointer state, T *x, Nd4jLong *xShapeBuffer, T *y, Nd4jLong *yShapeBuffer, T *z, Nd4jLong *zShapeBuffer, T *extraArguments) { + __device__ static inline void specialOpCuda(Nd4jPointer state, T const* x, Nd4jLong const* xShapeBuffer, T const* y, Nd4jLong const* yShapeBuffer, T *z, Nd4jLong const* zShapeBuffer, T *extraArguments) { int trials = (int) extraArguments[0]; T prob = extraArguments[1]; @@ -532,7 +532,7 @@ namespace randomOps { } #endif - static inline void specialOp(Nd4jPointer state, T *x, Nd4jLong *xShapeBuffer, T *y, Nd4jLong *yShapeBuffer, T *z, Nd4jLong *zShapeBuffer, T *extraArguments) { + static inline void specialOp(Nd4jPointer state, const T *x, const Nd4jLong *xShapeBuffer, const T *y, const Nd4jLong *yShapeBuffer, T *z, const Nd4jLong *zShapeBuffer, T *extraArguments) { int trials = (int) extraArguments[0]; Nd4jLong zLength = shape::length(zShapeBuffer); @@ -546,8 +546,7 @@ namespace randomOps { T prob = extraArguments[1]; - //sd::random::RandomBuffer *buffer = reinterpret_cast (state); - sd::graph::RandomGenerator* rng = reinterpret_cast(state); + auto rng = reinterpret_cast(state); auto func = PRAGMA_THREADS_FOR { for (auto e = start; e < stop; e++) { @@ -606,7 +605,7 @@ namespace randomOps { static const bool requiresSpecial = true; #ifdef __CUDACC__ - __device__ static inline void specialOpCuda(Nd4jPointer state, T *x, Nd4jLong *xShapeBuffer, T *y, Nd4jLong *yShapeBuffer, T *z, Nd4jLong *zShapeBuffer, T *extraArguments) { + __device__ static inline void specialOpCuda(Nd4jPointer state, T const* x, Nd4jLong const* xShapeBuffer, T const* y, Nd4jLong const* yShapeBuffer, T *z, Nd4jLong const* zShapeBuffer, T *extraArguments) { __shared__ T epsilon; __shared__ T two_pi; @@ -673,12 +672,12 @@ namespace randomOps { #endif static inline void - specialOp(Nd4jPointer state, T *x, Nd4jLong *xShapeBuffer, T *y, Nd4jLong *yShapeBuffer, T *z, Nd4jLong *zShapeBuffer, T *extraArguments) { + specialOp(Nd4jPointer state, const T *x, const Nd4jLong *xShapeBuffer, const T *y, const Nd4jLong *yShapeBuffer, T *z, const Nd4jLong *zShapeBuffer, T *extraArguments) { GaussianDistribution::specialOp(state, x, xShapeBuffer, y, yShapeBuffer, z, zShapeBuffer, extraArguments); Nd4jLong zLength = shape::length(zShapeBuffer); //auto yEWS = shape::elementWiseStride(yShapeBuffer); //auto zEWS = shape::elementWiseStride(zShapeBuffer); - sd::graph::RandomGenerator* rng = reinterpret_cast(state); + auto rng = reinterpret_cast(state); T mean = extraArguments[0]; T stddev = extraArguments[1]; T ds = sd::math::nd4j_abs(stddev) * (T) 2.0f; @@ -718,7 +717,7 @@ namespace randomOps { #ifdef __CUDACC__ - __device__ static inline void specialOpCuda(Nd4jPointer state, T *x, Nd4jLong *xShapeBuffer, T *y, Nd4jLong *yShapeBuffer, T *z, Nd4jLong *zShapeBuffer, T *extraArguments) { + __device__ static inline void specialOpCuda(Nd4jPointer state, T const* x, Nd4jLong const* xShapeBuffer, T const* y, Nd4jLong const* yShapeBuffer, T *z, Nd4jLong const* zShapeBuffer, T *extraArguments) { __shared__ T epsilon; __shared__ T two_pi; @@ -791,7 +790,7 @@ namespace randomOps { #endif static inline void - specialOp(Nd4jPointer state, T *x, Nd4jLong *xShapeBuffer, T *y, Nd4jLong *yShapeBuffer, T *z, Nd4jLong *zShapeBuffer, T *extraArguments) { + specialOp(Nd4jPointer state, const T *x, const Nd4jLong *xShapeBuffer, const T *y, const Nd4jLong *yShapeBuffer, T *z, const Nd4jLong *zShapeBuffer, T *extraArguments) { const T two_pi = static_cast(2.0f) * static_cast(3.14159265358979323846); Nd4jLong zLength = shape::length(zShapeBuffer); @@ -809,8 +808,7 @@ namespace randomOps { // we're enforcing even chunks, since it's mandatory for this algorithm span -= span % 2; -// auto buffer = reinterpret_cast (state); - sd::graph::RandomGenerator* rng = reinterpret_cast(state); + auto rng = reinterpret_cast(state); const T mean = extraArguments[0]; const T stddev = extraArguments[1]; diff --git a/libnd4j/include/ops/specials.h b/libnd4j/include/ops/specials.h index c250d72f6..ed5f8fb8c 100644 --- a/libnd4j/include/ops/specials.h +++ b/libnd4j/include/ops/specials.h @@ -50,36 +50,36 @@ namespace sd { template class ND4J_EXPORT SpecialMethods { public: - static void concatCpuGeneric(const std::vector& inArrs, NDArray& output, const int axis); - static void concatCpuGeneric(int dimension, int numArrays, Nd4jPointer *data, Nd4jPointer *inputShapeInfo, void *result, Nd4jLong *resultShapeInfo); - static void splitCpuGeneric(const NDArray& input, const std::vector& outArrs, const int axis); - static void accumulateGeneric(void **x, void *z, Nd4jLong *zShapeInfo, int n, const Nd4jLong length); - static void averageGeneric(void **x, void *z, Nd4jLong *zShapeInfo, int n, const Nd4jLong length, bool propagate); + static void concatCpuGeneric(const std::vector& inArrs, NDArray& output, int axis); + static void concatCpuGeneric(int dimension, int numArrays, Nd4jPointer *data, Nd4jPointer *inputShapeInfo, void *result, Nd4jLong const* resultShapeInfo); + static void splitCpuGeneric(const NDArray& input, const std::vector& outArrs, int axis); + static void accumulateGeneric(void **x, void *z, const Nd4jLong *zShapeInfo, int n, Nd4jLong length); + static void averageGeneric(void **x, void *z, const Nd4jLong *zShapeInfo, int n, Nd4jLong length, bool propagate); - static Nd4jLong getPosition(Nd4jLong *xShapeInfo, Nd4jLong index); - static void quickSort_parallel_internal(T* array, Nd4jLong *xShapeInfo, int left, int right, int cutoff, bool descending); - static void quickSort_parallel(void* array, Nd4jLong *xShapeInfo, Nd4jLong lenArray, int numThreads, bool descending); + static Nd4jLong getPosition(const Nd4jLong *xShapeInfo, Nd4jLong index); + static void quickSort_parallel_internal(T* array, const Nd4jLong *xShapeInfo, int left, int right, int cutoff, bool descending); + static void quickSort_parallel(void* array, const Nd4jLong *xShapeInfo, Nd4jLong lenArray, int numThreads, bool descending); static int nextPowerOf2(int number); static int lastPowerOf2(int number); - static void sortGeneric(void *x, Nd4jLong *xShapeInfo, bool descending); - static void sortTadGeneric(void *x, Nd4jLong *xShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool descending); + static void sortGeneric(void *x, const Nd4jLong *xShapeInfo, bool descending); + static void sortTadGeneric(void *x, const Nd4jLong *xShapeInfo, int *dimension, int dimensionLength, const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets, bool descending); - static void decodeBitmapGeneric(void *dx, Nd4jLong N, void *dz, Nd4jLong *zShapeInfo); - static Nd4jLong encodeBitmapGeneric(void *dx, Nd4jLong *zShapeInfo, Nd4jLong N, int *dz, float threshold); + static void decodeBitmapGeneric(const void *dx, Nd4jLong N, void *dz, const Nd4jLong *zShapeInfo); + static Nd4jLong encodeBitmapGeneric(void *dx, const Nd4jLong *zShapeInfo, Nd4jLong N, int *dz, float threshold); }; template class ND4J_EXPORT DoubleMethods{ public: - static void sortByKey(void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, bool descending); - static void sortByValue(void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, bool descending); + static void sortByKey(void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, bool descending); + static void sortByValue(void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, bool descending); - static void sortTadByKey(void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int *dimension, int dimensionLength, bool descending); - static void sortTadByValue(void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int *dimension, int dimensionLength, bool descending); + static void sortTadByKey(void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int *dimension, int dimensionLength, bool descending); + static void sortTadByValue(void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int *dimension, int dimensionLength, bool descending); }; } diff --git a/libnd4j/include/ops/specials_cuda.h b/libnd4j/include/ops/specials_cuda.h index bdff91dd0..a12fd302f 100644 --- a/libnd4j/include/ops/specials_cuda.h +++ b/libnd4j/include/ops/specials_cuda.h @@ -28,39 +28,39 @@ //////////////////////////////////////////////////////////////////////// template -__host__ void bitonicSortStepGeneric(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, int j, int k, int length, bool descending); +__host__ void bitonicSortStepGeneric(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, int j, int k, int length, bool descending); //////////////////////////////////////////////////////////////////////// template -__host__ void bitonicArbitraryStepGeneric(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, int window, int length, int reverse, bool descending); +__host__ void bitonicArbitraryStepGeneric(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, int window, int length, int reverse, bool descending); //////////////////////////////////////////////////////////////////////// template -__host__ void bitonicSortStepGenericKey(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int j, int k, int length, bool descending); +__host__ void bitonicSortStepGenericKey(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int j, int k, int length, bool descending); //////////////////////////////////////////////////////////////////////// template -__host__ void bitonicArbitraryStepGenericKey(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int window, int length, int reverse, bool descending); +__host__ void bitonicArbitraryStepGenericKey(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int window, int length, int reverse, bool descending); //////////////////////////////////////////////////////////////////////// template -__host__ void bitonicSortStepGenericValue(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int j, int k, int length, bool descending); +__host__ void bitonicSortStepGenericValue(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int j, int k, int length, bool descending); //////////////////////////////////////////////////////////////////////// template -__host__ void bitonicArbitraryStepGenericValue(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int window, int length, int reverse, bool descending); +__host__ void bitonicArbitraryStepGenericValue(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int window, int length, int reverse, bool descending); //////////////////////////////////////////////////////////////////////// template -__host__ void oesTadGeneric(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool descending); +__host__ void oesTadGeneric(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, int *dimension, int dimensionLength, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool descending); template -__host__ void oesTadGenericKey(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool descending); +__host__ void oesTadGenericKey(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int *dimension, int dimensionLength, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool descending); template -__host__ void oesTadGenericValue(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong *xShapeInfo, void *vy, Nd4jLong *yShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool descending); +__host__ void oesTadGenericValue(dim3 &launchDims, cudaStream_t *stream, void *vx, Nd4jLong const* xShapeInfo, void *vy, Nd4jLong const* yShapeInfo, int *dimension, int dimensionLength, Nd4jLong const* tadShapeInfo, Nd4jLong const* tadOffsets, bool descending); //////////////////////////////////////////////////////////////////////// template diff --git a/libnd4j/tests_cpu/layers_tests/AtomicTests.cu b/libnd4j/tests_cpu/layers_tests/AtomicTests.cu index bd024ef3b..f8248e7ea 100644 --- a/libnd4j/tests_cpu/layers_tests/AtomicTests.cu +++ b/libnd4j/tests_cpu/layers_tests/AtomicTests.cu @@ -57,7 +57,7 @@ static void multiplyLauncher(void *vbuffer, uint64_t length, void *vresult) { multiplyKernel<<<256, 256, 1024, *sd::LaunchContext::defaultContext()->getCudaStream()>>>(vbuffer, length, vresult); auto err = cudaStreamSynchronize(*sd::LaunchContext::defaultContext()->getCudaStream()); if (err != 0) - sd::cuda_exception::build("multiply failed", err); + throw sd::cuda_exception::build("multiply failed", err); } template @@ -80,7 +80,7 @@ static void sumLauncher(void *vbuffer, uint64_t length, void *vresult) { sumKernel<<<256, 256, 1024, *sd::LaunchContext::defaultContext()->getCudaStream()>>>(vbuffer, length, vresult); auto err = cudaStreamSynchronize(*sd::LaunchContext::defaultContext()->getCudaStream()); if (err != 0) - sd::cuda_exception::build("sum failed", err); + throw sd::cuda_exception::build("sum failed", err); } template @@ -103,7 +103,7 @@ static void subLauncher(void *vbuffer, uint64_t length, void *vresult) { subKernel<<<256, 256, 1024, *sd::LaunchContext::defaultContext()->getCudaStream()>>>(vbuffer, length, vresult); auto err = cudaStreamSynchronize(*sd::LaunchContext::defaultContext()->getCudaStream()); if (err != 0) - sd::cuda_exception::build("sub failed", err); + throw sd::cuda_exception::build("sub failed", err); } template @@ -126,7 +126,7 @@ static void divLauncher(void *vbuffer, uint64_t length, void *vresult) { divKernel<<<256, 256, 1024, *sd::LaunchContext::defaultContext()->getCudaStream()>>>(vbuffer, length, vresult); auto err = cudaStreamSynchronize(*sd::LaunchContext::defaultContext()->getCudaStream()); if (err != 0) - sd::cuda_exception::build("div failed", err); + throw sd::cuda_exception::build("div failed", err); } static void multiplyHost(NDArray &input, NDArray &output) { diff --git a/libnd4j/tests_cpu/layers_tests/BroadcastableOpsTests.cpp b/libnd4j/tests_cpu/layers_tests/BroadcastableOpsTests.cpp index 51c6e2375..5a4db9fb8 100644 --- a/libnd4j/tests_cpu/layers_tests/BroadcastableOpsTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/BroadcastableOpsTests.cpp @@ -195,8 +195,8 @@ TEST_F(BroadcastableOpsTests, Test_Shape_1) { TEST_F(BroadcastableOpsTests, Test_Shape_2) { sd::ops::minimum op; - Nd4jLong shapeX[] = {2, 1, 1, 1, 1, 8192, 1, 99}; - Nd4jLong shapeY[] = {2, 2, 5, 5, 1, 8192, 1, 99}; + const Nd4jLong shapeX[] = {2, 1, 1, 1, 1, 8192, 1, 99}; + const Nd4jLong shapeY[] = {2, 2, 5, 5, 1, 8192, 1, 99}; ShapeList inputShape({shapeX, shapeY}); VariableSpace vs; Context ctx(1, &vs, false); @@ -213,8 +213,8 @@ TEST_F(BroadcastableOpsTests, Test_Shape_2) { TEST_F(BroadcastableOpsTests, Test_Shape_3) { sd::ops::minimum op; - Nd4jLong shapeX[] = {2, 5, 3, 1, 1, 8192, 1, 99}; - Nd4jLong shapeY[] = {2, 1, 3, 3, 1, 8192, 1, 99}; + const Nd4jLong shapeX[] = {2, 5, 3, 1, 1, 8192, 1, 99}; + const Nd4jLong shapeY[] = {2, 1, 3, 3, 1, 8192, 1, 99}; ShapeList inputShape({shapeX, shapeY}); VariableSpace vs; Context ctx(1, &vs, false); @@ -231,8 +231,8 @@ TEST_F(BroadcastableOpsTests, Test_Shape_3) { TEST_F(BroadcastableOpsTests, Test_Shape_4) { sd::ops::minimum op; - Nd4jLong shapeX[] = {2, 5, 3, 1, 1, 8192, 1, 99}; - Nd4jLong shapeY[] = {2, 5, 1, 1, 1, 8192, 1, 99}; + const Nd4jLong shapeX[] = {2, 5, 3, 1, 1, 8192, 1, 99}; + const Nd4jLong shapeY[] = {2, 5, 1, 1, 1, 8192, 1, 99}; ShapeList inputShape({shapeX, shapeY}); VariableSpace vs; Context ctx(1, &vs, false); @@ -250,9 +250,9 @@ TEST_F(BroadcastableOpsTests, Test_Shape_4) { TEST_F(BroadcastableOpsTests, Test_Shape_5) { sd::ops::minimum op; - Nd4jLong shapeX[] = {3, 2, 1, 3, 3, 3, 1, 8192, 1, 99}; - Nd4jLong shapeY[] = {2, 4, 3, 3, 1, 8192, 1, 99}; - Nd4jLong shapeE[] = {3, 2, 4, 3, 12, 3, 1, 8192, 1, 99}; + const Nd4jLong shapeX[] = {3, 2, 1, 3, 3, 3, 1, 8192, 1, 99}; + const Nd4jLong shapeY[] = {2, 4, 3, 3, 1, 8192, 1, 99}; + const Nd4jLong shapeE[] = {3, 2, 4, 3, 12, 3, 1, 8192, 1, 99}; ShapeList inputShape({shapeX, shapeY}); VariableSpace vs; Context ctx(1, &vs, false); diff --git a/libnd4j/tests_cpu/layers_tests/BrodcastTests.cpp b/libnd4j/tests_cpu/layers_tests/BrodcastTests.cpp index ed97c3137..34d0132bb 100644 --- a/libnd4j/tests_cpu/layers_tests/BrodcastTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/BrodcastTests.cpp @@ -37,7 +37,7 @@ public: #ifndef __CUDABLAS__ TEST_F(BroadcastMultiDimTest,MultimDimTest) { - shape::TAD *tad = new shape::TAD(); + auto tad = new shape::TAD(); tad->init(inputShapeBuffer,dimensions,dimensionLength); tad->createTadOnlyShapeInfo(); tad-> createOffsets(); @@ -55,6 +55,7 @@ TEST_F(BroadcastMultiDimTest,MultimDimTest) { tad->tadOffsets, //tadOffset tad->tadOnlyShapeInfo, //tadShapeInfoZ tad->tadOffsets, sd::LoopKind::COMMON, 0, tad->numTads); //tadOffsetZ + for(int i = 0; i < 30; i++) { ASSERT_EQ(dataAssertion[i],result[i]); } diff --git a/libnd4j/tests_cpu/layers_tests/CMakeLists.txt b/libnd4j/tests_cpu/layers_tests/CMakeLists.txt index 07f473dbc..870178152 100644 --- a/libnd4j/tests_cpu/layers_tests/CMakeLists.txt +++ b/libnd4j/tests_cpu/layers_tests/CMakeLists.txt @@ -42,18 +42,18 @@ endif() # -fsanitize=address # -fsanitize=leak if (APPLE) - set(CMAKE_CXX_FLAGS " -fPIC -fmax-errors=2 -D__APPLE_OS__=true") + set(CMAKE_CXX_FLAGS " -fPIC -D__APPLE_OS__=true") elseif(WIN32) if (SD_CPU) set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fPIC -march=native -mtune=native -O3") endif() if (SD_CPU AND LINUX) - set(CMAKE_CXX_FLAGS " -fPIC -fmax-errors=2") + set(CMAKE_CXX_FLAGS " -fPIC") endif() else() set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") - set(CMAKE_CXX_FLAGS " -fPIC -fmax-errors=2") + set(CMAKE_CXX_FLAGS " -fPIC") if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64*") set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -mcpu=native") else() @@ -82,12 +82,12 @@ elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") # using GCC - SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") -endif() + SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fmax-errors=2") -if (CMAKE_BUILD_TYPE STREQUAL "Debug" AND ${CMAKE_SYSTEM_NAME} MATCHES "Linux" AND NOT(MINGW)) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -rdynamic -Wl,-export-dynamic") - SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -export-dynamic") + if (CMAKE_BUILD_TYPE STREQUAL "Debug" AND ${CMAKE_SYSTEM_NAME} MATCHES "Linux" AND NOT(MINGW)) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -rdynamic -Wl,-export-dynamic") + SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -export-dynamic") + endif() endif() IF(${CMAKE_SYSTEM_NAME} MATCHES "Linux") diff --git a/libnd4j/tests_cpu/layers_tests/ConvolutionTests1.cpp b/libnd4j/tests_cpu/layers_tests/ConvolutionTests1.cpp index 149ab3c5f..4438e5fe6 100644 --- a/libnd4j/tests_cpu/layers_tests/ConvolutionTests1.cpp +++ b/libnd4j/tests_cpu/layers_tests/ConvolutionTests1.cpp @@ -574,7 +574,7 @@ TYPED_TEST(TypedConvolutionTests1, conv2D_BP_Bias_1) { TypeParam _expEpsB[] = {952.0, 1540.0, 1636.0, 1180.0, 1791.0, 2886.0, 3057.0, 2193.0, 2223.0, 3570.0, 3741.0, 2673.0, 1900.0, 3028.0, 3160.0, 2240.0, 2872.0, 4612.0, 4708.0, 3356.0, 5247.0, 8358.0, 8529.0, 6033.0, 5679.0, 9042.0, 9213.0, 6513.0, 4588.0, 7252.0, 7384.0, 5184.0}; - NDArray expEps(_expEpsB, input.getShapeInfo()); + NDArray expEps(_expEpsB, input.shapeInfo()); input.linspace(1); weights.linspace(1); @@ -624,7 +624,7 @@ TYPED_TEST(TypedConvolutionTests1, conv2D_BP_NoBias_1) { TypeParam _expEpsB[] = {952.0, 1540.0, 1636.0, 1180.0, 1791.0, 2886.0, 3057.0, 2193.0, 2223.0, 3570.0, 3741.0, 2673.0, 1900.0, 3028.0, 3160.0, 2240.0, 2872.0, 4612.0, 4708.0, 3356.0, 5247.0, 8358.0, 8529.0, 6033.0, 5679.0, 9042.0, 9213.0, 6513.0, 4588.0, 7252.0, 7384.0, 5184.0}; - NDArray expEps(_expEpsB, input.getShapeInfo()); + NDArray expEps(_expEpsB, input.shapeInfo()); input.linspace(1); weights.linspace(1); @@ -2413,7 +2413,7 @@ TYPED_TEST(TypedConvolutionTests1, conv2D_input_BP_test1) { TypeParam _expEpsB[] = {952.0, 1540.0, 1636.0, 1180.0, 1791.0, 2886.0, 3057.0, 2193.0, 2223.0, 3570.0, 3741.0, 2673.0, 1900.0, 3028.0, 3160.0, 2240.0, 2872.0, 4612.0, 4708.0, 3356.0, 5247.0, 8358.0, 8529.0, 6033.0, 5679.0, 9042.0, 9213.0, 6513.0, 4588.0, 7252.0, 7384.0, 5184.0}; - NDArray expEps(_expEpsB, shapeArr.getShapeInfo()); + NDArray expEps(_expEpsB, shapeArr.shapeInfo()); weights.linspace(1); epsilonNext.linspace(1); diff --git a/libnd4j/tests_cpu/layers_tests/CudaBasicsTests1.cu b/libnd4j/tests_cpu/layers_tests/CudaBasicsTests1.cu index a0104e637..972435523 100644 --- a/libnd4j/tests_cpu/layers_tests/CudaBasicsTests1.cu +++ b/libnd4j/tests_cpu/layers_tests/CudaBasicsTests1.cu @@ -147,10 +147,10 @@ TEST_F(CudaBasicsTests1, execIndexReduceScalar_1) { cudaResult = cudaMalloc(reinterpret_cast(&dX2), x2.lengthOf() * x2.sizeOfT()); ASSERT_EQ(0, cudaResult); cudaResult = cudaMalloc(reinterpret_cast(&dX3), x3.lengthOf() * x3.sizeOfT()); ASSERT_EQ(0, cudaResult); cudaResult = cudaMalloc(reinterpret_cast(&dZ), scalar.lengthOf() * scalar.sizeOfT()); ASSERT_EQ(0, cudaResult); - cudaResult = cudaMalloc(reinterpret_cast(&dX1ShapeInfo), shape::shapeInfoByteLength(x1.getShapeInfo())); ASSERT_EQ(0, cudaResult); - cudaResult = cudaMalloc(reinterpret_cast(&dX2ShapeInfo), shape::shapeInfoByteLength(x2.getShapeInfo())); ASSERT_EQ(0, cudaResult); - cudaResult = cudaMalloc(reinterpret_cast(&dX3ShapeInfo), shape::shapeInfoByteLength(x3.getShapeInfo())); ASSERT_EQ(0, cudaResult); - cudaResult = cudaMalloc(reinterpret_cast(&dZShapeInfo), shape::shapeInfoByteLength(scalar.getShapeInfo())); ASSERT_EQ(0, cudaResult); + cudaResult = cudaMalloc(reinterpret_cast(&dX1ShapeInfo), shape::shapeInfoByteLength(x1.shapeInfo())); ASSERT_EQ(0, cudaResult); + cudaResult = cudaMalloc(reinterpret_cast(&dX2ShapeInfo), shape::shapeInfoByteLength(x2.shapeInfo())); ASSERT_EQ(0, cudaResult); + cudaResult = cudaMalloc(reinterpret_cast(&dX3ShapeInfo), shape::shapeInfoByteLength(x3.shapeInfo())); ASSERT_EQ(0, cudaResult); + cudaResult = cudaMalloc(reinterpret_cast(&dZShapeInfo), shape::shapeInfoByteLength(scalar.shapeInfo())); ASSERT_EQ(0, cudaResult); cudaStream_t stream; cudaResult = cudaStreamCreate(&stream); @@ -164,10 +164,10 @@ TEST_F(CudaBasicsTests1, execIndexReduceScalar_1) { cudaMemcpyAsync(dX1, x1.buffer(), x1.lengthOf() * x1.sizeOfT(), cudaMemcpyHostToDevice, stream); cudaMemcpyAsync(dX2, x2.buffer(), x2.lengthOf() * x2.sizeOfT(), cudaMemcpyHostToDevice, stream); cudaMemcpyAsync(dX3, x3.buffer(), x3.lengthOf() * x3.sizeOfT(), cudaMemcpyHostToDevice, stream); - cudaMemcpyAsync(dX1ShapeInfo, x1.getShapeInfo(), shape::shapeInfoByteLength(x1.getShapeInfo()), cudaMemcpyHostToDevice, stream); - cudaMemcpyAsync(dX2ShapeInfo, x2.getShapeInfo(), shape::shapeInfoByteLength(x2.getShapeInfo()), cudaMemcpyHostToDevice, stream); - cudaMemcpyAsync(dX3ShapeInfo, x3.getShapeInfo(), shape::shapeInfoByteLength(x3.getShapeInfo()), cudaMemcpyHostToDevice, stream); - cudaMemcpyAsync(dZShapeInfo, scalar.getShapeInfo(), shape::shapeInfoByteLength(scalar.getShapeInfo()), cudaMemcpyHostToDevice, stream); + cudaMemcpyAsync(dX1ShapeInfo, x1.shapeInfo(), shape::shapeInfoByteLength(x1.shapeInfo()), cudaMemcpyHostToDevice, stream); + cudaMemcpyAsync(dX2ShapeInfo, x2.shapeInfo(), shape::shapeInfoByteLength(x2.shapeInfo()), cudaMemcpyHostToDevice, stream); + cudaMemcpyAsync(dX3ShapeInfo, x3.shapeInfo(), shape::shapeInfoByteLength(x3.shapeInfo()), cudaMemcpyHostToDevice, stream); + cudaMemcpyAsync(dZShapeInfo, scalar.shapeInfo(), shape::shapeInfoByteLength(scalar.shapeInfo()), cudaMemcpyHostToDevice, stream); void* reductionPointer = nullptr; cudaResult = cudaMalloc(reinterpret_cast(&reductionPointer), 1024*1024); @@ -181,10 +181,10 @@ TEST_F(CudaBasicsTests1, execIndexReduceScalar_1) { NativeOpExecutioner::execIndexReduceScalar(&lc, sd::indexreduce::IndexAbsoluteMax, - x1.buffer(), x1.getShapeInfo(), + x1.buffer(), x1.shapeInfo(), dX1, dX1ShapeInfo, nullptr, - scalar.buffer(), scalar.getShapeInfo(), + scalar.buffer(), scalar.shapeInfo(), dZ, dZShapeInfo); cudaResult = cudaStreamSynchronize(stream); @@ -203,10 +203,10 @@ TEST_F(CudaBasicsTests1, execIndexReduceScalar_1) { NativeOpExecutioner::execIndexReduceScalar(&lc, sd::indexreduce::IndexAbsoluteMax, - nullptr, x2.getShapeInfo(), + nullptr, x2.shapeInfo(), dX2, dX2ShapeInfo, nullptr, - nullptr, scalar.getShapeInfo(), + nullptr, scalar.shapeInfo(), dZ, dZShapeInfo); cudaResult = cudaStreamSynchronize(stream); @@ -223,10 +223,10 @@ TEST_F(CudaBasicsTests1, execIndexReduceScalar_1) { NativeOpExecutioner::execIndexReduceScalar(&lc, sd::indexreduce::IndexAbsoluteMax, - nullptr, x3.getShapeInfo(), + nullptr, x3.shapeInfo(), dX3, dX3ShapeInfo, nullptr, - nullptr, scalar.getShapeInfo(), + nullptr, scalar.shapeInfo(), dZ, dZShapeInfo); cudaResult = cudaStreamSynchronize(stream); @@ -279,10 +279,10 @@ TEST_F(CudaBasicsTests1, execReduce3Scalar_1) { cudaResult = cudaMalloc(reinterpret_cast(&dX4), x4.lengthOf() * x4.sizeOfT()); ASSERT_EQ(0, cudaResult); cudaResult = cudaMalloc(reinterpret_cast(&dZ1), scalar1.lengthOf() * scalar1.sizeOfT()); ASSERT_EQ(0, cudaResult); cudaResult = cudaMalloc(reinterpret_cast(&dZ2), scalar2.lengthOf() * scalar2.sizeOfT()); ASSERT_EQ(0, cudaResult); - cudaResult = cudaMalloc(reinterpret_cast(&dX1ShapeInfo), shape::shapeInfoByteLength(x1.getShapeInfo())); ASSERT_EQ(0, cudaResult); - cudaResult = cudaMalloc(reinterpret_cast(&dX3ShapeInfo), shape::shapeInfoByteLength(x3.getShapeInfo())); ASSERT_EQ(0, cudaResult); - cudaResult = cudaMalloc(reinterpret_cast(&dZ1ShapeInfo), shape::shapeInfoByteLength(scalar1.getShapeInfo())); ASSERT_EQ(0, cudaResult); - cudaResult = cudaMalloc(reinterpret_cast(&dZ2ShapeInfo), shape::shapeInfoByteLength(scalar2.getShapeInfo())); ASSERT_EQ(0, cudaResult); + cudaResult = cudaMalloc(reinterpret_cast(&dX1ShapeInfo), shape::shapeInfoByteLength(x1.shapeInfo())); ASSERT_EQ(0, cudaResult); + cudaResult = cudaMalloc(reinterpret_cast(&dX3ShapeInfo), shape::shapeInfoByteLength(x3.shapeInfo())); ASSERT_EQ(0, cudaResult); + cudaResult = cudaMalloc(reinterpret_cast(&dZ1ShapeInfo), shape::shapeInfoByteLength(scalar1.shapeInfo())); ASSERT_EQ(0, cudaResult); + cudaResult = cudaMalloc(reinterpret_cast(&dZ2ShapeInfo), shape::shapeInfoByteLength(scalar2.shapeInfo())); ASSERT_EQ(0, cudaResult); cudaStream_t stream; cudaResult = cudaStreamCreate(&stream); @@ -299,10 +299,10 @@ TEST_F(CudaBasicsTests1, execReduce3Scalar_1) { cudaMemcpyAsync(dX2, x2.buffer(), x2.lengthOf() * x2.sizeOfT(), cudaMemcpyHostToDevice, stream); cudaMemcpyAsync(dX3, x3.buffer(), x3.lengthOf() * x3.sizeOfT(), cudaMemcpyHostToDevice, stream); cudaMemcpyAsync(dX4, x4.buffer(), x4.lengthOf() * x4.sizeOfT(), cudaMemcpyHostToDevice, stream); - cudaMemcpyAsync(dX1ShapeInfo, x1.getShapeInfo(), shape::shapeInfoByteLength(x1.getShapeInfo()), cudaMemcpyHostToDevice, stream); - cudaMemcpyAsync(dX3ShapeInfo, x3.getShapeInfo(), shape::shapeInfoByteLength(x3.getShapeInfo()), cudaMemcpyHostToDevice, stream); - cudaMemcpyAsync(dZ1ShapeInfo, scalar1.getShapeInfo(), shape::shapeInfoByteLength(scalar1.getShapeInfo()), cudaMemcpyHostToDevice, stream); - cudaMemcpyAsync(dZ2ShapeInfo, scalar2.getShapeInfo(), shape::shapeInfoByteLength(scalar2.getShapeInfo()), cudaMemcpyHostToDevice, stream); + cudaMemcpyAsync(dX1ShapeInfo, x1.shapeInfo(), shape::shapeInfoByteLength(x1.shapeInfo()), cudaMemcpyHostToDevice, stream); + cudaMemcpyAsync(dX3ShapeInfo, x3.shapeInfo(), shape::shapeInfoByteLength(x3.shapeInfo()), cudaMemcpyHostToDevice, stream); + cudaMemcpyAsync(dZ1ShapeInfo, scalar1.shapeInfo(), shape::shapeInfoByteLength(scalar1.shapeInfo()), cudaMemcpyHostToDevice, stream); + cudaMemcpyAsync(dZ2ShapeInfo, scalar2.shapeInfo(), shape::shapeInfoByteLength(scalar2.shapeInfo()), cudaMemcpyHostToDevice, stream); /***************************************/ @@ -316,7 +316,7 @@ TEST_F(CudaBasicsTests1, execReduce3Scalar_1) { /***************************************/ - NativeOpExecutioner::execReduce3Scalar(&lc, sd::reduce3::Dot,nullptr, x1.getShapeInfo(),dX1, dX1ShapeInfo, nullptr, nullptr, x2.getShapeInfo(),dX2, dX1ShapeInfo,nullptr, scalar1.getShapeInfo(),dZ1, dZ1ShapeInfo); + NativeOpExecutioner::execReduce3Scalar(&lc, sd::reduce3::Dot,nullptr, x1.shapeInfo(),dX1, dX1ShapeInfo, nullptr, nullptr, x2.shapeInfo(),dX2, dX1ShapeInfo,nullptr, scalar1.shapeInfo(),dZ1, dZ1ShapeInfo); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -333,7 +333,7 @@ TEST_F(CudaBasicsTests1, execReduce3Scalar_1) { /***************************************/ - NativeOpExecutioner::execReduce3Scalar(&lc, sd::reduce3::Dot,nullptr, x3.getShapeInfo(),dX3, dX3ShapeInfo, nullptr, nullptr, x4.getShapeInfo(),dX4, dX3ShapeInfo,nullptr, scalar2.getShapeInfo(),dZ2, dZ2ShapeInfo); + NativeOpExecutioner::execReduce3Scalar(&lc, sd::reduce3::Dot,nullptr, x3.shapeInfo(),dX3, dX3ShapeInfo, nullptr, nullptr, x4.shapeInfo(),dX4, dX3ShapeInfo,nullptr, scalar2.shapeInfo(),dZ2, dZ2ShapeInfo); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -387,10 +387,10 @@ TEST_F(CudaBasicsTests1, execReduce3_1) { // call cuda kernel which calculates result NativeOpExecutioner::execReduce3(&lc, sd::reduce3::Dot, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), nullptr, nullptr, nullptr, nullptr); @@ -436,10 +436,10 @@ TEST_F(CudaBasicsTests1, execReduce3_2) { // call cuda kernel which calculates result NativeOpExecutioner::execReduce3(&lc, sd::reduce3::Dot, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), nullptr, nullptr, nullptr, nullptr); @@ -471,13 +471,13 @@ TEST_F(CudaBasicsTests1, execReduce3_3) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); // evaluate yTad data shape::TAD yTad; - yTad.init(y.getShapeInfo(), dimensions.data(), dimensions.size()); + yTad.init(y.shapeInfo(), dimensions.data(), dimensions.size()); yTad.createTadOnlyShapeInfo(); yTad.createOffsets(); @@ -502,10 +502,10 @@ TEST_F(CudaBasicsTests1, execReduce3_3) { // call cuda kernel which calculates result NativeOpExecutioner::execReduce3(&lc, sd::reduce3::Dot, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], (Nd4jLong*)devicePtrs[3], (Nd4jLong*)devicePtrs[4]); @@ -537,13 +537,13 @@ TEST_F(CudaBasicsTests1, execReduce3_4) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); // evaluate yTad data shape::TAD yTad; - yTad.init(y.getShapeInfo(), dimensions.data(), dimensions.size()); + yTad.init(y.shapeInfo(), dimensions.data(), dimensions.size()); yTad.createTadOnlyShapeInfo(); yTad.createOffsets(); @@ -568,10 +568,10 @@ TEST_F(CudaBasicsTests1, execReduce3_4) { // call cuda kernel which calculates result NativeOpExecutioner::execReduce3(&lc, sd::reduce3::Dot, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], (Nd4jLong*)devicePtrs[3], (Nd4jLong*)devicePtrs[4]); @@ -603,13 +603,13 @@ TEST_F(CudaBasicsTests1, execReduce3_5) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); // evaluate yTad data shape::TAD yTad; - yTad.init(y.getShapeInfo(), dimensions.data(), dimensions.size()); + yTad.init(y.shapeInfo(), dimensions.data(), dimensions.size()); yTad.createTadOnlyShapeInfo(); yTad.createOffsets(); @@ -634,10 +634,10 @@ TEST_F(CudaBasicsTests1, execReduce3_5) { // call cuda kernel which calculates result NativeOpExecutioner::execReduce3(&lc, sd::reduce3::Dot, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], (Nd4jLong*)devicePtrs[3], (Nd4jLong*)devicePtrs[4]); @@ -669,13 +669,13 @@ TEST_F(CudaBasicsTests1, execReduce3All_1) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); // evaluate yTad data shape::TAD yTad; - yTad.init(y.getShapeInfo(), dimensions.data(), dimensions.size()); + yTad.init(y.shapeInfo(), dimensions.data(), dimensions.size()); yTad.createTadOnlyShapeInfo(); yTad.createOffsets(); @@ -700,10 +700,10 @@ TEST_F(CudaBasicsTests1, execReduce3All_1) { // call cuda kernel which calculates result NativeOpExecutioner::execReduce3All(&lc, sd::reduce3::Dot, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], (Nd4jLong*)devicePtrs[3], (Nd4jLong*)devicePtrs[4]); @@ -735,13 +735,13 @@ TEST_F(CudaBasicsTests1, execReduce3All_2) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); // evaluate yTad data shape::TAD yTad; - yTad.init(y.getShapeInfo(), dimensions.data(), dimensions.size()); + yTad.init(y.shapeInfo(), dimensions.data(), dimensions.size()); yTad.createTadOnlyShapeInfo(); yTad.createOffsets(); @@ -766,10 +766,10 @@ TEST_F(CudaBasicsTests1, execReduce3All_2) { // call cuda kernel which calculates result NativeOpExecutioner::execReduce3All(&lc, sd::reduce3::Dot, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], (Nd4jLong*)devicePtrs[3], (Nd4jLong*)devicePtrs[4]); @@ -800,7 +800,7 @@ TEST_F(CudaBasicsTests1, execIndexReduce_1) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -823,9 +823,9 @@ TEST_F(CudaBasicsTests1, execIndexReduce_1) { // call cuda kernel which calculates result NativeOpExecutioner::execIndexReduce(&lc, sd::indexreduce::IndexMax, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2]); @@ -861,7 +861,7 @@ TEST_F(CudaBasicsTests1, execIndexReduce_2) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -885,9 +885,9 @@ TEST_F(CudaBasicsTests1, execIndexReduce_2) { // call cuda kernel which calculates result NativeOpExecutioner::execIndexReduce(&lc, sd::indexreduce::IndexMax, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2]); @@ -923,7 +923,7 @@ TEST_F(CudaBasicsTests1, execIndexReduce_3) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -946,9 +946,9 @@ TEST_F(CudaBasicsTests1, execIndexReduce_3) { // call cuda kernel which calculates result NativeOpExecutioner::execIndexReduce(&lc, sd::indexreduce::IndexMax, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2]); @@ -986,9 +986,9 @@ TEST_F(CudaBasicsTests1, execScalar_1) { // call cuda kernel which calculates result NativeOpExecutioner::execScalar(&lc, sd::scalar::Divide, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), - nullptr, scalar.getShapeInfo(), scalar.specialBuffer(), scalar.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, scalar.shapeInfo(), scalar.specialBuffer(), scalar.specialShapeInfo(), nullptr); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -1021,9 +1021,9 @@ TEST_F(CudaBasicsTests1, execScalar_2) { // call cuda kernel which calculates result NativeOpExecutioner::execScalar(&lc, sd::scalar::CopyPws, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), - nullptr, scalar.getShapeInfo(), scalar.specialBuffer(), scalar.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, scalar.shapeInfo(), scalar.specialBuffer(), scalar.specialShapeInfo(), nullptr); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -1053,7 +1053,7 @@ TEST_F(CudaBasicsTests1, execScalar_3) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -1076,10 +1076,10 @@ TEST_F(CudaBasicsTests1, execScalar_3) { // call cuda kernel which calculates result NativeOpExecutioner::execScalar(&lc, sd::scalar::Divide, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), - nullptr, scalars.getShapeInfo(), scalars.specialBuffer(), scalars.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, scalars.shapeInfo(), scalars.specialBuffer(), scalars.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], nullptr, nullptr); @@ -1116,9 +1116,9 @@ TEST_F(CudaBasicsTests1, execScalarBool_1) { // call cuda kernel which calculates result // call cuda kernel which calculates result NativeOpExecutioner::execScalarBool(&lc, sd::scalar::GreaterThan, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), - nullptr, scalar.getShapeInfo(), scalar.specialBuffer(), scalar.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, scalar.shapeInfo(), scalar.specialBuffer(), scalar.specialShapeInfo(), nullptr); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -1144,7 +1144,7 @@ TEST_F(CudaBasicsTests1, execScalarBool_2) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -1166,10 +1166,10 @@ TEST_F(CudaBasicsTests1, execScalarBool_2) { // call cuda kernel which calculates result NativeOpExecutioner::execScalarBool(&lc, sd::scalar::GreaterThan, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), - nullptr, scalars.getShapeInfo(), scalars.specialBuffer(), scalars.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, scalars.shapeInfo(), scalars.specialBuffer(), scalars.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], nullptr, nullptr); @@ -1205,7 +1205,7 @@ TEST_F(CudaBasicsTests1, execBroadcast_1) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -1227,9 +1227,9 @@ TEST_F(CudaBasicsTests1, execBroadcast_1) { // call cuda kernel which calculates result NativeOpExecutioner::execBroadcast(&lc, sd::broadcast::Add, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], nullptr, nullptr); @@ -1265,7 +1265,7 @@ TEST_F(CudaBasicsTests1, execBroadcast_2) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -1287,9 +1287,9 @@ TEST_F(CudaBasicsTests1, execBroadcast_2) { // call cuda kernel which calculates result NativeOpExecutioner::execBroadcast(&lc, sd::broadcast::Add, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], nullptr, nullptr); @@ -1322,7 +1322,7 @@ TEST_F(CudaBasicsTests1, execBroadcastBool_1) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -1344,9 +1344,9 @@ TEST_F(CudaBasicsTests1, execBroadcastBool_1) { // call cuda kernel which calculates result NativeOpExecutioner::execBroadcastBool(&lc, sd::broadcast::EqualTo, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], @@ -1380,7 +1380,7 @@ TEST_F(CudaBasicsTests1, execBroadcastBool_2) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -1403,9 +1403,9 @@ TEST_F(CudaBasicsTests1, execBroadcastBool_2) { // call cuda kernel which calculates result NativeOpExecutioner::execBroadcastBool(&lc, sd::broadcast::EqualTo, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], @@ -1447,9 +1447,9 @@ TEST_F(CudaBasicsTests1, execPairwiseTransform_1) { // call cuda kernel which calculates result NativeOpExecutioner::execPairwiseTransform(&lc, sd::pairwise::Subtract, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -1481,9 +1481,9 @@ TEST_F(CudaBasicsTests1, execPairwiseBoolTransform_1) { // call cuda kernel which calculates result NativeOpExecutioner::execPairwiseBoolTransform(&lc, sd::pairwise::EqualTo, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -1515,8 +1515,8 @@ TEST_F(CudaBasicsTests1, execTransformFloat_1) { // call cuda kernel which calculates result NativeOpExecutioner::execTransformFloat(&lc, sd::transform::Sqrt, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr, nullptr); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -1545,8 +1545,8 @@ TEST_F(CudaBasicsTests1, execTransformFloat_2) { // call cuda kernel which calculates result NativeOpExecutioner::execTransformFloat(&lc, sd::transform::Sqrt, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr, nullptr); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -1576,8 +1576,8 @@ TEST_F(CudaBasicsTests1, execTransformAny_1) { // call cuda kernel which calculates result NativeOpExecutioner::execTransformAny(&lc, sd::transform::Assign, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr, nullptr); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -1606,8 +1606,8 @@ TEST_F(CudaBasicsTests1, execTransformAny_2) { // call cuda kernel which calculates result NativeOpExecutioner::execTransformAny(&lc, sd::transform::Assign, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr, nullptr); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -1637,8 +1637,8 @@ TEST_F(CudaBasicsTests1, execTransformStrict_1) { // call cuda kernel which calculates result NativeOpExecutioner::execTransformStrict(&lc, sd::transform::CubeDerivative, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr, nullptr); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -1667,8 +1667,8 @@ TEST_F(CudaBasicsTests1, execTransformStrict_2) { // call cuda kernel which calculates result NativeOpExecutioner::execTransformStrict(&lc, sd::transform::CubeDerivative, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr, nullptr); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -1698,8 +1698,8 @@ TEST_F(CudaBasicsTests1, execTransformSame_1) { // call cuda kernel which calculates result NativeOpExecutioner::execTransformSame(&lc, sd::transform::Square, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr, nullptr); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -1728,8 +1728,8 @@ TEST_F(CudaBasicsTests1, execTransformSame_2) { // call cuda kernel which calculates result NativeOpExecutioner::execTransformSame(&lc, sd::transform::Square, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr, nullptr); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -1759,8 +1759,8 @@ TEST_F(CudaBasicsTests1, execTransformBool_1) { // call cuda kernel which calculates result NativeOpExecutioner::execTransformBool(&lc, sd::transform::IsPositive, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr, nullptr); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -1789,8 +1789,8 @@ TEST_F(CudaBasicsTests1, execTransformBool_2) { // call cuda kernel which calculates result NativeOpExecutioner::execTransformBool(&lc, sd::transform::IsPositive, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr, nullptr); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -1816,7 +1816,7 @@ TEST_F(CudaBasicsTests1, execReduceFloat_1) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -1838,9 +1838,9 @@ TEST_F(CudaBasicsTests1, execReduceFloat_1) { // call cuda kernel which calculates result NativeOpExecutioner::execReduceFloat(&lc, sd::reduce::Mean, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2]); @@ -1870,7 +1870,7 @@ TEST_F(CudaBasicsTests1, execReduceFloat_2) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -1892,9 +1892,9 @@ TEST_F(CudaBasicsTests1, execReduceFloat_2) { // call cuda kernel which calculates result NativeOpExecutioner::execReduceFloat(&lc, sd::reduce::Mean, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2]); @@ -1925,7 +1925,7 @@ TEST_F(CudaBasicsTests1, execReduceSame_1) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -1947,9 +1947,9 @@ TEST_F(CudaBasicsTests1, execReduceSame_1) { // call cuda kernel which calculates result NativeOpExecutioner::execReduceSame(&lc, sd::reduce::Sum, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2]); @@ -1979,7 +1979,7 @@ TEST_F(CudaBasicsTests1, execReduceSame_2) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -2001,9 +2001,9 @@ TEST_F(CudaBasicsTests1, execReduceSame_2) { // call cuda kernel which calculates result NativeOpExecutioner::execReduceSame(&lc, sd::reduce::Sum, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2]); @@ -2035,7 +2035,7 @@ TEST_F(CudaBasicsTests1, execReduceBool_1) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -2057,9 +2057,9 @@ TEST_F(CudaBasicsTests1, execReduceBool_1) { // call cuda kernel which calculates result NativeOpExecutioner::execReduceBool(&lc, sd::reduce::IsPositive, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2]); @@ -2089,7 +2089,7 @@ TEST_F(CudaBasicsTests1, execReduceBool_2) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -2111,9 +2111,9 @@ TEST_F(CudaBasicsTests1, execReduceBool_2) { // call cuda kernel which calculates result NativeOpExecutioner::execReduceBool(&lc, sd::reduce::IsPositive, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2]); @@ -2144,7 +2144,7 @@ TEST_F(CudaBasicsTests1, execReduceLong_1) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -2166,9 +2166,9 @@ TEST_F(CudaBasicsTests1, execReduceLong_1) { // call cuda kernel which calculates result NativeOpExecutioner::execReduceLong(&lc, sd::reduce::CountNonZero, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2]); @@ -2198,7 +2198,7 @@ TEST_F(CudaBasicsTests1, execReduceLong_2) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -2220,9 +2220,9 @@ TEST_F(CudaBasicsTests1, execReduceLong_2) { // call cuda kernel which calculates result NativeOpExecutioner::execReduceLong(&lc, sd::reduce::CountNonZero, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2]); @@ -2263,9 +2263,9 @@ TEST_F(CudaBasicsTests1, execReduceFloatScalar_1) { // call cuda kernel which calculates result NativeOpExecutioner::execReduceFloatScalar(&lc, sd::reduce::Mean, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo()); + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo()); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); z.tickWriteDevice(); @@ -2299,9 +2299,9 @@ TEST_F(CudaBasicsTests1, execReduceFloatScalar_2) { // call cuda kernel which calculates result NativeOpExecutioner::execReduceFloatScalar(&lc, sd::reduce::Mean, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo()); + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo()); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); z.tickWriteDevice(); @@ -2336,9 +2336,9 @@ TEST_F(CudaBasicsTests1, execReduceSameScalar_1) { // call cuda kernel which calculates result NativeOpExecutioner::execReduceSameScalar(&lc, sd::reduce::Sum, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo()); + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo()); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); z.tickWriteDevice(); @@ -2372,9 +2372,9 @@ TEST_F(CudaBasicsTests1, execReduceSameScalar_2) { // call cuda kernel which calculates result NativeOpExecutioner::execReduceSameScalar(&lc, sd::reduce::Sum, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo()); + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo()); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); z.tickWriteDevice(); @@ -2410,9 +2410,9 @@ TEST_F(CudaBasicsTests1, execReduceBoolScalar_1) { // call cuda kernel which calculates result NativeOpExecutioner::execReduceBoolScalar(&lc, sd::reduce::IsPositive, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo()); + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo()); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); z.tickWriteDevice(); @@ -2446,9 +2446,9 @@ TEST_F(CudaBasicsTests1, execReduceBoolScalar_2) { // call cuda kernel which calculates result NativeOpExecutioner::execReduceBoolScalar(&lc, sd::reduce::IsPositive, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo()); + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo()); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); z.tickWriteDevice(); @@ -2484,9 +2484,9 @@ TEST_F(CudaBasicsTests1, execReduceLongScalar_1) { // call cuda kernel which calculates result NativeOpExecutioner::execReduceLongScalar(&lc, sd::reduce::CountNonZero, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo()); + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo()); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); z.tickWriteDevice(); @@ -2520,9 +2520,9 @@ TEST_F(CudaBasicsTests1, execReduceLongScalar_2) { // call cuda kernel which calculates result NativeOpExecutioner::execReduceLongScalar(&lc, sd::reduce::CountNonZero, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo()); + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo()); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); z.tickWriteDevice(); @@ -2552,10 +2552,10 @@ TEST_F(CudaBasicsTests1, execReduce3TAD_1) { PointersManager pm(context, "execReduce3TAD_1"); // call cuda kernel which calculates result NativeOpExecutioner::execReduce3TAD(context, sd::reduce3::Dot, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, dimensions.size(), packX.specialShapeInfo(), packX.specialOffsets(), nullptr, nullptr); pm.synchronize(); @@ -2580,7 +2580,7 @@ TEST_F(CudaBasicsTests1, execReduce3TAD_2) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -2603,10 +2603,10 @@ TEST_F(CudaBasicsTests1, execReduce3TAD_2) { // call cuda kernel which calculates result NativeOpExecutioner::execReduce3TAD(&lc, sd::reduce3::Dot, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], nullptr, nullptr); @@ -2636,7 +2636,7 @@ TEST_F(CudaBasicsTests1, execReduce3TAD_3) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -2659,10 +2659,10 @@ TEST_F(CudaBasicsTests1, execReduce3TAD_3) { // call cuda kernel which calculates result NativeOpExecutioner::execReduce3TAD(&lc, sd::reduce3::Dot, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2]); @@ -2692,7 +2692,7 @@ TEST_F(CudaBasicsTests1, execReduce3TAD_4) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -2714,10 +2714,10 @@ TEST_F(CudaBasicsTests1, execReduce3TAD_4) { // call cuda kernel which calculates result NativeOpExecutioner::execReduce3TAD(&lc, sd::reduce3::Dot, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2]); @@ -2753,9 +2753,9 @@ TEST_F(CudaBasicsTests1, execSummaryStats_1) { // call cuda kernel which calculates result NativeOpExecutioner::execSummaryStats(&lc, sd::variance::SummaryStatsStandardDeviation, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), true); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -2780,7 +2780,7 @@ TEST_F(CudaBasicsTests1, execSummaryStats_2) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -2802,9 +2802,9 @@ TEST_F(CudaBasicsTests1, execSummaryStats_2) { // call cuda kernel which calculates result NativeOpExecutioner::execSummaryStats(&lc, sd::variance::SummaryStatsStandardDeviation, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], true); @@ -2834,7 +2834,7 @@ TEST_F(CudaBasicsTests1, execSummaryStats_3) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -2856,9 +2856,9 @@ TEST_F(CudaBasicsTests1, execSummaryStats_3) { // call cuda kernel which calculates result NativeOpExecutioner::execSummaryStats(&lc, sd::variance::SummaryStatsStandardDeviation, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], true); @@ -2895,9 +2895,9 @@ TEST_F(CudaBasicsTests1, execSummaryStatsScalar_1) { // call cuda kernel which calculates result NativeOpExecutioner::execSummaryStatsScalar(&lc, sd::variance::SummaryStatsStandardDeviation, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), true); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -2944,9 +2944,9 @@ TEST_F(CudaBasicsTests1, execRandom_1) { // // call cuda kernel which calculates result // NativeOpExecutioner::execRandom(&lc, sd::random::GaussianDistribution, // &gen, -// nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), -// nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), -// nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), +// nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), +// nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), +// nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), // extraArguments.argumentsAsT(z.dataType())); // // cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -2992,8 +2992,8 @@ TEST_F(CudaBasicsTests1, execRandom_2) { // call cuda kernel which calculates result NativeOpExecutioner::execRandom(lc, sd::random::DropOut, &gen, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), extraArguments.argumentsAsT(z.dataType())); cudaResult = cudaStreamSynchronize(*lc->getCudaStream()); ASSERT_EQ(0, cudaResult); @@ -3036,7 +3036,7 @@ TEST_F(CudaBasicsTests1, execRandom_3) { // call cuda kernel which calculates result NativeOpExecutioner::execRandom(&lc, sd::random::UniformDistribution, &gen, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), devicePtrs[0]); cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); @@ -3081,7 +3081,7 @@ TEST_F(CudaBasicsTests1, execRandom_4) { // call cuda kernel which calculates result NativeOpExecutioner::execRandom(context, sd::random::UniformDistribution, &gen, - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), extraArguments.argumentsAsT(z.dataType())); // cudaResult = cudaStreamSynchronize(stream); ASSERT_EQ(0, cudaResult); diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests1.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests1.cpp index 8a03d4abc..959362c4d 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests1.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests1.cpp @@ -1718,17 +1718,17 @@ TEST_F(DeclarableOpsTests1, TestRegistrator1) { // auto inputBuffers = new Nd4jPointer[2]; // auto inputShapes = new Nd4jPointer[2]; -// inputBuffers[0] = (Nd4jPointer) x->getBuffer(); -// inputBuffers[1] = (Nd4jPointer) y->getBuffer(); +// inputBuffers[0] = (Nd4jPointer) x->buffer(); +// inputBuffers[1] = (Nd4jPointer) y->buffer(); -// inputShapes[0] = (Nd4jPointer) x->getShapeInfo(); -// inputShapes[1] = (Nd4jPointer) y->getShapeInfo(); +// inputShapes[0] = (Nd4jPointer) x->shapeInfo(); +// inputShapes[1] = (Nd4jPointer) y->shapeInfo(); // auto outputBuffers = new Nd4jPointer[1]; // auto outputShapes = new Nd4jPointer[1]; -// outputBuffers[0] = (Nd4jPointer) z->getBuffer(); -// outputShapes[0] = (Nd4jPointer) z->getShapeInfo(); +// outputBuffers[0] = (Nd4jPointer) z->buffer(); +// outputShapes[0] = (Nd4jPointer) z->shapeInfo(); // //auto status = execCustomOp(nullptr, hash, inputBuffers, inputShapes, 2, outputBuffers, outputShapes, 1, nullptr, 0, nullptr, 0, false); @@ -1768,11 +1768,11 @@ TEST_F(DeclarableOpsTests1, TestRegistrator1) { // auto inputBuffers = new Nd4jPointer[2]; // auto inputShapes = new Nd4jPointer[2]; -// inputBuffers[0] = (Nd4jPointer) x->getBuffer(); -// inputBuffers[1] = (Nd4jPointer) y->getBuffer(); +// inputBuffers[0] = (Nd4jPointer) x->buffer(); +// inputBuffers[1] = (Nd4jPointer) y->buffer(); -// inputShapes[0] = (Nd4jPointer) x->getShapeInfo(); -// inputShapes[1] = (Nd4jPointer) y->getShapeInfo(); +// inputShapes[0] = (Nd4jPointer) x->shapeInfo(); +// inputShapes[1] = (Nd4jPointer) y->shapeInfo(); // auto outputBuffers = new Nd4jPointer[1]; // auto outputShapes = new Nd4jPointer[1]; @@ -1811,9 +1811,9 @@ TEST_F(DeclarableOpsTests1, TestGemv1) { auto z = NDArrayFactory::create_('f', {5, 1}); auto expBuffer = new float[5]{28.00f,64.00f,100.00f,136.00f,172.00f}; - auto exp = new NDArray(expBuffer, z->getShapeInfo()); + auto exp = new NDArray(expBuffer, z->shapeInfo()); - sd::blas::GEMV::op('f', x->rows(), x->columns(), 1.0f, x->getBuffer(), y->rows(), y->getBuffer(), 1, 0.0, z->getBuffer(), 1); + sd::blas::GEMV::op('f', x->rows(), x->columns(), 1.0f, x->buffer(), y->rows(), y->buffer(), 1, 0.0, z->buffer(), 1); ASSERT_TRUE(z->equalsTo(exp)); @@ -1930,8 +1930,8 @@ TEST_F(DeclarableOpsTests1, TestReductionShape1) { sd::ops::testreduction testop; - auto inP = new Nd4jLong[shape::shapeInfoLength(input->getShapeInfo())]; - memcpy(inP, input->getShapeInfo(), shape::shapeInfoByteLength(input->rankOf())); + auto inP = new Nd4jLong[shape::shapeInfoLength(input->shapeInfo())]; + memcpy(inP, input->shapeInfo(), shape::shapeInfoByteLength(input->rankOf())); auto inshape = new ShapeList(inP); @@ -1969,7 +1969,7 @@ TEST_F(DeclarableOpsTests1, TestReductionShape2) { sd::ops::testreduction testop; - auto inshapes = new ShapeList(input->getShapeInfo()); + auto inshapes = new ShapeList(input->shapeInfo()); auto shapes = testop.calculateOutputShape(inshapes, *block); ASSERT_EQ(1, shapes->size()); ASSERT_EQ(1, shapes->at(0)[0]); @@ -1994,14 +1994,14 @@ TEST_F(DeclarableOpsTests1, TestCustomShape1) { sd::ops::testcustom test; - auto inshapes = new ShapeList(input->getShapeInfo()); + auto inshapes = new ShapeList(input->shapeInfo()); auto shapes = test.calculateOutputShape(inshapes, *block); - ASSERT_EQ(input->getShapeInfo()[0], shapes->at(0)[0]); - ASSERT_EQ(input->getShapeInfo()[1] * 2, shapes->at(0)[1]); - ASSERT_EQ(input->getShapeInfo()[2] * 2, shapes->at(0)[2]); - ASSERT_EQ(input->getShapeInfo()[3] * 2, shapes->at(0)[3]); + ASSERT_EQ(input->shapeInfo()[0], shapes->at(0)[0]); + ASSERT_EQ(input->shapeInfo()[1] * 2, shapes->at(0)[1]); + ASSERT_EQ(input->shapeInfo()[2] * 2, shapes->at(0)[2]); + ASSERT_EQ(input->shapeInfo()[3] * 2, shapes->at(0)[3]); delete variableSpace; delete block; diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp index b8c89322c..963884c06 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests11.cpp @@ -3677,7 +3677,7 @@ TEST_F(DeclarableOpsTests11, SafeDivideMixed_Test1) { NDArray labels('c', {2, 3}, {1.0, 2.0, 3.0, -1.0, 2.0, 1.0}); auto sumDiff = labels.reduceAlongDimension(reduce::Sum, {1}, true); - NDArray numOfNonZero(sumDiff.getShapeInfo(), sd::DataType::INT64, false); + NDArray numOfNonZero(sumDiff.shapeInfo(), sd::DataType::INT64, false); numOfNonZero.assign(1); sumDiff.applyPairwiseTransform(pairwise::SafeDivide, numOfNonZero, sumDiff); } diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests12.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests12.cpp index 0684f7887..2bca43ae9 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests12.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests12.cpp @@ -779,8 +779,8 @@ TEST_F(DeclarableOpsTests12, pullRows_1) { std::vector dims = {1}; - auto xTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.getShapeInfo(), dims); - auto zTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(z.getShapeInfo(), dims); + auto xTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dims); + auto zTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dims); Nd4jPointer nativeStart[2]; @@ -789,8 +789,8 @@ TEST_F(DeclarableOpsTests12, pullRows_1) { #endif OpaqueDataBuffer xBuf(x.dataBuffer()); OpaqueDataBuffer zBuf(z.dataBuffer()); - pullRows(nativeStart, &xBuf, x.getShapeInfo(), x.getSpecialShapeInfo(), - &zBuf, z.getShapeInfo(), z.specialShapeInfo(), + pullRows(nativeStart, &xBuf, x.shapeInfo(), x.specialShapeInfo(), + &zBuf, z.shapeInfo(), z.specialShapeInfo(), 4, pidx, xTadPack.platformShapeInfo(), xTadPack.platformOffsets(), zTadPack.platformShapeInfo(), zTadPack.platformOffsets()); @@ -815,8 +815,8 @@ TEST_F(DeclarableOpsTests12, pullRows_2) { std::vector dims = {1}; - auto xTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.getShapeInfo(), dims); - auto zTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(z.getShapeInfo(), dims); + auto xTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dims); + auto zTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dims); Nd4jPointer nativeStart[2]; #ifdef __CUDABLAS__ @@ -824,8 +824,8 @@ TEST_F(DeclarableOpsTests12, pullRows_2) { #endif OpaqueDataBuffer xBuf(x.dataBuffer()); OpaqueDataBuffer zBuf(z.dataBuffer()); - pullRows(nativeStart, &xBuf, x.getShapeInfo(), x.specialShapeInfo(), - &zBuf, z.getShapeInfo(), z.specialShapeInfo(), + pullRows(nativeStart, &xBuf, x.shapeInfo(), x.specialShapeInfo(), + &zBuf, z.shapeInfo(), z.specialShapeInfo(), 4, pidx, xTadPack.platformShapeInfo(), xTadPack.platformOffsets(), zTadPack.platformShapeInfo(), zTadPack.platformOffsets()); diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp index 4052e260d..c37f3fe4a 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp @@ -3211,8 +3211,8 @@ TEST_F(DeclarableOpsTests13, batchnorm_bp_test9) { int* dims = reinterpret_cast(manager.replicatePointer(dimensions.data(), dimensions.size() * sizeof(int))); input.reduceAlongDimension(sd::reduce::Mean, mean, dimensions); NDArray::prepareSpecialUse({&variance}, {&input}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), dimensions); - NativeOpExecutioner::execSummaryStats(input.getContext(), 0,input.getBuffer(), input.getShapeInfo(),input.getSpecialBuffer(), input.getSpecialShapeInfo(),nullptr,variance.getBuffer(), variance.getShapeInfo(),variance.getSpecialBuffer(), variance.getSpecialShapeInfo(), dims, dimensions.size(),packX.platformShapeInfo(), packX.platformOffsets(),false); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions); + NativeOpExecutioner::execSummaryStats(input.getContext(), 0,input.buffer(), input.shapeInfo(),input.specialBuffer(), input.specialShapeInfo(),nullptr,variance.buffer(), variance.shapeInfo(),variance.specialBuffer(), variance.specialShapeInfo(), dims, dimensions.size(),packX.platformShapeInfo(), packX.platformOffsets(),false); manager.synchronize(); NDArray::registerSpecialUse({&variance}, {&input}); @@ -3262,8 +3262,8 @@ TEST_F(DeclarableOpsTests13, batchnorm_bp_test10) { int* dims = reinterpret_cast(manager.replicatePointer(dimensions.data(), dimensions.size() * sizeof(int))); input.reduceAlongDimension(sd::reduce::Mean, mean, dimensions); NDArray::prepareSpecialUse({&variance}, {&input}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), dimensions); - NativeOpExecutioner::execSummaryStats(input.getContext(), 0,input.getBuffer(), input.getShapeInfo(),input.getSpecialBuffer(), input.getSpecialShapeInfo(),nullptr,variance.getBuffer(), variance.getShapeInfo(),variance.getSpecialBuffer(), variance.getSpecialShapeInfo(), dims, dimensions.size(),packX.platformShapeInfo(), packX.platformOffsets(),false); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions); + NativeOpExecutioner::execSummaryStats(input.getContext(), 0,input.buffer(), input.shapeInfo(),input.specialBuffer(), input.specialShapeInfo(),nullptr,variance.buffer(), variance.shapeInfo(),variance.specialBuffer(), variance.specialShapeInfo(), dims, dimensions.size(),packX.platformShapeInfo(), packX.platformOffsets(),false); manager.synchronize(); NDArray::registerSpecialUse({&variance}, {&input}); @@ -3325,8 +3325,8 @@ TEST_F(DeclarableOpsTests13, batchnorm_bp_test11) { int* dims = reinterpret_cast(manager.replicatePointer(dimensions.data(), dimensions.size() * sizeof(int))); input.reduceAlongDimension(sd::reduce::Mean, mean, dimensions, true); NDArray::prepareSpecialUse({&variance}, {&input}); - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), dimensions); - NativeOpExecutioner::execSummaryStats(input.getContext(), 0,input.getBuffer(), input.getShapeInfo(),input.getSpecialBuffer(), input.getSpecialShapeInfo(),nullptr,variance.getBuffer(), variance.getShapeInfo(),variance.getSpecialBuffer(), variance.getSpecialShapeInfo(), dims, dimensions.size(),packX.platformShapeInfo(), packX.platformOffsets(),false); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(input.shapeInfo(), dimensions); + NativeOpExecutioner::execSummaryStats(input.getContext(), 0,input.buffer(), input.shapeInfo(),input.specialBuffer(), input.specialShapeInfo(),nullptr,variance.buffer(), variance.shapeInfo(),variance.specialBuffer(), variance.specialShapeInfo(), dims, dimensions.size(),packX.platformShapeInfo(), packX.platformOffsets(),false); manager.synchronize(); NDArray::registerSpecialUse({&variance}, {&input}); diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests19.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests19.cpp index f48e3d946..641728ad3 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests19.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests19.cpp @@ -25,6 +25,7 @@ #include #include #include +#include using namespace sd; @@ -39,6 +40,195 @@ public: } }; +TEST_F(DeclarableOpsTests19, test_threshold_encode_1) { + auto x = NDArrayFactory::create('c', {3}, {1.5, 2.5, -3.5}); + auto exp_encoded = NDArrayFactory::create('c', {7}, {3, 3, 1056964608, 0, 1, 2, -3}); + auto exp_gradients = NDArrayFactory::create('c', {3}, {1.0, 2.0, -3.0}); + + sd::ops::encode_threshold op; + auto result = op.evaluate({&x}, {0.5}); + + auto gradients = result.at(0); + auto encoded = result.at(1); + + //encoded->printIndexedBuffer("ENC"); + + ASSERT_EQ(exp_encoded, *encoded); + ASSERT_EQ(exp_gradients, x); + + // FIXME: we need to add a way to declare individual inplace outputs + //ASSERT_EQ(exp_gradients, *gradients); +} + +TEST_F(DeclarableOpsTests19, test_threshold_encode_2) { + for (int length = 5; length < 35; length++) { + auto x = NDArrayFactory::create('c', {10000}); + auto exp_gradients = NDArrayFactory::create('c', {10000}); + + for (int e = 0; e < length; e++) { + x.p(e, 2e-3); + exp_gradients.p(e, 1e-3); + } + + sd::ops::encode_threshold op; + auto result = op.evaluate({&x}, {1e-3}); + + auto encoded = result.at(1); + + ASSERT_EQ(length + 4, encoded->lengthOf()); + ASSERT_EQ(exp_gradients, x); + } +} + +TEST_F(DeclarableOpsTests19, test_threshold_encode_boundary_1) { + auto x = NDArrayFactory::create('c', {6}); + x = 1.0f; + + sd::ops::encode_threshold op; + auto result = op.evaluate({&x}, {1.0}, {3}); + + auto gradients = result.at(0); + auto encoded = result.at(1); + + ASSERT_EQ(7, encoded->lengthOf()); + ASSERT_EQ(3, x.sumNumber().e(0)); +} + +TEST_F(DeclarableOpsTests19, test_threshold_encode_boundary_2) { + auto x = NDArrayFactory::create('c', {1000}); + x = 1.0f; + + sd::ops::encode_threshold op; + auto result = op.evaluate({&x}, {1.0}, {100}); + + auto gradients = result.at(0); + auto encoded = result.at(1); + + ASSERT_EQ(104, encoded->lengthOf()); + + ASSERT_EQ(900, x.sumNumber().e(0)); +} + +TEST_F(DeclarableOpsTests19, test_threshold_decode_1) { + auto x = NDArrayFactory::create('c', {3}, {1.0, 2.0, -3.0}); + auto y = NDArrayFactory::create('c', {7}, {3, 3, 1056964608, 0, 1, 2, -3}); + auto exp_gradients = NDArrayFactory::create('c', {3}, {1.5, 2.5, -3.5}); + + sd::ops::decode_threshold op; + auto status = op.execute({&x, &y}, {&x}); + ASSERT_EQ(Status::OK(), status); + ASSERT_EQ(exp_gradients, x); +} + +TEST_F(DeclarableOpsTests19, test_bitmap_encode_1) { + auto initial = NDArrayFactory::create('c', {6}, {0.0f, 0.0f, 1e-3f, -1e-3f, 0.0f, 0.0f}); + auto exp_0 = initial.like(); + auto exp_1 = initial.dup(); + auto exp_c = NDArrayFactory::create(2L); + + sd::ops::encode_bitmap enc; + auto enc_result = enc.evaluate({&initial}, {1e-3f}); + ASSERT_EQ(Status::OK(), enc_result.status()); + + //initial.printIndexedBuffer("initial"); + ASSERT_EQ(exp_0, initial); + + auto encoded = enc_result.at(1); + auto counter = enc_result.at(2); + + //encoded->printIndexedBuffer("encoded"); + + ASSERT_EQ(exp_c, *counter); + + sd::ops::decode_bitmap dec; + auto status = dec.execute({&initial, encoded}, {&initial}); + ASSERT_EQ(Status::OK(), status); + + + //initial.printIndexedBuffer(); + + ASSERT_EQ(exp_1, initial); +} + +TEST_F(DeclarableOpsTests19, test_bitmap_encode_decode) { + auto initial = NDArrayFactory::create('c', {256000}); + initial = 1.0f; + auto exp = initial.dup(); + auto neg = initial.like(); + neg = 0.5f; + + sd::ops::encode_bitmap enc; + auto enc_result = enc.evaluate({&initial}, {0.5f}); + auto encoded = enc_result.at(1); + + // checking equality of all encoded bits + for (int e = 5; e < encoded->lengthOf() - 1; e++) { + if (encoded->e(e) != encoded->e(e - 1)) + nd4j_printf("Non equal encoded values at E[%i]: %i;\n", e, encoded->e(e)); + } + + ASSERT_NE(exp, initial); + ASSERT_EQ(neg, initial); + + sd::ops::decode_bitmap dec; + auto status = dec.execute({&initial, encoded}, {&initial}); + ASSERT_EQ(Status::OK(), status); + + // checking equality of all dedoded bits + for (int e = 0; e < initial.lengthOf(); e++) { + auto f = initial.e(e); + if (f != 1.0f) + nd4j_printf("initial[%i] = %f\n", e, f); + } + + + ASSERT_EQ(exp, initial); +} + +TEST_F(DeclarableOpsTests19, test_threshold_encode_decode) { + auto initial = NDArrayFactory::create('c', {256000}); + initial = 1.0f; + auto exp = initial.dup(); + auto neg = initial.like(); + neg = 0.5f; + + sd::ops::encode_threshold enc; + auto enc_result = enc.evaluate({&initial}, {0.5f}); + auto encoded = enc_result.at(1); + + ASSERT_EQ(256000 + 4, encoded->lengthOf()); + ASSERT_NE(exp, initial); + + for (int e = 0; e < initial.lengthOf(); e++) { + auto f = initial.e(e); + if (f != 0.5f) { + nd4j_printf("initial[%i] = %f\n", e, f); + throw std::runtime_error(""); + } + } + ASSERT_EQ(neg, initial); + + // checking equality of all encoded bits + //for (int e = 5; e < encoded->lengthOf() - 1; e++) { + //if (encoded->e(e) != encoded->e(e - 1) + 1) + //nd4j_printf("Non equal encoded values at E[%i]: %i;\n", e, encoded->e(e)); + //} + + sd::ops::decode_threshold dec; + auto status = dec.execute({&initial, encoded}, {&initial}); + ASSERT_EQ(Status::OK(), status); + + // checking equality of all dedoded bits + for (int e = 0; e < initial.lengthOf(); e++) { + auto f = initial.e(e); + if (f != 1.0f) + nd4j_printf("initial[%i] = %f\n", e, f); + } + + ASSERT_EQ(exp, initial); +} + + TEST_F(DeclarableOpsTests19, test_matmul_ccc) { auto x = NDArrayFactory::create('c', {10, 10}); auto y = NDArrayFactory::create('c', {10, 10}); diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests6.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests6.cpp index 002e3376f..450b32bcc 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests6.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests6.cpp @@ -156,7 +156,7 @@ TEST_F(DeclarableOpsTests6, Test_StridedSlice_Once_Again_04) { block->getIArguments()->push_back(1); block->getIArguments()->push_back(0); block->getIArguments()->push_back(0); - auto inputShapes = new ShapeList({ones->getShapeInfo(), b->getShapeInfo(), e->getShapeInfo(), s->getShapeInfo()}); + auto inputShapes = new ShapeList({ones->shapeInfo(), b->shapeInfo(), e->shapeInfo(), s->shapeInfo()}); sd::ops::strided_slice op; auto result = op.calculateOutputShape(inputShapes, *block); //execute({ones, &b, &e, &s}, {}, {0, 1, 0, 0, 0}); ASSERT_EQ(result->size(), 1); diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests9.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests9.cpp index c7e704a21..556ce3bb6 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests9.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests9.cpp @@ -114,7 +114,7 @@ TEST_F(DeclarableOpsTests9, exponentialDistributionInv_test1) { if (rng == nullptr) throw std::runtime_error("DeclarableOpsTests9.exponentialDistributionInv_test1: RNG initialization failed !"); - functions::random::RandomFunction::template execTransform>(rng, x.getBuffer(), x.getShapeInfo(), extraParams); + functions::random::RandomFunction::template execTransform>(rng, x.getBuffer(), x.shapeInfo(), extraParams); const double actualMean = x.meanNumber().e(0); const double actualStd = x.varianceNumber(variance::SummaryStatsStandardDeviation, true).e(0); @@ -145,7 +145,7 @@ TEST_F(DeclarableOpsTests9, exponentialDistributionInv_test2) { if (rng == nullptr) throw std::runtime_error("DeclarableOpsTests9.exponentialDistributionInv_test2: RNG initialization failed !"); - functions::random::RandomFunction::template execTransform>(rng, y.getBuffer(), y.getShapeInfo(), x.getBuffer(), x.getShapeInfo(), extraParams); + functions::random::RandomFunction::template execTransform>(rng, y.getBuffer(), y.shapeInfo(), x.getBuffer(), x.shapeInfo(), extraParams); const double actualMean = x.meanNumber().e(0); const double actualStd = x.varianceNumber(variance::SummaryStatsStandardDeviation, true).e(0); @@ -174,7 +174,7 @@ TEST_F(DeclarableOpsTests9, exponentialDistribution_test1) { if (rng == nullptr) throw std::runtime_error("DeclarableOpsTests9.exponentialDistribution_test1: RNG initialization failed !"); - functions::random::RandomFunction::template execTransform>(rng, x.getBuffer(), x.getShapeInfo(), extraParams); + functions::random::RandomFunction::template execTransform>(rng, x.getBuffer(), x.shapeInfo(), extraParams); const double actualMean = x.meanNumber().e(0); const double actualStd = x.varianceNumber(variance::SummaryStatsStandardDeviation, true).e(0); @@ -207,7 +207,7 @@ TEST_F(DeclarableOpsTests9, exponentialDistribution_test2) { if (rng == nullptr) throw std::runtime_error("DeclarableOpsTests9.exponentialDistribution_test2: RNG initialization failed !"); - functions::random::RandomFunction::template execTransform>(rng, y.getBuffer(), y.getShapeInfo(), x.getBuffer(), x.getShapeInfo(), extraParams); + functions::random::RandomFunction::template execTransform>(rng, y.getBuffer(), y.shapeInfo(), x.getBuffer(), x.shapeInfo(), extraParams); destroyRandom((Nd4jPointer) rng); #endif @@ -539,7 +539,7 @@ TEST_F(DeclarableOpsTests9, concat_test14) { auto z = result.at(0); - Nd4jLong numOfTads= ShapeUtils::getNumOfSubArrs(z->getShapeInfo(), {0}); + Nd4jLong numOfTads= ShapeUtils::getNumOfSubArrs(z->shapeInfo(), {0}); ASSERT_TRUE(2 == numOfTads); for (int e = 0; e < numOfTads; ++e) { @@ -601,7 +601,7 @@ TEST_F(DeclarableOpsTests9, concat_test17) { // z->printShapeInfo(); // z->printIndexedBuffer(); - Nd4jLong numOfTads= ShapeUtils::getNumOfSubArrs(z->getShapeInfo(), {0}); + Nd4jLong numOfTads= ShapeUtils::getNumOfSubArrs(z->shapeInfo(), {0}); ASSERT_TRUE(2 == numOfTads); for (int e = 0; e < numOfTads; ++e) { @@ -680,7 +680,7 @@ TEST_F(DeclarableOpsTests9, concat_test20) { auto z = result.at(0); - Nd4jLong numOfTads= ShapeUtils::getNumOfSubArrs(z->getShapeInfo(), {0}); + Nd4jLong numOfTads= ShapeUtils::getNumOfSubArrs(z->shapeInfo(), {0}); ASSERT_TRUE(4 == numOfTads); for (int e = 0; e < numOfTads; e++) { diff --git a/libnd4j/tests_cpu/layers_tests/EmptyTests.cpp b/libnd4j/tests_cpu/layers_tests/EmptyTests.cpp index e6aeb43d4..81040185d 100644 --- a/libnd4j/tests_cpu/layers_tests/EmptyTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/EmptyTests.cpp @@ -216,7 +216,7 @@ TEST_F(EmptyTests, test_shaped_empty_3) { } TEST_F(EmptyTests, test_shaped_empty_4) { - auto shape = ConstantShapeHelper::getInstance()->vectorShapeInfo(0, sd::DataType::FLOAT32); + const auto shape = ConstantShapeHelper::getInstance()->vectorShapeInfo(0, sd::DataType::FLOAT32); NDArray array(shape, true, sd::LaunchContext::defaultContext()); std::vector shapeOf({0}); diff --git a/libnd4j/tests_cpu/layers_tests/HelpersTests1.cpp b/libnd4j/tests_cpu/layers_tests/HelpersTests1.cpp index 679f0c5eb..e25bd0144 100644 --- a/libnd4j/tests_cpu/layers_tests/HelpersTests1.cpp +++ b/libnd4j/tests_cpu/layers_tests/HelpersTests1.cpp @@ -1440,7 +1440,7 @@ TEST_F(HelpersTests1, SVD_test17) { // auto outArr = NDArrayFactory::create('c', {2,5}); // // -// ops::helpers::reverseArray(sd::LaunchContext ::defaultContext(), inArr.getBuffer(), inArr.getShapeInfo(), outArr.getBuffer(), outArr.getShapeInfo()); +// ops::helpers::reverseArray(sd::LaunchContext ::defaultContext(), inArr.getBuffer(), inArr.shapeInfo(), outArr.getBuffer(), outArr.shapeInfo()); // // ASSERT_TRUE(outArr.equalsTo(&exp)); // ASSERT_TRUE(outArr.isSameShapeStrict(exp)); @@ -1454,7 +1454,7 @@ TEST_F(HelpersTests1, SVD_test17) { // auto exp = NDArrayFactory::create('c', {2,5}, {10,9,8,7,6,5,4,3,2,1}); // // -// ops::helpers::reverseArray(sd::LaunchContext ::defaultContext(), inArr.getBuffer(), inArr.getShapeInfo(), inArr.getBuffer(), inArr.getShapeInfo()); +// ops::helpers::reverseArray(sd::LaunchContext ::defaultContext(), inArr.getBuffer(), inArr.shapeInfo(), inArr.getBuffer(), inArr.shapeInfo()); // // ASSERT_TRUE(inArr.equalsTo(&exp)); // ASSERT_TRUE(inArr.isSameShapeStrict(exp)); @@ -1468,7 +1468,7 @@ TEST_F(HelpersTests1, SVD_test17) { // auto exp = NDArrayFactory::create('c', {2,5}, {5,4,3,2,1,6,7,8,9,10}); // auto outArr = NDArrayFactory::create('c', {2,5}); // -// ops::helpers::reverseArray(sd::LaunchContext ::defaultContext(), inArr.getBuffer(), inArr.getShapeInfo(), outArr.getBuffer(), outArr.getShapeInfo(), 5); +// ops::helpers::reverseArray(sd::LaunchContext ::defaultContext(), inArr.getBuffer(), inArr.shapeInfo(), outArr.getBuffer(), outArr.shapeInfo(), 5); // // ASSERT_TRUE(outArr.equalsTo(&exp)); // ASSERT_TRUE(outArr.isSameShapeStrict(exp)); diff --git a/libnd4j/tests_cpu/layers_tests/JavaInteropTests.cpp b/libnd4j/tests_cpu/layers_tests/JavaInteropTests.cpp index 29c681544..e6992d7a2 100644 --- a/libnd4j/tests_cpu/layers_tests/JavaInteropTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/JavaInteropTests.cpp @@ -46,7 +46,7 @@ TEST_F(JavaInteropTests, TestShapeExposure1) { std::vector tArgs({}); std::vector iArgs({2, 2, 1, 1, 0, 0, 1, 1, 1}); - Nd4jPointer ptrs[] = {(Nd4jPointer) input.getShapeInfo(), (Nd4jPointer) weights.getShapeInfo()}; + Nd4jPointer ptrs[] = {(Nd4jPointer) input.shapeInfo(), (Nd4jPointer) weights.shapeInfo()}; auto shapeList = calculateOutputShapes(nullptr, op.getOpHash(), ptrs, 2, tArgs.data(), tArgs.size(), iArgs.data(), iArgs.size()); @@ -76,7 +76,7 @@ TEST_F(JavaInteropTests, TestShapeExposure2) { std::vector iArgs({}); - Nd4jPointer ptrs[] = {(Nd4jPointer) input.getShapeInfo()}; + Nd4jPointer ptrs[] = {(Nd4jPointer) input.shapeInfo()}; auto shapeList = calculateOutputShapes(nullptr, op.getOpHash(), ptrs, 1, tArgs.data(), tArgs.size(), iArgs.data(), iArgs.size()); @@ -104,8 +104,8 @@ TEST_F(JavaInteropTests, TestShapeExposure3) { sub1.assign(1.0f); sub2.assign(2.0f); - Nd4jPointer inputBuffers[] = {x.buffer(), sizes.buffer(), x.getSpecialBuffer(), sizes.getSpecialBuffer()}; - Nd4jPointer inputShapes[] = {x.shapeInfo(), sizes.shapeInfo(), x.getSpecialShapeInfo(), sizes.getSpecialShapeInfo()}; + Nd4jPointer inputBuffers[] = {x.buffer(), sizes.buffer(), x.specialBuffer(), sizes.specialBuffer()}; + Nd4jPointer inputShapes[] = {(Nd4jPointer)x.shapeInfo(), (Nd4jPointer)sizes.shapeInfo(), (Nd4jPointer)x.specialShapeInfo(), (Nd4jPointer)sizes.specialShapeInfo()}; sd::ops::split_v op; @@ -130,11 +130,11 @@ TEST_F(JavaInteropTests, Test_Squeeze_1) { sd::ops::squeeze op; - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.getBuffer(), x.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.getShapeInfo(), x.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.buffer(), x.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.shapeInfo(), (Nd4jPointer)x.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.getBuffer(), z.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.getShapeInfo(), z.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.buffer(), z.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.shapeInfo(), (Nd4jPointer)z.specialShapeInfo()}; auto status = execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); ASSERT_EQ(Status::OK(), status); @@ -151,12 +151,12 @@ TEST_F(JavaInteropTests, Test_RDiv_1) { sd::ops::reversedivide op; - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.getBuffer(), (Nd4jPointer) y.getBuffer(), x.getSpecialBuffer(), y.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.getShapeInfo(), (Nd4jPointer) y.getShapeInfo(), x.getSpecialShapeInfo(), y.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.buffer(), (Nd4jPointer) y.buffer(), x.specialBuffer(), y.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.shapeInfo(), (Nd4jPointer) y.shapeInfo(), (Nd4jPointer)x.specialShapeInfo(), (Nd4jPointer)y.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.getBuffer(), z.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.getShapeInfo(), z.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.buffer(), (Nd4jPointer)z.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.shapeInfo(), (Nd4jPointer)z.specialShapeInfo()}; auto status = execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 2, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); NDArray::registerSpecialUse({&z}, {&x, &y}); @@ -186,12 +186,12 @@ TEST_F(JavaInteropTests, TestSconv2d_1) { NDArray::prepareSpecialUse({&output}, {&input, &weightsD, &weightsP, &bias}); - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) input.getBuffer(), (Nd4jPointer) weightsD.getBuffer(), (Nd4jPointer) weightsP.getBuffer(), (Nd4jPointer) bias.getBuffer(), (Nd4jPointer) input.getSpecialBuffer(), (Nd4jPointer) weightsD.getSpecialBuffer(), (Nd4jPointer) weightsP.getSpecialBuffer(), (Nd4jPointer) bias.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) input.getShapeInfo(), (Nd4jPointer) weightsD.getShapeInfo(), (Nd4jPointer) weightsP.getShapeInfo(), (Nd4jPointer) bias.getShapeInfo(), (Nd4jPointer) input.getSpecialShapeInfo(), (Nd4jPointer) weightsD.getSpecialShapeInfo(), (Nd4jPointer) weightsP.getSpecialShapeInfo(), (Nd4jPointer) bias.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) input.buffer(), (Nd4jPointer) weightsD.buffer(), (Nd4jPointer) weightsP.buffer(), (Nd4jPointer) bias.buffer(), (Nd4jPointer) input.specialBuffer(), (Nd4jPointer) weightsD.specialBuffer(), (Nd4jPointer) weightsP.specialBuffer(), (Nd4jPointer) bias.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) input.shapeInfo(), (Nd4jPointer) weightsD.shapeInfo(), (Nd4jPointer) weightsP.shapeInfo(), (Nd4jPointer) bias.shapeInfo(), (Nd4jPointer) input.specialShapeInfo(), (Nd4jPointer) weightsD.specialShapeInfo(), (Nd4jPointer) weightsP.specialShapeInfo(), (Nd4jPointer) bias.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) output.getBuffer(), (Nd4jPointer) output.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) output.getShapeInfo(), (Nd4jPointer) output.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) output.buffer(), (Nd4jPointer) output.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) output.shapeInfo(), (Nd4jPointer) output.specialShapeInfo()}; Nd4jLong exp[] = {1, 1, 1, 1, 0, 0, 1, 1, 0, 0}; @@ -221,12 +221,12 @@ TEST_F(JavaInteropTests, TestSconv2d_2) { NDArray::prepareSpecialUse({&output}, {&input, &weightsD}); - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) input.getBuffer(), (Nd4jPointer) weightsD.getBuffer(), input.getSpecialBuffer(), weightsD.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) input.getShapeInfo(), (Nd4jPointer) weightsD.getShapeInfo(), input.getSpecialShapeInfo(), weightsD.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) input.buffer(), (Nd4jPointer) weightsD.buffer(), input.specialBuffer(), weightsD.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) input.shapeInfo(), (Nd4jPointer) weightsD.shapeInfo(), (Nd4jPointer)input.specialShapeInfo(), (Nd4jPointer)weightsD.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) output.getBuffer(), output.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) output.getShapeInfo(), output.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) output.buffer(), output.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) output.shapeInfo(), (Nd4jPointer)output.specialShapeInfo()}; Nd4jLong exp[] = {1, 1, 1, 1, 0, 0, 1, 1, 0}; @@ -245,11 +245,11 @@ TEST_F(JavaInteropTests, TestMaxPooling2d_1) { NDArray::prepareSpecialUse({&output}, {&input}); - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) input.getBuffer(), input.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) input.getShapeInfo(), input.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) input.buffer(), input.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) input.shapeInfo(), (Nd4jPointer)input.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) output.getBuffer(), output.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) output.getShapeInfo(), output.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) output.buffer(), output.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) output.shapeInfo(), (Nd4jPointer)output.specialShapeInfo()}; std::vector iArgs({2, 2, 1, 1, 0, 0, 1, 1, 1}); @@ -276,11 +276,11 @@ TEST_F(JavaInteropTests, TestCol2Im_1) { NDArray::prepareSpecialUse({&output}, {&input}); - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) input.getBuffer(), input.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) input.getShapeInfo(), input.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) input.buffer(), input.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) input.shapeInfo(), (Nd4jPointer)input.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) output.getBuffer(), output.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) output.getShapeInfo(), output.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) output.buffer(), output.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) output.shapeInfo(), (Nd4jPointer)output.specialShapeInfo()}; sd::ops::col2im op; @@ -316,11 +316,11 @@ TEST_F(JavaInteropTests, TestPNorm_1) { Nd4jLong exp[] = {2, 2, 1, 1, 0, 0, 1, 1, 0, 2, 0, 0}; - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) input.getBuffer(), input.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) input.getShapeInfo(), input.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) input.buffer(), input.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) input.shapeInfo(), (Nd4jPointer)input.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) output.getBuffer(), output.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) output.getShapeInfo(), output.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) output.buffer(), output.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) output.shapeInfo(), (Nd4jPointer)output.specialShapeInfo()}; execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, exp, 11, nullptr, 0, false); @@ -342,8 +342,8 @@ TEST_F(JavaInteropTests, TestInplace_1) { double extras[] = {-1.0f, 1.0f}; - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) input.getBuffer(), input.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) input.getShapeInfo(), input.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) input.buffer(), input.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) input.shapeInfo(), (Nd4jPointer)input.specialShapeInfo()}; Nd4jStatus result = execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 1, nullptr, nullptr, 0, extras, 2, nullptr, 0, nullptr, 0, true); @@ -482,11 +482,11 @@ TEST_F(JavaInteropTests, test_avgpooling_edge_1) { Nd4jLong exp[] = {3,3, 1,1, 0,0, 1,1, 1, 0, 1}; - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.getBuffer(), x.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.getShapeInfo(), x.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.buffer(), x.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.shapeInfo(), x.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.getBuffer(), z.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.getShapeInfo(), z.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.buffer(), z.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.shapeInfo(), z.specialShapeInfo()}; auto result = execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, exp, 11, nullptr, 0, false); @@ -669,11 +669,11 @@ TEST_F(JavaInteropTests, Test_Greater_1) { sd::ops::greater op; - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.getBuffer(), (Nd4jPointer) y.getBuffer(), x.getSpecialBuffer(), y.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.getShapeInfo(), (Nd4jPointer) y.getShapeInfo(), x.getSpecialShapeInfo(), y.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.buffer(), (Nd4jPointer) y.buffer(), x.specialBuffer(), y.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.shapeInfo(), (Nd4jPointer) y.shapeInfo(), (Nd4jPointer)x.specialShapeInfo(), (Nd4jPointer)y.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) o.getBuffer(), o.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) o.getShapeInfo(), o.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) o.buffer(), o.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) o.shapeInfo(), (Nd4jPointer)o.specialShapeInfo()}; execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 2, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); @@ -693,11 +693,11 @@ TEST_F(JavaInteropTests, Test_Greater_2) { NDArray::prepareSpecialUse({&o}, {&x, &y}); - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.getBuffer(), (Nd4jPointer) y.getBuffer(), x.getSpecialBuffer(), y.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.getShapeInfo(), (Nd4jPointer) y.getShapeInfo(), x.getSpecialShapeInfo(), y.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.buffer(), (Nd4jPointer) y.buffer(), x.specialBuffer(), y.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.shapeInfo(), (Nd4jPointer) y.shapeInfo(), (Nd4jPointer)x.specialShapeInfo(), (Nd4jPointer)y.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) o.getBuffer(), o.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) o.getShapeInfo(), o.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) o.buffer(), o.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) o.shapeInfo(), (Nd4jPointer)o.specialShapeInfo()}; execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 2, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); @@ -716,11 +716,11 @@ TEST_F(JavaInteropTests, Test_Boolean_Op_1) { NDArray::prepareSpecialUse({&o}, {&x}); - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.getBuffer(), x.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.getShapeInfo(), x.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.buffer(), x.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.shapeInfo(), (Nd4jPointer)x.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) o.getBuffer(), o.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) o.getShapeInfo(), o.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) o.buffer(), o.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) o.shapeInfo(), (Nd4jPointer)o.specialShapeInfo()}; auto hash = op.getOpHash(); auto status = execCustomOp(nullptr, hash, ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); @@ -741,11 +741,11 @@ TEST_F(JavaInteropTests, Test_Inplace_Outputs_1) { NDArray::prepareSpecialUse({&z}, {&x}); - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.getBuffer(), x.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.getShapeInfo(), x.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.buffer(), x.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.shapeInfo(), (Nd4jPointer)x.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.getBuffer(), z.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.getShapeInfo(), z.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.buffer(), z.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.shapeInfo(), (Nd4jPointer)z.specialShapeInfo()}; auto hash = op.getOpHash(); auto status = execCustomOp(nullptr, hash, ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); @@ -769,11 +769,11 @@ TEST_F(JavaInteropTests, Test_Inplace_Outputs_2) { NDArray::prepareSpecialUse({&z}, {&x, &y}); - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.getBuffer(), (Nd4jPointer) y.getBuffer(), x.getSpecialBuffer(), y.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.getShapeInfo(), (Nd4jPointer) y.getShapeInfo(), x.getSpecialShapeInfo(), y.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.buffer(), (Nd4jPointer) y.buffer(), x.specialBuffer(), y.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.shapeInfo(), (Nd4jPointer) y.shapeInfo(), (Nd4jPointer)x.specialShapeInfo(), (Nd4jPointer)y.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.getBuffer(), z.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.getShapeInfo(), z.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.buffer(), z.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.shapeInfo(), (Nd4jPointer)z.specialShapeInfo()}; auto hash = op.getOpHash(); auto status = execCustomOp(nullptr, hash, ptrsInBuffer, ptrsInShapes, 2, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); @@ -796,11 +796,11 @@ TEST_F(JavaInteropTests, Test_Inplace_Outputs_3) { NDArray::prepareSpecialUse({&output}, {&input, &indices}); - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) input.getBuffer(), (Nd4jPointer) indices.getBuffer(), input.getSpecialBuffer(), indices.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) input.getShapeInfo(), (Nd4jPointer) indices.getShapeInfo(), input.getSpecialShapeInfo(), input.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) input.buffer(), (Nd4jPointer) indices.buffer(), input.specialBuffer(), indices.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) input.shapeInfo(), (Nd4jPointer) indices.shapeInfo(), (Nd4jPointer)input.specialShapeInfo(), (Nd4jPointer)input.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) output.getBuffer(), output.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) output.getShapeInfo(), output.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) output.buffer(), output.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) output.shapeInfo(), (Nd4jPointer)output.specialShapeInfo()}; Nd4jLong iArgs[] = {1}; @@ -830,8 +830,8 @@ TEST_F(JavaInteropTests, Test_Reduce3_EdgeCase) { extraPointers = new Nd4jPointer[6] {nullptr, context->getCudaStream(), context->getScalarPointer(), nullptr, context->getCudaSpecialStream(), context->getReductionPointer()}; #endif - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.getShapeInfo(), {0,1}); - auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.getShapeInfo(), {0,1}); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), {0,1}); + auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), {0,1}); NDArray::prepareSpecialUse({&z}, {&x, &y, &dims}); OpaqueDataBuffer xBuf(x.dataBuffer()); @@ -877,11 +877,11 @@ TEST_F(JavaInteropTests, Test_AveragePooling_FF_TF_double) { NDArray::prepareSpecialUse({&z}, {&input}); - Nd4jPointer ptrsInBuffer[] = {reinterpret_cast(input.buffer()), input.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {reinterpret_cast(input.shapeInfo()), input.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {reinterpret_cast(input.buffer()), input.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer)input.shapeInfo(), (Nd4jPointer)input.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {reinterpret_cast(z.buffer()), z.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {reinterpret_cast(z.shapeInfo()), z.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {reinterpret_cast(z.buffer()), z.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer)z.shapeInfo(), (Nd4jPointer)z.specialShapeInfo()}; Nd4jLong iArgs[] = {3,3, 3,3, 0,0, 1,1,1, 0,1}; @@ -903,11 +903,11 @@ TEST_F(JavaInteropTests, Test_MaxPool2D_float_1) { NDArray::prepareSpecialUse({&z}, {&input}); - Nd4jPointer ptrsInBuffer[] = {reinterpret_cast(input.buffer()), input.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {reinterpret_cast(input.shapeInfo()), input.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {reinterpret_cast(input.buffer()), input.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer)input.shapeInfo(), (Nd4jPointer)input.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {reinterpret_cast(z.buffer()), z.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {reinterpret_cast(z.shapeInfo()), z.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {reinterpret_cast(z.buffer()), z.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer)z.shapeInfo(), (Nd4jPointer)z.specialShapeInfo()}; Nd4jLong iArgs[] = {2,2, 1,1, 1,1, 2,2,1, 0,0}; @@ -931,11 +931,14 @@ TEST_F(JavaInteropTests, Test_Unstack_1) { NDArray::prepareSpecialUse({&z0, &z1, &z2, &z3, &z4}, {&x}); - Nd4jPointer ptrsInBuffer[] = {reinterpret_cast(x.buffer()), x.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {reinterpret_cast(x.shapeInfo()), x.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {reinterpret_cast(x.buffer()), x.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer)x.shapeInfo(), (Nd4jPointer)x.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {z0.buffer(), z1.buffer(), z2.buffer(), z3.buffer(), z4.buffer(), z0.getSpecialBuffer(), z1.getSpecialBuffer(), z2.getSpecialBuffer(), z3.getSpecialBuffer(), z4.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {z0.shapeInfo(), z1.shapeInfo(), z2.shapeInfo(), z3.shapeInfo(), z4.shapeInfo(), z0.getSpecialShapeInfo(), z1.getSpecialShapeInfo(), z2.getSpecialShapeInfo(), z3.getSpecialShapeInfo(), z4.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {z0.buffer(), z1.buffer(), z2.buffer(), z3.buffer(), z4.buffer(), z0.specialBuffer(), z1.specialBuffer(), z2.specialBuffer(), z3.specialBuffer(), z4.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer)z0.shapeInfo(), (Nd4jPointer)z1.shapeInfo(), (Nd4jPointer)z2.shapeInfo(), + (Nd4jPointer)z3.shapeInfo(), (Nd4jPointer)z4.shapeInfo(), (Nd4jPointer)z0.specialShapeInfo(), + (Nd4jPointer)z1.specialShapeInfo(), (Nd4jPointer)z2.specialShapeInfo(), + (Nd4jPointer)z3.specialShapeInfo(), (Nd4jPointer)z4.specialShapeInfo()}; Nd4jLong iArgs[] = {0}; @@ -958,11 +961,11 @@ TEST_F(JavaInteropTests, Test_AveragePooling_FF_TF_float) { NDArray::prepareSpecialUse({&z}, {&input}); - Nd4jPointer ptrsInBuffer[] = {reinterpret_cast(input.buffer()), input.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {reinterpret_cast(input.shapeInfo()), input.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {reinterpret_cast(input.buffer()), input.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer)input.shapeInfo(), (Nd4jPointer)input.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {reinterpret_cast(z.buffer()), z.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {reinterpret_cast(z.shapeInfo()), z.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {reinterpret_cast(z.buffer()), z.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer)z.shapeInfo(), (Nd4jPointer)z.specialShapeInfo()}; Nd4jLong iArgs[] = {3,3, 3,3, 0,0, 1,1,1, 0,1}; auto hash = op.getOpHash(); @@ -991,9 +994,9 @@ TEST_F(JavaInteropTests, Test_Mixed_Add_1) { OpaqueDataBuffer zBuf(arrayZ.dataBuffer()); execPairwiseTransform(nullptr, pairwise::Add, - &xBuf, arrayX.shapeInfo(), arrayX.getSpecialShapeInfo(), - &yBuf, arrayY.shapeInfo(), arrayY.getSpecialShapeInfo(), - &zBuf, arrayZ.shapeInfo(), arrayZ.getSpecialShapeInfo(), + &xBuf, arrayX.shapeInfo(), arrayX.specialShapeInfo(), + &yBuf, arrayY.shapeInfo(), arrayY.specialShapeInfo(), + &zBuf, arrayZ.shapeInfo(), arrayZ.specialShapeInfo(), nullptr); NDArray::registerSpecialUse({&arrayZ}, {&arrayX, &arrayY}); @@ -1010,11 +1013,11 @@ TEST_F(JavaInteropTests, Test_Add_1) { sd::ops::add op; - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.getBuffer(), y.getBuffer(), x.getSpecialBuffer(), y.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.getShapeInfo(), y.getShapeInfo(), x.getSpecialShapeInfo(), y.getSpecialShapeInfo(),}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.buffer(), y.buffer(), x.specialBuffer(), y.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.shapeInfo(), (Nd4jPointer)y.shapeInfo(), (Nd4jPointer)x.specialShapeInfo(), (Nd4jPointer)y.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) x.getBuffer(), x.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) x.getShapeInfo(), x.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) x.buffer(), x.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) x.shapeInfo(), (Nd4jPointer)x.specialShapeInfo()}; execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 2, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); @@ -1035,11 +1038,11 @@ TEST_F(JavaInteropTests, zeta_test10) { NDArray::prepareSpecialUse({&z}, {&x, &q}); - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.getBuffer(), q.getBuffer(), x.getSpecialBuffer(), q.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.getShapeInfo(), q.getShapeInfo(), x.specialShapeInfo(), q.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.buffer(), q.buffer(), x.specialBuffer(), q.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.shapeInfo(), (Nd4jPointer)q.shapeInfo(), (Nd4jPointer)x.specialShapeInfo(), (Nd4jPointer)q.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.getBuffer(), z.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.getShapeInfo(), z.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.buffer(), z.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.shapeInfo(), (Nd4jPointer)z.specialShapeInfo()}; execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 2, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); @@ -1060,8 +1063,8 @@ TEST_F(JavaInteropTests, Test_Boolean_Broadcastables_1) { auto arrayX = NDArrayFactory::create('c', {10, 10}); auto arrayY = NDArrayFactory::create('c', {10, 10}); - Nd4jPointer ptrsInBuffer[] = {reinterpret_cast(arrayX.buffer()), reinterpret_cast(arrayY.buffer()), arrayX.getSpecialBuffer(), arrayY.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {reinterpret_cast(arrayX.shapeInfo()), reinterpret_cast(arrayY.shapeInfo()), arrayX.getSpecialShapeInfo(), arrayY.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {reinterpret_cast(arrayX.buffer()), reinterpret_cast(arrayY.buffer()), arrayX.specialBuffer(), arrayY.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer)arrayX.shapeInfo(), (Nd4jPointer)arrayY.shapeInfo(), (Nd4jPointer)arrayX.specialShapeInfo(), (Nd4jPointer)arrayY.specialShapeInfo()}; NDArray::prepareSpecialUse({}, {&arrayX, &arrayY}); sd::ops::greater_equal op; @@ -1077,11 +1080,11 @@ TEST_F(JavaInteropTests, Test_L2_Loss_3) { NDArray::prepareSpecialUse({&z}, {&x}); - Nd4jPointer ptrsInBuffer[] = {reinterpret_cast(x.buffer()), x.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {reinterpret_cast(x.shapeInfo()), x.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {reinterpret_cast(x.buffer()), x.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer)x.shapeInfo(), (Nd4jPointer)x.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffer[] = {reinterpret_cast(z.buffer()), z.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {reinterpret_cast(z.shapeInfo()), z.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffer[] = {reinterpret_cast(z.buffer()), (Nd4jPointer)z.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer)z.shapeInfo(), (Nd4jPointer)z.specialShapeInfo()}; sd::ops::l2_loss op; auto status = execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffer, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); @@ -1102,9 +1105,9 @@ TEST_F(JavaInteropTests, Test_Fastpath_3) { NDArray::prepareSpecialUse({&z}, {&array0, &array1}); - ctx.setInputArray(0, array0.buffer(), array0.shapeInfo(), array0.getSpecialBuffer(), array0.getSpecialShapeInfo()); - ctx.setInputArray(1, array1.buffer(), array1.shapeInfo(), array1.getSpecialBuffer(), array1.getSpecialShapeInfo()); - ctx.setOutputArray(0, z.buffer(), z.shapeInfo(), z.getSpecialBuffer(), z.getSpecialShapeInfo()); + ctx.setInputArray(0, array0.buffer(), array0.shapeInfo(), array0.specialBuffer(), array0.specialShapeInfo()); + ctx.setInputArray(1, array1.buffer(), array1.shapeInfo(), array1.specialBuffer(), array1.specialShapeInfo()); + ctx.setOutputArray(0, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo()); ASSERT_EQ(2, ctx.width()); diff --git a/libnd4j/tests_cpu/layers_tests/LegacyOpsTests.cpp b/libnd4j/tests_cpu/layers_tests/LegacyOpsTests.cpp index dae5ba5b9..7c7734b38 100644 --- a/libnd4j/tests_cpu/layers_tests/LegacyOpsTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/LegacyOpsTests.cpp @@ -466,8 +466,8 @@ TEST_F(LegacyOpsTests, Reduce3_2) { extraPointers = new Nd4jPointer[7] {nullptr, context->getCudaStream(), context->getScalarPointer(), nullptr, context->getCudaSpecialStream(), context->getReductionPointer(), context->getAllocationPointer()}; #endif - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.getShapeInfo(), {1}); - auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.getShapeInfo(), {1}); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), {1}); + auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), {1}); NDArray::prepareSpecialUse({&z}, {&x, &y, &dim}); OpaqueDataBuffer xBuf(x.dataBuffer()); @@ -506,8 +506,8 @@ TEST_F(LegacyOpsTests, Reduce3_3) { extraPointers = new Nd4jPointer[7] {nullptr, context->getCudaStream(), context->getScalarPointer(), nullptr, context->getCudaSpecialStream(), context->getReductionPointer(), context->getAllocationPointer()}; #endif - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.getShapeInfo(), {1}); - auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.getShapeInfo(), {1}); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), {1}); + auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), {1}); NDArray::prepareSpecialUse({&z}, {&x, &y, &dim}); OpaqueDataBuffer xBuf(x.dataBuffer()); @@ -546,8 +546,8 @@ TEST_F(LegacyOpsTests, Reduce3_4) { extraPointers = new Nd4jPointer[7] {nullptr, context->getCudaStream(), context->getScalarPointer(), nullptr, context->getCudaSpecialStream(), context->getReductionPointer(), context->getAllocationPointer()}; #endif - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.getShapeInfo(), {1}); - auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.getShapeInfo(), {1}); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), {1}); + auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), {1}); NDArray::prepareSpecialUse({&z}, {&x, &y, &dim}); OpaqueDataBuffer xBuf(x.dataBuffer()); @@ -588,8 +588,8 @@ TEST_F(LegacyOpsTests, Reduce3_5) { extraPointers = new Nd4jPointer[7] {nullptr, context->getCudaStream(), context->getScalarPointer(), nullptr, context->getCudaSpecialStream(), context->getReductionPointer(), context->getAllocationPointer()}; #endif - auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.getShapeInfo(), {1}); - auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.getShapeInfo(), {1}); + auto packX = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), {1}); + auto packY = sd::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), {1}); NDArray::prepareSpecialUse({&z}, {&x, &y, &dim}); @@ -707,7 +707,7 @@ TEST_F(LegacyOpsTests, test_legacy_reduce_empty_1) { x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), - &dim, 1, x.getPlatformShapeInfo(), nullptr); + &dim, 1, x.platformShapeInfo(), nullptr); ASSERT_EQ(e, z); } @@ -720,7 +720,7 @@ TEST_F(LegacyOpsTests, test_legacy_reduce_empty_2) { int dim = 1; - NativeOpExecutioner::execReduceSame(LaunchContext::defaultContext(), reduce::SameOps::Min, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), &dim, 1, x.getPlatformShapeInfo(), nullptr); + NativeOpExecutioner::execReduceSame(LaunchContext::defaultContext(), reduce::SameOps::Min, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), &dim, 1, x.platformShapeInfo(), nullptr); ASSERT_EQ(e, z); } @@ -733,7 +733,7 @@ TEST_F(LegacyOpsTests, test_legacy_reduce_empty_3) { int dim = 1; - NativeOpExecutioner::execReduceSame(LaunchContext::defaultContext(), reduce::SameOps::Max, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), &dim, 1, x.getPlatformShapeInfo(), nullptr); + NativeOpExecutioner::execReduceSame(LaunchContext::defaultContext(), reduce::SameOps::Max, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), &dim, 1, x.platformShapeInfo(), nullptr); ASSERT_EQ(e, z); } diff --git a/libnd4j/tests_cpu/layers_tests/MultiDataTypeTests.cpp b/libnd4j/tests_cpu/layers_tests/MultiDataTypeTests.cpp index db342771e..803029216 100644 --- a/libnd4j/tests_cpu/layers_tests/MultiDataTypeTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/MultiDataTypeTests.cpp @@ -1962,7 +1962,7 @@ TEST_F(MultiDataTypeTests, aaa) { NativeOpExecutioner::execRandom(LaunchContext::defaultContext(), sd::random::UniformDistribution, &gen, - z.buffer(), z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), extras.argumentsAsT()); // z.printIndexedBuffer(); diff --git a/libnd4j/tests_cpu/layers_tests/NDArrayConstructorsTests.cu b/libnd4j/tests_cpu/layers_tests/NDArrayConstructorsTests.cu index 48208d2ff..24ac087d1 100644 --- a/libnd4j/tests_cpu/layers_tests/NDArrayConstructorsTests.cu +++ b/libnd4j/tests_cpu/layers_tests/NDArrayConstructorsTests.cu @@ -198,9 +198,9 @@ TEST_F(NDArrayConstructorsTests, test_constructor_10) { ASSERT_TRUE(scalar2.isActualOnDeviceSide()); ASSERT_TRUE(scalar2.isActualOnHostSide()); - ASSERT_TRUE(scalar1.getBuffer() == nullptr); - ASSERT_TRUE(scalar1.getSpecialBuffer() != nullptr); - ASSERT_TRUE(scalar1.getShapeInfo() != nullptr); - ASSERT_TRUE(scalar1.getSpecialShapeInfo() != nullptr); + ASSERT_TRUE(scalar1.buffer() == nullptr); + ASSERT_TRUE(scalar1.specialBuffer() != nullptr); + ASSERT_TRUE(scalar1.shapeInfo() != nullptr); + ASSERT_TRUE(scalar1.specialShapeInfo() != nullptr); ASSERT_TRUE(scalar1.lengthOf() == 1); } \ No newline at end of file diff --git a/libnd4j/tests_cpu/layers_tests/NDArrayCudaBasicsTests.cu b/libnd4j/tests_cpu/layers_tests/NDArrayCudaBasicsTests.cu index 6c37e3145..f95705f08 100644 --- a/libnd4j/tests_cpu/layers_tests/NDArrayCudaBasicsTests.cu +++ b/libnd4j/tests_cpu/layers_tests/NDArrayCudaBasicsTests.cu @@ -700,7 +700,7 @@ TEST_F(NDArrayCudaBasicsTests, TestRawBroadcast_2) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -722,9 +722,9 @@ TEST_F(NDArrayCudaBasicsTests, TestRawBroadcast_2) { // call cuda kernel which calculates result NativeOpExecutioner::execBroadcast(&lc, sd::broadcast::Multiply, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], nullptr, nullptr); @@ -760,7 +760,7 @@ TEST_F(NDArrayCudaBasicsTests, TestRawBroadcast_3) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -788,9 +788,9 @@ TEST_F(NDArrayCudaBasicsTests, TestRawBroadcast_3) { NDArray::registerSpecialUse({&z}, {&x, &y}); // call cuda kernel which calculates result NativeOpExecutioner::execBroadcast(pLc, sd::broadcast::Multiply, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], nullptr, nullptr); @@ -958,7 +958,7 @@ TEST_F(NDArrayCudaBasicsTests, TestBroadcastRaw_1) { // evaluate xTad data shape::TAD xTad; - xTad.init(x.getShapeInfo(), dimensions.data(), dimensions.size()); + xTad.init(x.shapeInfo(), dimensions.data(), dimensions.size()); xTad.createTadOnlyShapeInfo(); xTad.createOffsets(); @@ -984,9 +984,9 @@ TEST_F(NDArrayCudaBasicsTests, TestBroadcastRaw_1) { // call cuda kernel which calculates result NativeOpExecutioner::execBroadcast(pLc, sd::broadcast::Add, - nullptr, x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), - nullptr, y.getShapeInfo(), y.specialBuffer(), y.specialShapeInfo(), - nullptr, z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), + nullptr, x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + nullptr, y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), + nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), (int*)devicePtrs[0], dimensions.size(), (Nd4jLong*)devicePtrs[1], (Nd4jLong*)devicePtrs[2], nullptr, nullptr); @@ -1949,7 +1949,7 @@ TEST_F(NDArrayCudaBasicsTests, subarray_1) ASSERT_TRUE(xExp.isSameShape(x0)); ASSERT_TRUE(xExp.equalsTo(x0)); // for(int i = 0; i < shape::shapeInfoLength(x0.rankOf()); ++i) -// ASSERT_TRUE(x0.getShapeInfo()[i] == shapeExpX0[i]); +// ASSERT_TRUE(x0.shapeInfo()[i] == shapeExpX0[i]); // for(int i = 0; i < x0.lengthOf(); ++i) // ASSERT_TRUE(x0.e(i) == buffExpX0[i]); @@ -1959,7 +1959,7 @@ TEST_F(NDArrayCudaBasicsTests, subarray_1) ASSERT_TRUE(x1Exp.equalsTo(x1)); // for(int i = 0; i < shape::shapeInfoLength(x1.rankOf()); ++i) -// ASSERT_TRUE(x1.getShapeInfo()[i] == shapeExpX1[i]); +// ASSERT_TRUE(x1.shapeInfo()[i] == shapeExpX1[i]); // for(int i = 0; i < x1.lengthOf(); ++i) // ASSERT_TRUE(x1.e(i) == buffExpX1[i]); @@ -1970,7 +1970,7 @@ TEST_F(NDArrayCudaBasicsTests, subarray_1) // x2Exp.printBuffer("X2 EXPECT"); ASSERT_TRUE(x2Exp.equalsTo(x2)); // for(int i = 0; i < shape::shapeInfoLength(x2.rankOf()); ++i) -// ASSERT_TRUE(x2.getShapeInfo()[i] == shapeExpX2[i]); +// ASSERT_TRUE(x2.shapeInfo()[i] == shapeExpX2[i]); // for(int i = 0; i < x2.lengthOf(); ++i) // ASSERT_TRUE(x2.e(i) == buffExpX2[i]); @@ -1979,7 +1979,7 @@ TEST_F(NDArrayCudaBasicsTests, subarray_1) ASSERT_TRUE(x3Exp.isSameShape(x3)); ASSERT_TRUE(x3Exp.equalsTo(x3)); // for(int i = 0; i < shape::shapeInfoLength(x3.rankOf()); ++i) -// ASSERT_TRUE(x3.getShapeInfo()[i] == shapeExpX3[i]); +// ASSERT_TRUE(x3.shapeInfo()[i] == shapeExpX3[i]); // for(int i = 0; i < x3.lengthOf(); ++i) // ASSERT_TRUE(x3.e(i) == buffExpX3[i]); @@ -1988,7 +1988,7 @@ TEST_F(NDArrayCudaBasicsTests, subarray_1) ASSERT_TRUE(x4Exp.isSameShape(x4)); ASSERT_TRUE(x4Exp.equalsTo(x4)); // for(int i = 0; i < shape::shapeInfoLength(x4.rankOf()); ++i) -// ASSERT_TRUE(x4.getShapeInfo()[i] == shapeExpX4[i]); +// ASSERT_TRUE(x4.shapeInfo()[i] == shapeExpX4[i]); // for(int i = 0; i < x4.lengthOf(); ++i) // ASSERT_TRUE(x4.e(i) == buffExpX4[i]); @@ -1998,7 +1998,7 @@ TEST_F(NDArrayCudaBasicsTests, subarray_1) ASSERT_TRUE(x5Exp.equalsTo(x5)); // for(int i = 0; i < shape::shapeInfoLength(x5.rankOf()); ++i) -// ASSERT_TRUE(x5.getShapeInfo()[i] == shapeExpX5[i]); +// ASSERT_TRUE(x5.shapeInfo()[i] == shapeExpX5[i]); // for(int i = 0; i < x5.lengthOf(); ++i) // ASSERT_TRUE(x5.e(i) == buffExpX5[i]); @@ -2008,7 +2008,7 @@ TEST_F(NDArrayCudaBasicsTests, subarray_1) ASSERT_TRUE(y0Exp.isSameShape(y0)); ASSERT_TRUE(y0Exp.equalsTo(y0)); // for(int i = 0; i < shape::shapeInfoLength(y0.rankOf()); ++i) -// ASSERT_TRUE(y0.getShapeInfo()[i] == shapeExpY0[i]); +// ASSERT_TRUE(y0.shapeInfo()[i] == shapeExpY0[i]); // for(int i = 0; i < y0.lengthOf(); ++i) // ASSERT_TRUE(y0.e(i) == buffExpY0[i]); @@ -2017,7 +2017,7 @@ TEST_F(NDArrayCudaBasicsTests, subarray_1) ASSERT_TRUE(y1Exp.isSameShape(y1)); ASSERT_TRUE(y1Exp.equalsTo(y1)); // for(int i = 0; i < shape::shapeInfoLength(y1.rankOf()); ++i) -// ASSERT_TRUE(y1.getShapeInfo()[i] == shapeExpY1[i]); +// ASSERT_TRUE(y1.shapeInfo()[i] == shapeExpY1[i]); // for(int i = 0; i < y1.lengthOf(); ++i) // ASSERT_TRUE(y1.e(i) == buffExpY1[i]); @@ -2026,7 +2026,7 @@ TEST_F(NDArrayCudaBasicsTests, subarray_1) ASSERT_TRUE(y2Exp.isSameShape(y2)); ASSERT_TRUE(y2Exp.equalsTo(y2)); // for(int i = 0; i < shape::shapeInfoLength(y2.rankOf()); ++i) -// ASSERT_TRUE(y2.getShapeInfo()[i] == shapeExpY2[i]); +// ASSERT_TRUE(y2.shapeInfo()[i] == shapeExpY2[i]); // for(int i = 0; i < y2.lengthOf(); ++i) // ASSERT_TRUE(y2.e(i) == buffExpY2[i]); @@ -2035,7 +2035,7 @@ TEST_F(NDArrayCudaBasicsTests, subarray_1) ASSERT_TRUE(y3Exp.isSameShape(y3)); ASSERT_TRUE(y3Exp.equalsTo(y3)); // for(int i = 0; i < shape::shapeInfoLength(y3.rankOf()); ++i) -// ASSERT_TRUE(y3.getShapeInfo()[i] == shapeExpY3[i]); +// ASSERT_TRUE(y3.shapeInfo()[i] == shapeExpY3[i]); // for(int i = 0; i < y3.lengthOf(); ++i) // ASSERT_TRUE(y3.e(i) == buffExpY3[i]); @@ -2044,7 +2044,7 @@ TEST_F(NDArrayCudaBasicsTests, subarray_1) ASSERT_TRUE(y4Exp.isSameShape(y4)); ASSERT_TRUE(y4Exp.equalsTo(y4)); // for(int i = 0; i < shape::shapeInfoLength(y4.rankOf()); ++i) -// ASSERT_TRUE(y4.getShapeInfo()[i] == shapeExpY4[i]); +// ASSERT_TRUE(y4.shapeInfo()[i] == shapeExpY4[i]); // for(int i = 0; i < y4.lengthOf(); ++i) // ASSERT_TRUE(y4.e(i) == buffExpY4[i]); @@ -2053,7 +2053,7 @@ TEST_F(NDArrayCudaBasicsTests, subarray_1) ASSERT_TRUE(y5Exp.isSameShape(y5)); ASSERT_TRUE(y5Exp.equalsTo(y5)); // for(int i = 0; i < shape::shapeInfoLength(y5.rankOf()); ++i) -// ASSERT_TRUE(y5.getShapeInfo()[i] == shapeExpY5[i]); +// ASSERT_TRUE(y5.shapeInfo()[i] == shapeExpY5[i]); // for(int i = 0; i < y5.lengthOf(); ++i) // ASSERT_TRUE(y5.e(i) == buffExpY5[i]); @@ -2077,9 +2077,9 @@ TEST_F(NDArrayCudaBasicsTests, Test_diagonal_1) { NDArray tmp(sd::DataType::FLOAT32, x.getContext()); // scalar = 0 ExtraArguments extras({eps}); - NativeOpExecutioner::execReduce3Scalar(diag.getContext(), reduce3::EqualsWithEps, diag.getBuffer(), - diag.getShapeInfo(), diag.getSpecialBuffer(), diag.getSpecialShapeInfo(), extras.argumentsAsT(sd::DataType::FLOAT32), - exp.getBuffer(), exp.getShapeInfo(), exp.getSpecialBuffer(), exp.getSpecialShapeInfo(), + NativeOpExecutioner::execReduce3Scalar(diag.getContext(), reduce3::EqualsWithEps, diag.buffer(), + diag.shapeInfo(), diag.specialBuffer(), diag.specialShapeInfo(), extras.argumentsAsT(sd::DataType::FLOAT32), + exp.buffer(), exp.shapeInfo(), exp.specialBuffer(), exp.specialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo()); cudaStream_t* stream = x.getContext()->getCudaStream(); auto res = cudaStreamSynchronize(*stream); diff --git a/libnd4j/tests_cpu/layers_tests/NDArrayTests.cpp b/libnd4j/tests_cpu/layers_tests/NDArrayTests.cpp index 5e6cd10fb..669574fa7 100644 --- a/libnd4j/tests_cpu/layers_tests/NDArrayTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/NDArrayTests.cpp @@ -99,7 +99,7 @@ TEST_F(NDArrayTest, NDArrayOrder1) { } for (int i = 0; i < 8; i++) { - ASSERT_EQ(fShape[i], arrayF->getShapeInfo()[i]); + ASSERT_EQ(fShape[i], arrayF->shapeInfo()[i]); } for (int i = 0; i < 4; i++) { @@ -107,7 +107,7 @@ TEST_F(NDArrayTest, NDArrayOrder1) { } for (int i = 0; i < 8; i++) { - ASSERT_EQ(cShape[i], arrayC2->getShapeInfo()[i]); + ASSERT_EQ(cShape[i], arrayC2->shapeInfo()[i]); } @@ -237,13 +237,13 @@ TEST_F(NDArrayTest, TestPermuteReshape1) { array.permutei({1, 2, 3, 0}); - for (int e = 0; e < shape::shapeInfoLength(array.getShapeInfo()); e++) - ASSERT_EQ(pShape[e], array.getShapeInfo()[e]); + for (int e = 0; e < shape::shapeInfoLength(array.shapeInfo()); e++) + ASSERT_EQ(pShape[e], array.shapeInfo()[e]); array.reshapei('c', {2, 25, 2}); - for (int e = 0; e < shape::shapeInfoLength(array.getShapeInfo()); e++) - ASSERT_EQ(rShape[e], array.getShapeInfo()[e]); + for (int e = 0; e < shape::shapeInfoLength(array.shapeInfo()); e++) + ASSERT_EQ(rShape[e], array.shapeInfo()[e]); } @@ -259,15 +259,15 @@ TEST_F(NDArrayTest, TestPermuteReshape2) { // array.printShapeInfo("after "); - auto aShape = array.getShapeInfo(); + auto aShape = array.shapeInfo(); - for (int e = 0; e < shape::shapeInfoLength(array.getShapeInfo()); e++) + for (int e = 0; e < shape::shapeInfoLength(array.shapeInfo()); e++) ASSERT_EQ(pShape[e], aShape[e]); array.reshapei('c', {2, 72, 25}); - for (int e = 0; e < shape::shapeInfoLength(array.getShapeInfo()); e++) - ASSERT_EQ(rShape[e], array.getShapeInfo()[e]); + for (int e = 0; e < shape::shapeInfoLength(array.shapeInfo()); e++) + ASSERT_EQ(rShape[e], array.shapeInfo()[e]); } ////////////////////////////////////////////////////////////////////// @@ -947,9 +947,9 @@ TEST_F(NDArrayTest, TestMmulHelper2) { auto z = NDArrayFactory::create_('f', {5, 1}); auto expBuffer = new float[5]{28.00f, 64.00f, 100.00f, 136.00f, 172.00f}; - auto exp = new NDArray(expBuffer, z->getShapeInfo(), sd::LaunchContext ::defaultContext(), true); + auto exp = new NDArray(expBuffer, z->shapeInfo(), sd::LaunchContext ::defaultContext(), true); - //sd::blas::GEMV::op('f', x->rows(), x->columns(), 1.0f, x->getBuffer(), y->rows(), y->getBuffer(), 1, 0.0, z->getBuffer(), 1); + //sd::blas::GEMV::op('f', x->rows(), x->columns(), 1.0f, x->buffer(), y->rows(), y->buffer(), 1, 0.0, z->buffer(), 1); MmulHelper::mmul(x, y, z); @@ -976,9 +976,9 @@ TEST_F(NDArrayTest, TestMmulHelper3) { auto z = NDArrayFactory::create_('f', {5, 1}); auto expBuffer = new float[5]{92.00f, 104.00f, 116.00f, 128.00f, 140.00f}; - auto exp = new NDArray(expBuffer, z->getShapeInfo()); + auto exp = new NDArray(expBuffer, z->shapeInfo()); - //sd::blas::GEMV::op('f', x->rows(), x->columns(), 1.0f, x->getBuffer(), y->rows(), y->getBuffer(), 1, 0.0, z->getBuffer(), 1); + //sd::blas::GEMV::op('f', x->rows(), x->columns(), 1.0f, x->buffer(), y->rows(), y->buffer(), 1, 0.0, z->buffer(), 1); MmulHelper::mmul(x, y, z); @@ -1011,7 +1011,7 @@ TEST_F(NDArrayTest, TestMmulHelper4) { auto z = NDArrayFactory::create_('f', {3, 3}); auto expBuffer = new float[9]{7.0f, 21.0f, 35.0f, 10.0f, 28.0f, 46.0f, 13.0f, 35.0f, 57.0f}; - auto exp = new NDArray(expBuffer, z->getShapeInfo()); + auto exp = new NDArray(expBuffer, z->shapeInfo()); MmulHelper::mmul(x, y, z); ASSERT_TRUE(z->equalsTo(exp)); @@ -1041,7 +1041,7 @@ TEST_F(NDArrayTest, TestMmulHelper5) { auto z = NDArrayFactory::create_('f', {3, 3}); auto expBuffer = new float[9]{7.0f, 14.0f, 21.0f, 12.0f, 21.0f, 30.0f, 17.0f, 28.0f, 39.0f}; - auto exp = new NDArray(expBuffer, z->getShapeInfo()); + auto exp = new NDArray(expBuffer, z->shapeInfo()); MmulHelper::mmul(x, y, z); ASSERT_TRUE(z->equalsTo(exp)); @@ -1071,7 +1071,7 @@ TEST_F(NDArrayTest, TestMmulHelper6) { auto z = NDArrayFactory::create_('f', {3, 3}); auto expBuffer = new float[9]{39.0f, 54.0f, 69.0f, 9.0f, 18.0f, 27.0f, 9.0f, 12.0f, 15.0f}; - auto exp = new NDArray(expBuffer, z->getShapeInfo()); + auto exp = new NDArray(expBuffer, z->shapeInfo()); MmulHelper::mmul(x, y, z); ASSERT_TRUE(z->equalsTo(exp)); @@ -1102,7 +1102,7 @@ TEST_F(NDArrayTest, TestMmulHelper7) { auto z = NDArrayFactory::create_('f', {1, 3}); auto expBuffer = new float[9]{110.00f, 260.00f, 410.00f}; - auto exp = new NDArray(expBuffer, z->getShapeInfo()); + auto exp = new NDArray(expBuffer, z->shapeInfo()); MmulHelper::mmul(y, x, z); @@ -1301,7 +1301,7 @@ TEST_F(NDArrayTest, TestIndexedPut2) { x.p(1, 1.0f); //x.printBuffer("after"); - ASSERT_NEAR(reinterpret_cast(x.getBuffer())[2], 1.0, 1e-5); + ASSERT_NEAR(reinterpret_cast(x.buffer())[2], 1.0, 1e-5); } TEST_F(NDArrayTest, TestIndexedPut3) { @@ -1309,7 +1309,7 @@ TEST_F(NDArrayTest, TestIndexedPut3) { x.p(1, 1.0f); //x.printBuffer("after"); - ASSERT_NEAR(reinterpret_cast(x.getBuffer())[1], 1.0, 1e-5); + ASSERT_NEAR(reinterpret_cast(x.buffer())[1], 1.0, 1e-5); } TEST_F(NDArrayTest, TestIndexedPut4) { @@ -1317,7 +1317,7 @@ TEST_F(NDArrayTest, TestIndexedPut4) { x.p(0, 1, 1.0f); //x.printBuffer("after"); - ASSERT_NEAR(reinterpret_cast(x.getBuffer())[2], 1.0, 1e-5); + ASSERT_NEAR(reinterpret_cast(x.buffer())[2], 1.0, 1e-5); } diff --git a/libnd4j/tests_cpu/layers_tests/NDArrayTests2.cpp b/libnd4j/tests_cpu/layers_tests/NDArrayTests2.cpp index 49f003809..4dd4c3abe 100644 --- a/libnd4j/tests_cpu/layers_tests/NDArrayTests2.cpp +++ b/libnd4j/tests_cpu/layers_tests/NDArrayTests2.cpp @@ -974,74 +974,74 @@ TEST_F(NDArrayTest2, subarray_1) { NDArray x0 = x(0, {1,2}); for(int i = 0; i < shape::shapeInfoLength(x0.rankOf()); ++i) - ASSERT_TRUE(x0.getShapeInfo()[i] == shapeExpX0[i]); + ASSERT_TRUE(x0.shapeInfo()[i] == shapeExpX0[i]); for(int i = 0; i < x0.lengthOf(); ++i) ASSERT_TRUE(x0.e(i) == buffExpX0[i]); NDArray x1 = x(1, {1,2}); for(int i = 0; i < shape::shapeInfoLength(x1.rankOf()); ++i) - ASSERT_TRUE(x1.getShapeInfo()[i] == shapeExpX0[i]); + ASSERT_TRUE(x1.shapeInfo()[i] == shapeExpX0[i]); for(int i = 0; i < x1.lengthOf(); ++i) ASSERT_TRUE(x1.e(i) == buffExpX1[i]); NDArray x2 = x(0, {1,2}, true); for(int i = 0; i < shape::shapeInfoLength(x2.rankOf()); ++i) - ASSERT_TRUE(x2.getShapeInfo()[i] == shapeExpX2[i]); + ASSERT_TRUE(x2.shapeInfo()[i] == shapeExpX2[i]); for(int i = 0; i < x2.lengthOf(); ++i) ASSERT_TRUE(x2.e(i) == buffExpX2[i]); NDArray x3 = x(2, {1}); for(int i = 0; i < shape::shapeInfoLength(x3.rankOf()); ++i) - ASSERT_TRUE(x3.getShapeInfo()[i] == shapeExpX3[i]); + ASSERT_TRUE(x3.shapeInfo()[i] == shapeExpX3[i]); for(int i = 0; i < x3.lengthOf(); ++i) ASSERT_TRUE(x3.e(i) == buffExpX3[i]); NDArray x4 = x(2, {1}, true); for(int i = 0; i < shape::shapeInfoLength(x4.rankOf()); ++i) - ASSERT_TRUE(x4.getShapeInfo()[i] == shapeExpX4[i]); + ASSERT_TRUE(x4.shapeInfo()[i] == shapeExpX4[i]); for(int i = 0; i < x4.lengthOf(); ++i) ASSERT_TRUE(x4.e(i) == buffExpX4[i]); NDArray x5 = x(3, {2}); for(int i = 0; i < shape::shapeInfoLength(x5.rankOf()); ++i) - ASSERT_TRUE(x5.getShapeInfo()[i] == shapeExpX5[i]); + ASSERT_TRUE(x5.shapeInfo()[i] == shapeExpX5[i]); for(int i = 0; i < x5.lengthOf(); ++i) ASSERT_TRUE(x5.e(i) == buffExpX5[i]); // ******************* // NDArray y0 = y(0, {1,2}); for(int i = 0; i < shape::shapeInfoLength(y0.rankOf()); ++i) - ASSERT_TRUE(y0.getShapeInfo()[i] == shapeExpY0[i]); + ASSERT_TRUE(y0.shapeInfo()[i] == shapeExpY0[i]); for(int i = 0; i < y0.lengthOf(); ++i) ASSERT_TRUE(y0.e(i) == buffExpY0[i]); NDArray y1 = y(1, {1,2}); for(int i = 0; i < shape::shapeInfoLength(y1.rankOf()); ++i) - ASSERT_TRUE(y1.getShapeInfo()[i] == shapeExpY0[i]); + ASSERT_TRUE(y1.shapeInfo()[i] == shapeExpY0[i]); for(int i = 0; i < y1.lengthOf(); ++i) ASSERT_TRUE(y1.e(i) == buffExpY1[i]); NDArray y2 = y(0, {1,2}, true); for(int i = 0; i < shape::shapeInfoLength(y2.rankOf()); ++i) - ASSERT_TRUE(y2.getShapeInfo()[i] == shapeExpY2[i]); + ASSERT_TRUE(y2.shapeInfo()[i] == shapeExpY2[i]); for(int i = 0; i < y2.lengthOf(); ++i) ASSERT_TRUE(y2.e(i) == buffExpY2[i]); NDArray y3 = y(2, {1}); for(int i = 0; i < shape::shapeInfoLength(y3.rankOf()); ++i) - ASSERT_TRUE(y3.getShapeInfo()[i] == shapeExpY3[i]); + ASSERT_TRUE(y3.shapeInfo()[i] == shapeExpY3[i]); for(int i = 0; i < y3.lengthOf(); ++i) ASSERT_TRUE(y3.e(i) == buffExpY3[i]); NDArray y4 = y(2, {1}, true); for(int i = 0; i < shape::shapeInfoLength(y4.rankOf()); ++i) - ASSERT_TRUE(y4.getShapeInfo()[i] == shapeExpY4[i]); + ASSERT_TRUE(y4.shapeInfo()[i] == shapeExpY4[i]); for(int i = 0; i < y4.lengthOf(); ++i) ASSERT_TRUE(y4.e(i) == buffExpY4[i]); NDArray y5 = y(3, {2}); for(int i = 0; i < shape::shapeInfoLength(y5.rankOf()); ++i) - ASSERT_TRUE(y5.getShapeInfo()[i] == shapeExpY5[i]); + ASSERT_TRUE(y5.shapeInfo()[i] == shapeExpY5[i]); for(int i = 0; i < y5.lengthOf(); ++i) ASSERT_TRUE(y5.e(i) == buffExpY5[i]); @@ -1171,7 +1171,7 @@ TEST_F(NDArrayTest2, reshapei_1) { const bool canReshape = x.reshapei({4,7}); ASSERT_FALSE(canReshape); - ASSERT_TRUE(shape::equalsStrict(x.getShapeInfo(), shapeInfo2)); + ASSERT_TRUE(shape::equalsStrict(x.shapeInfo(), shapeInfo2)); delete[] buffer; } @@ -1188,7 +1188,7 @@ TEST_F(NDArrayTest2, reshapei_2) { const bool canReshape = x.reshapei({4,7}); ASSERT_FALSE(canReshape); - ASSERT_TRUE(shape::equalsStrict(x.getShapeInfo(), shapeInfo2)); + ASSERT_TRUE(shape::equalsStrict(x.shapeInfo(), shapeInfo2)); delete[] buffer; } @@ -1225,8 +1225,8 @@ TEST_F(NDArrayTest2, reduce_1) { for (int x = 0; x < 4; x++) { for (int y = 0; y < 4; y++) { Nd4jLong indices[] = {0, 0, x, y, i, j}; - Nd4jLong offset = shape::getOffset(arr6.getShapeInfo(), indices); - sum += ((double*)arr6.getBuffer())[offset]; + Nd4jLong offset = shape::getOffset(arr6.shapeInfo(), indices); + sum += ((double*)arr6.buffer())[offset]; } } exp.p(0, 0, i, j, sum); diff --git a/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp b/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp index 971fe452e..3421edf95 100644 --- a/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp @@ -70,7 +70,7 @@ TEST_F(NativeOpsTests, PointerTests_1) { #ifdef __CUDABLAS__ printf("Unsupported for cuda now.\n"); #else - ::tryPointer(nullptr, x.getBuffer(), 4); + ::tryPointer(nullptr, x.buffer(), 4); #endif // auto exp = NDArrayFactory::create('c', {5, 5}); @@ -1061,10 +1061,9 @@ TEST_F(NativeOpsTests, ConcatTest_2) { auto tadPackZ = sd::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dimensions, dimension.lengthOf()); exp.linspace(1); Nd4jPointer datas[] = {x.buffer(), y.buffer()}; - Nd4jPointer shapes[] = {x.shapeInfo(), y.shapeInfo()}; + Nd4jPointer shapes[] = {(Nd4jPointer)x.shapeInfo(), (Nd4jPointer)y.shapeInfo()}; - ::specialConcat(extra, - 0, 2, datas, shapes, z.buffer(), z.shapeInfo(), nullptr, nullptr); + ::specialConcat(extra, 0, 2, datas, shapes, z.buffer(), z.shapeInfo(), nullptr, nullptr); // exp.printIndexedBuffer("Exp"); // z.printIndexedBuffer("Concat"); @@ -1126,8 +1125,8 @@ TEST_F(NativeOpsTests, PullRowsTest_1) { std::vector dims = {1}; - auto xTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.getShapeInfo(), dims); - auto zTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(z.getShapeInfo(), dims); + auto xTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dims); + auto zTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dims); Nd4jPointer nativeStart[2]; @@ -1137,8 +1136,8 @@ TEST_F(NativeOpsTests, PullRowsTest_1) { OpaqueDataBuffer xBuf(x.dataBuffer()); OpaqueDataBuffer zBuf(z.dataBuffer()); - pullRows(nativeStart, &xBuf, x.getShapeInfo(), x.getSpecialShapeInfo(), - &zBuf, z.getShapeInfo(), z.specialShapeInfo(), + pullRows(nativeStart, &xBuf, x.shapeInfo(), x.specialShapeInfo(), + &zBuf, z.shapeInfo(), z.specialShapeInfo(), 4, pidx, xTadPack.platformShapeInfo(), xTadPack.platformOffsets(), zTadPack.platformShapeInfo(), zTadPack.platformOffsets()); @@ -1224,16 +1223,16 @@ TEST_F(NativeOpsTests, ShuffleTest_1) { exp.linspace(2,2); Nd4jPointer xList[] = {x.buffer(), x.buffer()}; Nd4jPointer dxList[] = {x.specialBuffer(), y.specialBuffer()}; - Nd4jPointer xShapeList[] = {x.shapeInfo(), y.shapeInfo()}; - Nd4jPointer dxShapeList[] = {x.specialShapeInfo(), y.specialShapeInfo()}; + Nd4jPointer xShapeList[] = {(Nd4jPointer)x.shapeInfo(), (Nd4jPointer)y.shapeInfo()}; + Nd4jPointer dxShapeList[] = {(Nd4jPointer)x.specialShapeInfo(), (Nd4jPointer)y.specialShapeInfo()}; Nd4jPointer zList[] = {z.buffer(), z.buffer()}; Nd4jPointer dzList[] = {z.specialBuffer(), z.specialBuffer()}; - Nd4jPointer zShapeList[] = {z.shapeInfo(), z.shapeInfo()}; - Nd4jPointer dzShapeList[] = {z.specialShapeInfo(), z.specialShapeInfo()}; + Nd4jPointer zShapeList[] = {(Nd4jPointer)z.shapeInfo(), (Nd4jPointer)z.shapeInfo()}; + Nd4jPointer dzShapeList[] = {(Nd4jPointer)z.specialShapeInfo(), (Nd4jPointer)z.specialShapeInfo()}; int shuffleMap[] = {1, 0, 4, 3, 2}; - auto zTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.getShapeInfo(), {1}); - Nd4jPointer zListOffset[] = {zTadPack.platformOffsets(), zTadPack.platformOffsets()}; - Nd4jPointer zListTADs[] = {zTadPack.platformShapeInfo(), zTadPack.platformShapeInfo()}; + auto zTadPack = sd::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), {1}); + Nd4jPointer zListOffset[] = {(Nd4jPointer)zTadPack.platformOffsets(), (Nd4jPointer)zTadPack.platformOffsets()}; + Nd4jPointer zListTADs[] = {(Nd4jPointer)zTadPack.platformShapeInfo(), (Nd4jPointer)zTadPack.platformShapeInfo()}; ::shuffle(nullptr, xList, xShapeList, dxList, dxShapeList, @@ -1494,11 +1493,11 @@ TEST_F(NativeOpsTests, CustomOpTest_1) { sd::ops::squeeze op; - Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.getBuffer(), x.getSpecialBuffer()}; - Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.getShapeInfo(), x.getSpecialShapeInfo()}; + Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.buffer(), x.specialBuffer()}; + Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) x.shapeInfo(), (Nd4jPointer)x.specialShapeInfo()}; - Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.getBuffer(), z.getSpecialBuffer()}; - Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.getShapeInfo(), z.getSpecialShapeInfo()}; + Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.buffer(), z.specialBuffer()}; + Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.shapeInfo(), (Nd4jPointer)z.specialShapeInfo()}; auto status = ::execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); @@ -1516,9 +1515,9 @@ TEST_F(NativeOpsTests, CustomOpTests_2) { NDArray::prepareSpecialUse({&z}, {&array0, &array1}); - ctx.setInputArray(0, array0.buffer(), array0.shapeInfo(), array0.getSpecialBuffer(), array0.getSpecialShapeInfo()); - ctx.setInputArray(1, array1.buffer(), array1.shapeInfo(), array1.getSpecialBuffer(), array1.getSpecialShapeInfo()); - ctx.setOutputArray(0, z.buffer(), z.shapeInfo(), z.getSpecialBuffer(), z.getSpecialShapeInfo()); + ctx.setInputArray(0, array0.buffer(), array0.shapeInfo(), array0.specialBuffer(), array0.specialShapeInfo()); + ctx.setInputArray(1, array1.buffer(), array1.shapeInfo(), array1.specialBuffer(), array1.specialShapeInfo()); + ctx.setOutputArray(0, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo()); ASSERT_EQ(2, ctx.width()); @@ -1539,7 +1538,7 @@ TEST_F(NativeOpsTests, CalculateOutputShapeTests_1) { std::vector tArgs({}); std::vector iArgs({2, 2, 1, 1, 0, 0, 1, 1, 1}); - Nd4jPointer ptrs[] = {(Nd4jPointer) input.getShapeInfo(), (Nd4jPointer) weights.getShapeInfo()}; + Nd4jPointer ptrs[] = {(Nd4jPointer) input.shapeInfo(), (Nd4jPointer) weights.shapeInfo()}; #ifdef __CUDABLAS__ return; #endif @@ -1572,7 +1571,7 @@ TEST_F(NativeOpsTests, CalculateOutputShapeTests_2) { std::vector bArgsF({}); std::vector iArgs({2, 2, 1, 1, 0, 0, 1, 1, 1}); - Nd4jPointer shapePtrs[] = {(Nd4jPointer) input.getShapeInfo(), (Nd4jPointer) weights.getShapeInfo()}; + Nd4jPointer shapePtrs[] = {(Nd4jPointer) input.shapeInfo(), (Nd4jPointer) weights.shapeInfo()}; Nd4jPointer dataPtrs[] = {(Nd4jPointer)input.buffer(), (Nd4jPointer)weights.buffer()}; #ifdef __CUDABLAS__ return; diff --git a/libnd4j/tests_cpu/layers_tests/PairwiseTests.cpp b/libnd4j/tests_cpu/layers_tests/PairwiseTests.cpp index 9cb2589c1..0d73b369b 100644 --- a/libnd4j/tests_cpu/layers_tests/PairwiseTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/PairwiseTests.cpp @@ -22,9 +22,9 @@ class EqualsTest : public testing::Test { public: - Nd4jLong firstShapeBuffer[8] = {2,1,2,1,1,0,1,102}; + const Nd4jLong firstShapeBuffer[8] = {2,1,2,1,1,0,1,102}; float data[2] = {1.0f, 7.0f}; - Nd4jLong secondShapeBuffer[8] = {2,2,1,6,1,0,6,99}; + const Nd4jLong secondShapeBuffer[8] = {2,2,1,6,1,0,6,99}; float dataSecond[12] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f}; int opNum = 4; float extraArgs[1] = {1e-6f}; diff --git a/libnd4j/tests_cpu/layers_tests/RNGTests.cpp b/libnd4j/tests_cpu/layers_tests/RNGTests.cpp index 56ca6b95e..c4c1806bd 100644 --- a/libnd4j/tests_cpu/layers_tests/RNGTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/RNGTests.cpp @@ -1097,8 +1097,8 @@ TEST_F(RNGTests, Test_Uniform_4) { #endif TEST_F(RNGTests, test_choice_1) { - auto x = NDArrayFactory::linspace(0, 10, 11); - auto prob = NDArrayFactory::valueOf({11}, 1.0/11, 'c'); + const auto x = NDArrayFactory::linspace(0, 10, 11); + const auto prob = NDArrayFactory::valueOf({11}, 1.0/11, 'c'); auto z = NDArrayFactory::create('c', {1000}); RandomGenerator rng(119, 256); diff --git a/libnd4j/tests_cpu/layers_tests/ReduceTests.cpp b/libnd4j/tests_cpu/layers_tests/ReduceTests.cpp deleted file mode 100644 index adbe28a41..000000000 --- a/libnd4j/tests_cpu/layers_tests/ReduceTests.cpp +++ /dev/null @@ -1,157 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -// -// Created by agibsonccc on 1/15/17. -// -#include -#include "testinclude.h" -#include -#include -#include - -class ReduceTest : public testing::Test { -public: - Nd4jLong shape[2] = {500,3}; - float x[1500] = {4.0f, 2.0f, 3.0f, 8.0f, 4.0f, 6.0f, 12.0f, 6.0f, 9.0f, 16.0f, 8.0f, 12.0f, 20.0f, 10.0f, 15.0f, 24.0f, 12.0f, 18.0f, 28.0f, 14.0f, 21.0f, 32.0f, 16.0f, 24.0f, 36.0f, 18.0f, 27.0f, 40.0f, 20.0f, 30.0f, 44.0f, 22.0f, 33.0f, 48.0f, 24.0f, 36.0f, 52.0f, 26.0f, 39.0f, 56.0f, 28.0f, 42.0f, 60.0f, 30.0f, 45.0f, 64.0f, 32.0f, 48.0f, 68.0f, 34.0f, 51.0f, 72.0f, 36.0f, 54.0f, 76.0f, 38.0f, 57.0f, 80.0f, 40.0f, 60.0f, 84.0f, 42.0f, 63.0f, 88.0f, 44.0f, 66.0f, 92.0f, 46.0f, 69.0f, 96.0f, 48.0f, 72.0f, 100.0f, 50.0f, 75.0f, 104.0f, 52.0f, 78.0f, 108.0f, 54.0f, 81.0f, 112.0f, 56.0f, 84.0f, 116.0f, 58.0f, 87.0f, 120.0f, 60.0f, 90.0f, 124.0f, 62.0f, 93.0f, 128.0f, 64.0f, 96.0f, 132.0f, 66.0f, 99.0f, 136.0f, 68.0f, 102.0f, 140.0f, 70.0f, 105.0f, 144.0f, 72.0f, 108.0f, 148.0f, 74.0f, 111.0f, 152.0f, 76.0f, 114.0f, 156.0f, 78.0f, 117.0f, 160.0f, 80.0f, 120.0f, 164.0f, 82.0f, 123.0f, 168.0f, 84.0f, 126.0f, 172.0f, 86.0f, 129.0f, 176.0f, 88.0f, 132.0f, 180.0f, 90.0f, 135.0f, 184.0f, 92.0f, 138.0f, 188.0f, 94.0f, 141.0f, 192.0f, 96.0f, 144.0f, 196.0f, 98.0f, 147.0f, 200.0f, 100.0f, 150.0f, 204.0f, 102.0f, 153.0f, 208.0f, 104.0f, 156.0f, 212.0f, 106.0f, 159.0f, 216.0f, 108.0f, 162.0f, 220.0f, 110.0f, 165.0f, 224.0f, 112.0f, 168.0f, 228.0f, 114.0f, 171.0f, 232.0f, 116.0f, 174.0f, 236.0f, 118.0f, 177.0f, 240.0f, 120.0f, 180.0f, 244.0f, 122.0f, 183.0f, 248.0f, 124.0f, 186.0f, 252.0f, 126.0f, 189.0f, 256.0f, 128.0f, 192.0f, 260.0f, 130.0f, 195.0f, 264.0f, 132.0f, 198.0f, 268.0f, 134.0f, 201.0f, 272.0f, 136.0f, 204.0f, 276.0f, 138.0f, 207.0f, 280.0f, 140.0f, 210.0f, 284.0f, 142.0f, 213.0f, 288.0f, 144.0f, 216.0f, 292.0f, 146.0f, 219.0f, 296.0f, 148.0f, 222.0f, 300.0f, 150.0f, 225.0f, 304.0f, 152.0f, 228.0f, 308.0f, 154.0f, 231.0f, 312.0f, 156.0f, 234.0f, 316.0f, 158.0f, 237.0f, 320.0f, 160.0f, 240.0f, 324.0f, 162.0f, 243.0f, 328.0f, 164.0f, 246.0f, 332.0f, 166.0f, 249.0f, 336.0f, 168.0f, 252.0f, 340.0f, 170.0f, 255.0f, 344.0f, 172.0f, 258.0f, 348.0f, 174.0f, 261.0f, 352.0f, 176.0f, 264.0f, 356.0f, 178.0f, 267.0f, 360.0f, 180.0f, 270.0f, 364.0f, 182.0f, 273.0f, 368.0f, 184.0f, 276.0f, 372.0f, 186.0f, 279.0f, 376.0f, 188.0f, 282.0f, 380.0f, 190.0f, 285.0f, 384.0f, 192.0f, 288.0f, 388.0f, 194.0f, 291.0f, 392.0f, 196.0f, 294.0f, 396.0f, 198.0f, 297.0f, 400.0f, 200.0f, 300.0f, 404.0f, 202.0f, 303.0f, 408.0f, 204.0f, 306.0f, 412.0f, 206.0f, 309.0f, 416.0f, 208.0f, 312.0f, 420.0f, 210.0f, 315.0f, 424.0f, 212.0f, 318.0f, 428.0f, 214.0f, 321.0f, 432.0f, 216.0f, 324.0f, 436.0f, 218.0f, 327.0f, 440.0f, 220.0f, 330.0f, 444.0f, 222.0f, 333.0f, 448.0f, 224.0f, 336.0f, 452.0f, 226.0f, 339.0f, 456.0f, 228.0f, 342.0f, 460.0f, 230.0f, 345.0f, 464.0f, 232.0f, 348.0f, 468.0f, 234.0f, 351.0f, 472.0f, 236.0f, 354.0f, 476.0f, 238.0f, 357.0f, 480.0f, 240.0f, 360.0f, 484.0f, 242.0f, 363.0f, 488.0f, 244.0f, 366.0f, 492.0f, 246.0f, 369.0f, 496.0f, 248.0f, 372.0f, 500.0f, 250.0f, 375.0f, 504.0f, 252.0f, 378.0f, 508.0f, 254.0f, 381.0f, 512.0f, 256.0f, 384.0f, 516.0f, 258.0f, 387.0f, 520.0f, 260.0f, 390.0f, 524.0f, 262.0f, 393.0f, 528.0f, 264.0f, 396.0f, 532.0f, 266.0f, 399.0f, 536.0f, 268.0f, 402.0f, 540.0f, 270.0f, 405.0f, 544.0f, 272.0f, 408.0f, 548.0f, 274.0f, 411.0f, 552.0f, 276.0f, 414.0f, 556.0f, 278.0f, 417.0f, 560.0f, 280.0f, 420.0f, 564.0f, 282.0f, 423.0f, 568.0f, 284.0f, 426.0f, 572.0f, 286.0f, 429.0f, 576.0f, 288.0f, 432.0f, 580.0f, 290.0f, 435.0f, 584.0f, 292.0f, 438.0f, 588.0f, 294.0f, 441.0f, 592.0f, 296.0f, 444.0f, 596.0f, 298.0f, 447.0f, 600.0f, 300.0f, 450.0f, 604.0f, 302.0f, 453.0f, 608.0f, 304.0f, 456.0f, 612.0f, 306.0f, 459.0f, 616.0f, 308.0f, 462.0f, 620.0f, 310.0f, 465.0f, 624.0f, 312.0f, 468.0f, 628.0f, 314.0f, 471.0f, 632.0f, 316.0f, 474.0f, 636.0f, 318.0f, 477.0f, 640.0f, 320.0f, 480.0f, 644.0f, 322.0f, 483.0f, 648.0f, 324.0f, 486.0f, 652.0f, 326.0f, 489.0f, 656.0f, 328.0f, 492.0f, 660.0f, 330.0f, 495.0f, 664.0f, 332.0f, 498.0f, 668.0f, 334.0f, 501.0f, 672.0f, 336.0f, 504.0f, 676.0f, 338.0f, 507.0f, 680.0f, 340.0f, 510.0f, 684.0f, 342.0f, 513.0f, 688.0f, 344.0f, 516.0f, 692.0f, 346.0f, 519.0f, 696.0f, 348.0f, 522.0f, 700.0f, 350.0f, 525.0f, 704.0f, 352.0f, 528.0f, 708.0f, 354.0f, 531.0f, 712.0f, 356.0f, 534.0f, 716.0f, 358.0f, 537.0f, 720.0f, 360.0f, 540.0f, 724.0f, 362.0f, 543.0f, 728.0f, 364.0f, 546.0f, 732.0f, 366.0f, 549.0f, 736.0f, 368.0f, 552.0f, 740.0f, 370.0f, 555.0f, 744.0f, 372.0f, 558.0f, 748.0f, 374.0f, 561.0f, 752.0f, 376.0f, 564.0f, 756.0f, 378.0f, 567.0f, 760.0f, 380.0f, 570.0f, 764.0f, 382.0f, 573.0f, 768.0f, 384.0f, 576.0f, 772.0f, 386.0f, 579.0f, 776.0f, 388.0f, 582.0f, 780.0f, 390.0f, 585.0f, 784.0f, 392.0f, 588.0f, 788.0f, 394.0f, 591.0f, 792.0f, 396.0f, 594.0f, 796.0f, 398.0f, 597.0f, 800.0f, 400.0f, 600.0f, 804.0f, 402.0f, 603.0f, 808.0f, 404.0f, 606.0f, 812.0f, 406.0f, 609.0f, 816.0f, 408.0f, 612.0f, 820.0f, 410.0f, 615.0f, 824.0f, 412.0f, 618.0f, 828.0f, 414.0f, 621.0f, 832.0f, 416.0f, 624.0f, 836.0f, 418.0f, 627.0f, 840.0f, 420.0f, 630.0f, 844.0f, 422.0f, 633.0f, 848.0f, 424.0f, 636.0f, 852.0f, 426.0f, 639.0f, 856.0f, 428.0f, 642.0f, 860.0f, 430.0f, 645.0f, 864.0f, 432.0f, 648.0f, 868.0f, 434.0f, 651.0f, 872.0f, 436.0f, 654.0f, 876.0f, 438.0f, 657.0f, 880.0f, 440.0f, 660.0f, 884.0f, 442.0f, 663.0f, 888.0f, 444.0f, 666.0f, 892.0f, 446.0f, 669.0f, 896.0f, 448.0f, 672.0f, 900.0f, 450.0f, 675.0f, 904.0f, 452.0f, 678.0f, 908.0f, 454.0f, 681.0f, 912.0f, 456.0f, 684.0f, 916.0f, 458.0f, 687.0f, 920.0f, 460.0f, 690.0f, 924.0f, 462.0f, 693.0f, 928.0f, 464.0f, 696.0f, 932.0f, 466.0f, 699.0f, 936.0f, 468.0f, 702.0f, 940.0f, 470.0f, 705.0f, 944.0f, 472.0f, 708.0f, 948.0f, 474.0f, 711.0f, 952.0f, 476.0f, 714.0f, 956.0f, 478.0f, 717.0f, 960.0f, 480.0f, 720.0f, 964.0f, 482.0f, 723.0f, 968.0f, 484.0f, 726.0f, 972.0f, 486.0f, 729.0f, 976.0f, 488.0f, 732.0f, 980.0f, 490.0f, 735.0f, 984.0f, 492.0f, 738.0f, 988.0f, 494.0f, 741.0f, 992.0f, 496.0f, 744.0f, 996.0f, 498.0f, 747.0f, 1000.0f, 500.0f, 750.0f, 1004.0f, 502.0f, 753.0f, 1008.0f, 504.0f, 756.0f, 1012.0f, 506.0f, 759.0f, 1016.0f, 508.0f, 762.0f, 1020.0f, 510.0f, 765.0f, 1024.0f, 512.0f, 768.0f, 1028.0f, 514.0f, 771.0f, 1032.0f, 516.0f, 774.0f, 1036.0f, 518.0f, 777.0f, 1040.0f, 520.0f, 780.0f, 1044.0f, 522.0f, 783.0f, 1048.0f, 524.0f, 786.0f, 1052.0f, 526.0f, 789.0f, 1056.0f, 528.0f, 792.0f, 1060.0f, 530.0f, 795.0f, 1064.0f, 532.0f, 798.0f, 1068.0f, 534.0f, 801.0f, 1072.0f, 536.0f, 804.0f, 1076.0f, 538.0f, 807.0f, 1080.0f, 540.0f, 810.0f, 1084.0f, 542.0f, 813.0f, 1088.0f, 544.0f, 816.0f, 1092.0f, 546.0f, 819.0f, 1096.0f, 548.0f, 822.0f, 1100.0f, 550.0f, 825.0f, 1104.0f, 552.0f, 828.0f, 1108.0f, 554.0f, 831.0f, 1112.0f, 556.0f, 834.0f, 1116.0f, 558.0f, 837.0f, 1120.0f, 560.0f, 840.0f, 1124.0f, 562.0f, 843.0f, 1128.0f, 564.0f, 846.0f, 1132.0f, 566.0f, 849.0f, 1136.0f, 568.0f, 852.0f, 1140.0f, 570.0f, 855.0f, 1144.0f, 572.0f, 858.0f, 1148.0f, 574.0f, 861.0f, 1152.0f, 576.0f, 864.0f, 1156.0f, 578.0f, 867.0f, 1160.0f, 580.0f, 870.0f, 1164.0f, 582.0f, 873.0f, 1168.0f, 584.0f, 876.0f, 1172.0f, 586.0f, 879.0f, 1176.0f, 588.0f, 882.0f, 1180.0f, 590.0f, 885.0f, 1184.0f, 592.0f, 888.0f, 1188.0f, 594.0f, 891.0f, 1192.0f, 596.0f, 894.0f, 1196.0f, 598.0f, 897.0f, 1200.0f, 600.0f, 900.0f, 1204.0f, 602.0f, 903.0f, 1208.0f, 604.0f, 906.0f, 1212.0f, 606.0f, 909.0f, 1216.0f, 608.0f, 912.0f, 1220.0f, 610.0f, 915.0f, 1224.0f, 612.0f, 918.0f, 1228.0f, 614.0f, 921.0f, 1232.0f, 616.0f, 924.0f, 1236.0f, 618.0f, 927.0f, 1240.0f, 620.0f, 930.0f, 1244.0f, 622.0f, 933.0f, 1248.0f, 624.0f, 936.0f, 1252.0f, 626.0f, 939.0f, 1256.0f, 628.0f, 942.0f, 1260.0f, 630.0f, 945.0f, 1264.0f, 632.0f, 948.0f, 1268.0f, 634.0f, 951.0f, 1272.0f, 636.0f, 954.0f, 1276.0f, 638.0f, 957.0f, 1280.0f, 640.0f, 960.0f, 1284.0f, 642.0f, 963.0f, 1288.0f, 644.0f, 966.0f, 1292.0f, 646.0f, 969.0f, 1296.0f, 648.0f, 972.0f, 1300.0f, 650.0f, 975.0f, 1304.0f, 652.0f, 978.0f, 1308.0f, 654.0f, 981.0f, 1312.0f, 656.0f, 984.0f, 1316.0f, 658.0f, 987.0f, 1320.0f, 660.0f, 990.0f, 1324.0f, 662.0f, 993.0f, 1328.0f, 664.0f, 996.0f, 1332.0f, 666.0f, 999.0f, 1336.0f, 668.0f, 1002.0f, 1340.0f, 670.0f, 1005.0f, 1344.0f, 672.0f, 1008.0f, 1348.0f, 674.0f, 1011.0f, 1352.0f, 676.0f, 1014.0f, 1356.0f, 678.0f, 1017.0f, 1360.0f, 680.0f, 1020.0f, 1364.0f, 682.0f, 1023.0f, 1368.0f, 684.0f, 1026.0f, 1372.0f, 686.0f, 1029.0f, 1376.0f, 688.0f, 1032.0f, 1380.0f, 690.0f, 1035.0f, 1384.0f, 692.0f, 1038.0f, 1388.0f, 694.0f, 1041.0f, 1392.0f, 696.0f, 1044.0f, 1396.0f, 698.0f, 1047.0f, 1400.0f, 700.0f, 1050.0f, 1404.0f, 702.0f, 1053.0f, 1408.0f, 704.0f, 1056.0f, 1412.0f, 706.0f, 1059.0f, 1416.0f, 708.0f, 1062.0f, 1420.0f, 710.0f, 1065.0f, 1424.0f, 712.0f, 1068.0f, 1428.0f, 714.0f, 1071.0f, 1432.0f, 716.0f, 1074.0f, 1436.0f, 718.0f, 1077.0f, 1440.0f, 720.0f, 1080.0f, 1444.0f, 722.0f, 1083.0f, 1448.0f, 724.0f, 1086.0f, 1452.0f, 726.0f, 1089.0f, 1456.0f, 728.0f, 1092.0f, 1460.0f, 730.0f, 1095.0f, 1464.0f, 732.0f, 1098.0f, 1468.0f, 734.0f, 1101.0f, 1472.0f, 736.0f, 1104.0f, 1476.0f, 738.0f, 1107.0f, 1480.0f, 740.0f, 1110.0f, 1484.0f, 742.0f, 1113.0f, 1488.0f, 744.0f, 1116.0f, 1492.0f, 746.0f, 1119.0f, 1496.0f, 748.0f, 1122.0f, 1500.0f, 750.0f, 1125.0f, 1504.0f, 752.0f, 1128.0f, 1508.0f, 754.0f, 1131.0f, 1512.0f, 756.0f, 1134.0f, 1516.0f, 758.0f, 1137.0f, 1520.0f, 760.0f, 1140.0f, 1524.0f, 762.0f, 1143.0f, 1528.0f, 764.0f, 1146.0f, 1532.0f, 766.0f, 1149.0f, 1536.0f, 768.0f, 1152.0f, 1540.0f, 770.0f, 1155.0f, 1544.0f, 772.0f, 1158.0f, 1548.0f, 774.0f, 1161.0f, 1552.0f, 776.0f, 1164.0f, 1556.0f, 778.0f, 1167.0f, 1560.0f, 780.0f, 1170.0f, 1564.0f, 782.0f, 1173.0f, 1568.0f, 784.0f, 1176.0f, 1572.0f, 786.0f, 1179.0f, 1576.0f, 788.0f, 1182.0f, 1580.0f, 790.0f, 1185.0f, 1584.0f, 792.0f, 1188.0f, 1588.0f, 794.0f, 1191.0f, 1592.0f, 796.0f, 1194.0f, 1596.0f, 798.0f, 1197.0f, 1600.0f, 800.0f, 1200.0f, 1604.0f, 802.0f, 1203.0f, 1608.0f, 804.0f, 1206.0f, 1612.0f, 806.0f, 1209.0f, 1616.0f, 808.0f, 1212.0f, 1620.0f, 810.0f, 1215.0f, 1624.0f, 812.0f, 1218.0f, 1628.0f, 814.0f, 1221.0f, 1632.0f, 816.0f, 1224.0f, 1636.0f, 818.0f, 1227.0f, 1640.0f, 820.0f, 1230.0f, 1644.0f, 822.0f, 1233.0f, 1648.0f, 824.0f, 1236.0f, 1652.0f, 826.0f, 1239.0f, 1656.0f, 828.0f, 1242.0f, 1660.0f, 830.0f, 1245.0f, 1664.0f, 832.0f, 1248.0f, 1668.0f, 834.0f, 1251.0f, 1672.0f, 836.0f, 1254.0f, 1676.0f, 838.0f, 1257.0f, 1680.0f, 840.0f, 1260.0f, 1684.0f, 842.0f, 1263.0f, 1688.0f, 844.0f, 1266.0f, 1692.0f, 846.0f, 1269.0f, 1696.0f, 848.0f, 1272.0f, 1700.0f, 850.0f, 1275.0f, 1704.0f, 852.0f, 1278.0f, 1708.0f, 854.0f, 1281.0f, 1712.0f, 856.0f, 1284.0f, 1716.0f, 858.0f, 1287.0f, 1720.0f, 860.0f, 1290.0f, 1724.0f, 862.0f, 1293.0f, 1728.0f, 864.0f, 1296.0f, 1732.0f, 866.0f, 1299.0f, 1736.0f, 868.0f, 1302.0f, 1740.0f, 870.0f, 1305.0f, 1744.0f, 872.0f, 1308.0f, 1748.0f, 874.0f, 1311.0f, 1752.0f, 876.0f, 1314.0f, 1756.0f, 878.0f, 1317.0f, 1760.0f, 880.0f, 1320.0f, 1764.0f, 882.0f, 1323.0f, 1768.0f, 884.0f, 1326.0f, 1772.0f, 886.0f, 1329.0f, 1776.0f, 888.0f, 1332.0f, 1780.0f, 890.0f, 1335.0f, 1784.0f, 892.0f, 1338.0f, 1788.0f, 894.0f, 1341.0f, 1792.0f, 896.0f, 1344.0f, 1796.0f, 898.0f, 1347.0f, 1800.0f, 900.0f, 1350.0f, 1804.0f, 902.0f, 1353.0f, 1808.0f, 904.0f, 1356.0f, 1812.0f, 906.0f, 1359.0f, 1816.0f, 908.0f, 1362.0f, 1820.0f, 910.0f, 1365.0f, 1824.0f, 912.0f, 1368.0f, 1828.0f, 914.0f, 1371.0f, 1832.0f, 916.0f, 1374.0f, 1836.0f, 918.0f, 1377.0f, 1840.0f, 920.0f, 1380.0f, 1844.0f, 922.0f, 1383.0f, 1848.0f, 924.0f, 1386.0f, 1852.0f, 926.0f, 1389.0f, 1856.0f, 928.0f, 1392.0f, 1860.0f, 930.0f, 1395.0f, 1864.0f, 932.0f, 1398.0f, 1868.0f, 934.0f, 1401.0f, 1872.0f, 936.0f, 1404.0f, 1876.0f, 938.0f, 1407.0f, 1880.0f, 940.0f, 1410.0f, 1884.0f, 942.0f, 1413.0f, 1888.0f, 944.0f, 1416.0f, 1892.0f, 946.0f, 1419.0f, 1896.0f, 948.0f, 1422.0f, 1900.0f, 950.0f, 1425.0f, 1904.0f, 952.0f, 1428.0f, 1908.0f, 954.0f, 1431.0f, 1912.0f, 956.0f, 1434.0f, 1916.0f, 958.0f, 1437.0f, 1920.0f, 960.0f, 1440.0f, 1924.0f, 962.0f, 1443.0f, 1928.0f, 964.0f, 1446.0f, 1932.0f, 966.0f, 1449.0f, 1936.0f, 968.0f, 1452.0f, 1940.0f, 970.0f, 1455.0f, 1944.0f, 972.0f, 1458.0f, 1948.0f, 974.0f, 1461.0f, 1952.0f, 976.0f, 1464.0f, 1956.0f, 978.0f, 1467.0f, 1960.0f, 980.0f, 1470.0f, 1964.0f, 982.0f, 1473.0f, 1968.0f, 984.0f, 1476.0f, 1972.0f, 986.0f, 1479.0f, 1976.0f, 988.0f, 1482.0f, 1980.0f, 990.0f, 1485.0f, 1984.0f, 992.0f, 1488.0f, 1988.0f, 994.0f, 1491.0f, 1992.0f, 996.0f, 1494.0f, 1996.0f, 998.0f, 1497.0f, 2000.0f, 1000.0f, 1500.0f}; - float result[1500] = {0.f}; - int dimension[1] = {0}; - std::vector dim = {0}; - int dimensionLength = 1; - float theoreticalMin[3] = {4.f, 2.f, 3.f}; - float theoreticalMax[3] = {2000.00f, 1000.00f, 1500.00f}; - float theoreticalRange[3] = {1996.00f, 998.00f, 1497.00f}; -}; - -class StdTest : public testing::Test { -public: - Nd4jLong examplesShape[4] = {10,5,10,15}; - int dimensionsForStd[3] = {0,2,3}; - std::vector dimsForStd = {0,2,3}; - int dimensionLength = 3; - //standard deviation - int opNum = 1; - float x[7500] ={0.5786382f, 0.16236664f, 0.069020785f, 0.9840061f, 0.941816f, 0.76720303f, 0.7794372f, 0.46979624f, 0.73381734f, 0.9957244f, 0.6167372f, 0.53088397f, 0.28015637f, 0.826945f, 0.83352476f, 0.66504276f, 0.5793391f, 0.47484478f, 0.7076381f, 0.49456358f, 0.62396896f, 0.53332835f, 0.6388812f, 0.68836075f, 0.26663998f, 0.0014623206f, 0.19409843f, 0.56639415f, 0.98213744f, 0.68497056f, 0.867037f, 0.76840234f, 0.318186f, 0.28759065f, 0.11965875f, 0.53291357f, 0.53767395f, 0.55705845f, 0.7467155f, 0.1575149f, 0.18076386f, 0.8174763f, 0.22883898f, 0.5071535f, 0.86735153f, 0.9635827f, 0.24558435f, 0.15767147f, 0.458882f, 0.71102697f, 0.21914826f, 0.16241662f, 0.27248728f, 0.89015275f, 0.71070856f, 0.55088985f, 0.98992974f, 0.70927286f, 0.9261268f, 0.50781846f, 0.62151235f, 0.4590896f, 0.7487442f, 0.21744072f, 0.2636398f, 0.084352165f, 0.46951914f, 0.383644f, 0.6749645f, 0.24111961f, 0.83259743f, 0.05546627f, 0.4790621f, 0.68884027f, 0.90992177f, 0.23907907f, 0.5342047f, 0.221003f, 0.29615387f, 0.43343517f, 0.16554528f, 0.73144174f, 0.52923626f, 0.10688303f, 0.78197056f, 0.39259177f, 0.43832788f, 0.052234255f, 0.5795483f, 0.97033966f, 0.7392455f, 0.086584255f, 0.9092887f, 0.9402065f, 0.9126419f, 0.44749174f, 0.20514569f, 0.8749829f, 0.30917913f, 0.10170506f, 0.37034252f, 0.7427814f, 0.5497875f, 0.3116048f, 0.12112484f, 0.07918618f, 0.6003074f, 0.6188079f, 0.6292188f, 0.26580265f, 0.42029652f, 0.9863358f, 0.41489154f, 0.23757206f, 0.30395788f, 0.75231904f, 0.76751274f, 0.6324773f, 0.3231405f, 0.5016677f, 0.86029065f, 0.575702f, 0.7473972f, 0.118974194f, 0.115586124f, 0.62481487f, 0.91101325f, 0.6137756f, 0.71462154f, 0.995567f, 0.93439484f, 0.37260458f, 0.6033152f, 0.3444346f, 0.91579247f, 0.7452442f, 0.97466874f, 0.6299154f, 0.35426098f, 0.50121397f, 0.14155711f, 0.78726757f, 0.028531995f, 0.8435531f, 0.6444501f, 0.8826095f, 0.25354537f, 0.5547923f, 0.99555415f, 0.8430975f, 246.29712f, 253.4231f, 282.26755f, 215.6161f, 251.57019f, 239.20515f, 296.2021f, 234.32518f, 278.9852f, 235.4248f, 238.70155f, 256.9956f, 212.62695f, 288.38763f, 231.21237f, 284.80396f, 261.86835f, 223.92522f, 205.86221f, 234.742f, 262.11407f, 298.1942f, 242.60652f, 238.83704f, 251.6588f, 267.23315f, 294.4865f, 223.47488f, 259.24976f, 251.82695f, 265.01166f, 234.65732f, 265.1853f, 202.15352f, 244.42313f, 253.90427f, 212.09233f, 227.62961f, 237.77951f, 261.36838f, 234.32147f, 240.81522f, 273.62595f, 221.19333f, 284.11353f, 216.00859f, 284.36948f, 243.90376f, 282.61584f, 256.97165f, 275.08722f, 253.8055f, 265.1405f, 298.87567f, 223.393f, 288.02148f, 287.26102f, 276.36237f, 290.52777f, 299.57062f, 224.73566f, 290.82623f, 231.3513f, 238.51828f, 230.74028f, 224.97539f, 290.11844f, 238.00816f, 290.39606f, 291.32538f, 272.94766f, 211.88446f, 291.66742f, 210.34077f, 285.62628f, 246.31918f, 283.68738f, 282.34418f, 223.43613f, 245.08679f, 235.22693f, 246.01146f, 224.03375f, 280.5359f, 226.01413f, 262.18884f, 237.87335f, 238.89404f, 259.04294f, 202.59842f, 294.69302f, 209.01956f, 244.75763f, 264.3232f, 293.4627f, 287.69165f, 236.79088f, 282.37012f, 222.24211f, 293.5885f, 249.6388f, 273.91916f, 215.40356f, 255.45584f, 268.4702f, 275.81577f, 259.25064f, 224.95108f, 250.37906f, 267.89093f, 256.31766f, 227.89124f, 204.10915f, 263.38596f, 213.62708f, 218.84116f, 289.00494f, 216.93646f, 200.29439f, 284.1103f, 216.20671f, 260.57642f, 248.57745f, 241.73776f, 244.7205f, 286.86218f, 206.42664f, 204.06395f, 216.60626f, 224.02377f, 219.4697f, 287.2509f, 246.91132f, 289.83777f, 292.73767f, 202.73048f, 206.4165f, 294.0605f, 276.23276f, 288.51318f, 279.45175f, 253.69833f, 281.3311f, 249.44318f, 287.76288f, 262.2878f, 238.2247f, 203.41438f, 208.8359f, 274.0062f, -9.999092f, -9.99934f, -9.999794f, -9.999654f, -9.999987f, -9.999574f, -9.99965f, -9.999892f, -9.999203f, -9.999798f, -9.999658f, -9.999974f, -9.999982f, -9.999003f, -9.999369f, -9.999311f, -9.999708f, -9.999327f, -9.999302f, -9.999419f, -9.999553f, -9.9991665f, -9.999842f, -9.9991665f, -9.999702f, -9.999081f, -9.9993725f, -9.999735f, -9.999399f, -9.999073f, -9.999045f, -9.999458f, -9.99971f, -9.999414f, -9.999165f, -9.999782f, -9.999417f, -9.999513f, -9.999398f, -9.999933f, -9.999367f, -9.999933f, -9.999302f, -9.999572f, -9.999926f, -9.999371f, -9.999746f, -9.999628f, -9.9995165f, -9.999816f, -9.9998255f, -9.999983f, -9.999482f, -9.99976f, -9.999302f, -9.999825f, -9.999026f, -9.999029f, -9.999147f, -9.9995f, -9.999214f, -9.999216f, -9.999818f, -9.999334f, -9.999354f, -9.999414f, -9.999564f, -9.99962f, -9.999615f, -9.999496f, -9.999803f, -9.999454f, -9.999789f, -9.999615f, -9.999473f, -9.999701f, -9.999164f, -9.999112f, -9.9991865f, -9.999779f, -9.999639f, -9.999739f, -9.999949f, -9.999005f, -9.999157f, -9.999394f, -9.999148f, -9.999729f, -9.999721f, -9.999721f, -9.999678f, -9.999215f, -9.99921f, -9.999848f, -9.999702f, -9.999167f, -9.999995f, -9.999203f, -9.999381f, -9.999537f, -9.999643f, -9.999887f, -9.999234f, -9.999761f, -9.999863f, -9.9999275f, -9.99965f, -9.999459f, -9.999674f, -9.999408f, -9.999761f, -9.999802f, -9.999465f, -9.999648f, -9.999447f, -9.999051f, -9.999212f, -9.999952f, -9.999188f, -9.999153f, -9.999513f, -9.999785f, -9.999538f, -9.999458f, -9.999802f, -9.999176f, -9.999821f, -9.999529f, -9.999089f, -9.999206f, -9.999853f, -9.999218f, -9.999763f, -9.999283f, -9.999687f, -9.999333f, -9.9996195f, -9.999563f, -9.99978f, -9.999214f, -9.999417f, -9.999161f, -9.999615f, -9.999529f, -9.999715f, -9.99965f, -9.999793f, -9.999159f, -9.999804f, -9.999826f, 0.25581473f, 0.011998488f, 0.19125576f, 0.26596868f, 0.21618238f, 0.7962773f, 0.8030581f, 0.7543603f, 0.37575766f, 0.764879f, 0.10974313f, 0.06437898f, 0.26072952f, 0.30300763f, 0.029973997f, 0.025493756f, 0.21206349f, 0.7668091f, 0.53181326f, 0.36343664f, 0.5012292f, 0.17466855f, 0.188394f, 0.73864985f, 0.4810524f, 0.42596745f, 0.17328279f, 0.2649388f, 0.5691122f, 0.6979966f, 0.40108117f, 0.680846f, 0.8891427f, 0.36562127f, 0.5258834f, 0.02162829f, 0.34679192f, 0.51932955f, 0.5934363f, 0.8976068f, 0.17759448f, 0.84487504f, 0.08563967f, 0.8079017f, 0.53375924f, 0.5292685f, 0.7386051f, 0.84675163f, 0.52025354f, 0.402771f, 0.25339442f, 0.020660425f, 0.8532977f, 0.26857603f, 0.08696012f, 0.30953142f, 0.05712433f, 0.52134746f, 0.668039f, 0.8811842f, 0.84066904f, 0.5784957f, 0.13710192f, 0.25812075f, 0.12778813f, 0.6114538f, 0.68826395f, 0.6296169f, 0.050615292f, 0.60265064f, 0.59383374f, 0.50250226f, 0.5533876f, 0.80024f, 0.15964289f, 0.44098398f, 0.3639451f, 0.9836441f, 0.59009975f, 0.42786047f, 0.66358715f, 0.77674544f, 0.96205765f, 0.30722687f, 0.07275952f, 0.8073388f, 0.8589582f, 0.1655514f, 0.942791f, 0.7421209f, 0.33589354f, 0.031047517f, 0.2333922f, 0.32696965f, 0.06680667f, 0.43655157f, 0.60084665f, 0.924222f, 0.5181169f, 0.8633322f, 0.07042168f, 0.3576994f, 0.23789743f, 0.98523647f, 0.35718223f, 0.09434685f, 0.7895948f, 0.6365413f, 0.7331945f, 0.8172492f, 0.2427676f, 0.23792028f, 0.7375947f, 0.72343403f, 0.47277793f, 0.53527576f, 0.30485073f, 0.64892334f, 0.15171374f, 0.8003455f, 0.9694175f, 0.3611101f, 0.8037058f, 0.7925937f, 0.18575527f, 0.81588566f, 0.094868064f, 0.9775748f, 0.6791609f, 0.26662946f, 0.18830737f, 0.595805f, 0.49300948f, 0.9033739f, 0.663468f, 0.3000145f, 0.57594025f, 0.8624458f, 0.18944798f, 0.65868706f, 0.35742447f, 0.099066f, 0.2832066f, 0.6912541f, 0.24243657f, 0.9277832f, 0.64250916f, 0.9440414f, 0.2378183f, 0.055244252f, 0.76272976f, 0.67200613f, 0.49664533f, 0.5904184f, 0.17577513f, 0.7822792f, 0.61906105f, 0.6896018f, 0.873862f, 0.9968526f, 0.4556378f, 0.87811166f, 0.86004007f, 0.41853464f, 0.5995596f, 0.40827745f, 0.28851208f, 0.5202819f, 0.19265123f, 0.92939705f, 0.70689267f, 0.11201124f, 0.98409003f, 0.18970507f, 0.7182739f, 0.5939693f, 0.05994234f, 0.021280153f, 0.14513102f, 0.40208468f, 0.22757782f, 0.23340172f, 0.3629895f, 0.13855931f, 0.78980845f, 0.8154337f, 0.9686873f, 0.03149764f, 0.027852392f, 0.7822175f, 0.3670333f, 0.78024536f, 0.44308364f, 0.7551719f, 0.7001006f, 0.99656695f, 0.7096177f, 0.6460425f, 0.3090078f, 0.3817309f, 0.75382084f, 0.24751845f, 0.9919141f, 0.8101396f, 0.72690064f, 0.58389014f, 0.13931125f, 0.4260997f, 0.19920675f, 0.29389992f, 0.22849065f, 0.054567583f, 0.0286403f, 0.68753535f, 0.6393382f, 0.83747303f, 0.43944475f, 0.16854768f, 0.659512f, 0.25002992f, 0.015794016f, 0.9449101f, 0.7541057f, 0.945847f, 0.97127223f, 0.59012526f, 0.04557803f, 0.114047214f, 0.7673727f, 0.4418709f, 0.1393514f, 0.41973236f, 0.5081946f, 0.282509f, 0.30676988f, 0.2546641f, 0.6687642f, 0.31170198f, 0.43019253f, 0.81878066f, 0.9186455f, 0.787344f, 0.119964f, 0.48843786f, 0.26080957f, 0.43372f, 0.7264191f, 0.7316731f, 0.52168936f, 0.3228819f, 0.5850103f, 0.58188486f, 0.5764724f, 0.85721606f, 0.0048306463f, 0.9518531f, 0.51219267f, 0.9845728f, 0.72086376f, 0.21577734f, 0.14109355f, 0.16697218f, 0.70463514f, 0.54204077f, 0.5187638f, 0.08548192f, 0.021048365f, 0.8778848f, 0.19857538f, 0.04883652f, 0.7117264f, 0.10805124f, 0.49904156f, 0.22152025f, 0.6800811f, 0.17553183f, 0.637131f, 0.4801609f, 0.5453409f, 0.25295126f, 0.48752138f, 0.5394039f, 0.7378793f, 0.89846796f, 0.30146414f, 0.21664028f, 0.27394173f, 0.022367671f, 0.9892407f, 0.19886415f, 0.41262844f, 0.30491787f, 0.49006933f, 0.81182134f, 0.673692f, 0.2412966f, 0.17482981f, 0.5432391f, 0.8450185f, 0.69215244f, 0.70803803f, 0.04421597f, 0.29316452f, 0.21701345f, 0.111889146f, 0.85679144f, 0.92165715f, 0.093697235f, 0.3446256f, 0.46299627f, 0.4249108f, 0.7948484f, 0.19556557f, 0.7571282f, 0.01646797f, 0.8894279f, 0.19658394f, 0.26087877f, 0.70531607f, 0.6966002f, 0.5969214f, 0.5227917f, 0.36881882f, 0.9858828f, 0.23796275f, 0.4213183f, 0.48533306f, 0.44627303f, 0.15690878f, 0.6434008f, 0.41254497f, 0.99109685f, 0.20189007f, 0.5941583f, 0.18635221f, 0.6158875f, 0.42995065f, 0.027945405f, 0.8306056f, 0.3877798f, 0.982836f, 0.49713424f, 0.91654354f, 0.6155134f, 0.814247f, 0.3077533f, 0.22847779f, 0.88966215f, 0.8747604f, 0.41640446f, 0.9716281f, 0.18517044f, 0.033389226f, 0.026901966f, 0.41404715f, 0.7838385f, 0.9055906f, 0.63307714f, 0.6555554f, 0.61210406f, 0.8100642f, 0.7994826f, 0.50656956f, 0.7002863f, 0.122354865f, 0.73366094f, 0.92528874f, 0.50401425f, 0.3586611f, 0.3649591f, 0.8697877f, 0.09153776f, 0.56987906f, 0.4228477f, 0.72918344f, 0.21651368f, 0.273237f, 0.1320687f, 0.256684f, 0.3676141f, 0.1802598f, 0.8279442f, 0.5993243f, 0.99537796f, 0.70956576f, 0.6580005f, 0.9079618f, 0.06857852f, 0.33703786f, 0.42991522f, 0.46704793f, 0.30789334f, 0.97041386f, 0.067041285f, 0.48089835f, 0.23312177f, 0.09135661f, 0.6173484f, 0.47475886f, 0.9562112f, 0.99144304f, 0.50248766f, 0.5567772f, 0.6791836f, 0.5094131f, 0.5138229f, 0.9128905f, 0.5559054f, 0.28739175f, 0.5442868f, 0.1325101f, 0.039360367f, 0.9252663f, 0.30213857f, 0.5769297f, 0.24732989f, 0.7464911f, 0.16295283f, 0.22247133f, 0.6684257f, 0.30283514f, 0.31917402f, 0.2872067f, 0.41503724f, 0.81451225f, 0.03269196f, 0.820269f, 0.5588804f, 0.26527935f, 0.6293965f, 0.40942776f, 0.6733743f, 0.5519464f, 0.7554137f, 0.28561452f, 0.19815777f, 0.14119685f, 0.8302559f, 0.47257373f, 0.45373413f, 0.26654762f, 0.51656854f, 0.16259237f, 0.8570836f, 0.6660475f, 0.9988463f, 0.2234983f, 0.29011694f, 0.19929285f, 0.87688833f, 288.208f, 299.0334f, 234.06802f, 288.59332f, 285.71396f, 208.14828f, 243.33327f, 263.37518f, 222.83241f, 267.64508f, 236.68651f, 240.05948f, 241.17122f, 227.03455f, 229.1796f, 231.68953f, 267.16785f, 205.02823f, 264.77625f, 237.24646f, 249.54239f, 232.01376f, 208.56255f, 210.85419f, 239.4313f, 285.38928f, 207.99615f, 219.70026f, 286.46414f, 259.6215f, 264.591f, 240.25525f, 212.3435f, 223.9664f, 258.98178f, 278.75095f, 267.05542f, 200.13255f, 271.41925f, 235.1554f, 277.16098f, 235.27489f, 218.60641f, 299.13928f, 237.70187f, 218.95384f, 233.26817f, 239.93466f, 210.01537f, 237.0251f, 236.5253f, 272.3498f, 248.93144f, 249.78705f, 202.80908f, 296.07632f, 248.54794f, 228.7884f, 238.64236f, 214.01402f, 231.23134f, 243.41833f, 254.53098f, 229.02164f, 210.59755f, 268.93982f, 277.32697f, 297.97763f, 259.46844f, 229.38896f, 288.10034f, 251.99005f, 273.70062f, 277.30673f, 212.11809f, 205.43094f, 270.62506f, 244.42522f, 280.7068f, 252.17372f, 221.36655f, 231.1006f, 224.59811f, 239.97418f, 257.73175f, 290.97693f, 205.1341f, 217.40971f, 275.88208f, 201.61108f, 280.00003f, 289.00586f, 267.0944f, 231.31201f, 211.03806f, 213.06203f, 269.1713f, 265.57556f, 248.42055f, 209.8977f, 286.6746f, 221.91562f, 215.06145f, 229.53949f, 269.93027f, 276.57254f, 250.9029f, 288.37958f, 228.52266f, 267.0228f, 297.99734f, 214.70332f, 253.89653f, 231.25943f, 204.15068f, 276.6967f, 213.42561f, 222.77573f, 246.64607f, 206.99153f, 251.96185f, 275.08154f, 218.24387f, 211.39914f, 266.65384f, 298.70865f, 287.00455f, 227.15556f, 247.37427f, 213.96188f, 272.59308f, 224.01898f, 235.20276f, 253.20197f, 209.47455f, 210.07729f, 261.2526f, 239.28952f, 219.84111f, 211.5859f, 263.7782f, 225.82002f, 209.55066f, 225.2778f, 276.13922f, 208.97437f, 274.6557f, 297.25998f, 287.32483f, 205.43816f, -9.999689f, -9.999144f, -9.999799f, -9.999373f, -9.999519f, -9.9993925f, -9.999233f, -9.999142f, -9.99984f, -9.999262f, -9.999546f, -9.999872f, -9.999391f, -9.999968f, -9.999606f, -9.999656f, -9.999715f, -9.99956f, -9.999932f, -9.999743f, -9.999814f, -9.999712f, -9.999522f, -9.999528f, -9.999384f, -9.999094f, -9.999038f, -9.999751f, -9.999586f, -9.99945f, -9.999128f, -9.999073f, -9.999791f, -9.999677f, -9.9991865f, -9.99909f, -9.999762f, -9.999218f, -9.9995575f, -9.999647f, -9.999325f, -9.999892f, -9.999989f, -9.999758f, -9.999248f, -9.999668f, -9.999531f, -9.999084f, -9.999631f, -9.999403f, -9.999865f, -9.999935f, -9.9991f, -9.999564f, -9.99925f, -9.9990425f, -9.999887f, -9.999345f, -9.999006f, -9.999103f, -9.999717f, -9.99932f, -9.999787f, -9.999386f, -9.999753f, -9.999903f, -9.999105f, -9.999969f, -9.999686f, -9.999083f, -9.99972f, -9.999545f, -9.999551f, -9.999687f, -9.999285f, -9.999309f, -9.999812f, -9.99978f, -9.999336f, -9.999835f, -9.999004f, -9.999377f, -9.999526f, -9.999481f, -9.999829f, -9.999929f, -9.999993f, -9.999933f, -9.999451f, -9.999956f, -9.999661f, -9.999863f, -9.9993305f, -9.999771f, -9.999426f, -9.999976f, -9.999994f, -9.999831f, -9.99988f, -9.999162f, -9.999056f, -9.999193f, -9.999941f, -9.999949f, -9.999971f, -9.999258f, -9.999011f, -9.999707f, -9.999535f, -9.999201f, -9.9995985f, -9.999823f, -9.999531f, -9.999698f, -9.999328f, -9.999958f, -9.999032f, -9.999576f, -9.999392f, -9.999067f, -9.99902f, -9.999045f, -9.99983f, -9.999011f, -9.999783f, -9.999335f, -9.999907f, -9.999681f, -9.999122f, -9.999256f, -9.999235f, -9.999991f, -9.999099f, -9.999523f, -9.999284f, -9.999148f, -9.999722f, -9.999268f, -9.999101f, -9.99915f, -9.999277f, -9.999724f, -9.999198f, -9.999702f, -9.999371f, -9.999346f, -9.999348f, -9.999846f, -9.99938f, -9.999386f, 0.9152095f, 0.9171647f, 0.8286799f, 0.06623944f, 0.4663288f, 0.6674705f, 0.88702863f, 0.26388377f, 0.38012853f, 0.22043897f, 0.34161663f, 0.7549241f, 0.89839345f, 0.57267684f, 0.46196744f, 0.40692735f, 0.63130325f, 0.46858534f, 0.25790846f, 0.5064126f, 0.6745789f, 0.815519f, 0.3279563f, 0.06752282f, 0.32830805f, 0.9456376f, 0.99969417f, 0.33946416f, 0.09058472f, 0.80821294f, 0.4096069f, 0.04731839f, 0.1274211f, 0.26724407f, 0.0013231506f, 0.89294916f, 0.14734322f, 0.3986316f, 0.44342554f, 0.37137577f, 0.55341625f, 0.49281976f, 0.7313272f, 0.2879761f, 0.20376818f, 0.9424636f, 0.21195652f, 0.22167233f, 0.5677064f, 0.36845347f, 0.079733446f, 0.6180234f, 0.52336746f, 0.2760374f, 0.07769606f, 0.637682f, 0.085176565f, 0.16043824f, 0.6679482f, 0.8272858f, 0.6635249f, 0.28023627f, 0.9216744f, 0.5184493f, 0.33986536f, 0.83903545f, 0.6198479f, 0.7963929f, 0.63605565f, 0.41838124f, 0.26928508f, 0.05648084f, 0.6071852f, 0.3672051f, 0.54514945f, 0.46253535f, 0.595289f, 0.2197304f, 0.56575435f, 0.33570454f, 0.12949312f, 0.009017748f, 0.82104915f, 0.31175017f, 0.46786937f, 0.9008307f, 0.059177548f, 0.21651942f, 0.58483404f, 0.13534085f, 0.2563066f, 0.98585606f, 0.3444204f, 0.30529618f, 0.9550007f, 0.010194158f, 0.44460547f, 0.4293112f, 0.020983648f, 0.83968806f, 0.5455774f, 0.9872851f, 0.27159318f, 0.16667603f, 0.3916389f, 0.10710736f, 0.70841914f, 0.23437801f, 0.78563285f, 0.25137436f, 0.61097264f, 0.41494665f, 0.20036837f, 0.26286733f, 0.5676644f, 0.2662849f, 0.80940986f, 0.7974582f, 0.5003222f, 0.29910246f, 0.1976132f, 0.30444196f, 0.073145f, 0.68550193f, 0.28199244f, 0.7541317f, 0.11088511f, 0.34996328f, 0.7452604f, 0.42252555f, 0.21781512f, 0.96444f, 0.15884762f, 0.99850196f, 0.5329689f, 0.33807343f, 0.2701225f, 0.6472552f, 0.18246143f, 0.32816347f, 0.81063986f, 0.90712345f, 0.69261926f, 0.44346964f, 0.08311381f, 0.019193182f, 0.3513845f, 0.38967726f, 0.68732834f, 0.45974445f, 0.79513454f, 0.92073804f, 0.61770153f, 0.15796295f, 0.34206834f, 0.61403716f, 0.50911576f, 0.09764764f, 0.4105753f, 0.4610053f, 0.23835297f, 0.7583669f, 0.26223376f, 0.76859593f, 0.82576513f, 0.91628957f, 0.95209956f, 0.34038633f, 0.2481594f, 0.5448205f, 0.94344336f, 0.5867557f, 0.44679952f, 0.35732326f, 0.15309544f, 0.83495915f, 0.8223747f, 0.7383799f, 0.2723741f, 0.37363288f, 0.32874116f, 0.5468127f, 0.5836204f, 0.680963f, 0.28229877f, 0.440675f, 0.058448013f, 0.26188472f, 0.8043764f, 0.92689526f, 0.26310128f, 0.6354866f, 0.915084f, 0.45643163f, 0.87117124f, 0.9500249f, 0.1889253f, 0.5461343f, 0.47915125f, 0.43820933f, 0.13977474f, 0.8290898f, 0.30484903f, 0.5062122f, 0.33160135f, 0.62606835f, 0.65262437f, 0.23008808f, 0.4257683f, 0.13102946f, 0.21824555f, 0.8722663f, 0.26695797f, 0.028245918f, 0.77160543f, 0.10392295f, 0.06169725f, 0.9943042f, 0.8000285f, 0.34662995f, 0.3909258f, 0.6586493f, 0.9920871f, 0.80688536f, 0.84350026f, 0.86506003f, 0.9833786f, 0.1113381f, 0.058909472f, 0.36759707f, 0.1351905f, 0.08711318f, 0.17150986f, 0.97114897f, 0.10649935f, 0.917866f, 0.56674695f, 0.99736273f, 0.6040517f, 0.92105764f, 0.38094944f, 0.48367384f, 0.14886507f, 0.380281f, 0.41597223f, 0.11372275f, 0.9531382f, 0.67997587f, 0.15792394f, 0.3364488f, 0.021841977f, 0.07619969f, 0.7798327f, 0.19889046f, 0.67756367f, 0.50971586f, 0.52456796f, 0.5036354f, 0.7753575f, 0.34809372f, 0.6398678f, 0.4031053f, 0.32557586f, 0.9053469f, 0.8064988f, 0.017155945f, 0.6316684f, 0.45066175f, 0.4873005f, 0.19287354f, 0.57614934f, 0.83062655f, 0.78713834f, 0.68235135f, 0.87318754f, 0.59281385f, 0.064060956f, 0.9382655f, 0.84566283f, 0.5540783f, 0.17840536f, 0.61837703f, 0.60292286f, 0.6568771f, 0.8471286f, 0.17995848f, 0.49391183f, 0.58517873f, 0.5330186f, 0.5795362f, 0.23409952f, 0.5289169f, 0.3746643f, 0.3180484f, 0.5622743f, 0.036257476f, 0.43180978f, 1.3171679E-4f, 0.63862574f, 0.5848303f, 0.94060403f, 0.5878032f, 0.6252845f, 0.18924952f, 0.39612424f, 0.7757128f, 0.9900665f, 0.86055374f, 0.18927997f, 0.84641314f, 0.8975901f, 0.89157784f, 0.57380813f, 0.94526875f, 0.501755f, 0.42647004f, 0.20386614f, 0.4966745f, 0.7561392f, 0.24496855f, 0.13073194f, 0.41784236f, 0.70873123f, 0.7233561f, 0.96866304f, 0.13634546f, 0.049341034f, 0.71949446f, 0.26208475f, 0.5635493f, 0.27563098f, 0.69374204f, 0.078678265f, 0.03588799f, 0.39408693f, 0.7788656f, 0.94594073f, 0.92669946f, 0.41283527f, 0.62035376f, 0.281576f, 0.89905745f, 0.9558993f, 0.0892733f, 0.43785354f, 0.37643972f, 0.23148632f, 0.17041226f, 0.35524517f, 0.88507247f, 0.3892006f, 0.387216f, 0.15375885f, 0.21120822f, 0.24968858f, 0.44297022f, 0.2895735f, 0.15732966f, 0.07728944f, 0.71204036f, 0.6714093f, 0.053016555f, 0.75036585f, 0.23313028f, 0.56734544f, 0.7048986f, 0.8168968f, 0.06141414f, 0.35583347f, 0.07237186f, 0.12143032f, 0.83158904f, 0.6737841f, 0.53340894f, 0.13451897f, 0.24459034f, 0.96684134f, 0.30125558f, 0.39460337f, 0.07498105f, 0.6020688f, 0.11102765f, 0.3656724f, 0.4939227f, 0.21076858f, 0.13569292f, 0.6039172f, 0.08439329f, 0.30890274f, 0.22699659f, 0.64184964f, 0.2754223f, 0.7049345f, 0.63606584f, 0.9549267f, 0.80815446f, 0.17538197f, 0.05759198f, 0.43693244f, 0.26000643f, 0.6929544f, 0.7537442f, 0.61757445f, 0.19318241f, 0.034338124f, 0.8184448f, 0.92103f, 0.97425944f, 0.8894058f, 0.4300163f, 0.88676697f, 0.3483852f, 0.13178374f, 0.95866996f, 0.6248255f, 0.93648285f, 0.08839288f, 0.14454809f, 0.035382055f, 0.3209607f, 0.16345672f, 0.12934527f, 0.3662055f, 0.25347614f, 0.22039147f, 0.07854195f, 0.7695641f, 0.45950922f, 0.093585685f, 0.35322717f, 0.5360373f, 0.6071155f, 0.9050337f, 0.8356653f, 0.55022f, 0.8330065f, 0.92175573f, 0.93212676f, 0.79578835f, 0.44477537f, 0.14613354f, 0.6763672f, 0.27782786f, 0.9030046f, 0.8203768f, 0.6832867f, 0.24530792f, 0.7274624f, 0.3142183f, 0.022943567f, 238.253f, 220.45427f, 267.66333f, 238.0088f, 271.58243f, 273.22388f, 211.78992f, 289.42252f, 217.21829f, 208.85757f, 217.32358f, 207.44218f, 259.48422f, 208.71153f, 268.2896f, 297.33484f, 254.15167f, 232.80293f, 254.54332f, 232.60858f, 238.36755f, 270.21686f, 279.47226f, 282.7281f, 212.87875f, 212.81602f, 277.39685f, 293.25415f, 220.63031f, 259.65414f, 257.0341f, 286.7428f, 202.3495f, 251.0628f, 268.4925f, 237.58267f, 214.1937f, 219.69623f, 294.32617f, 293.98544f, 271.97043f, 277.1976f, 208.15645f, 285.3982f, 275.2406f, 253.17255f, 280.30792f, 210.3171f, 262.86252f, 211.56f, 201.4514f, 237.41928f, 204.32811f, 291.4109f, 246.54733f, 278.7369f, 226.24847f, 262.70038f, 207.41508f, 274.15656f, 250.72443f, 259.09497f, 278.62515f, 298.87927f, 271.1042f, 265.95636f, 228.53195f, 264.95953f, 231.45522f, 238.10721f, 201.05338f, 299.04672f, 203.31392f, 280.5685f, 207.49594f, 288.41803f, 259.77884f, 289.5286f, 212.903f, 232.62526f, 273.2359f, 274.92944f, 228.19473f, 292.2021f, 244.35541f, 235.74893f, 281.4144f, 255.78027f, 261.2293f, 219.03902f, 240.27055f, 210.33026f, 250.7247f, 281.74927f, 296.55548f, 224.49033f, 224.96393f, 219.88365f, 294.07227f, 223.65594f, 273.98865f, 279.8825f, 262.97278f, 269.57916f, 284.82678f, 205.99402f, 230.71436f, 245.10574f, 291.90387f, 221.07706f, 285.6493f, 236.25264f, 225.34695f, 210.36287f, 288.40872f, 299.56335f, 259.16122f, 220.4013f, 235.9941f, 213.55952f, 286.5168f, 261.12793f, 230.74602f, 268.31143f, 226.09164f, 217.6272f, 203.38873f, 240.80707f, 255.07602f, 283.92712f, 218.6427f, 278.5974f, 272.98724f, 211.10165f, 230.14198f, 217.64426f, 228.90018f, 266.22888f, 227.51234f, 218.84616f, 247.46571f, 259.92053f, 212.12146f, 248.02554f, 236.08237f, 277.90137f, 263.06485f, 207.07365f, 275.89902f, 264.8849f, -9.9997225f, -9.9999695f, -9.999966f, -9.9999895f, -9.999834f, -9.999596f, -9.999333f, -9.999578f, -9.99955f, -9.999539f, -9.99926f, -9.999182f, -9.999128f, -9.999777f, -9.999337f, -9.999904f, -9.999079f, -9.99941f, -9.999122f, -9.999788f, -9.999136f, -9.9995165f, -9.999043f, -9.999407f, -9.999571f, -9.999437f, -9.999941f, -9.999134f, -9.999198f, -9.999579f, -9.999475f, -9.999036f, -9.999713f, -9.999731f, -9.999678f, -9.999174f, -9.999507f, -9.999201f, -9.999245f, -9.999307f, -9.999488f, -9.999016f, -9.999532f, -9.999287f, -9.999413f, -9.999584f, -9.99978f, -9.999425f, -9.999651f, -9.999136f, -9.999289f, -9.999958f, -9.9991665f, -9.99916f, -9.999886f, -9.999217f, -9.99971f, -9.999494f, -9.999177f, -9.999025f, -9.999024f, -9.999849f, -9.999718f, -9.99997f, -9.999352f, -9.999563f, -9.999284f, -9.999314f, -9.999419f, -9.999329f, -9.99949f, -9.9992075f, -9.999859f, -9.999224f, -9.999656f, -9.999043f, -9.99958f, -9.999525f, -9.999985f, -9.999004f, -9.999768f, -9.999181f, -9.999919f, -9.999416f, -9.999452f, -9.999608f, -9.999645f, -9.999955f, -9.999919f, -9.999946f, -9.999472f, -9.999145f, -9.999147f, -9.99935f, -9.999072f, -9.999628f, -9.999188f, -9.999702f, -9.999313f, -9.999205f, -9.999878f, -9.999991f, -9.999111f, -9.9991f, -9.999404f, -9.999437f, -9.999719f, -9.999646f, -9.999839f, -9.999222f, -9.999134f, -9.999098f, -9.999538f, -9.999294f, -9.999013f, -9.999872f, -9.99908f, -9.999922f, -9.999595f, -9.999158f, -9.999308f, -9.9995f, -9.99924f, -9.999744f, -9.999338f, -9.999049f, -9.999883f, -9.999513f, -9.999893f, -9.999218f, -9.999468f, -9.999204f, -9.999081f, -9.9994335f, -9.999555f, -9.999373f, -9.999073f, -9.999382f, -9.999415f, -9.999362f, -9.999137f, -9.999514f, -9.999781f, -9.999969f, -9.999229f, -9.999295f, -9.999149f, -9.999783f, -9.999437f, -9.999201f, 0.8368316f, 0.95952296f, 0.7187136f, 0.6472035f, 0.7200239f, 0.82257813f, 0.13384113f, 0.91812044f, 0.9440362f, 0.23334092f, 0.3562596f, 0.20390894f, 0.47781035f, 0.56394255f, 0.8770303f, 0.84794813f, 0.92716575f, 0.3591966f, 0.006163279f, 0.34427875f, 0.30020186f, 0.035439115f, 0.36127335f, 0.1666844f, 0.65421695f, 0.752802f, 0.8639191f, 0.7162624f, 0.10528788f, 0.3911885f, 0.6361361f, 0.33739233f, 0.45225555f, 0.04712947f, 0.9509385f, 0.08811871f, 0.6489793f, 0.563957f, 0.8571504f, 0.47839713f, 0.86719155f, 0.7297759f, 0.9265764f, 0.86381954f, 0.2705895f, 0.80873495f, 0.69725907f, 0.4615118f, 0.98845094f, 0.38829336f, 0.5021872f, 0.051559158f, 0.4416545f, 0.84030825f, 0.028471855f, 0.8019141f, 0.4764789f, 0.73308647f, 0.24829985f, 0.28266567f, 0.1642818f, 0.497284f, 0.9761126f, 0.8595787f, 0.61120987f, 0.48310366f, 0.45415315f, 0.4246855f, 0.35486698f, 0.4365935f, 0.6768876f, 0.36493155f, 0.96304077f, 0.49552417f, 0.8761381f, 0.7559321f, 0.46201146f, 0.50861555f, 0.023068247f, 0.551351f, 0.45992744f, 0.069025f, 0.9549169f, 0.9121757f, 0.35455093f, 0.32405618f, 0.6669353f, 0.16085483f, 0.9973096f, 0.81469834f, 0.47871014f, 0.009814576f, 0.9915644f, 0.4212253f, 0.18318938f, 0.5728494f, 0.3666718f, 0.78813976f, 0.48231423f, 0.723981f, 0.7495278f, 0.7334672f, 0.31657055f, 0.29471073f, 0.2991272f, 0.17905454f, 0.25772056f, 0.04573023f, 0.9155821f, 0.9855648f, 0.9641909f, 0.49942952f, 0.32687747f, 0.3305897f, 0.5485675f, 0.6368628f, 0.09610839f, 0.91397697f, 0.99097943f, 0.7983881f, 0.7839146f, 0.13756526f, 0.058954984f, 0.2574425f, 0.7659589f, 0.8970627f, 0.8955351f, 0.24972673f, 0.3770009f, 0.5416225f, 0.42023486f, 0.4635182f, 0.040502504f, 0.20716274f, 0.08657944f, 0.13138548f, 0.8770457f, 0.6316995f, 0.0990857f, 0.732918f, 0.4953378f, 0.30765584f, 0.21265133f, 0.008900259f, 0.42015043f, 0.25701198f, 0.26232395f, 0.59503317f, 0.37619093f, 0.059471674f, 0.96380097f, 0.6594173f, 0.74392956f, 0.80542815f, 0.5856752f, 0.4709212f, 0.07911475f, 0.8975309f, 0.76675755f, 0.026576402f, 0.012588193f, 0.9571294f, 0.14971007f, 0.42658392f, 0.4339528f, 0.40636125f, 0.418213f, 0.19980216f, 0.8942122f, 0.995247f, 0.026640382f, 0.8785028f, 0.48940244f, 0.3919287f, 0.0862845f, 0.5089264f, 0.17742826f, 0.10345855f, 0.5513259f, 0.7041969f, 0.78375727f, 0.34573317f, 0.34970793f, 0.61609524f, 0.9967575f, 0.19738163f, 0.4390408f, 0.49108744f, 0.5759808f, 0.39300266f, 0.84470737f, 0.3280776f, 0.41459507f, 0.0031824266f, 0.3248213f, 0.21955715f, 0.8830681f, 0.6528493f, 0.7155801f, 0.18756945f, 0.038407642f, 0.048247315f, 0.06908089f, 0.96183145f, 0.8542427f, 0.45350936f, 0.3367257f, 0.26481515f, 0.06306089f, 0.3728015f, 0.4432045f, 0.7682931f, 0.34411287f, 0.018815735f, 0.60152483f, 0.06271082f, 0.30780053f, 0.15404528f, 0.777356f, 0.9382987f, 0.03425807f, 0.74410313f, 0.050881404f, 0.106018655f, 0.9237955f, 0.40959543f, 0.44272372f, 0.42992854f, 0.40163797f, 0.9774989f, 0.7284286f, 0.96605545f, 0.073630586f, 0.7020174f, 0.9556004f, 0.4899371f, 0.2590087f, 0.7959899f, 0.8613244f, 0.7109668f, 0.68005985f, 0.18156524f, 0.68875915f, 0.89809185f, 0.26884466f, 0.46794668f, 0.78001046f, 0.6469185f, 0.03375709f, 0.83638656f, 0.19561735f, 0.72300714f, 0.4323585f, 0.6666231f, 0.6944045f, 0.5573255f, 0.94807935f, 0.40593168f, 0.16260563f, 0.2516181f, 0.5295202f, 0.8144355f, 0.63592476f, 0.40705463f, 0.41550696f, 0.046603993f, 0.23649848f, 0.72142303f, 0.86540526f, 0.9812862f, 0.12677868f, 0.7740198f, 0.028188271f, 0.05125889f, 0.25654867f, 0.7408246f, 0.9826668f, 0.75396377f, 0.6689209f, 0.8002577f, 0.3877432f, 0.83123654f, 0.5672896f, 0.8960579f, 0.39333224f, 0.14590047f, 0.7893236f, 0.38733613f, 0.77125305f, 0.9827144f, 0.014167471f, 0.49262884f, 0.21413602f, 0.67211145f, 0.27530655f, 0.76538646f, 0.5841506f, 0.9951677f, 0.29803824f, 0.024221342f, 0.6438744f, 0.43844396f, 0.35386777f, 0.39374486f, 0.9667755f, 0.26405483f, 0.29369798f, 6.263968E-5f, 0.40577433f, 0.014699541f, 0.8506516f, 0.82061505f, 0.04640132f, 0.38329712f, 0.23627418f, 0.01457501f, 0.920022f, 0.36586156f, 0.54100925f, 0.4094f, 0.9525085f, 0.7759392f, 0.38271114f, 0.9372709f, 0.4954011f, 0.90372294f, 0.5493134f, 0.79789823f, 0.215295f, 0.18560563f, 0.52747923f, 0.015467339f, 0.25793558f, 0.9574369f, 0.8208537f, 0.21616516f, 0.80089974f, 0.4464337f, 0.37760806f, 0.31725752f, 0.07363392f, 0.5414981f, 0.5969112f, 0.6802155f, 0.08681603f, 0.748899f, 0.8132425f, 0.6588185f, 0.7527277f, 0.22249526f, 0.48485887f, 0.52951264f, 0.9087715f, 0.0022171019f, 0.3312975f, 0.70355535f, 0.9905531f, 0.18766245f, 0.8428444f, 0.9489218f, 0.75968647f, 0.16918193f, 0.5090402f, 0.57815427f, 0.41849396f, 0.3353734f, 0.5701858f, 0.59971434f, 0.037876863f, 0.30670634f, 0.08724593f, 0.51724964f, 0.44608638f, 0.8887655f, 0.23586161f, 0.54564106f, 0.17055021f, 0.65770286f, 0.36355573f, 0.11598958f, 0.98736215f, 0.39781153f, 0.8273148f, 0.099607535f, 0.9095583f, 0.63183874f, 0.6119373f, 0.023166118f, 0.42524394f, 0.3938052f, 0.78907496f, 0.7087274f, 0.4950751f, 0.27278492f, 0.36101273f, 0.9821936f, 0.7951266f, 0.8089244f, 0.7677898f, 0.506932f, 0.6540132f, 0.45168075f, 0.82436436f, 0.6100174f, 0.50495255f, 0.95378387f, 0.15670867f, 0.3659073f, 0.34792703f, 0.22730303f, 0.41741064f, 0.5464127f, 0.12390941f, 0.38427374f, 0.64032775f, 0.77376515f, 0.8658444f, 0.7240665f, 0.43486324f, 0.12049561f, 0.8539374f, 0.08333132f, 0.97497743f, 0.09330166f, 0.44820398f, 0.6796943f, 0.48456368f, 0.9055214f, 0.26348707f, 0.658894f, 0.0733997f, 0.1792219f, 0.54822993f, 0.08548857f, 0.6243975f, 0.14298357f, 0.034526028f, 0.094718255f, 0.039160337f, 0.24803995f, 0.7548811f, 0.81707966f, 0.55264014f, 0.4717769f, 0.8132233f, 0.08796681f, 0.46675965f, 0.21120757f, 0.84116185f, 0.02198596f, 233.08963f, 284.46478f, 228.92946f, 299.10284f, 252.34494f, 270.3675f, 247.62338f, 259.12375f, 293.7792f, 292.25543f, 287.2373f, 261.2933f, 234.23328f, 242.85649f, 246.06302f, 211.33946f, 262.4088f, 288.57184f, 280.21918f, 205.70305f, 216.75426f, 287.24652f, 233.86952f, 253.43048f, 228.54883f, 297.02246f, 219.41966f, 230.32181f, 211.07607f, 201.58842f, 255.04857f, 276.64703f, 226.55725f, 285.53146f, 230.61176f, 277.40143f, 217.56476f, 214.18044f, 253.52425f, 286.49228f, 280.64703f, 216.87614f, 229.96323f, 272.0548f, 287.85236f, 209.3926f, 271.86664f, 240.23541f, 299.9867f, 214.53423f, 273.7356f, 253.11342f, 205.02061f, 222.24791f, 242.70433f, 245.3724f, 298.40033f, 289.42432f, 282.7867f, 229.05533f, 289.985f, 271.32953f, 206.18881f, 285.04318f, 280.12766f, 215.771f, 233.6232f, 204.17224f, 242.84424f, 286.33337f, 254.11534f, 209.9334f, 243.23608f, 272.5159f, 205.16878f, 276.64346f, 244.62245f, 294.27008f, 290.36227f, 216.88017f, 298.44403f, 298.37915f, 214.64677f, 255.04266f, 280.10626f, 281.35904f, 236.9879f, 257.5684f, 280.48505f, 238.83212f, 253.65378f, 291.90552f, 228.50763f, 205.08888f, 281.95593f, 252.75293f, 290.4546f, 287.56818f, 210.91739f, 256.31198f, 232.79715f, 269.6927f, 235.58183f, 276.23233f, 227.1755f, 276.03674f, 292.6508f, 285.0999f, 287.64133f, 234.23032f, 296.60068f, 277.18442f, 257.54352f, 254.5871f, 298.60168f, 202.64233f, 255.38023f, 248.32083f, 260.9433f, 205.4068f, 247.34087f, 208.5292f, 202.0934f, 216.09306f, 221.08582f, 257.41556f, 247.06735f, 266.92804f, 210.08488f, 249.02866f, 204.24144f, 263.3803f, 222.9913f, 251.80115f, 218.99036f, 290.71286f, 227.41696f, 204.93797f, 231.20157f, 292.14478f, 297.73837f, 280.12753f, 297.94702f, 228.16396f, 256.27838f, 280.33307f, 205.8249f, 279.23096f, 268.9643f, 231.75375f, -9.999341f, -9.999257f, -9.999949f, -9.999035f, -9.999831f, -9.99975f, -9.999811f, -9.999584f, -9.999827f, -9.999112f, -9.999565f, -9.999383f, -9.999329f, -9.999119f, -9.999867f, -9.999806f, -9.999535f, -9.99903f, -9.99938f, -9.9991255f, -9.999031f, -9.999938f, -9.999783f, -9.999634f, -9.999506f, -9.999364f, -9.999014f, -9.999437f, -9.999991f, -9.999617f, -9.999323f, -9.9991f, -9.999098f, -9.999426f, -9.999119f, -9.999553f, -9.9994545f, -9.999403f, -9.99964f, -9.999833f, -9.99963f, -9.999753f, -9.999862f, -9.999563f, -9.999861f, -9.999462f, -9.99921f, -9.99975f, -9.999412f, -9.99969f, -9.999759f, -9.999703f, -9.999666f, -9.999825f, -9.999146f, -9.999077f, -9.999142f, -9.999701f, -9.999502f, -9.999564f, -9.9995165f, -9.9997835f, -9.999195f, -9.999329f, -9.999829f, -9.999427f, -9.999484f, -9.999804f, -9.999084f, -9.999392f, -9.999105f, -9.999679f, -9.999752f, -9.999843f, -9.999609f, -9.999379f, -9.99906f, -9.999004f, -9.99919f, -9.9998665f, -9.999223f, -9.999334f, -9.999842f, -9.999544f, -9.999025f, -9.999718f, -9.999823f, -9.999554f, -9.99945f, -9.999082f, -9.999171f, -9.999058f, -9.999519f, -9.9995365f, -9.999272f, -9.999615f, -9.999609f, -9.999498f, -9.999642f, -9.999337f, -9.999279f, -9.999857f, -9.999663f, -9.999423f, -9.9990635f, -9.999101f, -9.9993f, -9.999743f, -9.999616f, -9.999779f, -9.99996f, -9.999366f, -9.999638f, -9.999791f, -9.999472f, -9.999714f, -9.999069f, -9.999222f, -9.999011f, -9.999037f, -9.999066f, -9.99982f, -9.999337f, -9.999344f, -9.9998455f, -9.999567f, -9.999952f, -9.9990635f, -9.9993515f, -9.999747f, -9.999756f, -9.999433f, -9.999954f, -9.999456f, -9.999391f, -9.999602f, -9.999213f, -9.999057f, -9.999885f, -9.999203f, -9.999455f, -9.999208f, -9.999754f, -9.99941f, -9.9997015f, -9.999528f, -9.999968f, -9.999105f, -9.999052f, -9.999117f, 0.07731749f, 0.9572599f, 0.2881733f, 0.34789458f, 0.12208096f, 0.3989875f, 0.23046659f, 0.07561615f, 0.7311842f, 0.24280672f, 0.13743502f, 0.32029906f, 0.26720718f, 0.6435275f, 0.71581525f, 0.25040102f, 0.07968058f, 0.9510946f, 0.16737682f, 0.5338542f, 0.96112233f, 0.12613547f, 0.71407163f, 0.017653665f, 0.5663055f, 0.9523341f, 0.66330385f, 0.43527827f, 0.21753095f, 0.6377421f, 0.0820664f, 0.5563942f, 0.105712675f, 0.06655064f, 0.8044171f, 0.6876928f, 0.97473025f, 0.47098678f, 0.23313597f, 0.46495864f, 0.13682419f, 0.19020991f, 0.6946199f, 0.58204114f, 0.008083445f, 0.21409632f, 0.90480167f, 0.06497669f, 0.3296087f, 0.51603156f, 0.49303642f, 0.3029305f, 0.5821996f, 0.5105462f, 0.51879376f, 0.108761f, 0.13990402f, 0.44722676f, 0.8695498f, 0.014239418f, 0.5745597f, 0.52994305f, 0.8318035f, 0.7634822f, 0.677615f, 0.09214777f, 0.705199f, 0.47799557f, 0.24047466f, 0.3105237f, 0.89669865f, 0.6427869f, 0.59037143f, 0.2127864f, 0.27039096f, 0.09363014f, 0.7930851f, 0.58145946f, 0.058050785f, 0.74635893f, 0.34254172f, 0.942883f, 0.8463423f, 0.49698228f, 0.1885729f, 0.2511439f, 0.87867934f, 0.028224535f, 0.7651291f, 0.49802932f, 0.21640365f, 0.69269353f, 0.25175697f, 0.76805496f, 0.75059545f, 0.05755356f, 0.7005975f, 0.9643457f, 0.59199476f, 0.15058741f, 0.8211338f, 0.50831884f, 0.9554822f, 0.10171006f, 0.5546305f, 0.28822696f, 0.8995881f, 0.96590596f, 0.76544195f, 0.23609895f, 0.5093231f, 0.29946357f, 0.44045478f, 0.5974459f, 0.24198511f, 0.13976322f, 0.30026865f, 0.6117198f, 0.54420567f, 0.83931947f, 0.9591503f, 0.055750016f, 0.015446019f, 0.34988365f, 0.6788849f, 0.8000394f, 0.34461623f, 0.8884854f, 0.11765242f, 0.6764313f, 0.70610297f, 0.7528662f, 0.6234379f, 0.95549244f, 0.48107228f, 0.57657474f, 0.35293803f, 0.53558505f, 0.90731245f, 0.6388894f, 0.9061205f, 0.9068154f, 0.82560843f, 0.48359713f, 0.6093791f, 0.25128087f, 0.58313656f, 0.10119824f, 0.14279248f, 0.8000816f, 0.89156765f, 0.12725733f, 0.052655865f, 0.09217951f, 0.20653115f, 0.34572187f, 0.34771374f, 0.30589288f, 0.06053133f, 0.41077146f, 0.9258966f, 0.31344774f, 0.66711676f, 0.04113631f, 0.9229566f, 0.008368838f, 0.5903627f, 0.84122473f, 0.11545232f, 0.7868713f, 0.9680761f, 0.23150893f, 0.4704689f, 0.5499954f, 0.43753204f, 0.7121286f, 0.61013496f, 0.59720284f, 0.92617583f, 0.7834906f, 0.027650753f, 0.8977211f, 0.15754606f, 0.54239666f, 0.18633401f, 0.5662742f, 0.2190944f, 0.59521663f, 0.6435355f, 0.71627194f, 0.037149042f, 0.6100622f, 0.61836076f, 0.1470259f, 0.36966816f, 0.90360576f, 0.5119274f, 0.7205386f, 0.39034662f, 0.62984717f, 0.01017152f, 0.64599174f, 0.15090384f, 0.36933318f, 0.19484489f, 0.09027873f, 0.58042485f, 0.14514206f, 0.036732975f, 0.54077417f, 0.43008235f, 0.15875153f, 0.34932455f, 0.37410876f, 0.8042535f, 0.7739999f, 0.28807458f, 0.97715217f, 0.117083825f, 0.17992087f, 0.9757363f, 0.18320304f, 0.015741833f, 0.9748695f, 0.65635973f, 0.14705919f, 0.037058447f, 0.8968405f, 0.021620478f, 0.5633058f, 0.767505f, 0.12037435f, 0.44985265f, 0.26535192f, 0.22633725f, 0.5835013f, 0.42530164f, 0.6948082f, 0.7116804f, 0.6978424f, 0.82452023f, 0.23771845f, 0.99683344f, 0.70071405f, 0.12593275f, 0.7764756f, 0.36999762f, 0.3072223f, 0.09792935f, 0.43981078f, 0.8204207f, 0.14809668f, 0.7569628f, 0.8288626f, 0.15944423f, 0.21987063f, 0.5351478f, 0.11639127f, 0.9450276f, 0.657273f, 0.48179442f, 0.6428968f, 0.07266802f, 0.54417425f, 0.8990355f, 0.36724177f, 0.4083636f, 0.2944423f, 0.9782087f, 0.15691185f, 0.39151284f, 0.56013423f, 0.049810167f, 0.906521f, 0.9659634f, 0.921944f, 0.30070534f, 0.9883118f, 0.95775986f, 0.13003021f, 0.8573852f, 0.1918365f, 0.10604336f, 0.19914377f, 0.40675613f, 0.024324145f, 0.23431449f, 0.72297823f, 0.7580914f, 0.20346278f, 0.82810277f, 0.32680357f, 0.10711087f, 0.590452f, 0.5469826f, 0.18557824f, 0.51672226f, 0.9832008f, 0.7936118f, 0.5308729f, 0.37090248f, 0.7742029f, 0.4481485f, 0.5493372f, 0.50338376f, 0.43103522f, 0.53751975f, 0.70061314f, 0.021088583f, 0.3308669f, 0.8162114f, 0.5326165f, 0.35944003f, 0.9206047f, 0.6406876f, 0.50699484f, 0.8470867f, 0.9593492f, 0.7875809f, 0.9962247f, 0.23328215f, 0.7006755f, 0.5442194f, 0.6375928f, 0.33889383f, 0.9687761f, 0.5783294f, 0.9320834f, 0.88320315f, 0.7495404f, 0.5102735f, 0.22573441f, 0.51124907f, 0.9721347f, 0.44289282f, 0.37883982f, 0.33592433f, 0.40807053f, 0.7348208f, 0.059953105f, 0.020652194f, 0.373106f, 0.35336265f, 0.029604226f, 0.6272284f, 0.6029403f, 0.49051753f, 0.398493f, 0.4539566f, 0.2655247f, 0.9981165f, 0.75446373f, 0.46822912f, 0.648188f, 0.324949f, 0.9306804f, 0.8809041f, 0.42844233f, 0.38464552f, 0.76389503f, 0.7626695f, 0.63432926f, 0.33961716f, 0.61165744f, 0.7148871f, 0.4873704f, 0.49829185f, 0.5820676f, 0.40672466f, 0.51494414f, 0.883497f, 0.78602934f, 0.24558222f, 0.5361903f, 0.69763577f, 0.26757947f, 0.4059913f, 0.862289f, 0.7588195f, 0.18907034f, 0.42610446f, 0.08498969f, 0.02107262f, 0.2888108f, 0.90481687f, 0.03300186f, 0.61184776f, 0.41099504f, 0.27365708f, 0.27691156f, 0.01747882f, 0.71713996f, 0.40858844f, 0.7091915f, 0.2785737f, 0.87971973f, 0.015822828f, 0.058852635f, 0.54861325f, 0.4243099f, 0.07972601f, 0.7242567f, 0.3915925f, 0.85279524f, 0.5510232f, 0.88121253f, 0.55209786f, 0.9690384f, 0.910818f, 0.4399193f, 0.08753263f, 0.25317103f, 0.28638893f, 0.08940263f, 0.62953717f, 0.13840295f, 0.6593923f, 0.27087918f, 0.54218894f, 0.7974436f, 0.03127277f, 0.13191597f, 0.3672008f, 0.45645824f, 0.50062525f, 0.59150535f, 0.53669804f, 0.87231857f, 0.083159134f, 0.30086067f, 0.57798487f, 0.6605887f, 0.46329933f, 0.7809135f, 0.3256513f, 0.42846498f, 0.43590286f, 0.7588255f, 0.112232044f, 0.45630154f, 0.85721415f, 0.36618492f, 0.3291177f, 0.3065707f, 0.258635f, 0.93674284f, 0.267144f, 0.94944286f, 0.03034833f, 0.43545058f, 277.44568f, 293.30225f, 290.0967f, 226.36577f, 263.3507f, 233.65721f, 271.0456f, 201.33302f, 244.87222f, 248.06546f, 283.55505f, 273.16003f, 273.43265f, 248.35196f, 261.96664f, 252.17625f, 213.653f, 268.57755f, 241.37634f, 275.69666f, 231.28116f, 238.647f, 267.70135f, 270.0771f, 278.84747f, 232.92476f, 227.37221f, 290.46814f, 282.7081f, 210.15854f, 275.31555f, 260.04895f, 283.80142f, 227.62625f, 267.77484f, 245.33005f, 251.6941f, 232.47691f, 220.30089f, 292.46063f, 252.57907f, 262.54684f, 254.58533f, 239.21768f, 246.7902f, 254.07513f, 230.66675f, 288.9232f, 216.71547f, 214.78873f, 279.40067f, 210.46289f, 269.7311f, 258.03143f, 220.68816f, 220.33643f, 290.5327f, 217.04453f, 203.5228f, 236.82892f, 271.18365f, 253.44327f, 206.32324f, 243.99203f, 285.42123f, 208.0186f, 235.3223f, 215.7981f, 281.17578f, 258.11807f, 235.2606f, 226.48712f, 280.93256f, 280.83173f, 243.42778f, 266.36462f, 236.26477f, 295.47427f, 273.871f, 293.18738f, 276.67422f, 232.46318f, 218.5724f, 278.0185f, 260.68582f, 216.33072f, 202.01517f, 256.0112f, 260.35217f, 285.29895f, 282.32895f, 204.90137f, 202.91895f, 201.99902f, 234.42209f, 232.87006f, 296.0879f, 282.7151f, 260.2f, 263.00598f, 245.1402f, 220.98419f, 227.66153f, 298.27438f, 288.2768f, 246.6337f, 247.41647f, 229.84933f, 200.41792f, 256.62027f, 207.03185f, 235.04187f, 269.5741f, 279.07892f, 279.92096f, 266.31543f, 277.62415f, 282.93802f, 244.6243f, 261.97354f, 287.40088f, 285.73053f, 210.00949f, 235.31769f, 267.29855f, 256.89893f, 225.80467f, 241.72736f, 243.78555f, 230.197f, 220.44577f, 286.22617f, 295.29068f, 248.73352f, 271.84897f, 295.86597f, 274.50906f, 285.53323f, 254.3574f, 246.36845f, 232.46686f, 202.37822f, 232.31885f, 284.55515f, 281.44986f, 288.22656f, 224.62955f, 257.4739f, 277.62314f, 233.47943f, -9.999561f, -9.999684f, -9.999829f, -9.999858f, -9.999566f, -9.999728f, -9.999245f, -9.999897f, -9.999244f, -9.999921f, -9.999919f, -9.999612f, -9.999473f, -9.9995575f, -9.999303f, -9.999789f, -9.999555f, -9.999162f, -9.999468f, -9.999969f, -9.999672f, -9.999807f, -9.999847f, -9.99909f, -9.999817f, -9.999831f, -9.999489f, -9.999215f, -9.999848f, -9.9998455f, -9.999323f, -9.999817f, -9.999044f, -9.999408f, -9.999863f, -9.999365f, -9.99908f, -9.99931f, -9.99933f, -9.99975f, -9.999039f, -9.99978f, -9.999931f, -9.99974f, -9.999948f, -9.999952f, -9.999335f, -9.999389f, -9.999414f, -9.999315f, -9.999753f, -9.999389f, -9.99995f, -9.999082f, -9.999573f, -9.999592f, -9.9998f, -9.999939f, -9.999826f, -9.999052f, -9.99905f, -9.999516f, -9.999568f, -9.999664f, -9.999201f, -9.9993f, -9.999386f, -9.999858f, -9.999468f, -9.99966f, -9.999665f, -9.999242f, -9.9997425f, -9.99912f, -9.999361f, -9.999368f, -9.999324f, -9.999566f, -9.999074f, -9.99973f, -9.99977f, -9.999092f, -9.99947f, -9.999531f, -9.999189f, -9.99918f, -9.999814f, -9.999811f, -9.999523f, -9.999692f, -9.999746f, -9.999281f, -9.999508f, -9.999807f, -9.999763f, -9.999359f, -9.999442f, -9.999778f, -9.999925f, -9.999119f, -9.999002f, -9.999579f, -9.999089f, -9.999878f, -9.9991865f, -9.999503f, -9.99901f, -9.9991865f, -9.999055f, -9.999055f, -9.9990225f, -9.999116f, -9.999345f, -9.999241f, -9.999561f, -9.999711f, -9.999534f, -9.999722f, -9.999037f, -9.99902f, -9.999436f, -9.999547f, -9.9997425f, -9.999701f, -9.999172f, -9.99957f, -9.99917f, -9.999358f, -9.999515f, -9.9994545f, -9.999549f, -9.99922f, -9.999552f, -9.999457f, -9.999204f, -9.999363f, -9.99935f, -9.999776f, -9.999162f, -9.999254f, -9.99992f, -9.999504f, -9.9991f, -9.999846f, -9.99928f, -9.99955f, -9.999984f, -9.999683f, -9.999582f, -9.999975f, 0.4054413f, 0.49212277f, 0.9723238f, 0.72839403f, 0.6485173f, 0.11651259f, 0.10785521f, 0.032620244f, 0.023706913f, 0.3086147f, 0.47183102f, 0.992096f, 0.99172103f, 0.34033036f, 0.95944905f, 0.22414577f, 0.06989748f, 0.5614623f, 0.97281843f, 0.52306736f, 0.053522028f, 0.50254625f, 0.51301396f, 0.5985718f, 0.0371569f, 0.8265822f, 0.4661505f, 0.4922629f, 0.81253344f, 0.9696686f, 0.60658884f, 0.8239178f, 0.15269178f, 0.939187f, 0.14531301f, 0.37456673f, 0.779733f, 0.418844f, 0.66610193f, 0.5676376f, 0.8005674f, 0.31309485f, 0.03271992f, 0.36289623f, 0.5230104f, 0.9365938f, 0.54856783f, 0.38090333f, 0.677641f, 0.98534113f, 0.6625885f, 0.9755095f, 0.078554325f, 0.018032718f, 0.8922824f, 0.9402988f, 0.7797243f, 0.5073222f, 0.8464975f, 0.7056091f, 0.49532133f, 0.42082825f, 0.39204183f, 0.7350382f, 0.7106082f, 0.7145868f, 0.7029236f, 0.22454071f, 0.9618653f, 0.4929038f, 0.58743435f, 0.22425091f, 0.52113986f, 0.29244232f, 0.58773226f, 0.17996566f, 0.16191864f, 0.8782989f, 0.6559272f, 0.45498922f, 0.109633766f, 0.29422963f, 0.28020766f, 0.45128867f, 0.34663188f, 0.011857478f, 0.13049418f, 0.39511293f, 0.15442526f, 0.98196644f, 0.74726933f, 0.20202826f, 0.066193216f, 0.6910641f, 0.91542566f, 0.36986846f, 0.36708114f, 0.7992493f, 0.66625875f, 0.9589232f, 0.58173925f, 0.2632916f, 0.8744973f, 0.869903f, 0.27612343f, 0.43633205f, 0.0069335676f, 0.46793646f, 0.6261623f, 0.8301051f, 0.4103617f, 0.583117f, 0.9595133f, 0.092884764f, 0.6108136f, 0.9563768f, 0.13297999f, 0.9781464f, 0.1866522f, 0.6501296f, 0.940671f, 0.5299086f, 0.9236821f, 0.8280376f, 0.5605807f, 0.08746594f, 0.99765533f, 0.9831952f, 0.3346039f, 0.45981014f, 0.16059282f, 0.898296f, 0.24069251f, 0.84168667f, 0.42612913f, 0.840821f, 0.06970532f, 0.6529262f, 0.21027155f, 0.6587761f, 0.8506848f, 0.23469605f, 0.8375965f, 0.6650027f, 0.6900568f, 0.03741631f, 0.90703416f, 0.60072684f, 0.041207824f, 0.20454895f, 0.13258597f, 0.38379464f, 0.5782676f, 0.37454012f, 0.788924f, 0.6553679f, 0.6696084f, 0.194304f, 0.18800853f, 0.42950943f, 0.70689565f, 0.837481f, 0.14751653f, 0.56871074f, 0.7577148f, 0.7652816f, 0.19738932f, 0.9059352f, 0.97273886f, 0.51461357f, 0.1711977f, 0.5120307f, 0.22731306f, 0.5407244f, 0.2804785f, 0.05774873f, 0.80988765f, 0.7796792f, 0.31191307f, 0.39822164f, 0.5347025f, 0.07349863f, 0.21531169f, 0.07873698f, 0.8192433f, 0.722044f, 0.40318736f, 0.8964449f, 0.49459186f, 0.9010825f, 0.45778024f, 0.80724466f, 0.38512704f, 0.38782215f, 0.13246128f, 0.7218372f, 0.7401796f, 0.84869057f, 0.56868243f, 0.3278968f, 0.019229556f, 0.43221912f, 0.693255f, 0.43167397f, 0.78483266f, 0.09825686f, 0.5116548f, 0.1271103f, 0.18708695f, 0.95848906f, 0.23714672f, 0.52546054f, 0.5915945f, 0.7894098f, 0.8593355f, 0.31078282f, 0.28504592f, 0.85881007f, 0.29736793f, 0.50781727f, 0.65514153f, 0.44968098f, 0.9075563f, 0.7546295f, 0.45364478f, 0.29375777f, 0.94780463f, 0.6616151f, 0.01726944f, 0.9249832f, 0.9179415f, 0.6749661f, 0.43883613f, 0.37391648f, 0.65078586f, 0.21732111f, 0.02359236f, 0.007791354f, 0.30327088f, 0.31245363f, 0.84185934f, 0.49694976f, 0.93794364f, 0.8528437f, 0.7000397f, 0.5224565f, 0.8105422f, 0.99443287f, 0.847529f, 0.15470129f, 0.8077305f, 0.5341055f, 0.23147497f, 0.40932575f, 0.96443266f, 0.09061932f, 0.05683991f, 0.99754393f, 0.11661421f, 0.19272684f, 0.3620329f, 0.45262036f, 0.03901034f, 0.06041548f, 0.0075550857f, 0.27494353f, 0.67014945f, 0.2957977f, 0.2216069f, 0.6506188f, 0.45587075f, 0.28567624f, 0.5888963f, 0.98453754f, 0.8699843f, 0.9340606f, 0.0642961f, 0.14302005f, 0.7717978f, 0.75930613f, 0.6141049f, 0.4101332f, 0.27772737f, 0.28117037f, 0.8098905f, 0.5942f, 0.7786375f, 0.4493845f, 0.5141761f, 0.744234f, 0.34754843f, 0.9057713f, 0.29356617f, 0.41850287f, 0.25478244f, 0.78619635f, 0.70232016f, 0.7863453f, 0.57700616f, 0.3423882f, 0.11562478f, 0.6069529f, 0.7797115f, 0.2574891f, 0.51921356f, 0.2538803f, 0.670748f, 0.82137585f, 0.47364834f, 0.9369771f, 0.1801538f, 0.5134379f, 0.3520003f, 0.38112086f, 0.29870084f, 0.55816495f, 0.95891315f, 0.3729329f, 0.7877428f, 0.029987516f, 0.37669265f, 0.10563303f, 0.14064822f, 0.4556408f, 0.86550975f, 0.73312205f, 0.09095184f, 0.9431056f, 0.372078f, 0.4691022f, 0.72663444f, 0.5589779f, 0.98812455f, 0.1695335f, 0.8314304f, 0.7852622f, 0.61309403f, 0.10439321f, 0.76670945f, 0.5409888f, 0.9157445f, 0.57858527f, 0.14883776f, 0.20041484f, 0.30621874f, 0.9036323f, 0.9339205f, 0.9151604f, 0.12393201f, 0.929967f, 0.35930997f, 0.2358306f, 0.6697985f, 0.31414795f, 0.30049297f, 0.89661825f, 0.27027792f, 0.17256655f, 0.9318595f, 0.81196785f, 0.38976404f, 0.293463f, 0.2512547f, 0.81138444f, 0.988779f, 0.27900514f, 0.4261041f, 0.61765677f, 0.8339683f, 0.25210267f, 0.51324797f, 0.92285997f, 0.0889822f, 0.5169889f, 0.3989031f, 0.6554801f, 0.9353766f, 0.544529f, 0.123369224f, 0.34246746f, 0.2115331f, 0.26744205f, 0.71749866f, 0.22343503f, 0.64539504f, 0.67429143f, 0.41868812f, 0.40186298f, 0.098477215f, 0.88132435f, 0.07625152f, 0.043012597f, 0.6452063f, 0.2102687f, 0.22173183f, 0.10345679f, 0.7434575f, 0.7126712f, 0.76721144f, 0.6512526f, 0.15990873f, 0.11895295f, 0.77731425f, 0.5243528f, 0.694658f, 0.86524415f, 0.75635976f, 0.057310082f, 0.16338252f, 0.78290933f, 0.7817539f, 0.8036517f, 0.33238873f, 0.676157f, 0.6762056f, 0.16322272f, 0.87960654f, 0.36118373f, 0.32454377f, 0.763408f, 0.506997f, 0.6956684f, 0.9279813f, 0.20323144f, 0.5839603f, 0.5633559f, 0.6701542f, 0.25721762f, 0.9896909f, 0.95511895f, 0.9082311f, 0.29406747f, 0.60026234f, 0.93644714f, 0.61788774f, 0.66341126f, 0.20749137f, 0.52809435f, 0.30916053f, 0.59821826f, 0.42163637f, 0.8293481f, 0.9711802f, 0.7839911f, 0.7657031f, 0.5351135f, 0.6362381f, 0.5429735f, 0.29129192f, 0.74155486f, 256.6196f, 299.92203f, 283.1842f, 257.95f, 242.67941f, 283.13525f, 297.3768f, 209.21597f, 298.94897f, 272.28577f, 208.13962f, 224.24684f, 215.7119f, 289.45593f, 248.60497f, 291.094f, 261.66168f, 291.05728f, 280.15112f, 246.94473f, 281.08008f, 221.38707f, 231.09238f, 220.10115f, 219.70961f, 273.52057f, 298.6576f, 250.59302f, 203.40039f, 227.90755f, 208.1463f, 211.84389f, 251.76518f, 275.46594f, 292.12732f, 277.5088f, 281.66544f, 274.27924f, 291.94995f, 282.94733f, 231.35228f, 229.87643f, 226.04532f, 246.81201f, 285.92133f, 211.72032f, 265.00046f, 292.0401f, 217.145f, 258.9742f, 241.07838f, 297.71396f, 265.03607f, 293.78973f, 215.46487f, 271.7528f, 297.20273f, 234.13841f, 253.58505f, 252.52872f, 224.75195f, 218.48878f, 204.55463f, 293.8269f, 283.58505f, 264.1618f, 226.64536f, 280.69232f, 218.0678f, 219.11906f, 209.70735f, 215.2419f, 227.23471f, 226.22966f, 292.78833f, 250.87213f, 220.66672f, 292.0923f, 214.3262f, 220.62033f, 292.90533f, 294.61047f, 210.68884f, 260.9642f, 262.28113f, 255.0517f, 232.66026f, 294.8312f, 206.05696f, 289.73633f, 235.66345f, 232.93633f, 263.52408f, 256.7292f, 210.22684f, 229.51805f, 282.41776f, 211.0127f, 239.21553f, 235.43231f, 278.32697f, 299.7943f, 247.10483f, 219.1755f, 224.00432f, 263.2412f, 276.8183f, 291.88232f, 233.7261f, 241.75543f, 261.45193f, 296.58963f, 203.90746f, 277.9264f, 245.81134f, 261.24277f, 212.32646f, 242.76822f, 241.22888f, 224.0751f, 267.85315f, 232.49553f, 272.37656f, 253.20465f, 206.93951f, 201.29115f, 257.55444f, 296.3969f, 259.25177f, 292.10406f, 267.9734f, 253.28792f, 210.03741f, 272.03717f, 284.04358f, 292.52087f, 253.26274f, 207.37628f, 263.50598f, 228.07819f, 237.00746f, 241.3014f, 278.94174f, 214.41554f, 270.15442f, 264.77567f, 206.68633f, 229.17867f, 238.87085f, 254.12152f, -9.999742f, -9.999057f, -9.999062f, -9.999852f, -9.999382f, -9.999388f, -9.999354f, -9.999587f, -9.999273f, -9.999814f, -9.999888f, -9.999484f, -9.999295f, -9.999065f, -9.999623f, -9.999145f, -9.999381f, -9.999056f, -9.99943f, -9.999615f, -9.999143f, -9.999795f, -9.999838f, -9.999658f, -9.999616f, -9.9998f, -9.999448f, -9.999215f, -9.999058f, -9.999626f, -9.999816f, -9.99952f, -9.999158f, -9.999308f, -9.999545f, -9.999357f, -9.999205f, -9.999506f, -9.999683f, -9.999209f, -9.9999895f, -9.999543f, -9.999428f, -9.999628f, -9.999103f, -9.9991455f, -9.999936f, -9.999467f, -9.999748f, -9.99912f, -9.999807f, -9.999134f, -9.999681f, -9.999262f, -9.999087f, -9.999329f, -9.999385f, -9.999264f, -9.999793f, -9.999045f, -9.9995985f, -9.999204f, -9.999249f, -9.999444f, -9.9992075f, -9.9998455f, -9.999957f, -9.999949f, -9.999563f, -9.999786f, -9.999491f, -9.999651f, -9.999318f, -9.999416f, -9.999064f, -9.999325f, -9.9996f, -9.999902f, -9.999786f, -9.99952f, -9.999172f, -9.999215f, -9.999257f, -9.9991865f, -9.999605f, -9.999594f, -9.999224f, -9.999279f, -9.999259f, -9.999697f, -9.9996195f, -9.999134f, -9.999058f, -9.999047f, -9.999575f, -9.999919f, -9.999645f, -9.999633f, -9.999902f, -9.999141f, -9.999885f, -9.999965f, -9.999505f, -9.99982f, -9.999797f, -9.99964f, -9.999083f, -9.9995775f, -9.9999695f, -9.999383f, -9.999018f, -9.999117f, -9.99926f, -9.99911f, -9.999243f, -9.999118f, -9.99911f, -9.999486f, -9.99909f, -9.999861f, -9.999171f, -9.9999275f, -9.999972f, -9.999925f, -9.999671f, -9.999307f, -9.9994955f, -9.999324f, -9.999028f, -9.999182f, -9.999585f, -9.999082f, -9.999469f, -9.999043f, -9.999628f, -9.9994335f, -9.999068f, -9.999732f, -9.999809f, -9.999425f, -9.99959f, -9.999719f, -9.999516f, -9.999942f, -9.999832f, -9.999641f, -9.999447f, -9.99934f, -9.999968f, -9.999992f, 0.639171f, 0.47615534f, 0.1366003f, 0.4112621f, 0.543977f, 0.6301188f, 0.72094375f, 0.41664115f, 0.6702276f, 0.2662457f, 0.34709758f, 0.0047021024f, 0.19731691f, 0.3105783f, 0.35764986f, 0.6188618f, 0.55722684f, 0.014176953f, 0.28426266f, 0.55528253f, 0.9861382f, 0.59125423f, 0.91971123f, 0.50413203f, 0.71612626f, 0.37045076f, 0.16731057f, 0.8361767f, 0.20203081f, 0.46268502f, 0.54416966f, 0.82547253f, 0.70076334f, 0.19353609f, 0.7197332f, 0.7577992f, 0.15850778f, 0.09100532f, 0.8406752f, 0.4743588f, 0.14548168f, 0.91383964f, 0.31233132f, 0.057911392f, 0.38550714f, 0.788842f, 0.45663434f, 0.87255025f, 0.6822182f, 0.27235323f, 0.8781251f, 0.8971649f, 0.6117316f, 0.5027711f, 0.7707731f, 0.8171592f, 0.99433446f, 0.3228524f, 0.10424189f, 0.9995735f, 0.07680203f, 0.16278757f, 0.87946606f, 0.8840557f, 0.45882654f, 0.5382355f, 0.17185123f, 0.19348888f, 0.08070494f, 0.8351659f, 0.59116447f, 0.3656219f, 0.38914752f, 0.8038363f, 0.21394636f, 0.6494243f, 0.2923405f, 0.096409395f, 0.81489897f, 0.2177272f, 0.5156461f, 0.28180742f, 0.15846203f, 0.38402006f, 0.6799602f, 0.0992625f, 0.42167094f, 0.5157946f, 0.5737303f, 0.61967856f, 0.27188474f, 0.33863726f, 0.8381059f, 0.9284707f, 0.81110543f, 0.14615615f, 0.5137047f, 0.4068576f, 0.27341366f, 0.6371842f, 0.46284974f, 0.6114867f, 0.71931726f, 0.91663635f, 0.60304374f, 0.14932536f, 0.88403726f, 0.54094154f, 0.1467738f, 0.97935086f, 0.7863954f, 0.2147064f, 0.012224621f, 0.14325804f, 0.65899223f, 0.5648787f, 0.65609366f, 0.8197612f, 0.6399177f, 0.8468733f, 0.76479703f, 0.25536442f, 0.5532024f, 0.95500815f, 0.39078063f, 0.5678974f, 0.21131837f, 0.987159f, 0.27899948f, 0.45318067f, 0.052973147f, 0.22060722f, 0.13576879f, 0.22578368f, 0.4504141f, 0.81624466f, 0.6962496f, 0.38475657f, 0.5542052f, 0.040127296f, 0.7824744f, 0.7515341f, 0.2940618f, 0.45921704f, 0.74931914f, 0.4590101f, 0.1761703f, 0.76585937f, 0.3804439f, 0.20216002f, 0.79364806f, 0.48445576f, 0.9997787f, 0.07572355f, 0.9185397f, 0.43292367f, 0.6824889f, 0.57344544f, 0.45387882f, 0.61218095f, 0.001530312f, 0.36701044f, 0.3732282f, 0.21642086f, 0.0032335173f, 0.9757738f, 0.6631197f, 0.84142756f, 0.23562978f, 0.8842848f, 0.24768245f, 0.6896844f, 0.093373105f, 0.47206926f, 0.018847544f, 0.3574926f, 0.7817249f, 0.3901984f, 0.37762666f, 0.60320383f, 0.5876514f, 0.8498338f, 0.6137263f, 0.64150596f, 0.8912183f, 0.18202206f, 0.07165835f, 0.54631984f, 0.14491297f, 0.46619728f, 0.5531275f, 0.9730491f, 0.3560192f, 0.5463067f, 0.9498098f, 0.6082786f, 0.12641688f, 0.27168056f, 0.449438f, 0.2710077f, 0.059393216f, 0.47376275f, 0.3349298f, 0.8534693f, 0.24378222f, 0.27263063f, 0.31725782f, 0.027660795f, 0.36858514f, 0.31543452f, 0.32232106f, 0.7514354f, 0.7665531f, 0.93814677f, 0.94667625f, 0.7495306f, 0.07630936f, 0.07085721f, 0.09998243f, 0.14326382f, 0.3722598f, 0.8195573f, 0.88503057f, 0.64455885f, 0.9708746f, 0.574863f, 0.7547003f, 0.663569f, 0.62627494f, 0.66573906f, 0.88241595f, 0.5472183f, 0.10965517f, 0.086363465f, 0.03911088f, 0.43472022f, 0.282755f, 0.81878805f, 0.7069662f, 0.6482738f, 0.7889657f, 0.13123439f, 0.5466046f, 0.9870477f, 0.65994346f, 0.044764873f, 0.2590037f, 0.21607089f, 0.7882748f, 0.030434562f, 0.7240241f, 0.24359426f, 0.24925096f, 0.50715107f, 0.8548116f, 0.5778587f, 0.81658524f, 0.8406002f, 0.26860788f, 0.308281f, 0.40139812f, 0.27045614f, 0.681128f, 0.55732554f, 0.77117866f, 0.025454784f, 0.045293983f, 0.27430618f, 0.24866389f, 0.9072126f, 0.21633524f, 0.986974f, 0.91918707f, 0.86734384f, 0.5860722f, 0.8918684f, 0.86775124f, 0.24765202f, 0.7032609f, 0.4580694f, 0.6150063f, 0.12584582f, 0.13061108f, 0.11944151f, 0.27304602f, 0.08538959f, 0.2935459f, 0.6501564f, 0.6911091f, 0.79428184f, 0.19728307f, 0.9433592f, 0.98402375f, 0.278235f, 0.6931662f, 0.32246152f, 0.7604209f, 0.323686f, 0.4490462f, 0.21253695f, 0.37495488f, 0.095260054f, 0.5237899f, 0.9992169f, 0.36044437f, 0.5078252f, 0.5861082f, 0.64059675f, 0.03762793f, 0.49785113f, 0.38858363f, 0.69295675f, 0.2873984f, 0.32729995f, 0.59859157f, 0.73461634f, 0.25285175f, 0.5567667f, 0.71841735f, 0.69814867f, 0.77477485f, 0.16508374f, 0.15479185f, 0.48362815f, 0.37302348f, 0.7408702f, 0.11581469f, 0.08464117f, 0.029988535f, 0.34612563f, 0.45165575f, 0.68815565f, 0.008550999f, 0.09454897f, 0.8842033f, 0.471434f, 0.16433838f, 0.5935435f, 0.8646248f, 0.57239705f, 0.65469956f, 0.5863223f, 0.4796355f, 0.59167236f, 0.54985625f, 0.39255446f, 0.61727005f, 0.50840545f, 0.3316757f, 0.74857223f, 0.35827267f, 0.8872402f, 0.8038483f, 0.3931879f, 0.70447254f, 0.16417824f, 0.42719653f, 0.7534679f, 0.57123446f, 0.34724474f, 0.54931104f, 0.39288715f, 0.42828634f, 0.8222923f, 0.8765563f, 0.94212073f, 0.12068056f, 0.70422703f, 0.2824587f, 0.027603716f, 0.52777815f, 0.5066046f, 0.5769824f, 0.07630827f, 0.103958726f, 0.1505021f, 0.24175929f, 0.50438327f, 0.6733676f, 0.35198468f, 0.0752788f, 0.7415916f, 0.42589715f, 0.761479f, 0.0033971865f, 0.91897255f, 0.9319753f, 0.81370807f, 0.79544336f, 0.23588327f, 0.9587119f, 0.71191025f, 0.42136034f, 0.19574885f, 0.54185784f, 0.008105425f, 0.14255908f, 0.63592f, 0.3044852f, 0.6324764f, 0.6508548f, 0.08161495f, 0.65241224f, 0.8424147f, 0.97779244f, 0.72876996f, 0.61530423f, 0.94752645f, 0.6066642f, 0.10435986f, 0.18537253f, 0.30024627f, 0.8787194f, 0.06873524f, 0.91032326f, 0.84761214f, 0.12825106f, 0.22760965f, 0.70036477f, 0.09428674f, 0.9861057f, 0.13853452f, 0.8474568f, 0.057899747f, 0.060172286f, 0.37916803f, 0.15240528f, 0.77621406f, 0.26485768f, 0.1740309f, 0.29064766f, 0.7386373f, 0.5348933f, 0.26158985f, 0.43255532f, 0.59368885f, 0.61983097f, 0.13413209f, 0.32573816f, 0.43871734f, 0.7316835f, 0.7375361f, 0.8791016f, 0.46889958f, 0.8362294f, 0.56079483f, 0.78738517f, 0.12909074f, 0.19669758f, 0.3654093f, 257.23004f, 205.25952f, 256.3495f, 287.5462f, 248.0553f, 279.42828f, 252.23164f, 293.8083f, 244.82593f, 241.14514f, 264.60312f, 242.02669f, 265.36676f, 285.9313f, 276.8894f, 264.85254f, 204.56178f, 216.75874f, 245.4952f, 212.06345f, 205.75478f, 284.3255f, 291.17203f, 219.69725f, 203.70792f, 225.91046f, 230.73822f, 262.73547f, 201.7526f, 212.36281f, 283.3116f, 294.07062f, 249.66954f, 283.85126f, 246.5827f, 207.68987f, 272.6758f, 240.09421f, 275.82172f, 225.84433f, 232.80176f, 201.71077f, 252.89136f, 240.62161f, 259.20868f, 247.87543f, 218.64772f, 248.03424f, 202.67117f, 238.984f, 290.77563f, 293.03915f, 289.35855f, 289.96945f, 286.17395f, 231.49643f, 251.10532f, 225.1938f, 206.88234f, 256.4651f, 239.51657f, 245.26834f, 247.59836f, 204.23398f, 203.37993f, 225.53943f, 267.85843f, 297.7295f, 265.553f, 295.24786f, 242.70523f, 286.44165f, 283.38336f, 251.81482f, 208.90456f, 257.36407f, 229.28513f, 290.7318f, 258.70337f, 223.44356f, 264.08783f, 275.03732f, 251.59811f, 292.53107f, 251.5335f, 244.22394f, 213.89952f, 236.25047f, 211.8138f, 220.5794f, 216.87543f, 233.37456f, 224.4222f, 295.09964f, 214.58566f, 281.3576f, 256.06107f, 241.79654f, 291.32068f, 239.49226f, 228.46638f, 218.16322f, 203.63048f, 299.67514f, 282.89703f, 265.6753f, 287.9343f, 239.81447f, 209.17609f, 262.6297f, 295.4711f, 205.0095f, 223.62189f, 286.34204f, 243.34543f, 237.4936f, 249.12177f, 232.68518f, 229.49867f, 224.16684f, 203.26491f, 272.76715f, 294.89102f, 286.48096f, 273.26846f, 273.41534f, 204.2877f, 210.98381f, 206.86124f, 265.20584f, 244.88943f, 266.12534f, 239.2653f, 286.19138f, 271.75153f, 267.04507f, 210.73386f, 233.14261f, 220.80898f, 273.75244f, 298.48633f, 268.37622f, 204.67131f, 289.64368f, 276.43658f, 290.26245f, 279.004f, 201.35966f, 207.23166f, 280.78134f, -9.999485f, -9.999401f, -9.99988f, -9.99983f, -9.999996f, -9.999282f, -9.999148f, -9.999958f, -9.999139f, -9.999945f, -9.999827f, -9.999956f, -9.999576f, -9.999011f, -9.99982f, -9.999912f, -9.999579f, -9.9990425f, -9.999927f, -9.999287f, -9.999705f, -9.999723f, -9.999244f, -9.999403f, -9.999639f, -9.999259f, -9.999532f, -9.999533f, -9.999703f, -9.999582f, -9.999963f, -9.99968f, -9.999428f, -9.999266f, -9.999494f, -9.999798f, -9.999454f, -9.999226f, -9.99951f, -9.999481f, -9.999743f, -9.99988f, -9.999303f, -9.999975f, -9.999095f, -9.99945f, -9.999369f, -9.999166f, -9.99957f, -9.999976f, -9.999418f, -9.999267f, -9.99994f, -9.999312f, -9.999308f, -9.999992f, -9.9999f, -9.999182f, -9.9991665f, -9.999685f, -9.999133f, -9.999587f, -9.999473f, -9.999556f, -9.999567f, -9.999451f, -9.999944f, -9.999353f, -9.999919f, -9.999077f, -9.99981f, -9.999687f, -9.999805f, -9.999417f, -9.999404f, -9.999712f, -9.99989f, -9.999068f, -9.999573f, -9.999242f, -9.99952f, -9.999031f, -9.999762f, -9.999584f, -9.999476f, -9.999041f, -9.999508f, -9.999519f, -9.999463f, -9.999605f, -9.999481f, -9.99913f, -9.999719f, -9.99981f, -9.999058f, -9.99957f, -9.999909f, -9.99912f, -9.999596f, -9.999688f, -9.999179f, -9.999336f, -9.999998f, -9.999264f, -9.999145f, -9.99914f, -9.999104f, -9.999027f, -9.999755f, -9.999626f, -9.999572f, -9.999876f, -9.999124f, -9.9998865f, -9.999168f, -9.999185f, -9.9995575f, -9.999532f, -9.999246f, -9.999302f, -9.999073f, -9.999327f, -9.9998045f, -9.999645f, -9.999669f, -9.999047f, -9.999023f, -9.999354f, -9.999763f, -9.999772f, -9.999175f, -9.999568f, -9.999145f, -9.999254f, -9.999511f, -9.999705f, -9.999031f, -9.999324f, -9.999718f, -9.999497f, -9.99974f, -9.999597f, -9.999909f, -9.999239f, -9.999544f, -9.999691f, -9.999259f, -9.999239f, -9.999568f, -9.999504f, 0.03882216f, 0.8428897f, 0.74364215f, 0.23163715f, 0.49048677f, 0.22178552f, 0.6055793f, 0.4489804f, 0.9163623f, 0.9438124f, 0.1631071f, 0.6749212f, 0.7188561f, 0.32485962f, 0.8829685f, 0.20882395f, 0.60495543f, 0.47757575f, 0.6093003f, 0.84457403f, 0.7257506f, 0.17652789f, 0.025987253f, 0.9859064f, 0.6156289f, 0.73053515f, 0.76787066f, 0.5010675f, 0.40560544f, 0.07712759f, 0.9088255f, 0.07926025f, 0.24527292f, 0.27416497f, 0.74946845f, 0.24720564f, 0.07141664f, 0.43434754f, 0.4136174f, 0.869559f, 0.22436135f, 0.31195417f, 0.12554419f, 0.7383186f, 0.48795158f, 0.52957517f, 0.623028f, 0.036754537f, 0.56178623f, 0.32868809f, 0.9017316f, 0.09641818f, 0.9912348f, 0.92983764f, 0.4863829f, 0.2328445f, 0.72820157f, 0.5609035f, 0.5382467f, 0.21526214f, 0.2952519f, 0.391415f, 0.32775486f, 0.7910391f, 0.04752018f, 0.3907967f, 0.24044213f, 0.62969697f, 0.86658025f, 0.550671f, 0.6625566f, 0.7994618f, 0.12169334f, 0.21295948f, 0.4997118f, 0.98608136f, 0.67981267f, 0.5607458f, 0.20580857f, 0.59258527f, 0.74313295f, 0.504703f, 0.34825593f, 0.88810426f, 0.375232f, 0.9950801f, 0.6716571f, 0.43368435f, 0.13610889f, 0.7123607f, 0.5050985f, 0.31398848f, 0.6695705f, 0.12510324f, 0.18162547f, 0.61493284f, 0.816849f, 0.9648539f, 0.37662333f, 0.03039601f, 0.8444544f, 0.3708865f, 0.24754128f, 0.33466703f, 0.96997195f, 0.4863897f, 0.425792f, 0.5019443f, 0.3766153f, 0.37071276f, 0.30467907f, 0.5455875f, 0.47557223f, 0.99561185f, 0.82659286f, 0.50989014f, 0.8268076f, 0.32439554f, 0.90867627f, 0.523794f, 0.91507274f, 0.3708023f, 0.67873424f, 0.6258858f, 0.7507315f, 0.6253023f, 0.62942946f, 0.5893559f, 0.30942422f, 0.2114435f, 0.022920458f, 0.044418756f, 0.61610794f, 0.8113304f, 0.35662258f, 0.41705018f, 0.46921277f, 0.86777097f, 0.95223355f, 0.40362936f, 0.9437976f, 0.18228506f, 0.6360729f, 0.33576652f, 0.031274755f, 0.21817888f, 0.36112952f, 0.7787455f, 0.42273897f, 0.25281885f, 0.33198494f, 0.7785485f, 0.788286f, 0.16736427f, 0.0092501305f, 0.09297396f, 0.28935695f, 0.34107473f, 0.30980217f, 0.53143716f, 0.52857065f, 0.8409118f, 0.4052178f, 0.69706166f, 0.64710814f, 0.026039753f, 0.98393834f, 0.37317148f, 0.2896904f, 0.9887286f, 0.26908764f, 0.9406588f, 0.5261725f, 0.9049269f, 0.56662345f, 0.6709716f, 0.68239623f, 0.49234113f, 0.97048306f, 0.33545634f, 0.23616292f, 0.21654218f, 0.25211942f, 0.024790008f, 0.6374578f, 0.38915554f, 0.9337675f, 0.9430794f, 0.4695175f, 0.7804938f, 0.536538f, 0.9851012f, 0.19607964f, 0.3125924f, 0.55515915f, 0.85639995f, 0.76419586f, 0.19247372f, 0.8593474f, 0.65614396f, 0.8763346f, 0.5008372f, 0.75938493f, 0.30444136f, 0.8475765f, 0.2756218f, 0.7643892f, 0.10603409f, 0.4270085f, 0.40084615f, 0.094159424f, 0.28666124f, 0.907423f, 0.59824944f, 0.13585345f, 0.7766466f, 0.8080405f, 0.6886941f, 0.019375224f, 0.8924157f, 0.8251331f, 0.78726494f, 0.91793686f, 0.30526364f, 0.75136036f, 0.5101915f, 0.0959181f, 0.64297056f, 0.16485944f, 0.7552983f, 0.5024531f, 0.29433584f, 0.99849665f, 0.4194633f, 0.3247048f, 0.6200598f, 0.10172686f, 0.5053654f, 0.2359409f, 0.7552459f, 0.8971784f, 0.044323962f, 0.52423203f, 0.67628855f, 0.36866117f, 0.99563f, 0.2329034f, 0.27227026f, 0.76375973f, 0.79602706f, 0.5184415f, 0.10457488f, 0.0819885f, 0.90606177f, 0.052181873f, 0.6621527f, 0.92458886f, 0.24737877f, 0.04191045f, 0.34999782f, 0.08424192f, 0.29925734f, 0.24015819f, 0.5147704f, 0.42221153f, 0.99205357f, 0.54271156f, 0.79544294f, 0.5694224f, 0.37800944f, 0.5500707f, 0.09987821f, 0.40123457f, 0.7795467f, 0.8094248f, 0.5604407f, 0.34524485f, 0.56357986f, 0.6901132f, 0.2526902f, 0.46615395f, 0.24697252f, 0.5420497f, 0.18665877f, 0.6566352f, 0.2777055f, 0.9320998f, 0.89702964f, 0.022678716f, 0.1815973f, 0.09005783f, 0.51381236f, 0.6743502f, 0.6247244f, 0.8565416f, 0.87987f, 0.6732118f, 0.00460204f, 0.27535322f, 0.7455861f, 0.15749842f, 0.9247148f, 0.03532768f, 0.08851064f, 0.23502532f, 0.752143f, 0.21853413f, 0.6609476f, 0.28531924f, 0.18054475f, 0.029035527f, 0.67236483f, 0.2241403f, 0.28975555f, 0.99908245f, 0.43963638f, 0.59023327f, 0.30457687f, 0.16792373f, 0.7709499f, 0.6859642f, 0.69117963f, 0.86467695f, 0.5084144f, 0.7589203f, 0.4828981f, 0.07482473f, 0.48116097f, 0.53940266f, 0.5052822f, 0.22626108f, 0.7467059f, 0.41369334f, 0.031238595f, 0.028987564f, 0.66039693f, 0.22867519f, 0.8922084f, 0.23077016f, 0.49657655f, 0.12957393f, 0.5363605f, 0.4044849f, 0.44835f, 0.35317385f, 0.9867398f, 0.92447424f, 0.8969754f, 0.12785867f, 0.34567907f, 0.37078106f, 0.33044818f, 0.5057445f, 0.7683958f, 0.59161294f, 0.3239813f, 0.345188f, 0.5798496f, 0.64173394f, 0.8413601f, 0.47511417f, 0.835949f, 0.9396055f, 0.26686642f, 0.23109126f, 0.69826096f, 0.80957353f, 0.3445376f, 0.30203474f, 0.45118847f, 0.21602394f, 0.59850556f, 0.4789453f, 0.4077335f, 0.5152989f, 0.33034822f, 0.68474686f, 0.85391724f, 0.48057246f, 0.2998755f, 0.90360653f, 0.65591294f, 0.8092372f, 0.7287787f, 0.59123766f, 0.6105523f, 0.15701269f, 0.9201797f, 0.22071724f, 0.44657114f, 0.85324067f, 0.74536175f, 0.92492616f, 0.67641914f, 0.5987662f, 0.81729543f, 0.8069455f, 0.6891773f, 0.8835294f, 0.8892519f, 0.8500076f, 0.857101f, 0.6734726f, 0.9874815f, 0.46896955f, 0.9641137f, 0.47160545f, 0.8463774f, 0.30557284f, 0.9699319f, 0.06608189f, 0.055327572f, 0.93581414f, 0.9587841f, 0.058981307f, 0.92397076f, 0.010058546f, 0.34675553f, 0.6533823f, 0.5349482f, 0.46875533f, 0.5844002f, 0.5102338f, 0.26537207f, 0.19412437f, 0.07258324f, 0.38117927f, 0.1528994f, 0.056126937f, 0.7896892f, 0.3633707f, 0.5028834f, 0.15584666f, 0.43396717f, 0.7498128f, 0.17068368f, 0.8056127f, 0.83374524f, 0.7477155f, 0.8996221f, 0.53976667f, 0.9230572f, 0.19246647f, 0.6391656f, 0.4030687f, 0.7643678f, 0.019256072f, 0.59730285f, 0.309159f, 0.7264034f, 256.18292f, 247.5509f, 241.8322f, 221.72641f, 247.00475f, 289.95996f, 204.75641f, 299.0052f, 222.08545f, 249.15363f, 277.1748f, 222.7599f, 219.53043f, 259.93314f, 290.20483f, 264.3145f, 203.74707f, 269.35193f, 270.35507f, 233.42912f, 209.86781f, 292.96222f, 238.48882f, 256.7762f, 211.95813f, 255.83502f, 271.98605f, 276.92862f, 244.43182f, 219.40994f, 250.76295f, 294.04694f, 226.60033f, 258.7823f, 224.29234f, 289.13776f, 284.96054f, 215.06387f, 284.33295f, 255.14339f, 249.39714f, 298.0097f, 206.93636f, 207.78658f, 210.90904f, 237.74179f, 227.25084f, 248.60242f, 241.76729f, 289.64044f, 257.6767f, 223.0866f, 249.12407f, 201.15231f, 275.7378f, 262.39612f, 268.82336f, 262.55298f, 269.66827f, 237.66492f, 211.21674f, 246.47617f, 200.1591f, 228.94618f, 286.93787f, 224.82498f, 282.6982f, 216.67554f, 299.76526f, 211.74054f, 258.6674f, 282.2848f, 242.32083f, 244.45291f, 261.59262f, 257.17282f, 230.43474f, 219.33755f, 239.1705f, 229.16939f, 229.4628f, 227.99637f, 278.22507f, 207.49443f, 232.81923f, 250.38698f, 255.53925f, 201.98932f, 279.6214f, 245.52f, 216.7771f, 238.63602f, 204.19614f, 258.92218f, 230.05328f, 267.0341f, 256.95154f, 293.94968f, 251.7791f, 249.71518f, 268.04617f, 243.68118f, 239.60608f, 291.69824f, 255.33287f, 247.66194f, 210.42975f, 272.79053f, 251.49638f, 270.4292f, 266.5404f, 223.91647f, 227.0489f, 217.59396f, 202.26263f, 234.13164f, 282.81702f, 241.44751f, 237.6629f, 254.03835f, 276.81006f, 253.21158f, 290.75342f, 299.60394f, 252.36249f, 207.7176f, 293.0687f, 224.40785f, 254.29674f, 210.75064f, 251.1633f, 265.51978f, 292.73917f, 268.97003f, 213.86755f, 280.26193f, 236.59819f, 261.9136f, 271.9696f, 260.67432f, 225.67659f, 279.94318f, 244.74088f, 205.70877f, 236.24387f, 266.11798f, 234.5054f, 227.88277f, 212.92162f, 281.1429f, -9.9995f, -9.999907f, -9.999015f, -9.99986f, -9.999811f, -9.99916f, -9.9994335f, -9.999082f, -9.999476f, -9.999472f, -9.999309f, -9.999354f, -9.999964f, -9.999819f, -9.999472f, -9.999187f, -9.999328f, -9.999281f, -9.999373f, -9.999825f, -9.999259f, -9.999581f, -9.999256f, -9.999902f, -9.999506f, -9.999213f, -9.999032f, -9.999097f, -9.999959f, -9.999018f, -9.999999f, -9.999964f, -9.99983f, -9.999462f, -9.999094f, -9.999825f, -9.999322f, -9.999475f, -9.999018f, -9.999352f, -9.999122f, -9.999426f, -9.999498f, -9.999934f, -9.9994545f, -9.99973f, -9.999741f, -9.999373f, -9.99933f, -9.999706f, -9.999398f, -9.999283f, -9.999558f, -9.999604f, -9.999935f, -9.999592f, -9.999328f, -9.999943f, -9.999334f, -9.99971f, -9.999961f, -9.999668f, -9.9997835f, -9.999137f, -9.999606f, -9.999959f, -9.99975f, -9.999391f, -9.999501f, -9.999959f, -9.999507f, -9.999104f, -9.999123f, -9.999664f, -9.99954f, -9.999395f, -9.99991f, -9.999099f, -9.999796f, -9.999523f, -9.999298f, -9.999127f, -9.99933f, -9.999529f, -9.999645f, -9.999581f, -9.999803f, -9.999978f, -9.999745f, -9.999099f, -9.999732f, -9.999282f, -9.999186f, -9.999484f, -9.9994545f, -9.999736f, -9.999692f, -9.999638f, -9.999521f, -9.999184f, -9.999315f, -9.999997f, -9.999688f, -9.999604f, -9.999361f, -9.999519f, -9.999438f, -9.999516f, -9.999867f, -9.999932f, -9.99967f, -9.999632f, -9.999027f, -9.999614f, -9.999386f, -9.999235f, -9.99902f, -9.999881f, -9.999402f, -9.999828f, -9.999898f, -9.999556f, -9.9999485f, -9.99902f, -9.999726f, -9.99967f, -9.999689f, -9.999588f, -9.999742f, -9.999436f, -9.999829f, -9.999895f, -9.999559f, -9.999202f, -9.999972f, -9.999332f, -9.999621f, -9.999881f, -9.999916f, -9.999846f, -9.999947f, -9.999159f, -9.999294f, -9.999025f, -9.999374f, -9.999594f, -9.999471f, -9.999263f, -9.999252f, -9.999847f, 0.8405395f, 0.4899531f, 0.15557215f, 0.053656846f, 0.9073092f, 0.07903749f, 0.49019513f, 0.46704555f, 0.2108235f, 0.59149706f, 0.06908697f, 0.91793466f, 0.19079898f, 0.54947394f, 0.052311927f, 0.77982026f, 0.5299146f, 0.17064495f, 0.56645525f, 0.8840749f, 0.042285662f, 0.8682272f, 0.028326662f, 0.09698481f, 0.12325795f, 0.4347101f, 0.37012324f, 0.7913993f, 0.9993339f, 0.75977063f, 0.36460763f, 0.3775515f, 0.51856863f, 0.95555836f, 0.49067768f, 0.04478922f, 0.71699315f, 0.097812556f, 0.45841676f, 0.773683f, 0.75010455f, 0.42993996f, 0.9079247f, 0.017453227f, 0.44864193f, 0.672689f, 0.28056568f, 0.19584337f, 0.37550166f, 0.8117075f, 0.7120219f, 0.5780687f, 0.44134927f, 0.42259568f, 0.7511653f, 0.5891905f, 0.67056227f, 0.11231151f, 0.6758219f, 0.22908887f, 0.37498733f, 0.41971782f, 0.055803128f, 0.59144944f, 0.9299475f, 0.12942357f, 0.95274854f, 0.32053652f, 0.20608023f, 0.16834818f, 0.57836413f, 0.055714697f, 0.06392813f, 0.29768264f, 0.09972937f, 0.8983277f, 0.97463375f, 0.1341327f, 0.65210474f, 0.35204768f, 0.014110221f, 0.80327654f, 0.6689872f, 0.9037585f, 0.90981257f, 0.86295295f, 0.3795516f, 0.0062070885f, 0.5173644f, 0.20474744f, 0.86028427f, 0.15545785f, 0.3484738f, 0.48408556f, 0.28058404f, 0.75635433f, 0.5704764f, 0.80539626f, 0.8308685f, 0.7464902f, 0.12689869f, 0.89151156f, 0.37369293f, 0.36895418f, 0.5450234f, 0.1559311f, 0.2432725f, 0.38309494f, 0.27770162f, 0.56394845f, 0.72261786f, 0.5332152f, 0.49045795f, 0.88231075f, 0.6032768f, 0.6665413f, 0.857885f, 0.31463873f, 0.9153665f, 0.37640592f, 0.58912075f, 0.24793272f, 0.7373741f, 0.8440094f, 0.015947558f, 0.58805275f, 0.3667698f, 0.46238968f, 0.8334069f, 0.81946284f, 0.19397281f, 0.92121077f, 0.964989f, 0.24575949f, 0.0900369f, 0.6689977f, 0.23726216f, 0.601819f, 0.16691278f, 0.47163498f, 0.03375374f, 0.36948392f, 0.08575206f, 0.9858967f, 0.7306862f, 0.21772163f, 0.39309397f, 0.7458295f, 0.7629526f, 0.3144869f, 0.94122046f, 0.20584162f, 0.83637947f, 0.7726502f, 0.9049252f, 0.36524808f, 0.7137413f, 0.8284559f, 0.22519512f, 0.30139557f, 0.8169721f, 0.5312386f, 0.8956069f, 0.66213816f, 0.58457166f, 0.45457113f, 0.5169665f, 0.6269637f, 0.26091218f, 0.7560391f, 0.7980105f, 0.3960119f, 0.08781406f, 0.10958682f, 0.12124728f, 0.4373948f, 0.031676244f, 0.55287856f, 0.7805502f, 0.56280786f, 0.25152865f, 0.566051f, 0.7870067f, 0.759523f, 0.45281285f, 0.62631804f, 0.989187f, 0.26606834f, 0.39388546f, 0.87392044f, 0.583776f, 0.654467f, 0.49633527f, 0.39479604f, 0.63170516f, 0.62530655f, 0.9021866f, 0.13965032f, 0.35174674f, 0.79825306f, 0.7204604f, 0.8848764f, 0.43971986f, 0.7367297f, 0.71475625f, 0.07822404f, 0.42548487f, 0.11135407f, 0.80643165f, 0.83326644f, 0.8646103f, 0.89960915f, 0.46280593f, 0.8834037f, 0.2807901f, 0.68196964f, 0.3704893f, 0.4120405f, 0.82667f, 0.02957211f, 0.16348517f, 0.528726f, 0.36919758f, 0.22145572f, 0.43879473f, 0.09656078f, 0.5824419f, 0.0181659f, 0.25570688f, 0.7642685f, 0.19078839f, 0.70748967f, 0.5835414f, 0.92161185f, 0.8213292f, 0.046582457f, 0.85949063f, 0.15103385f, 0.74723977f, 0.39284366f, 0.5726992f, 0.07368804f, 0.3426399f, 0.17463133f, 0.24858418f, 0.31684884f, 0.49405006f, 0.37952894f, 0.33315596f, 0.8640441f, 0.57182634f, 0.25183997f, 0.7026268f, 0.37704948f, 0.17044407f, 0.27955136f, 0.96993434f, 0.09108966f, 0.6897659f, 0.19774762f, 0.6693781f, 0.12952057f, 0.89581305f, 0.21900262f, 0.1147024f, 0.29112664f, 0.06916158f, 0.22942513f, 0.42038745f, 0.7651415f, 0.45440084f, 0.17078096f, 0.07726187f, 0.4274913f, 0.86462736f, 0.06414275f, 0.9592153f, 0.16050456f, 0.88035154f, 0.9545343f, 0.8513476f, 0.2491725f, 0.7261043f, 0.5407395f, 0.22621076f, 0.31755584f, 0.75632083f, 0.7962324f, 0.50990444f, 0.61564916f, 0.76425743f, 0.70222944f, 0.73869663f, 0.29614443f, 0.021682443f, 0.5887306f, 0.31215057f, 0.10243766f, 0.9339864f, 0.23341663f, 0.7255635f, 0.4185125f, 0.5641563f, 0.0210989f, 0.31937757f, 0.77237654f, 0.055116564f, 0.31758264f, 0.35916016f, 0.5235203f, 0.15846917f, 0.5410007f, 0.3291817f, 0.14069794f, 0.90887386f, 0.259237f, 0.93863297f, 0.75447625f, 0.6713672f, 0.5048135f, 0.7174148f, 0.52741486f, 0.92290014f, 0.0805213f, 0.70555705f, 0.8765804f, 0.21684085f, 0.059146658f, 0.52307314f, 0.24510364f, 0.73993003f, 0.081979565f, 0.76904917f, 0.57904243f, 0.4695278f, 0.016590666f, 0.7074726f, 0.03675281f, 0.05884536f, 0.8561499f, 0.7090553f, 0.86932564f, 0.31001756f, 0.7310781f, 0.7902563f, 0.4690628f, 0.5504265f, 0.99635744f, 0.8836126f, 0.49213162f, 0.4428661f, 0.88994193f, 0.35176337f, 0.4958119f, 0.5913544f, 0.4187957f, 0.27758822f, 0.28339785f, 0.7841562f, 0.30195132f, 0.752634f, 0.3137563f, 0.4315457f, 0.44653264f, 0.5451809f, 0.44049335f, 0.8987003f, 0.5640792f, 0.5874427f, 0.47600824f, 0.5928f, 0.80064255f, 0.20061128f, 0.37571868f, 0.8139443f, 0.62335235f, 0.8047332f, 0.31274527f, 0.30714568f, 0.035397593f, 0.69739f, 0.2944578f, 0.34834376f, 0.5873635f, 0.9606469f, 0.5618423f, 0.6756651f, 0.03466902f, 0.27137738f, 0.59027666f, 0.8357776f, 0.425116f, 0.50365347f, 0.4515947f, 0.4932688f, 0.005631942f, 0.57952595f, 0.47525176f, 0.6249525f, 0.086651884f, 0.89189065f, 0.6617942f, 0.9442606f, 0.27843753f, 0.44292933f, 0.38660362f, 0.07765346f, 0.50435954f, 0.83211386f, 0.9370695f, 0.39374778f, 0.08252517f, 0.20432696f, 0.9130672f, 0.6829529f, 0.4023203f, 0.18018572f, 0.7534347f, 0.42706057f, 0.42672646f, 0.47151735f, 0.22955406f, 0.9152989f, 0.08499177f, 0.21106064f, 0.81278425f, 0.4464995f, 0.9721553f, 0.5701927f, 0.5504968f, 0.33792228f, 0.97337884f, 0.1806469f, 0.09640216f, 0.163271f, 0.42888898f, 0.778335f, 0.8884757f, 0.79867357f, 0.7878421f, 0.07889473f, 0.35902497f, 0.56884366f, 0.4541578f, 0.85038835f, 0.5382435f, 0.09464303f, 0.9107641f, 0.94099534f, 0.5400446f, 266.79602f, 274.32846f, 213.67004f, 233.85674f, 243.74121f, 250.29242f, 241.2762f, 246.10477f, 210.67426f, 209.43724f, 229.85814f, 280.7868f, 272.1595f, 250.896f, 203.6569f, 224.5947f, 228.5461f, 250.31659f, 259.0063f, 207.73958f, 214.5609f, 227.4157f, 288.49915f, 258.5862f, 237.1694f, 260.80396f, 253.53038f, 216.46973f, 200.73683f, 276.59747f, 218.64984f, 277.839f, 211.7889f, 278.14984f, 276.74042f, 224.4895f, 237.72171f, 253.24715f, 202.98746f, 237.59871f, 204.87325f, 239.43521f, 295.81796f, 299.5604f, 222.03635f, 228.79982f, 266.0576f, 239.92245f, 268.24426f, 238.24408f, 298.47308f, 288.47458f, 215.21046f, 248.30959f, 290.8601f, 287.38885f, 209.855f, 220.54123f, 251.46211f, 269.38593f, 215.89407f, 249.74835f, 233.35129f, 259.1078f, 247.44966f, 203.68665f, 295.11304f, 298.9008f, 216.80823f, 265.98523f, 250.68268f, 259.11737f, 224.44098f, 201.49985f, 265.72772f, 291.2741f, 291.02527f, 205.01653f, 225.3552f, 230.4449f, 205.90791f, 236.37225f, 234.94302f, 227.96848f, 293.9239f, 200.43617f, 261.1322f, 246.37569f, 206.33258f, 230.6332f, 275.16974f, 226.53664f, 253.74765f, 201.92174f, 277.2812f, 279.80594f, 269.5651f, 215.83727f, 290.79214f, 209.25894f, 240.69214f, 259.45502f, 221.35303f, 245.88794f, 233.58676f, 278.87738f, 268.62115f, 238.47983f, 288.8792f, 284.89505f, 235.00497f, 242.7936f, 236.64014f, 252.04784f, 205.45514f, 290.40726f, 232.52823f, 259.1132f, 290.73474f, 227.57782f, 216.67067f, 294.74762f, 217.73929f, 209.24208f, 256.90912f, 240.18433f, 257.794f, 282.8988f, 208.77882f, 297.82245f, 299.72125f, 298.86118f, 282.77133f, 299.69577f, 298.43073f, 299.66992f, 206.1796f, 239.80862f, 245.31291f, 207.94046f, 256.93558f, 210.00853f, 297.19482f, 258.61487f, 298.00143f, 247.14326f, 220.11229f, 299.13562f, 289.7299f, 244.51624f, -9.999632f, -9.999593f, -9.999801f, -9.999819f, -9.999018f, -9.999244f, -9.999898f, -9.999155f, -9.999041f, -9.999333f, -9.999995f, -9.999601f, -9.999369f, -9.999678f, -9.99932f, -9.999411f, -9.999675f, -9.999204f, -9.999888f, -9.999743f, -9.999049f, -9.999095f, -9.9994955f, -9.999148f, -9.999902f, -9.999157f, -9.999642f, -9.999242f, -9.999449f, -9.99954f, -9.999594f, -9.999917f, -9.999246f, -9.999855f, -9.999591f, -9.999358f, -9.999842f, -9.999382f, -9.999745f, -9.999809f, -9.999109f, -9.999151f, -9.999462f, -9.999784f, -9.999753f, -9.999547f, -9.999858f, -9.999641f, -9.999331f, -9.999973f, -9.999725f, -9.999956f, -9.999523f, -9.999478f, -9.999359f, -9.999043f, -9.999455f, -9.999254f, -9.999494f, -9.999362f, -9.999646f, -9.999454f, -9.999153f, -9.99971f, -9.99948f, -9.999924f, -9.999973f, -9.9990425f, -9.999157f, -9.999034f, -9.999135f, -9.999451f, -9.99927f, -9.999871f, -9.999655f, -9.999354f, -9.999864f, -9.999408f, -9.999447f, -9.999032f, -9.999453f, -9.999718f, -9.999415f, -9.999358f, -9.999691f, -9.99945f, -9.999504f, -9.999244f, -9.999987f, -9.999557f, -9.999052f, -9.999141f, -9.999237f, -9.999049f, -9.99919f, -9.999888f, -9.999757f, -9.999621f, -9.999702f, -9.999411f, -9.999203f, -9.999174f, -9.999015f, -9.999339f, -9.999034f, -9.999728f, -9.99976f, -9.999317f, -9.999367f, -9.999866f, -9.999091f, -9.999755f, -9.999178f, -9.999553f, -9.999263f, -9.999655f, -9.999423f, -9.999304f, -9.999814f, -9.999966f, -9.999977f, -9.9992075f, -9.999666f, -9.999204f, -9.999895f, -9.999059f, -9.99907f, -9.9995575f, -9.999523f, -9.999056f, -9.999571f, -9.999786f, -9.999026f, -9.999145f, -9.999575f, -9.999738f, -9.99979f, -9.999363f, -9.999586f, -9.999727f, -9.999086f, -9.999402f, -9.999158f, -9.999252f, -9.999179f, -9.999597f, -9.999156f, -9.99936f, -9.999807f, -9.999261f, 0.5652288f, 0.9339315f, 0.55770487f, 0.7478212f, 0.33771703f, 0.28125492f, 0.51592994f, 0.5532214f, 0.58044416f, 0.66528046f, 0.669034f, 0.16671883f, 0.67413294f, 0.036051773f, 0.108843535f, 0.7993396f, 0.1639013f, 0.6568752f, 0.122072175f, 0.70342636f, 0.5444655f, 0.5812534f, 0.4522436f, 0.2419f, 0.07067616f, 0.8879451f, 0.60514754f, 0.14282055f, 0.70217454f, 0.10503953f, 0.39604086f, 0.60164565f, 0.5446685f, 0.07094606f, 0.5559759f, 0.014643576f, 0.9885768f, 0.45798954f, 0.80507016f, 0.46793476f, 0.91752577f, 0.04094297f, 0.60369307f, 0.8747373f, 0.5086575f, 0.7004933f, 0.2251465f, 0.35307238f, 0.27597564f, 0.94157344f, 0.65179616f, 0.20595148f, 0.27256346f, 0.20036213f, 0.67921185f, 0.15910614f, 0.52645075f, 0.6180527f, 0.09315563f, 0.4282912f, 0.3796773f, 0.55366653f, 0.8087156f, 0.989089f, 0.81570625f, 0.36953965f, 0.29338685f, 0.8806224f, 0.40907812f, 0.99581677f, 0.031810474f, 0.9831273f, 0.21194534f, 0.6745432f, 0.38136473f, 0.2702163f, 0.6385419f, 0.29438227f, 0.12847719f, 0.27120438f, 0.30660692f, 0.5424479f, 0.92706877f, 0.9079774f, 0.22223541f, 0.3657775f, 0.25447527f, 0.81911993f, 0.30269873f, 0.74017876f, 0.92759985f, 0.70151937f, 0.7640615f, 0.8949204f, 0.79928416f, 0.77783567f, 0.6940916f, 0.2910855f, 0.97654736f, 0.2973309f, 0.5588422f, 0.6462096f, 0.30760437f, 0.18172295f, 0.7695246f, 0.34731266f, 0.19734544f, 0.029608455f, 0.37696892f, 0.111436665f, 0.50183326f, 0.28445065f, 0.68564844f, 0.44779962f, 0.9736052f, 0.51790065f, 0.983022f, 0.52825344f, 0.41285545f, 0.9967343f, 0.6162969f, 0.37753683f, 0.17138597f, 0.07175013f, 0.81368434f, 0.9612253f, 0.9045651f, 0.84745973f, 0.36729226f, 0.98037714f, 0.20115525f, 0.12099608f, 0.96984464f, 0.37242016f, 0.29363927f, 0.39158085f, 0.27558497f, 0.66305256f, 0.10113714f, 0.76193494f, 0.45118755f, 0.4488773f, 0.93012637f, 0.31139725f, 0.0031577414f, 0.22718209f, 0.29718128f, 0.71752393f, 0.14526285f, 0.18364605f, 0.37547293f, 0.9685261f, 0.9378056f, 0.27025697f, 0.8536382f, 0.40919214f, 0.6247997f, 0.020774715f, 0.2789666f, 0.6214883f, 0.28909984f, 0.4459083f, 0.22759606f, 0.16503142f, 0.12913509f, 0.76620036f, 0.31722352f, 0.31122422f, 0.14058389f, 0.3711774f, 0.2540991f, 0.92829734f, 0.31982893f, 0.58990836f, 0.7611616f, 0.94479626f, 0.77106464f, 0.98198724f, 0.045493614f, 0.5808194f, 0.044766188f, 0.028754123f, 0.6398209f, 0.5149536f, 0.6159741f, 0.38356403f, 0.3443942f, 0.8204024f, 0.16429621f, 0.45349202f, 0.9345274f, 0.6689286f, 0.46520096f, 0.5479114f, 0.50660115f, 0.030693837f, 0.14807424f, 0.0025167174f, 0.04072329f, 0.06662837f, 0.19923986f, 0.31228405f, 0.26450446f, 0.5282875f, 0.32404247f, 0.3938328f, 0.028723368f, 0.53065664f, 0.84379214f, 0.84157664f, 0.37586623f, 0.15792112f, 0.20647834f, 0.024251468f, 0.3573017f, 0.37901312f, 0.6181092f, 0.76309824f, 0.7608666f, 0.3481646f, 0.34048688f, 0.47856995f, 0.31012326f, 0.23520178f, 0.45539266f, 0.92912894f, 0.4204687f, 0.92543155f, 0.5307048f, 0.27608588f, 0.7496653f, 0.6049889f, 0.36525294f, 0.14689086f, 0.51323116f, 0.12193437f, 0.59619224f, 0.60478336f, 0.9294276f, 0.249309f, 0.74476606f, 0.92789376f, 0.043751504f, 0.5309229f, 0.3062958f, 0.31674966f, 0.14777556f, 0.52924913f, 0.9668007f, 0.20873389f, 0.3279674f, 0.7965414f, 0.37618962f, 0.89503884f, 0.46796778f, 0.0799155f, 0.13676843f, 0.99596673f, 0.5959752f, 0.82745814f, 0.19763403f, 0.45169583f, 0.034008075f, 0.51954156f, 0.5263711f, 0.32014525f, 0.053273566f, 0.81357837f, 0.97085255f, 0.07153194f, 0.9582462f, 0.64213526f, 0.32651472f, 0.60837305f, 0.9404863f, 0.06993771f, 0.7587776f, 0.7886673f, 0.41194588f, 0.78207874f, 0.7781359f, 0.3276002f, 0.33506534f, 0.28078383f, 0.12973906f, 0.399713f, 0.62760603f, 0.75171447f, 0.80802286f, 0.5050624f, 0.33723688f, 0.23653711f, 0.22387893f, 0.3570362f, 0.05210913f, 0.8889524f, 0.49352857f, 0.4521699f, 0.9740411f, 0.7144635f, 0.4756838f, 0.331589f, 0.068503655f, 0.97924995f, 0.41867498f, 0.31639704f, 0.7069934f, 0.81501675f, 0.5386601f, 0.4093507f, 0.707298f, 0.9774356f, 0.72752196f, 0.1570271f, 0.9423814f, 0.9732382f, 0.71725017f, 0.3946321f, 0.62860346f, 0.06245658f, 0.90315664f, 0.5143768f, 0.8708286f, 0.84123635f, 0.92691624f, 0.639396f, 0.2552601f, 0.37173754f, 0.7914776f, 0.91429204f, 0.4736561f, 0.15064463f, 0.7540974f, 0.2862515f, 0.48185065f, 0.13227704f, 0.32188603f, 0.63464296f, 0.8106472f, 0.94166034f, 0.17569262f, 0.19304337f, 0.29407963f, 0.587708f, 0.97985137f, 0.93614686f, 0.8405717f, 0.02620014f, 0.35624048f, 0.59463245f, 0.011628275f, 0.66693187f, 0.74045765f, 0.8160365f, 0.84104806f, 0.88261247f, 0.0711487f, 0.8989867f, 0.97475845f, 0.4168518f, 0.13669337f, 0.28926903f, 0.49182004f, 0.41090083f, 0.276433f, 0.09197279f, 0.68734396f, 0.3883402f, 0.90047145f, 0.11048286f, 0.15737055f, 0.21775864f, 0.9536175f, 0.076466806f, 0.24726667f, 0.103641525f, 0.0413075f, 0.27288043f, 0.3405656f, 0.14998767f, 0.51837134f, 0.16329993f, 0.3755023f, 0.9497281f, 0.8958037f, 0.98416775f, 0.34084278f, 0.18396701f, 0.8870497f, 0.11773594f, 0.7778607f, 0.5278507f, 0.9345038f, 0.12104616f, 0.3192234f, 0.026860172f, 0.71437854f, 0.8270822f, 0.34825006f, 0.39791596f, 0.62681943f, 0.27854878f, 0.519083f, 0.9585388f, 0.9732782f, 0.24999642f, 0.18574189f, 0.92319125f, 0.2299785f, 0.78481007f, 0.4593966f, 0.18952563f, 0.4418934f, 0.75275475f, 0.47553676f, 0.47977385f, 0.516905f, 0.6218342f, 0.986334f, 0.6328223f, 0.87600803f, 0.23837951f, 0.29930744f, 0.5477805f, 0.17647119f, 0.3403492f, 0.79772884f, 0.12769036f, 0.8723695f, 0.1560829f, 0.75527936f, 0.41855234f, 0.66972154f, 0.3795148f, 0.75438255f, 0.45185962f, 0.64733654f, 0.83693033f, 0.7853063f, 0.52869916f, 0.44457012f, 0.031068115f, 0.995698f, 0.86542577f, 0.29396066f, 0.3056323f, 0.7761462f, 0.5815433f, 0.4590591f, 0.6379277f, 203.08049f, 242.811f, 200.0787f, 248.54701f, 240.53275f, 206.88977f, 264.96545f, 215.722f, 207.14218f, 248.2029f, 260.38293f, 246.59158f, 255.92654f, 290.20236f, 282.13013f, 255.587f, 289.51746f, 250.55061f, 256.14774f, 212.82437f, 283.77695f, 234.53087f, 295.53558f, 263.51688f, 262.4394f, 295.93118f, 249.12567f, 230.53714f, 244.58417f, 212.62454f, 222.62276f, 202.04688f, 220.03893f, 219.85342f, 298.00995f, 225.98215f, 237.55687f, 233.73161f, 277.78552f, 292.03333f, 241.16255f, 239.44547f, 269.768f, 208.34856f, 223.83221f, 247.22945f, 220.80157f, 225.7253f, 267.53107f, 219.36331f, 263.37506f, 292.40854f, 238.76868f, 248.44582f, 284.12405f, 266.40955f, 297.5755f, 221.04996f, 205.62082f, 256.34137f, 216.44402f, 236.91107f, 213.73282f, 215.86444f, 256.87595f, 251.31393f, 216.1751f, 265.14798f, 213.08633f, 254.30765f, 244.74179f, 278.06122f, 262.01956f, 248.49234f, 205.56573f, 285.15247f, 291.18823f, 246.23334f, 286.69305f, 297.73892f, 222.13132f, 274.70645f, 272.9896f, 218.96129f, 263.71072f, 289.10516f, 210.93655f, 235.38228f, 240.58383f, 289.90942f, 238.94185f, 276.05884f, 239.10864f, 254.86401f, 282.10757f, 204.39113f, 238.20418f, 291.72028f, 279.3937f, 255.42195f, 223.81288f, 201.32336f, 262.53845f, 218.35716f, 291.38098f, 248.38783f, 276.37997f, 251.07683f, 295.05258f, 210.5348f, 252.41638f, 265.33124f, 294.82996f, 279.9688f, 295.2437f, 275.68787f, 202.7976f, 207.2586f, 262.63266f, 295.0467f, 288.30432f, 231.05023f, 298.57654f, 286.71002f, 222.34149f, 209.956f, 297.5865f, 204.87299f, 243.4733f, 242.39302f, 209.53899f, 221.00655f, 211.91463f, 266.0036f, 223.22115f, 266.37555f, 278.43994f, 214.11813f, 254.79947f, 234.70715f, 294.82663f, 267.89825f, 282.26373f, 285.57803f, 216.04143f, 222.16176f, 264.46344f, 216.57985f, 208.0961f, 251.9738f, -9.999269f, -9.999741f, -9.999561f, -9.999911f, -9.999339f, -9.999749f, -9.999292f, -9.999522f, -9.999454f, -9.9992895f, -9.999531f, -9.99933f, -9.999341f, -9.99938f, -9.999905f, -9.999054f, -9.999979f, -9.999243f, -9.999734f, -9.999235f, -9.999104f, -9.999684f, -9.999259f, -9.999619f, -9.999497f, -9.999474f, -9.999353f, -9.999263f, -9.999088f, -9.999558f, -9.999322f, -9.999186f, -9.9993925f, -9.9999075f, -9.999958f, -9.999795f, -9.999834f, -9.999768f, -9.999121f, -9.999825f, -9.999527f, -9.999656f, -9.999941f, -9.999142f, -9.999984f, -9.999141f, -9.999887f, -9.9990835f, -9.999148f, -9.9991665f, -9.999867f, -9.999421f, -9.999081f, -9.999978f, -9.999075f, -9.999531f, -9.999142f, -9.999553f, -9.999812f, -9.999398f, -9.999295f, -9.9992285f, -9.999865f, -9.999482f, -9.999524f, -9.999773f, -9.999741f, -9.999358f, -9.999916f, -9.999248f, -9.999274f, -9.999893f, -9.999962f, -9.999569f, -9.9997225f, -9.999103f, -9.999036f, -9.999721f, -9.999645f, -9.999536f, -9.999113f, -9.9998455f, -9.999898f, -9.999262f, -9.999967f, -9.999528f, -9.9996195f, -9.999813f, -9.99977f, -9.999597f, -9.999661f, -9.999434f, -9.999925f, -9.999199f, -9.999759f, -9.999627f, -9.999813f, -9.999361f, -9.999325f, -9.999499f, -9.999843f, -9.999769f, -9.999987f, -9.999241f, -9.999264f, -9.999075f, -9.9998665f, -9.99927f, -9.999766f, -9.999045f, -9.999036f, -9.999232f, -9.999256f, -9.999415f, -9.999601f, -9.999707f, -9.999876f, -9.999688f, -9.999064f, -9.999532f, -9.99921f, -9.99905f, -9.999712f, -9.999656f, -9.999218f, -9.999016f, -9.999569f, -9.999398f, -9.999709f, -9.999183f, -9.999058f, -9.999427f, -9.999155f, -9.999367f, -9.999406f, -9.99968f, -9.999578f, -9.999454f, -9.999143f, -9.999611f, -9.999365f, -9.999709f, -9.9992285f, -9.9998255f, -9.999111f, -9.999831f, -9.999511f, -9.999469f, -9.99995f, -9.999711f, 0.5344577f, 0.28066808f, 0.56196564f, 0.5902792f, 0.8473387f, 0.24633567f, 0.92718124f, 0.17364842f, 0.31536132f, 0.22439669f, 0.46772173f, 0.23150134f, 0.13030241f, 0.7544915f, 0.32698f, 0.59160626f, 0.5460109f, 0.84683007f, 0.23899049f, 0.8182671f, 0.7197824f, 0.8125036f, 0.8256115f, 0.40416914f, 0.66582596f, 0.0867179f, 0.0084044915f, 0.49205506f, 0.721172f, 0.40177187f, 0.29393357f, 0.015860511f, 0.93151456f, 0.4811004f, 0.54983306f, 0.9995074f, 0.27758396f, 0.22854643f, 0.5583765f, 0.6666239f, 0.85158247f, 0.21441942f, 0.6990569f, 0.017201606f, 0.530989f, 0.21839866f, 0.08578203f, 0.10198945f, 0.039713096f, 0.7290501f, 0.6342606f, 0.51234406f, 0.12498403f, 0.25547478f, 0.8394662f, 0.8280061f, 0.81155413f, 0.012060473f, 0.057682104f, 0.7739566f, 0.08708117f, 0.5193988f, 0.8415829f, 0.7520876f, 0.007182941f, 0.7731886f, 0.33688733f, 0.19361727f, 0.84651196f, 0.22044875f, 0.54851544f, 0.6421493f, 0.58298194f, 0.6989305f, 0.4031829f, 0.41380137f, 0.20955233f, 0.47619122f, 0.65416205f, 0.44766036f, 0.7429968f, 0.47871348f, 0.36874366f, 0.76017255f, 0.63620025f, 0.6808348f, 0.8399061f, 0.72613007f, 0.97575134f, 0.4643534f, 0.7247778f, 0.04549828f, 0.5940095f, 0.5128606f, 0.5878437f, 0.46860144f, 0.6618377f, 0.83293724f, 0.26350665f, 0.24366878f, 0.7788333f, 0.74646133f, 0.5429722f, 0.26375026f, 0.3656472f, 0.12205635f, 0.7138406f, 0.7608406f, 0.60281974f, 0.33415812f, 0.16791728f, 0.68858635f, 0.4469567f, 0.04436514f, 0.5672564f, 0.89869404f, 0.6294232f, 0.9793584f, 0.092907295f, 0.51271373f, 0.3846658f, 0.79488826f, 0.30746242f, 0.9191275f, 0.9108379f, 0.78182805f, 0.97138745f, 0.9847524f, 0.8531674f, 0.022702204f, 0.621023f, 0.7043253f, 0.22311302f, 0.6966194f, 0.36192545f, 0.8646154f, 0.94498384f, 0.8819606f, 0.39050183f, 0.66352f, 0.9537454f, 0.9776376f, 0.07475392f, 0.14165574f, 0.9068708f, 0.07851684f, 0.098995164f, 0.4659044f, 0.94835365f, 0.8669782f, 0.47114196f, 0.24303971f, 0.36649755f, 0.38048944f, 0.3541504f, 0.3041829f, 0.04842617f, 0.5725111f, 0.68421566f, 0.18098183f, 0.96466625f, 0.32582006f, 0.47631285f, 0.17308696f, 0.5422008f, 0.43860963f, 0.94000804f, 0.90531296f, 0.24555893f, 0.15075591f, 0.8892247f, 0.80251575f, 0.43217945f, 0.5427292f, 0.58730876f, 0.9010511f, 0.75740033f, 0.16942962f, 0.77507013f, 0.7471421f, 0.18903506f, 0.96626693f, 0.43212372f, 0.9690648f, 0.31306309f, 0.62832534f, 0.7866172f, 0.79370797f, 0.32908842f, 0.5066318f, 0.34556115f, 0.1002444f, 0.90521127f, 0.3832993f, 0.3292787f, 0.9103993f, 0.17307699f, 0.36895168f, 0.7688117f, 0.7769159f, 0.7559714f, 0.7624208f, 0.4072027f, 0.6700012f, 0.10266004f, 0.46105045f, 0.8847699f, 0.3703581f, 0.79471564f, 0.18433845f, 0.26636884f, 0.5759068f, 0.025358567f, 0.6020128f, 0.85619676f, 0.77020776f, 0.8782154f, 0.605358f, 0.82230324f, 0.3943509f, 0.10723012f, 0.23251477f, 0.41980323f, 0.44982743f, 0.3976f, 0.24261324f, 0.09185766f, 0.9083403f, 0.8951799f, 0.93775445f, 0.4116088f, 0.8328249f, 0.060170095f, 0.23731631f, 0.043149915f, 0.8760627f, 0.9832404f, 0.8160704f, 0.35087004f, 0.99301636f, 0.58498734f, 0.31982517f, 0.28746068f, 0.10150419f, 0.64765805f, 0.93925524f, 0.6288832f, 0.5287214f, 0.6787367f, 0.7280878f, 0.8089835f, 0.45152652f, 0.28626585f, 0.37735057f, 0.84606636f, 0.17912877f, 0.1262947f, 0.93639624f, 0.74632484f, 0.10586514f, 0.2034781f, 0.3999192f, 0.6237884f, 0.58933526f, 0.11924875f, 0.16451561f, 0.5822025f, 0.3976624f, 0.9056206f, 0.66830647f, 0.801052f, 0.6321766f, 0.47481045f, 0.6505067f, 0.5119758f, 0.8057609f, 0.059799645f, 0.014172987f, 0.637021f, 0.878043f, 0.19765095f, 0.7158634f, 0.6288858f, 0.41249686f, 0.2579455f, 0.32608235f, 0.153792f, 0.030521471f, 0.5082303f, 0.33682522f, 0.5155604f, 0.8285316f, 0.7492474f, 0.56472075f, 0.7964325f, 0.8807934f, 0.21563967f, 0.67301345f, 0.32791767f, 0.47523862f}; -}; - - -class EuclideanDistanceTest : public testing::Test { -public: - float x[16] = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f}; - float y[16] = {2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f}; - float result[9] = {0.f}; - Nd4jLong shapeBuffer[12] = {4,2,2,2,2,8,4,2,1,0,1,99}; - int dimensionLength = 3; - int dimension[3] = {1,2,3}; - float extraVals[2] = {0.f, 0.f}; - int opNum = 1; - - std::vector dim = {1, 2, 3}; -}; - -#ifndef __CUDABLAS__ -TEST_F(EuclideanDistanceTest,Test1) { - //int *tadShapeBuffer = shape::computeResultShape(shapeBuffer,dimension,dimensionLength); - sd::ArrayOptions::setDataType(shapeBuffer, sd::DataType::FLOAT32); - auto tadShapeBuffer = sd::ShapeUtils::evalReduceShapeInfo('c', dim, shapeBuffer, false, true, nullptr); - //shape::printShapeInfoLinear("tadShape", tadShapeBuffer); - functions::reduce3::Reduce3::exec(opNum, - x, - shapeBuffer, - extraVals, - y, - shapeBuffer, - result, - tadShapeBuffer, - dimension, - dimensionLength, 0, 2); - - ASSERT_EQ(result[1],result[0]); -} - - -TEST_F(StdTest,MultiDimTest) { - auto xShapeInfo = shape::shapeBuffer(4, sd::DataType::FLOAT32, examplesShape); - //int *resultShapeInfo = shape::computeResultShape(xShapeInfo,dimensionsForStd,dimensionLength); - auto resultShapeInfo = sd::ShapeUtils::evalReduceShapeInfo('c', dimsForStd, xShapeInfo, false, true, nullptr); - int resultLengthAssertion = 5; - ASSERT_EQ(resultLengthAssertion,shape::length(resultShapeInfo)); - shape::TAD *tad = new shape::TAD; - tad->init(xShapeInfo,dimensionsForStd,dimensionLength); - float none[1] = {0.f}; - tad->createTadOnlyShapeInfo(); - tad->createOffsets(); - int tadElementWiseStride = shape::elementWiseStride(tad->tadOnlyShapeInfo); - ASSERT_EQ(0,tadElementWiseStride); - float *result = new float[shape::length(resultShapeInfo)]; - functions::reduce::ReduceFloatFunction::exec( - opNum, - x, - xShapeInfo, - none, - result, - resultShapeInfo, - dimensionsForStd, - dimensionLength, - tad->tadOnlyShapeInfo, - tad->tadOffsets, 0, shape::length(resultShapeInfo)); - - // for(int i = 0; i < shape::length(resultShapeInfo); i++) - // printf("%f\n",result[i]); - - delete[] result; - delete tad; - delete[] xShapeInfo; -} - - - - - -TEST_F(ReduceTest,MatrixTest) { - int opNum = 4; - auto xShapeInfo = sd::ShapeBuilders::createShapeInfo(sd::DataType::FLOAT32, 'c', 2, shape); - //int *resultShapeInfo = shape::computeResultShape(xShapeInfo,dimension,dimensionLength); - auto resultShapeInfo = sd::ShapeUtils::evalReduceShapeInfo('c', dim, xShapeInfo, false, true, nullptr); - int resultLengthAssertion = 3; - ASSERT_EQ(resultLengthAssertion,shape::length(resultShapeInfo)); - shape::TAD *tad = new shape::TAD; - tad->init(xShapeInfo,dimension,dimensionLength); - float none[1] = {0.f}; - tad->createTadOnlyShapeInfo(); - tad->createOffsets(); - auto tadElementWiseStride = shape::elementWiseStride(tad->tadOnlyShapeInfo); - ASSERT_EQ(3,tadElementWiseStride); - functions::reduce::ReduceFloatFunction::exec( - opNum, - x, - xShapeInfo, - none, - result, - resultShapeInfo, - dimension, - dimensionLength, - tad->tadOnlyShapeInfo, - tad->tadOffsets, 0, tad->numTads); - - // for(int i = 0; i < shape::length(resultShapeInfo); i++) - // printf("%f\n",result[i]); - - delete tad; - delete[] xShapeInfo; -} - -#endif \ No newline at end of file diff --git a/libnd4j/tests_cpu/layers_tests/ShapeUtilsTests.cpp b/libnd4j/tests_cpu/layers_tests/ShapeUtilsTests.cpp index 36fce0dd9..25f4f2c18 100644 --- a/libnd4j/tests_cpu/layers_tests/ShapeUtilsTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/ShapeUtilsTests.cpp @@ -60,7 +60,7 @@ TEST_F(ShapeUtilsTests, EvalBroadcastShapeInfo_1) NDArray x(xShapeInfo); NDArray y(yShapeInfo); - Nd4jLong *newShapeInfo = nullptr; + const Nd4jLong *newShapeInfo = nullptr; ShapeUtils::evalBroadcastShapeInfo(x, y, false, newShapeInfo, nullptr); ASSERT_TRUE(shape::equalsStrict(expShapeInfo, newShapeInfo)); @@ -77,7 +77,7 @@ TEST_F(ShapeUtilsTests, EvalBroadcastShapeInfo_2) NDArray x(xShapeInfo); NDArray y(yShapeInfo); - Nd4jLong *newShapeInfo = nullptr; + const Nd4jLong *newShapeInfo = nullptr; ShapeUtils::evalBroadcastShapeInfo(x, y, false, newShapeInfo, nullptr); ASSERT_TRUE(shape::equalsStrict(expShapeInfo, newShapeInfo)); @@ -94,7 +94,7 @@ TEST_F(ShapeUtilsTests, EvalBroadcastShapeInfo_3) NDArray x(xShapeInfo); NDArray y(yShapeInfo); - Nd4jLong *newShapeInfo = nullptr; + const Nd4jLong *newShapeInfo = nullptr; ShapeUtils::evalBroadcastShapeInfo(x, y, false, newShapeInfo, nullptr); ASSERT_TRUE(shape::equalsStrict(expShapeInfo, newShapeInfo)); @@ -111,7 +111,7 @@ TEST_F(ShapeUtilsTests, EvalBroadcastShapeInfo_4) NDArray x(xShapeInfo); NDArray y(yShapeInfo); - Nd4jLong *newShapeInfo = nullptr; + const Nd4jLong *newShapeInfo = nullptr; ShapeUtils::evalBroadcastShapeInfo(x, y, false, newShapeInfo, nullptr); //for(int i=0; i<2*newShapeInfo[0]+4; ++i) // std::cout<('c', {2,4,5}); std::vector dimensions = {1}; - auto newShapeInfo = ShapeUtils::evalReduceShapeInfo('c', dimensions, x.getShapeInfo()); + auto newShapeInfo = ShapeUtils::evalReduceShapeInfo('c', dimensions, x.shapeInfo()); - ASSERT_TRUE(shape::shapeEquals(expected.getShapeInfo(), newShapeInfo)); + ASSERT_TRUE(shape::shapeEquals(expected.shapeInfo(), newShapeInfo)); } ////////////////////////////////////////////////////////////////// @@ -141,9 +141,9 @@ TEST_F(ShapeUtilsTests, evalReduceShapeInfo_test2) auto expected = NDArrayFactory::create('c', {2,1,4,5}); std::vector dimensions = {1}; - auto newShapeInfo = ShapeUtils::evalReduceShapeInfo('c', dimensions, x.getShapeInfo(), true); + auto newShapeInfo = ShapeUtils::evalReduceShapeInfo('c', dimensions, x.shapeInfo(), true); - ASSERT_TRUE(shape::shapeEquals(expected.getShapeInfo(), newShapeInfo)); + ASSERT_TRUE(shape::shapeEquals(expected.shapeInfo(), newShapeInfo)); } ////////////////////////////////////////////////////////////////// @@ -154,9 +154,9 @@ TEST_F(ShapeUtilsTests, evalReduceShapeInfo_test3) auto expected = NDArrayFactory::create('c', {1,1,1,5}); std::vector dimensions = {0,1,2}; - auto newShapeInfo = ShapeUtils::evalReduceShapeInfo('c', dimensions, x.getShapeInfo(), true); + auto newShapeInfo = ShapeUtils::evalReduceShapeInfo('c', dimensions, x.shapeInfo(), true); - ASSERT_TRUE(shape::shapeEquals(expected.getShapeInfo(), newShapeInfo)); + ASSERT_TRUE(shape::shapeEquals(expected.shapeInfo(), newShapeInfo)); } @@ -168,9 +168,9 @@ TEST_F(ShapeUtilsTests, evalReduceShapeInfo_test4) auto expected = NDArrayFactory::create('c', {1,1,1,1}); std::vector dimensions = {0,1,2,3}; - auto newShapeInfo = ShapeUtils::evalReduceShapeInfo('c', dimensions, x.getShapeInfo(), true); + auto newShapeInfo = ShapeUtils::evalReduceShapeInfo('c', dimensions, x.shapeInfo(), true); - ASSERT_TRUE(shape::shapeEquals(expected.getShapeInfo(), newShapeInfo)); + ASSERT_TRUE(shape::shapeEquals(expected.shapeInfo(), newShapeInfo)); } TEST_F(ShapeUtilsTests, Test_Strings_1) { diff --git a/libnd4j/tests_cpu/layers_tests/SparseUtilsTest.cpp b/libnd4j/tests_cpu/layers_tests/SparseUtilsTest.cpp index becd5a21f..37f52568f 100644 --- a/libnd4j/tests_cpu/layers_tests/SparseUtilsTest.cpp +++ b/libnd4j/tests_cpu/layers_tests/SparseUtilsTest.cpp @@ -131,7 +131,7 @@ TEST_F(SparseUtilsTest, SortCOOindices_Test) { 34, 35, 36, 37, 38, 39 }); - sd::sparse::SparseUtils::sortCooIndicesGeneric(indicesArr, reinterpret_cast(values.getBuffer()), nnz, rank); + sd::sparse::SparseUtils::sortCooIndicesGeneric(indicesArr, reinterpret_cast(values.buffer()), nnz, rank); for ( int i = 0; i < rank * nnz; ++i){ ASSERT_EQ(expIndicesArr[i], indicesArr[i]); diff --git a/libnd4j/tests_cpu/layers_tests/TadTests.cpp b/libnd4j/tests_cpu/layers_tests/TadTests.cpp index 5dfdf401d..a2cdec003 100644 --- a/libnd4j/tests_cpu/layers_tests/TadTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/TadTests.cpp @@ -51,7 +51,7 @@ TEST_F(TadTests, Test4DTad1) { int dim = 1; shape::TAD tad; - tad.init(arrayBad->getShapeInfo(), &dim, 1); + tad.init(arrayBad->shapeInfo(), &dim, 1); tad.createTadOnlyShapeInfo(); tad.createOffsets(); @@ -70,10 +70,10 @@ TEST_F(TadTests, TestNumTads1) { std::vector dim({0}); - Nd4jLong tadLengthX = shape::tadLength(x.getShapeInfo(), dim.data(), dim.size()); + Nd4jLong tadLengthX = shape::tadLength(x.shapeInfo(), dim.data(), dim.size()); Nd4jLong numTadsX = x.lengthOf() / tadLengthX; - Nd4jLong tadLengthY = shape::tadLength(y.getShapeInfo(), dim.data(), dim.size()); + Nd4jLong tadLengthY = shape::tadLength(y.shapeInfo(), dim.data(), dim.size()); Nd4jLong numTadsY = y.lengthOf() / tadLengthY; ASSERT_EQ(2, tadLengthX); @@ -91,18 +91,18 @@ TEST_F(TadTests, TestShapeTad_1) { NDArray input(buff, shapeInfo); std::vector dimensions = {0,1,2}; - Nd4jLong tadLength = shape::tadLength(input.getShapeInfo(), dimensions.data(), dimensions.size()); + Nd4jLong tadLength = shape::tadLength(input.shapeInfo(), dimensions.data(), dimensions.size()); Nd4jLong numTads = input.lengthOf() / tadLength; shape::TAD tad; - tad.init(input.getShapeInfo(), dimensions.data(), dimensions.size()); + tad.init(input.shapeInfo(), dimensions.data(), dimensions.size()); tad.createTadOnlyShapeInfo(); tad.createOffsets(); auto tadShapeInfo = new Nd4jLong[shape::shapeInfoLength(tad.tadOnlyShapeInfo[0])]; std::memcpy(tadShapeInfo, tad.tadOnlyShapeInfo, shape::shapeInfoByteLength(tad.tadOnlyShapeInfo)); - float* tadBuff = reinterpret_cast(input.getBuffer()) + tad.tadOffsets[0]; + float* tadBuff = reinterpret_cast(input.buffer()) + tad.tadOffsets[0]; NDArray tadArr(tadBuff, tadShapeInfo); ASSERT_TRUE(numTads==1); @@ -296,7 +296,7 @@ TEST_F(TadTests, outerArrayIndexes_1) { const int n1[] = {20,25,30,35, 80,85,90,95}; int minIdx = 5; - int N = shape::outerArrayIndexes(maxIdxs, minIdx, x.getShapeInfo(), y1.getShapeInfo(), dimsToExclude1.data()); + int N = shape::outerArrayIndexes(maxIdxs, minIdx, x.shapeInfo(), y1.shapeInfo(), dimsToExclude1.data()); ASSERT_TRUE(N == x.lengthOf()/y1.lengthOf()); for(int i = 0; i < N; ++i) ASSERT_TRUE(n1[i] == maxIdxs[i]); @@ -306,7 +306,7 @@ TEST_F(TadTests, outerArrayIndexes_1) { const int n2[] = {12,32,52, 72,92,112}; minIdx = 12; - N = shape::outerArrayIndexes(maxIdxs, minIdx, x.getShapeInfo(), y2.getShapeInfo(), dimsToExclude2.data()); + N = shape::outerArrayIndexes(maxIdxs, minIdx, x.shapeInfo(), y2.shapeInfo(), dimsToExclude2.data()); ASSERT_TRUE(N == x.lengthOf()/y2.lengthOf()); for(int i = 0; i < N; ++i) ASSERT_TRUE(n2[i] == maxIdxs[i]); @@ -316,7 +316,7 @@ TEST_F(TadTests, outerArrayIndexes_1) { const int n3[] = {64,69,74,79,84,89,94,99,104,109,114,119}; minIdx = 9; - N = shape::outerArrayIndexes(maxIdxs, minIdx, x.getShapeInfo(), y3.getShapeInfo(), dimsToExclude3.data()); + N = shape::outerArrayIndexes(maxIdxs, minIdx, x.shapeInfo(), y3.shapeInfo(), dimsToExclude3.data()); ASSERT_TRUE(N == x.lengthOf()/y3.lengthOf()); for(int i = 0; i < N; ++i) ASSERT_TRUE(n3[i] == maxIdxs[i]); @@ -326,7 +326,7 @@ TEST_F(TadTests, outerArrayIndexes_1) { const int n4[] = {20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39}; minIdx = 1; - N = shape::outerArrayIndexes(maxIdxs, minIdx, x.getShapeInfo(), y4.getShapeInfo(), dimsToExclude4.data()); + N = shape::outerArrayIndexes(maxIdxs, minIdx, x.shapeInfo(), y4.shapeInfo(), dimsToExclude4.data()); ASSERT_TRUE(N == x.lengthOf()/y4.lengthOf()); for(int i = 0; i < N; ++i) ASSERT_TRUE(n4[i] == maxIdxs[i]); @@ -336,7 +336,7 @@ TEST_F(TadTests, outerArrayIndexes_1) { const int n5[] = {65,66,67,68,69, 85,86,87,88,89, 105,106,107,108,109}; minIdx = 5; - N = shape::outerArrayIndexes(maxIdxs, minIdx, x.getShapeInfo(), y5.getShapeInfo(), dimsToExclude5.data()); + N = shape::outerArrayIndexes(maxIdxs, minIdx, x.shapeInfo(), y5.shapeInfo(), dimsToExclude5.data()); ASSERT_TRUE(N == x.lengthOf()/y5.lengthOf()); for(int i = 0; i < N; ++i) ASSERT_TRUE(n5[i] == maxIdxs[i]); @@ -346,7 +346,7 @@ TEST_F(TadTests, outerArrayIndexes_1) { const int n6[] = {65,66,67,68,69}; minIdx = 13; - N = shape::outerArrayIndexes(maxIdxs, minIdx, x.getShapeInfo(), y6.getShapeInfo(), dimsToExclude6.data()); + N = shape::outerArrayIndexes(maxIdxs, minIdx, x.shapeInfo(), y6.shapeInfo(), dimsToExclude6.data()); ASSERT_TRUE(N == x.lengthOf()/y6.lengthOf()); for(int i = 0; i < N; ++i) ASSERT_TRUE(n6[i] == maxIdxs[i]); @@ -356,7 +356,7 @@ TEST_F(TadTests, outerArrayIndexes_1) { const int n7[] = {15,16,17,18,19, 35,36,37,38,39, 55,56,57,58,59, 75,76,77,78,79, 95,96,97,98,99, 115,116,117,118,119}; minIdx = 3; - N = shape::outerArrayIndexes(maxIdxs, minIdx, x.getShapeInfo(), y7.getShapeInfo(), dimsToExclude7.data()); + N = shape::outerArrayIndexes(maxIdxs, minIdx, x.shapeInfo(), y7.shapeInfo(), dimsToExclude7.data()); ASSERT_TRUE(N == x.lengthOf()/y7.lengthOf()); for(int i = 0; i < N; ++i) ASSERT_TRUE(n7[i] == maxIdxs[i]); @@ -366,7 +366,7 @@ TEST_F(TadTests, outerArrayIndexes_1) { const int n8[] = {0,5,10,15, 20,25,30,35, 40,45,50,55, 60,65,70,75, 80,85,90,95, 100,105,110,115}; minIdx = 0; - N = shape::outerArrayIndexes(maxIdxs, minIdx, x.getShapeInfo(), y8.getShapeInfo(), dimsToExclude8.data()); + N = shape::outerArrayIndexes(maxIdxs, minIdx, x.shapeInfo(), y8.shapeInfo(), dimsToExclude8.data()); ASSERT_TRUE(N == x.lengthOf()/y8.lengthOf()); for(int i = 0; i < N; ++i) ASSERT_TRUE(n8[i] == maxIdxs[i]); @@ -376,7 +376,7 @@ TEST_F(TadTests, outerArrayIndexes_1) { const int n9[] = {60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119}; minIdx = 1; - N = shape::outerArrayIndexes(maxIdxs, minIdx, x.getShapeInfo(), y9.getShapeInfo(), dimsToExclude9.data()); + N = shape::outerArrayIndexes(maxIdxs, minIdx, x.shapeInfo(), y9.shapeInfo(), dimsToExclude9.data()); ASSERT_TRUE(N == x.lengthOf()/y9.lengthOf()); for(int i = 0; i < N; ++i) ASSERT_TRUE(n9[i] == maxIdxs[i]); @@ -386,7 +386,7 @@ TEST_F(TadTests, outerArrayIndexes_1) { const int n10[] = {11, 71}; minIdx = 11; - N = shape::outerArrayIndexes(maxIdxs, minIdx, x.getShapeInfo(), y10.getShapeInfo(), dimsToExclude10.data()); + N = shape::outerArrayIndexes(maxIdxs, minIdx, x.shapeInfo(), y10.shapeInfo(), dimsToExclude10.data()); ASSERT_TRUE(N == x.lengthOf()/y10.lengthOf()); for(int i = 0; i < N; ++i) ASSERT_TRUE(n10[i] == maxIdxs[i]); @@ -396,7 +396,7 @@ TEST_F(TadTests, outerArrayIndexes_1) { const int n11[] = {66, 86, 106}; minIdx = 26; - N = shape::outerArrayIndexes(maxIdxs, minIdx, x.getShapeInfo(), y11.getShapeInfo(), dimsToExclude11.data()); + N = shape::outerArrayIndexes(maxIdxs, minIdx, x.shapeInfo(), y11.shapeInfo(), dimsToExclude11.data()); ASSERT_TRUE(N == x.lengthOf()/y11.lengthOf()); for(int i = 0; i < N; ++i) ASSERT_TRUE(n11[i] == maxIdxs[i]); @@ -406,7 +406,7 @@ TEST_F(TadTests, outerArrayIndexes_1) { const int n12[] = {0,2,4,5,7,9,10,12,14,15,17,19,60,62,64,65,67,69,70,72,74,75,77,79}; minIdx = 0; - N = shape::outerArrayIndexes(maxIdxs, minIdx, x.getShapeInfo(), y12.getShapeInfo(), dimsToExclude12.data()); + N = shape::outerArrayIndexes(maxIdxs, minIdx, x.shapeInfo(), y12.shapeInfo(), dimsToExclude12.data()); for(int i = 0; i < N; ++i) ASSERT_TRUE(n12[i] == maxIdxs[i]); @@ -415,7 +415,7 @@ TEST_F(TadTests, outerArrayIndexes_1) { const int n13[] = {1,3,6,8,11,13,16,18,61,63,66,68,71,73,76,78}; minIdx = 1; - N = shape::outerArrayIndexes(maxIdxs, minIdx, x.getShapeInfo(), y13.getShapeInfo(), dimsToExclude13.data()); + N = shape::outerArrayIndexes(maxIdxs, minIdx, x.shapeInfo(), y13.shapeInfo(), dimsToExclude13.data()); for(int i = 0; i < N; ++i) ASSERT_TRUE(n13[i] == maxIdxs[i]); @@ -423,7 +423,7 @@ TEST_F(TadTests, outerArrayIndexes_1) { const int n14[] = {12,32,52, 72,92,112}; minIdx = 12; - N = shape::outerArrayIndexes(maxIdxs, minIdx, x.getShapeInfo(), y14.getShapeInfo(), nullptr); + N = shape::outerArrayIndexes(maxIdxs, minIdx, x.shapeInfo(), y14.shapeInfo(), nullptr); ASSERT_TRUE(N == x.lengthOf()/y14.lengthOf()); for(int i = 0; i < N; ++i) ASSERT_TRUE(n14[i] == maxIdxs[i]); @@ -432,7 +432,7 @@ TEST_F(TadTests, outerArrayIndexes_1) { const int n15[] = {11, 71}; minIdx = 11; - N = shape::outerArrayIndexes(maxIdxs, minIdx, x.getShapeInfo(), y15.getShapeInfo(), nullptr); + N = shape::outerArrayIndexes(maxIdxs, minIdx, x.shapeInfo(), y15.shapeInfo(), nullptr); ASSERT_TRUE(N == x.lengthOf()/y15.lengthOf()); for(int i = 0; i < N; ++i) ASSERT_TRUE(n15[i] == maxIdxs[i]); diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/converters/ImportClassMapping.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/converters/ImportClassMapping.java index 0c6724e9a..a053a40ab 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/converters/ImportClassMapping.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/converters/ImportClassMapping.java @@ -73,6 +73,10 @@ public class ImportClassMapping { org.nd4j.linalg.api.ops.impl.broadcast.BroadcastRSubOp.class, org.nd4j.linalg.api.ops.impl.broadcast.BroadcastSubOp.class, org.nd4j.linalg.api.ops.impl.broadcast.BroadcastTo.class, + org.nd4j.linalg.api.ops.compression.EncodeBitmap.class, + org.nd4j.linalg.api.ops.compression.DecodeBitmap.class, + org.nd4j.linalg.api.ops.compression.EncodeThreshold.class, + org.nd4j.linalg.api.ops.compression.DecodeThreshold.class, org.nd4j.linalg.api.ops.impl.shape.Create.class, org.nd4j.linalg.api.ops.impl.broadcast.bool.BroadcastEqualTo.class, org.nd4j.linalg.api.ops.impl.broadcast.bool.BroadcastGreaterThan.class, diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ndarray/BaseNDArray.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ndarray/BaseNDArray.java index 55ce8039b..835a2f4cb 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ndarray/BaseNDArray.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ndarray/BaseNDArray.java @@ -5341,7 +5341,7 @@ public abstract class BaseNDArray implements INDArray, Iterable { if (!this.isView()) { Nd4j.getExecutioner().commit(); - DataBuffer buffer = Nd4j.createBuffer(this.length(), false); + DataBuffer buffer = Nd4j.createBuffer(this.dataType(), this.length(), false); Nd4j.getMemoryManager().memcpy(buffer, this.data()); copy = Nd4j.createArrayFromShapeBuffer(buffer, this.shapeInfoDataBuffer()); diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/compression/DecodeBitmap.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/compression/DecodeBitmap.java new file mode 100644 index 000000000..954fc76c1 --- /dev/null +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/compression/DecodeBitmap.java @@ -0,0 +1,55 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +package org.nd4j.linalg.api.ops.compression; + +import lombok.NonNull; +import org.nd4j.linalg.api.buffer.DataType; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.api.ops.DynamicCustomOp; +import org.nd4j.linalg.factory.Nd4j; + +import java.util.Arrays; +import java.util.List; + +/** + * Bitmap decoding op wrapper. Used in gradients sharing. + * @author raver119@gmail.com + */ +public class DecodeBitmap extends DynamicCustomOp { + + public DecodeBitmap() { + // + } + + public DecodeBitmap(@NonNull INDArray encoded, @NonNull INDArray updates) { + addInputArgument(updates, encoded); + addOutputArgument(updates); + + // this op ALWAYS modifies updates array + setInPlace(true); + } + + @Override + public String opName() { + return "decode_bitmap"; + } + + @Override + public List calculateOutputDataTypes(List dataTypes) { + return Arrays.asList(inputArguments.get(0).dataType(), DataType.INT32); + } +} diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/compression/DecodeThreshold.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/compression/DecodeThreshold.java new file mode 100644 index 000000000..c4eed6f24 --- /dev/null +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/compression/DecodeThreshold.java @@ -0,0 +1,54 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +package org.nd4j.linalg.api.ops.compression; + +import lombok.NonNull; +import org.nd4j.linalg.api.buffer.DataType; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.api.ops.DynamicCustomOp; + +import java.util.Arrays; +import java.util.List; + +/** + * Sparse threshold decoding op wrapper. Used in gradients sharing. + * @author raver119@gmail.com + */ +public class DecodeThreshold extends DynamicCustomOp { + + public DecodeThreshold() { + // + } + + public DecodeThreshold(@NonNull INDArray encoded, @NonNull INDArray updates) { + addInputArgument(updates, encoded); + addOutputArgument(updates); + + // this op ALWAYS modifies updates array + setInPlace(true); + } + + @Override + public String opName() { + return "decode_threshold"; + } + + @Override + public List calculateOutputDataTypes(List dataTypes) { + return Arrays.asList(inputArguments.get(0).dataType(), DataType.INT32); + } +} diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/compression/EncodeBitmap.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/compression/EncodeBitmap.java new file mode 100644 index 000000000..683237c40 --- /dev/null +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/compression/EncodeBitmap.java @@ -0,0 +1,64 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +package org.nd4j.linalg.api.ops.compression; + +import lombok.NonNull; +import org.nd4j.linalg.api.buffer.DataType; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.api.ops.DynamicCustomOp; +import org.nd4j.linalg.factory.Nd4j; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +/** + * Bitmap encoding op wrapper. Used in gradients sharing. + * @author raver119@gmail.com + */ +public class EncodeBitmap extends DynamicCustomOp { + protected float threshold = 1e-3f; + + public EncodeBitmap() { + // + } + + public EncodeBitmap(@NonNull INDArray updates, float threshold) { + this(updates, Nd4j.create(DataType.INT32, updates.length() / 16 + 5), Nd4j.scalar(DataType.INT32, 0), threshold); + } + + public EncodeBitmap(@NonNull INDArray updates, @NonNull INDArray encoded, @NonNull INDArray counter, float threshold) { + addInputArgument(updates); + addOutputArgument(updates, encoded, counter); + addTArgument(threshold); + + this.threshold = threshold; + + // this op ALWAYS modifies updates array + setInPlace(true); + } + + @Override + public String opName() { + return "encode_bitmap"; + } + + @Override + public List calculateOutputDataTypes(List dataTypes) { + return Arrays.asList(inputArguments.get(0).dataType(), DataType.INT32, DataType.INT32); + } +} diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/compression/EncodeThreshold.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/compression/EncodeThreshold.java new file mode 100644 index 000000000..621b459e9 --- /dev/null +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/compression/EncodeThreshold.java @@ -0,0 +1,63 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +package org.nd4j.linalg.api.ops.compression; + +import lombok.NonNull; +import org.nd4j.linalg.api.buffer.DataType; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.api.ops.DynamicCustomOp; +import org.nd4j.linalg.factory.Nd4j; + +import java.util.Arrays; +import java.util.List; + +/** + * Sparse threshold encoding op wrapper. Used in gradients sharing. + * @author raver119@gmail.com + */ +public class EncodeThreshold extends DynamicCustomOp { + protected float threshold = 1e-3f; + protected int boundary = Integer.MAX_VALUE; + + public EncodeThreshold() { + // + } + + public EncodeThreshold(@NonNull INDArray updates, float threshold) { + this(updates, threshold, Integer.MAX_VALUE); + } + + public EncodeThreshold(@NonNull INDArray updates, float threshold, @NonNull Integer boundary) { + addInputArgument(updates); + + addTArgument(threshold); + addIArgument(boundary.intValue()); + + this.threshold = threshold; + this.boundary = boundary; + } + + @Override + public String opName() { + return "encode_threshold"; + } + + @Override + public List calculateOutputDataTypes(List dataTypes) { + return Arrays.asList(inputArguments.get(0).dataType(), DataType.INT32); + } +} diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/executioner/DefaultOpExecutioner.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/executioner/DefaultOpExecutioner.java index e65bf4860..9f4217b68 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/executioner/DefaultOpExecutioner.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/executioner/DefaultOpExecutioner.java @@ -30,6 +30,10 @@ import org.nd4j.linalg.api.ndarray.INDArrayStatistics; import org.nd4j.linalg.api.ops.*; import org.nd4j.linalg.api.ops.aggregates.Aggregate; import org.nd4j.linalg.api.ops.aggregates.Batch; +import org.nd4j.linalg.api.ops.compression.DecodeBitmap; +import org.nd4j.linalg.api.ops.compression.DecodeThreshold; +import org.nd4j.linalg.api.ops.compression.EncodeBitmap; +import org.nd4j.linalg.api.ops.compression.EncodeThreshold; import org.nd4j.linalg.api.ops.impl.scatter.ScatterUpdate; import org.nd4j.linalg.api.ops.impl.summarystats.Variance; import org.nd4j.linalg.api.rng.Random; @@ -685,38 +689,41 @@ public abstract class DefaultOpExecutioner implements OpExecutioner { @Override public INDArray thresholdEncode(INDArray input, double threshold) { - throw new UnsupportedOperationException("Not yet implemented"); + return thresholdEncode(input, threshold, Integer.MAX_VALUE); } @Override public INDArray thresholdEncode(INDArray input, double threshold, Integer boundary) { - throw new UnsupportedOperationException("Not yet implemented"); + val result = Nd4j.exec(new EncodeThreshold(input, (float) threshold, boundary))[1]; + + return result.getInt(0) > 0 ? result : null; } @Override public INDArray thresholdDecode(INDArray encoded, INDArray target) { - throw new UnsupportedOperationException("Not yet implemented"); + Nd4j.exec(new DecodeThreshold(encoded, target)); + return target; } @Override public long bitmapEncode(INDArray indArray, INDArray target, double threshold) { - throw new UnsupportedOperationException("Not yet implemented"); + val results = Nd4j.exec(new EncodeBitmap(indArray, target, Nd4j.scalar(0), (float) threshold)); + + // return number of elements taht were compressed + return results[2].getInt(0); } @Override public INDArray bitmapEncode(INDArray indArray, double threshold) { - DataBuffer buffer = Nd4j.getDataBufferFactory().createInt(indArray.length() / 16 + 5); - - INDArray ret = Nd4j.createArrayFromShapeBuffer(buffer, indArray.shapeInfoDataBuffer()); - - bitmapEncode(indArray, ret, threshold); - - return ret; + val array = Nd4j.create(DataType.INT32, indArray.length() / 16 + 5); + bitmapEncode(indArray, array, threshold); + return array; } @Override public INDArray bitmapDecode(INDArray encoded, INDArray target) { - throw new UnsupportedOperationException("Not yet implemented"); + Nd4j.exec(new DecodeBitmap(encoded, target)); + return target; } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/dataset/ExistingMiniBatchDataSetIterator.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/dataset/ExistingMiniBatchDataSetIterator.java index c797fecc0..cab15f770 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/dataset/ExistingMiniBatchDataSetIterator.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/dataset/ExistingMiniBatchDataSetIterator.java @@ -1,5 +1,6 @@ /******************************************************************************* * Copyright (c) 2015-2018 Skymind, Inc. + * Copyright (c) 2020 Konduit K.K. * * This program and the accompanying materials are made available under the * terms of the Apache License, Version 2.0 which is available at @@ -16,6 +17,7 @@ package org.nd4j.linalg.dataset; +import lombok.NonNull; import org.nd4j.linalg.dataset.api.DataSetPreProcessor; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; @@ -43,7 +45,7 @@ public class ExistingMiniBatchDataSetIterator implements DataSetIterator { * Create with the given root directory, using the default filename pattern {@link #DEFAULT_PATTERN} * @param rootDir the root directory to use */ - public ExistingMiniBatchDataSetIterator(File rootDir) { + public ExistingMiniBatchDataSetIterator(@NonNull File rootDir) { this(rootDir, DEFAULT_PATTERN); } @@ -53,7 +55,7 @@ public class ExistingMiniBatchDataSetIterator implements DataSetIterator { * @param pattern The filename pattern to use. Used with {@code String.format(pattern,idx)}, where idx is an * integer, starting at 0. */ - public ExistingMiniBatchDataSetIterator(File rootDir, String pattern) { + public ExistingMiniBatchDataSetIterator(@NonNull File rootDir, String pattern) { this.rootDir = rootDir; totalBatches = rootDir.list().length; this.pattern = pattern; diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/lossfunctions/SameDiffLoss.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/lossfunctions/SameDiffLoss.java index 8b3c6a3bc..e78376b5f 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/lossfunctions/SameDiffLoss.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/lossfunctions/SameDiffLoss.java @@ -38,7 +38,7 @@ import java.util.Map; */ public abstract class SameDiffLoss implements ILossFunction { protected transient SameDiff sd; - protected transient SDVariable scoreVariable; + protected transient SDVariable scorePerExampleVariable; protected SameDiffLoss() { @@ -60,7 +60,8 @@ public abstract class SameDiffLoss implements ILossFunction { sd = SameDiff.create(); SDVariable layerInput = sd.placeHolder("layerInput", dataType, -1); SDVariable labels = sd.placeHolder("labels", dataType, -1); - scoreVariable = this.defineLoss(sd, layerInput, labels); + scorePerExampleVariable = this.defineLoss(sd, layerInput, labels); + scorePerExampleVariable.markAsLoss(); sd.createGradFunction("layerInput"); } @@ -112,7 +113,7 @@ public abstract class SameDiffLoss implements ILossFunction { m.put("labels", labels); m.put("layerInput", output); - INDArray scoreArr = sd.outputSingle(m,scoreVariable.name()); + INDArray scoreArr = sd.outputSingle(m, scorePerExampleVariable.name()); if (mask != null) { LossUtil.applyMask(scoreArr, mask); diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java index a82d1b756..c7789d7dc 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java @@ -1004,20 +1004,6 @@ public interface NativeOps { @Cast("Nd4jLong *") LongPointer tadShapeInfo, @Cast("Nd4jLong *") LongPointer tadOffsets); - - long encodeBitmap(PointerPointer extraPointers, Pointer dx, LongPointer xShapeInfo, long N, IntPointer dz, float threshold); - - void decodeBitmap(PointerPointer extraPointers, Pointer dx, long N, Pointer dz, LongPointer zShapeInfo); - - - void encodeThresholdP1(PointerPointer extraPointers, Pointer dx, LongPointer xShapeInfo, long N, IntPointer dz, float threshold); - - void encodeThresholdP2Int(PointerPointer extraPointers, IntPointer dx, long N, IntPointer dz); - - void encodeThresholdP3(PointerPointer extraPointers, Pointer dx, LongPointer xShapeInfo, IntPointer offsets, long N, IntPointer dz); - - void decodeThreshold(PointerPointer extraPointers, Pointer dx, long N, Pointer dz, LongPointer zShapeInfo); - void sort(PointerPointer extraPointers, Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo, Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo, diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/compression/CudaFlexibleThreshold.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/compression/CudaFlexibleThreshold.java deleted file mode 100644 index c59bc10ad..000000000 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/compression/CudaFlexibleThreshold.java +++ /dev/null @@ -1,100 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -package org.nd4j.linalg.jcublas.compression; - -import org.bytedeco.javacpp.IntPointer; -import org.nd4j.linalg.api.buffer.DataBuffer; -import org.nd4j.linalg.api.buffer.DataTypeEx; -import org.nd4j.linalg.api.concurrency.AffinityManager; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.compression.CompressedDataBuffer; -import org.nd4j.linalg.compression.CompressionDescriptor; -import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.linalg.indexing.conditions.Conditions; - -/** - * This compression is very special case, and shouldn't be ever used outside of ParallelWrapper/ParameterServer implementation. - * It encodes data as delta between zero and abs threshold. - * - * Unlike CudaThreshold codec, CudaFlexibleThreshold tries to target specified sparsity/density updates ratio via topN approach - * - * PLEASE NOTE: DO NOT USE THIS COMPRESSOR UNLESS YOU'RE 100% SURE WHAT YOU DO! - * - * @author raver119@gmail.com - */ -public class CudaFlexibleThreshold extends CudaThreshold { - - public CudaFlexibleThreshold() { - super(); - this.threshold = 0.1f; - } - - /** - * This method returns compression descriptor. It should be unique for any compressor implementation - * - * @return - */ - @Override - public String getDescriptor() { - return "FTHRESHOLD"; - } - - /** - * This method allows you to configure desired sparsity/density ratio for updates. Pass it as float/double value - * - * Default value: 0.1 - * @param vars - */ - @Override - public void configure(Object... vars) { - super.configure(vars); - } - - - @Override - public DataBuffer compress(DataBuffer buffer) { - INDArray temp = Nd4j.createArrayFromShapeBuffer(buffer, Nd4j.getShapeInfoProvider().createShapeInformation(new long[]{1, buffer.length()}, buffer.dataType())); - double max = temp.amaxNumber().doubleValue(); - - int cntAbs = temp.scan(Conditions.absGreaterThanOrEqual(max - (max * threshold))).intValue(); - - long originalLength = buffer.length() * Nd4j.sizeOfDataType(buffer.dataType()); - int compressedLength = cntAbs + 3; - // first 3 elements contain header - IntPointer pointer = new IntPointer(compressedLength); - pointer.put(0, cntAbs); - pointer.put(1, (int) buffer.length()); - pointer.put(2, Float.floatToIntBits(threshold)); // please note, this value will be ovewritten anyway - - CompressionDescriptor descriptor = new CompressionDescriptor(); - descriptor.setCompressedLength(compressedLength * 4); // sizeOf(INT) - descriptor.setOriginalLength(originalLength); - descriptor.setOriginalElementSize(Nd4j.sizeOfDataType(buffer.dataType())); - descriptor.setNumberOfElements(buffer.length()); - - descriptor.setCompressionAlgorithm(getDescriptor()); - descriptor.setCompressionType(getCompressionType()); - - CompressedDataBuffer cbuff = new CompressedDataBuffer(pointer, descriptor); - - Nd4j.getNDArrayFactory().convertDataEx(getBufferTypeEx(buffer), buffer.addressPointer(), DataTypeEx.FTHRESHOLD, pointer, buffer.length()); - - Nd4j.getAffinityManager().tagLocation(buffer, AffinityManager.Location.HOST); - - return cbuff; - } -} diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/compression/CudaThreshold.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/compression/CudaThreshold.java deleted file mode 100644 index f9cbb1794..000000000 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/compression/CudaThreshold.java +++ /dev/null @@ -1,271 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2015-2018 Skymind, Inc. - * - * This program and the accompanying materials are made available under the - * terms of the Apache License, Version 2.0 which is available at - * https://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ******************************************************************************/ - -package org.nd4j.linalg.jcublas.compression; - -import lombok.Getter; -import lombok.Setter; -import lombok.extern.slf4j.Slf4j; -import lombok.val; -import org.apache.commons.math3.util.FastMath; -import org.bytedeco.javacpp.*; -import org.nd4j.compression.impl.AbstractCompressor; -import org.nd4j.jita.allocator.impl.AtomicAllocator; -import org.nd4j.linalg.api.buffer.DataBuffer; -import org.nd4j.linalg.api.buffer.DataType; -import org.nd4j.linalg.api.buffer.DataTypeEx; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.compression.CompressedDataBuffer; -import org.nd4j.linalg.compression.CompressionType; -import org.nd4j.linalg.exception.ND4JIllegalStateException; -import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.linalg.jcublas.context.CudaContext; -import org.nd4j.nativeblas.NativeOpsHolder; - -import java.util.ArrayList; -import java.util.List; - -/** - * This compression is very special case, and shouldn't be ever used outside of ParallelWrapper/ParameterServer implementation. - * It encodes data as delta between zero and abs threshold. - * - * PLEASE NOTE: DO NOT USE THIS COMPRESSOR UNLESS YOU'RE 100% SURE WHAT YOU DO! - * - * @author raver119@gmail.com - */ -@Slf4j -public class CudaThreshold extends AbstractCompressor { - @Getter @Setter protected float threshold = 1e-3f; - - /** - * This method returns compression descriptor. It should be unique for any compressor implementation - * - * @return - */ - @Override - public String getDescriptor() { - return "THRESHOLD"; - } - - /** - * This method allows you to configure threshold for delta extraction. Pass it as float/double value - * - * Default value: 1e-3 - * @param vars - */ - @Override - public void configure(Object... vars) { - if (vars[0] instanceof Number) { - Number t = (Number) vars[0]; - threshold = FastMath.abs(t.floatValue()); - log.info("Setting threshold to [{}]", threshold); - } else { - throw new ND4JIllegalStateException("Threshold value should be Number"); - } - } - - @Override - public INDArray compress(INDArray array) { - //logger.info("Threshold [{}] compression", threshold); - - Nd4j.getExecutioner().commit(); - //Nd4j.getAffinityManager().ensureLocation(array, AffinityManager.Location.HOST); - - DataBuffer buffer = compress(array.data()); - if (buffer == null) - return null; - - INDArray dup = Nd4j.createArrayFromShapeBuffer(buffer, array.shapeInfoDataBuffer()); - dup.markAsCompressed(true); - - return dup; - } - - @Override - public CompressionType getCompressionType() { - return CompressionType.LOSSLESS; - } - - @Override - public DataBuffer decompress(DataBuffer buffer, DataType type) { - if (buffer.dataType() != DataType.INT) - throw new UnsupportedOperationException(); - - long compressedLength = buffer.getInt(0); - long originalLength = buffer.getInt(1); - - DataBuffer result = Nd4j.createBuffer(type, originalLength, false); - - val context = AtomicAllocator.getInstance().getDeviceContext(); - - PointerPointer extras = new PointerPointer(32).put(1, context.getOldStream()); - - //log.info("DEC Source length: {}", buffer.length()); - //log.info("DEC Source: {}", Arrays.toString(buffer.asInt())); - - //NativeOpsHolder.getInstance().getDeviceNativeOps().decodeThresholdFloat(extras, AtomicAllocator.getInstance().getPointer(buffer), compressedLength, (FloatPointer) AtomicAllocator.getInstance().getPointer(result)); - AtomicAllocator.getInstance().getAllocationPoint(result).tickDeviceWrite(); - - //DataBuffer result = Nd4j.getNDArrayFactory().convertDataEx(DataTypeEx.THRESHOLD, buffer, getGlobalTypeEx()); - - return result; - } - - @Override - public DataBuffer compress(DataBuffer buffer) { - - int numThreads = 1024; - int numBlocks = (int) (buffer.length() / numThreads + (buffer.length() % numThreads == 0 ? 0 : 1)); - - val context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext(); - - DataBuffer blocksBuffer = Nd4j.getMemoryManager().getCurrentWorkspace() == null ? Nd4j.getDataBufferFactory().createInt(numBlocks+1, true) : Nd4j.getDataBufferFactory().createInt(numBlocks+1, true, Nd4j.getMemoryManager().getCurrentWorkspace()); - PointerPointer extras = new PointerPointer(32).put(1, context.getOldStream()); - - - //NativeOpsHolder.getInstance().getDeviceNativeOps().encodeThresholdP1(extras, (FloatPointer) AtomicAllocator.getInstance().getPointer(buffer), buffer.length(), (IntPointer) AtomicAllocator.getInstance().getPointer(blocksBuffer), threshold); - AtomicAllocator.getInstance().getAllocationPoint(blocksBuffer).tickDeviceWrite(); - - - int numMatches = blocksBuffer.getInt(0); - - //log.info("Totals: {}", numMatches); -/* - - log.info("Number of blocks for compression: {}", numBlocks); - log.info("BlocksCounts: {}", Arrays.toString(blocksBuffer.asInt())); -*/ - DataBuffer encodedBuffer = Nd4j.getMemoryManager().getCurrentWorkspace() == null ? Nd4j.getDataBufferFactory().createInt(3+numMatches, false) : Nd4j.getDataBufferFactory().createInt(3+numMatches, false, Nd4j.getMemoryManager().getCurrentWorkspace()); - encodedBuffer.put(0, numMatches); - encodedBuffer.put(1, (int) buffer.length()); - encodedBuffer.put(2, Float.floatToIntBits(threshold)); - AtomicAllocator.getInstance().getAllocationPoint(encodedBuffer).tickHostWrite(); - - // FIXME: make it parallel via some kernel, because it can be pretty big array here, i.e. for 150m original array, offsets can - /* - int prevSum = 0; - for (int e = 0; e < numBlocks; e++) { - int prevVal = offsetsBuffer.getInt(e + 1); - offsetsBuffer.put(e + 1, prevSum); - prevSum += prevVal; - } - */ - - int prefixThreads = 512; - int numElts = numBlocks; - int level = 0; - List buffers = new ArrayList<>(); - - // here we just calculate number of sumBlock arrays - do { - int numPrefixBlocks = Math.max(1, (int)Math.ceil((float)numElts / (2.0f * prefixThreads))); - if (numBlocks > 1) { - level++; - } - numElts = numPrefixBlocks; - } while (numElts > 1); - - long[] pointers = new long[level]; - - level = 0; - numElts = numBlocks; - - // allocating temp buffers for prefux sum - DataBuffer tempX = Nd4j.getMemoryManager().getCurrentWorkspace() == null ? Nd4j.getDataBufferFactory().createDouble(pointers.length, false) : Nd4j.getDataBufferFactory().createDouble(pointers.length, false, Nd4j.getMemoryManager().getCurrentWorkspace()); - - do { - int numPrefixBlocks = Math.max(1, (int)Math.ceil((float)numElts / (2.0f * prefixThreads))); - if (numPrefixBlocks > 1) { - DataBuffer bf = Nd4j.getMemoryManager().getCurrentWorkspace() == null ? Nd4j.getDataBufferFactory().createInt(numPrefixBlocks, false) : Nd4j.getDataBufferFactory().createInt(numPrefixBlocks, false, Nd4j.getMemoryManager().getCurrentWorkspace()); - - buffers.add(bf); - - pointers[level++] = AtomicAllocator.getInstance().getPointer(bf).address(); - } - numElts = numPrefixBlocks; - } while (numElts > 1); - - - AtomicAllocator.getInstance().memcpyBlocking(tempX, new LongPointer(pointers), pointers.length * 8, 0); - - extras.put(2, AtomicAllocator.getInstance().getPointer(tempX)); - - DataBuffer offsetsBuffer = Nd4j.getMemoryManager().getCurrentWorkspace() == null ? Nd4j.getDataBufferFactory().createInt(numBlocks, true) : Nd4j.getDataBufferFactory().createInt(numBlocks, true, Nd4j.getMemoryManager().getCurrentWorkspace()); - - NativeOpsHolder.getInstance().getDeviceNativeOps().encodeThresholdP2Int(extras, (IntPointer) AtomicAllocator.getInstance().getPointer(blocksBuffer), numBlocks, (IntPointer) AtomicAllocator.getInstance().getPointer(offsetsBuffer) ); - AtomicAllocator.getInstance().getAllocationPoint(offsetsBuffer).tickDeviceWrite(); - - //log.info("Offsets: {}", Arrays.toString(offsetsBuffer.asInt())); - //log.info("Target: {}", Arrays.toString(encodedBuffer.asInt())); - - - - //NativeOpsHolder.getInstance().getDeviceNativeOps().encodeThresholdP3Float(extras, (FloatPointer) AtomicAllocator.getInstance().getPointer(buffer), (IntPointer) AtomicAllocator.getInstance().getPointer(offsetsBuffer), buffer.length(), (IntPointer) AtomicAllocator.getInstance().getPointer(encodedBuffer)); - AtomicAllocator.getInstance().getAllocationPoint(encodedBuffer).tickDeviceWrite(); - AtomicAllocator.getInstance().getAllocationPoint(buffer).tickDeviceWrite(); - - //log.info("Encoded: {}", Arrays.toString(encodedBuffer.asInt())); - - extras.address(); - tempX.address(); - - return encodedBuffer; - - /* - INDArray temp = Nd4j.createArrayFromShapeBuffer(buffer, Nd4j.getShapeInfoProvider().createShapeInformation(new int[]{1, (int) buffer.length()})); - MatchCondition condition = new MatchCondition(temp, Conditions.absGreaterThanOrEqual(threshold)); - int cntAbs = Nd4j.getExecutioner().exec(condition, Integer.MAX_VALUE).getInt(0); - - - //log.info("density ratio: {}", String.format("%.2f", cntAbs * 100.0f / buffer.length())); - - if (cntAbs == 0) - return null; - - long originalLength = buffer.length() * Nd4j.sizeOfDataType(buffer.dataType()); - int compressedLength = cntAbs + 3; - // first 3 elements contain header - IntPointer pointer = new IntPointer(compressedLength); - pointer.put(0, cntAbs); - pointer.put(1, (int) buffer.length()); - pointer.put(2, Float.floatToIntBits(threshold)); - - CompressionDescriptor descriptor = new CompressionDescriptor(); - descriptor.setCompressedLength(compressedLength * 4); // sizeOf(INT) - descriptor.setOriginalLength(originalLength); - descriptor.setOriginalElementSize(Nd4j.sizeOfDataType(buffer.dataType())); - descriptor.setNumberOfElements(buffer.length()); - - descriptor.setCompressionAlgorithm(getDescriptor()); - descriptor.setCompressionType(getCompressionType()); - - - - CompressedDataBuffer cbuff = new CompressedDataBuffer(pointer, descriptor); - - Nd4j.getNDArrayFactory().convertDataEx(getBufferTypeEx(buffer), buffer.addressPointer(), DataTypeEx.THRESHOLD, pointer, buffer.length()); - - Nd4j.getAffinityManager().tagLocation(buffer, AffinityManager.Location.HOST); - - return cbuff; - */ - } - - @Override - protected CompressedDataBuffer compressPointer(DataTypeEx srcType, Pointer srcPointer, int length, int elementSize) { - throw new UnsupportedOperationException(); - } -} diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java index 76430a50e..afca1daa5 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java @@ -31,6 +31,7 @@ import org.nd4j.jita.allocator.tad.DeviceTADManager; import org.nd4j.jita.conf.CudaEnvironment; import org.nd4j.linalg.api.buffer.DataBuffer; import org.nd4j.linalg.api.buffer.DataType; +import org.nd4j.linalg.api.concurrency.AffinityManager; import org.nd4j.linalg.api.environment.Nd4jEnvironment; import org.nd4j.linalg.api.memory.pointers.PagedPointer; import org.nd4j.linalg.api.ndarray.INDArray; @@ -1674,224 +1675,6 @@ public class CudaExecutioner extends DefaultOpExecutioner { ctx.syncSpecialStream(); } - @Override - public INDArray thresholdEncode(INDArray input, double threshold, Integer boundary) { - DataBuffer buffer = input.data(); - - int numThreads = 1024; - int numBlocks = (int) (buffer.length() / numThreads + (buffer.length() % numThreads == 0 ? 0 : 1)); - - val context = AtomicAllocator.getInstance().getDeviceContext(); - - DataBuffer blocksBuffer = Nd4j.getMemoryManager().getCurrentWorkspace() == null ? Nd4j.getDataBufferFactory().createInt(numBlocks+1, true) : Nd4j.getDataBufferFactory().createInt(numBlocks+1, true, Nd4j.getMemoryManager().getCurrentWorkspace()); - - if (extraz.get() == null) - extraz.set(new PointerPointer(32)); - - val extras = extraz.get().put(1, context.getOldStream()); - - ((BaseCudaDataBuffer) buffer).getOpaqueDataBuffer().syncToSpecial(); - - - NativeOpsHolder.getInstance().getDeviceNativeOps().encodeThresholdP1(extras, - AtomicAllocator.getInstance().getPointer(buffer), - (LongPointer) AtomicAllocator.getInstance().getHostPointer(input.shapeInfoDataBuffer()), - buffer.length(), - (IntPointer) AtomicAllocator.getInstance().getPointer(blocksBuffer), - (float) threshold); - - AtomicAllocator.getInstance().getAllocationPoint(blocksBuffer).tickDeviceWrite(); - - - int numMatches = blocksBuffer.getInt(0); - - // special case here, nothing to update - if (numMatches < 2) - return null; - - if (boundary != null && numMatches > boundary) { - numMatches = boundary; - blocksBuffer.put(0, numMatches); - } - - DataBuffer encodedBuffer = Nd4j.getMemoryManager().getCurrentWorkspace() == null ? Nd4j.getDataBufferFactory().createInt(4+numMatches, false) : Nd4j.getDataBufferFactory().createInt(4+numMatches, false, Nd4j.getMemoryManager().getCurrentWorkspace()); - - encodedBuffer.put(0, numMatches); - encodedBuffer.put(1, (int) buffer.length()); - encodedBuffer.put(2, Float.floatToIntBits((float) threshold)); - - encodedBuffer.put(3, ThresholdCompression.FLEXIBLE_ENCODING); - - ((BaseCudaDataBuffer) encodedBuffer).getOpaqueDataBuffer().syncToSpecial(); - - - int prefixThreads = 512; - int numElts = numBlocks; - int level = 0; - List buffers = new ArrayList<>(); - - // here we just calculate number of sumBlock arrays - do { - int numPrefixBlocks = Math.max(1, (int)Math.ceil((float)numElts / (2.0f * prefixThreads))); - if (numBlocks > 1) { - level++; - } - numElts = numPrefixBlocks; - } while (numElts > 1); - - long[] pointers = new long[level]; - - level = 0; - numElts = numBlocks; - - // allocating temp buffers for prefux sum - DataBuffer tempX = Nd4j.getMemoryManager().getCurrentWorkspace() == null ? Nd4j.getDataBufferFactory().createDouble(pointers.length, false) : Nd4j.getDataBufferFactory().createDouble(pointers.length, false, Nd4j.getMemoryManager().getCurrentWorkspace()); - - do { - int numPrefixBlocks = Math.max(1, (int)Math.ceil((float)numElts / (2.0f * prefixThreads))); - if (numPrefixBlocks > 1) { - DataBuffer bf = Nd4j.getMemoryManager().getCurrentWorkspace() == null ? Nd4j.getDataBufferFactory().createInt(numPrefixBlocks, false) : Nd4j.getDataBufferFactory().createInt(numPrefixBlocks, false, Nd4j.getMemoryManager().getCurrentWorkspace()); - - buffers.add(bf); - - pointers[level++] = AtomicAllocator.getInstance().getPointer(bf).address(); - } - numElts = numPrefixBlocks; - } while (numElts > 1); - - - AtomicAllocator.getInstance().memcpyBlocking(tempX, new LongPointer(pointers), pointers.length * 8, 0); - - extras.put(2, AtomicAllocator.getInstance().getPointer(tempX)); - - DataBuffer offsetsBuffer = Nd4j.getMemoryManager().getCurrentWorkspace() == null ? Nd4j.getDataBufferFactory().createInt(numBlocks, true) : Nd4j.getDataBufferFactory().createInt(numBlocks, true, Nd4j.getMemoryManager().getCurrentWorkspace()); - - NativeOpsHolder.getInstance().getDeviceNativeOps().encodeThresholdP2Int(extras, (IntPointer) AtomicAllocator.getInstance().getPointer(blocksBuffer), numBlocks, (IntPointer) AtomicAllocator.getInstance().getPointer(offsetsBuffer) ); - AtomicAllocator.getInstance().getAllocationPoint(offsetsBuffer).tickDeviceWrite(); - - - NativeOpsHolder.getInstance().getDeviceNativeOps().encodeThresholdP3(extras, AtomicAllocator.getInstance().getPointer(buffer), (LongPointer) AtomicAllocator.getInstance().getHostPointer(input.shapeInfoDataBuffer()), (IntPointer) AtomicAllocator.getInstance().getPointer(offsetsBuffer), buffer.length(), (IntPointer) AtomicAllocator.getInstance().getPointer(encodedBuffer)); - - AtomicAllocator.getInstance().getAllocationPoint(encodedBuffer).tickDeviceWrite(); - AtomicAllocator.getInstance().getAllocationPoint(buffer).tickDeviceWrite(); - - return Nd4j.createArrayFromShapeBuffer(encodedBuffer, input.shapeInfoDataBuffer()); - } - - - @Override - public INDArray thresholdEncode(INDArray input, double threshold) { - return thresholdEncode(input, threshold, null); - } - - @Override - public INDArray thresholdDecode(INDArray encoded, INDArray target) { - DataBuffer buffer = encoded.data(); - - if (buffer.dataType() != DataType.INT) - throw new UnsupportedOperationException(); - - long compressedLength = buffer.getInt(0); - long originalLength = buffer.getInt(1); - - if (target.length() != originalLength) - throw new ND4JIllegalStateException("originalLength ["+ originalLength+"] stored in encoded array doesn't match target length ["+ target.length()+"]"); - - DataBuffer result = target.data(); - - val context = AtomicAllocator.getInstance().getDeviceContext(); - - if (extraz.get() == null) - extraz.set(new PointerPointer(32)); - - PointerPointer extras = extraz.get().put(1, context.getOldStream()); - - nativeOps.decodeThreshold(extras, AtomicAllocator.getInstance().getPointer(buffer), compressedLength, AtomicAllocator.getInstance().getPointer(result), (LongPointer) AtomicAllocator.getInstance().getHostPointer(target.shapeInfoDataBuffer())); - - if (nativeOps.lastErrorCode() != 0) - throw new RuntimeException(nativeOps.lastErrorMessage()); - - AtomicAllocator.getInstance().getAllocationPoint(result).tickDeviceWrite(); - - return target; - } - - - @Override - public long bitmapEncode(INDArray indArray, INDArray target, double threshold) { - long length = indArray.length(); - long tLen = target.data().length(); - - if (tLen != (length / 16 + 5)) - throw new ND4JIllegalStateException("Length of target array should be " + (length / 16 + 5)); - - if (target.data().dataType() != DataType.INT) - throw new ND4JIllegalStateException("Target array should have INT dataType"); - - DataBuffer buffer = target.data(); - buffer.put(0, (int) length); - buffer.put(1, (int) length); - buffer.put(2, Float.floatToIntBits((float) threshold)); - - // format id - buffer.put(3, ThresholdCompression.BITMAP_ENCODING); - - val context = AtomicAllocator.getInstance().getDeviceContext(); - - if (extraz.get() == null) - extraz.set(new PointerPointer(32)); - - - PointerPointer extras = extraz.get().put( - AtomicAllocator.getInstance().getHostPointer(indArray), - context.getOldStream(), - context.getBufferScalar(), - context.getBufferReduction() - ); - - - val src = AtomicAllocator.getInstance().getPointer(indArray, context); - val dst = (IntPointer) AtomicAllocator.getInstance().getPointer(buffer, context); - ((BaseCudaDataBuffer) buffer).getOpaqueDataBuffer().syncToSpecial(); - - long val = nativeOps.encodeBitmap(extras, - src, (LongPointer) AtomicAllocator.getInstance().getHostPointer(indArray.shapeInfoDataBuffer()), - length, - dst, - (float) threshold); - - if (nativeOps.lastErrorCode() != 0) - throw new RuntimeException(nativeOps.lastErrorMessage()); - - AtomicAllocator.getInstance().getAllocationPoint(buffer).tickDeviceWrite(); - - return val; - } - - @Override - public INDArray bitmapDecode(INDArray encoded, INDArray target) { - - val context = AtomicAllocator.getInstance().getDeviceContext(); - - if (extraz.get() == null) - extraz.set(new PointerPointer(32)); - - - PointerPointer extras = extraz.get().put( - AtomicAllocator.getInstance().getHostPointer(target), - context.getOldStream(), - context.getBufferScalar(), - context.getBufferReduction()); - - nativeOps.decodeBitmap(extras, AtomicAllocator.getInstance().getPointer(encoded.data(), context), target.length(), AtomicAllocator.getInstance().getPointer(target, context), (LongPointer) AtomicAllocator.getInstance().getHostPointer(target.shapeInfoDataBuffer())); - - if (nativeOps.lastErrorCode() != 0) - throw new RuntimeException(nativeOps.lastErrorMessage()); - - return target; - } - - @Override public synchronized Map getCustomOperations() { if(customOps == null) { @@ -1974,6 +1757,11 @@ public class CudaExecutioner extends DefaultOpExecutioner { val inputArgs = opContext != null ? opContext.getInputArrays() : op.inputArguments(); int cnt= 0; for (val in: inputArgs) { + // TODO: once we implement Context-based shape function call this method should be removed + val loc = Nd4j.getAffinityManager().getActiveLocation(in); + if (loc != AffinityManager.Location.DEVICE && loc != AffinityManager.Location.EVERYWHERE) + Nd4j.getAffinityManager().ensureLocation(in, AffinityManager.Location.DEVICE); + // NOT A TYPO: shape functions work on host side only if (!in.isEmpty()) { inputBuffers.put(cnt, in.data().addressPointer()); diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaOpContext.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaOpContext.java index faf24a4e6..23e96ee64 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaOpContext.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaOpContext.java @@ -106,7 +106,7 @@ public class CudaOpContext extends BaseOpContext implements OpContext, Deallocat @Override public void setInputArray(int index, @NonNull INDArray array) { - val ctx = AtomicAllocator.getInstance().getFlowController().prepareAction(null, array); + //val ctx = AtomicAllocator.getInstance().getFlowController().prepareAction(null, array); nativeOps.setGraphContextInputBuffer(context, index, array.isEmpty() ? null : ((BaseCudaDataBuffer) array.data()).getOpaqueDataBuffer(), array.shapeInfoDataBuffer().addressPointer(), AtomicAllocator.getInstance().getPointer(array.shapeInfoDataBuffer())); super.setInputArray(index, array); @@ -114,7 +114,7 @@ public class CudaOpContext extends BaseOpContext implements OpContext, Deallocat @Override public void setOutputArray(int index, @NonNull INDArray array) { - val ctx = AtomicAllocator.getInstance().getFlowController().prepareAction(array, null); + //val ctx = AtomicAllocator.getInstance().getFlowController().prepareAction(array, null); nativeOps.setGraphContextOutputBuffer(context, index, array.isEmpty() ? null : ((BaseCudaDataBuffer) array.data()).getOpaqueDataBuffer(), array.shapeInfoDataBuffer().addressPointer(), AtomicAllocator.getInstance().getPointer(array.shapeInfoDataBuffer())); super.setOutputArray(index, array); diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java index 1bf3ec2d6..1307ab0ae 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java @@ -511,12 +511,12 @@ public class Nd4jCuda extends org.nd4j.nativeblas.Nd4jCudaHelper { private native void allocate(DoubleBuffer values, int length); public ConstantDescriptor(double[] values, int length) { super((Pointer)null); allocate(values, length); } private native void allocate(double[] values, int length); - public ConstantDescriptor(@Cast("Nd4jLong*") LongPointer values, int length) { super((Pointer)null); allocate(values, length); } - private native void allocate(@Cast("Nd4jLong*") LongPointer values, int length); - public ConstantDescriptor(@Cast("Nd4jLong*") LongBuffer values, int length) { super((Pointer)null); allocate(values, length); } - private native void allocate(@Cast("Nd4jLong*") LongBuffer values, int length); - public ConstantDescriptor(@Cast("Nd4jLong*") long[] values, int length) { super((Pointer)null); allocate(values, length); } - private native void allocate(@Cast("Nd4jLong*") long[] values, int length); + public ConstantDescriptor(@Cast("const Nd4jLong*") LongPointer values, int length) { super((Pointer)null); allocate(values, length); } + private native void allocate(@Cast("const Nd4jLong*") LongPointer values, int length); + public ConstantDescriptor(@Cast("const Nd4jLong*") LongBuffer values, int length) { super((Pointer)null); allocate(values, length); } + private native void allocate(@Cast("const Nd4jLong*") LongBuffer values, int length); + public ConstantDescriptor(@Cast("const Nd4jLong*") long[] values, int length) { super((Pointer)null); allocate(values, length); } + private native void allocate(@Cast("const Nd4jLong*") long[] values, int length); public ConstantDescriptor(@Cast("Nd4jLong*") @StdVector LongPointer values) { super((Pointer)null); allocate(values); } private native void allocate(@Cast("Nd4jLong*") @StdVector LongPointer values); @@ -654,11 +654,11 @@ public class Nd4jCuda extends org.nd4j.nativeblas.Nd4jCudaHelper { public TadPack() { super((Pointer)null); allocate(); } private native void allocate(); - public native @Cast("Nd4jLong*") LongPointer primaryShapeInfo(); - public native @Cast("Nd4jLong*") LongPointer primaryOffsets(); + public native @Cast("const Nd4jLong*") LongPointer primaryShapeInfo(); + public native @Cast("const Nd4jLong*") LongPointer primaryOffsets(); - public native @Cast("Nd4jLong*") LongPointer specialShapeInfo(); - public native @Cast("Nd4jLong*") LongPointer specialOffsets(); + public native @Cast("const Nd4jLong*") LongPointer specialShapeInfo(); + public native @Cast("const Nd4jLong*") LongPointer specialOffsets(); public native @Cast("Nd4jLong") long numberOfTads(); public native int shapeInfoLength(); @@ -667,8 +667,8 @@ public class Nd4jCuda extends org.nd4j.nativeblas.Nd4jCudaHelper { * These methods return either primary or special pointers depending on platform binaries were compiled for * @return */ - public native @Cast("Nd4jLong*") LongPointer platformShapeInfo(); - public native @Cast("Nd4jLong*") LongPointer platformOffsets(); + public native @Cast("const Nd4jLong*") LongPointer platformShapeInfo(); + public native @Cast("const Nd4jLong*") LongPointer platformOffsets(); } @@ -1120,19 +1120,19 @@ public native void setTADThreshold(int num); */ public native void execIndexReduceScalar(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo); public native void execIndexReduceScalar(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo); public native void execIndexReduceScalar(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo); /** * @@ -1147,22 +1147,22 @@ public native void execIndexReduceScalar(@Cast("Nd4jPointer*") PointerPointer ex */ public native void execIndexReduce(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongPointer hDimensionShape, @Cast("Nd4jLong*") LongPointer dDimensionShape); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongPointer hDimensionShape, @Cast("const Nd4jLong*") LongPointer dDimensionShape); public native void execIndexReduce(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongBuffer hDimensionShape, @Cast("Nd4jLong*") LongBuffer dDimensionShape); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongBuffer hDimensionShape, @Cast("const Nd4jLong*") LongBuffer dDimensionShape); public native void execIndexReduce(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") long[] hDimensionShape, @Cast("Nd4jLong*") long[] dDimensionShape); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") long[] hDimensionShape, @Cast("const Nd4jLong*") long[] dDimensionShape); /** * @@ -1179,50 +1179,50 @@ public native void execIndexReduce(@Cast("Nd4jPointer*") PointerPointer extraPoi public native void execBroadcast( @Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongPointer hYShapeInfo, @Cast("Nd4jLong*") LongPointer dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongPointer hDimensionShape, @Cast("Nd4jLong*") LongPointer dDimensionShape); + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongPointer hYShapeInfo, @Cast("const Nd4jLong*") LongPointer dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongPointer hDimensionShape, @Cast("const Nd4jLong*") LongPointer dDimensionShape); public native void execBroadcast( @Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongBuffer hYShapeInfo, @Cast("Nd4jLong*") LongBuffer dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongBuffer hDimensionShape, @Cast("Nd4jLong*") LongBuffer dDimensionShape); + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongBuffer hYShapeInfo, @Cast("const Nd4jLong*") LongBuffer dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongBuffer hDimensionShape, @Cast("const Nd4jLong*") LongBuffer dDimensionShape); public native void execBroadcast( @Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") long[] hYShapeInfo, @Cast("Nd4jLong*") long[] dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") long[] hDimensionShape, @Cast("Nd4jLong*") long[] dDimensionShape); + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") long[] hYShapeInfo, @Cast("const Nd4jLong*") long[] dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") long[] hDimensionShape, @Cast("const Nd4jLong*") long[] dDimensionShape); public native void execBroadcastBool( @Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongPointer hYShapeInfo, @Cast("Nd4jLong*") LongPointer dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongPointer hYShapeInfo, @Cast("const Nd4jLong*") LongPointer dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongPointer hDimensionShape, @Cast("Nd4jLong*") LongPointer dDimensionShape); + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongPointer hDimensionShape, @Cast("const Nd4jLong*") LongPointer dDimensionShape); public native void execBroadcastBool( @Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongBuffer hYShapeInfo, @Cast("Nd4jLong*") LongBuffer dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongBuffer hYShapeInfo, @Cast("const Nd4jLong*") LongBuffer dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongBuffer hDimensionShape, @Cast("Nd4jLong*") LongBuffer dDimensionShape); + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongBuffer hDimensionShape, @Cast("const Nd4jLong*") LongBuffer dDimensionShape); public native void execBroadcastBool( @Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") long[] hYShapeInfo, @Cast("Nd4jLong*") long[] dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") long[] hYShapeInfo, @Cast("const Nd4jLong*") long[] dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") long[] hDimensionShape, @Cast("Nd4jLong*") long[] dDimensionShape); + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") long[] hDimensionShape, @Cast("const Nd4jLong*") long[] dDimensionShape); /** * @@ -1239,45 +1239,45 @@ public native void execBroadcastBool( public native void execPairwiseTransform( @Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongPointer hYShapeInfo, @Cast("Nd4jLong*") LongPointer dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongPointer hYShapeInfo, @Cast("const Nd4jLong*") LongPointer dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, Pointer extraParams); public native void execPairwiseTransform( @Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongBuffer hYShapeInfo, @Cast("Nd4jLong*") LongBuffer dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongBuffer hYShapeInfo, @Cast("const Nd4jLong*") LongBuffer dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, Pointer extraParams); public native void execPairwiseTransform( @Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") long[] hYShapeInfo, @Cast("Nd4jLong*") long[] dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") long[] hYShapeInfo, @Cast("const Nd4jLong*") long[] dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, Pointer extraParams); public native void execPairwiseTransformBool( @Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongPointer hYShapeInfo, @Cast("Nd4jLong*") LongPointer dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongPointer hYShapeInfo, @Cast("const Nd4jLong*") LongPointer dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, Pointer extraParams); public native void execPairwiseTransformBool( @Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongBuffer hYShapeInfo, @Cast("Nd4jLong*") LongBuffer dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongBuffer hYShapeInfo, @Cast("const Nd4jLong*") LongBuffer dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, Pointer extraParams); public native void execPairwiseTransformBool( @Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") long[] hYShapeInfo, @Cast("Nd4jLong*") long[] dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") long[] hYShapeInfo, @Cast("const Nd4jLong*") long[] dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, Pointer extraParams); /** @@ -1291,68 +1291,68 @@ public native void execPairwiseTransformBool( */ public native void execReduceFloat(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo); public native void execReduceFloat(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo); public native void execReduceFloat(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo); public native void execReduceSame(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo); public native void execReduceSame(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo); public native void execReduceSame(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo); public native void execReduceBool(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo); public native void execReduceBool(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo); public native void execReduceBool(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo); public native void execReduceLong(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo); public native void execReduceLong(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo); public native void execReduceLong(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo); /** * @@ -1365,82 +1365,82 @@ public native void execReduceLong(@Cast("Nd4jPointer*") PointerPointer extraPoin */ public native void execReduceFloat2(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongPointer hDimensionShape, @Cast("Nd4jLong*") LongPointer dDimensionShape); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongPointer hDimensionShape, @Cast("const Nd4jLong*") LongPointer dDimensionShape); public native void execReduceFloat2(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongBuffer hDimensionShape, @Cast("Nd4jLong*") LongBuffer dDimensionShape); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongBuffer hDimensionShape, @Cast("const Nd4jLong*") LongBuffer dDimensionShape); public native void execReduceFloat2(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") long[] hDimensionShape, @Cast("Nd4jLong*") long[] dDimensionShape); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") long[] hDimensionShape, @Cast("const Nd4jLong*") long[] dDimensionShape); public native void execReduceSame2(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongPointer hDimensionShape, @Cast("Nd4jLong*") LongPointer dDimensionShape); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongPointer hDimensionShape, @Cast("const Nd4jLong*") LongPointer dDimensionShape); public native void execReduceSame2(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongBuffer hDimensionShape, @Cast("Nd4jLong*") LongBuffer dDimensionShape); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongBuffer hDimensionShape, @Cast("const Nd4jLong*") LongBuffer dDimensionShape); public native void execReduceSame2(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") long[] hDimensionShape, @Cast("Nd4jLong*") long[] dDimensionShape); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") long[] hDimensionShape, @Cast("const Nd4jLong*") long[] dDimensionShape); public native void execReduceBool2(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongPointer hDimensionShape, @Cast("Nd4jLong*") LongPointer dDimensionShape); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongPointer hDimensionShape, @Cast("const Nd4jLong*") LongPointer dDimensionShape); public native void execReduceBool2(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongBuffer hDimensionShape, @Cast("Nd4jLong*") LongBuffer dDimensionShape); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongBuffer hDimensionShape, @Cast("const Nd4jLong*") LongBuffer dDimensionShape); public native void execReduceBool2(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") long[] hDimensionShape, @Cast("Nd4jLong*") long[] dDimensionShape); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") long[] hDimensionShape, @Cast("const Nd4jLong*") long[] dDimensionShape); public native void execReduceLong2(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongPointer hDimensionShape, @Cast("Nd4jLong*") LongPointer dDimensionShape); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongPointer hDimensionShape, @Cast("const Nd4jLong*") LongPointer dDimensionShape); public native void execReduceLong2(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongBuffer hDimensionShape, @Cast("Nd4jLong*") LongBuffer dDimensionShape); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongBuffer hDimensionShape, @Cast("const Nd4jLong*") LongBuffer dDimensionShape); public native void execReduceLong2(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") long[] hDimensionShape, @Cast("Nd4jLong*") long[] dDimensionShape); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") long[] hDimensionShape, @Cast("const Nd4jLong*") long[] dDimensionShape); /** * @@ -1455,22 +1455,22 @@ public native void execReduceLong2(@Cast("Nd4jPointer*") PointerPointer extraPoi */ public native void execReduce3(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParamsVals, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongPointer hYShapeInfo, @Cast("Nd4jLong*") LongPointer dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo); + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongPointer hYShapeInfo, @Cast("const Nd4jLong*") LongPointer dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo); public native void execReduce3(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParamsVals, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongBuffer hYShapeInfo, @Cast("Nd4jLong*") LongBuffer dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo); + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongBuffer hYShapeInfo, @Cast("const Nd4jLong*") LongBuffer dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo); public native void execReduce3(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParamsVals, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") long[] hYShapeInfo, @Cast("Nd4jLong*") long[] dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo); + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") long[] hYShapeInfo, @Cast("const Nd4jLong*") long[] dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo); /** * @@ -1483,22 +1483,22 @@ public native void execReduce3(@Cast("Nd4jPointer*") PointerPointer extraPointer */ public native void execReduce3Scalar(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParamsVals, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongPointer hYShapeInfo, @Cast("Nd4jLong*") LongPointer dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo); + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongPointer hYShapeInfo, @Cast("const Nd4jLong*") LongPointer dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo); public native void execReduce3Scalar(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParamsVals, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongBuffer hYShapeInfo, @Cast("Nd4jLong*") LongBuffer dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo); + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongBuffer hYShapeInfo, @Cast("const Nd4jLong*") LongBuffer dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo); public native void execReduce3Scalar(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParamsVals, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") long[] hYShapeInfo, @Cast("Nd4jLong*") long[] dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo); + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") long[] hYShapeInfo, @Cast("const Nd4jLong*") long[] dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo); /** * * @param opNum @@ -1514,60 +1514,60 @@ public native void execReduce3Scalar(@Cast("Nd4jPointer*") PointerPointer extraP */ public native void execReduce3Tad(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParamsVals, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongPointer hYShapeInfo, @Cast("Nd4jLong*") LongPointer dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongPointer hDimensionShape, @Cast("Nd4jLong*") LongPointer dDimensionShape, - @Cast("Nd4jLong*") LongPointer tadOnlyShapeInfo, @Cast("Nd4jLong*") LongPointer tadOffsets, - @Cast("Nd4jLong*") LongPointer yTadOnlyShapeInfo, @Cast("Nd4jLong*") LongPointer yTadOffsets); + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongPointer hYShapeInfo, @Cast("const Nd4jLong*") LongPointer dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongPointer hDimensionShape, @Cast("const Nd4jLong*") LongPointer dDimensionShape, + @Cast("const Nd4jLong*") LongPointer tadOnlyShapeInfo, @Cast("const Nd4jLong*") LongPointer tadOffsets, + @Cast("const Nd4jLong*") LongPointer yTadOnlyShapeInfo, @Cast("const Nd4jLong*") LongPointer yTadOffsets); public native void execReduce3Tad(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParamsVals, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongBuffer hYShapeInfo, @Cast("Nd4jLong*") LongBuffer dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongBuffer hDimensionShape, @Cast("Nd4jLong*") LongBuffer dDimensionShape, - @Cast("Nd4jLong*") LongBuffer tadOnlyShapeInfo, @Cast("Nd4jLong*") LongBuffer tadOffsets, - @Cast("Nd4jLong*") LongBuffer yTadOnlyShapeInfo, @Cast("Nd4jLong*") LongBuffer yTadOffsets); + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongBuffer hYShapeInfo, @Cast("const Nd4jLong*") LongBuffer dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongBuffer hDimensionShape, @Cast("const Nd4jLong*") LongBuffer dDimensionShape, + @Cast("const Nd4jLong*") LongBuffer tadOnlyShapeInfo, @Cast("const Nd4jLong*") LongBuffer tadOffsets, + @Cast("const Nd4jLong*") LongBuffer yTadOnlyShapeInfo, @Cast("const Nd4jLong*") LongBuffer yTadOffsets); public native void execReduce3Tad(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParamsVals, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") long[] hYShapeInfo, @Cast("Nd4jLong*") long[] dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") long[] hDimensionShape, @Cast("Nd4jLong*") long[] dDimensionShape, - @Cast("Nd4jLong*") long[] tadOnlyShapeInfo, @Cast("Nd4jLong*") long[] tadOffsets, - @Cast("Nd4jLong*") long[] yTadOnlyShapeInfo, @Cast("Nd4jLong*") long[] yTadOffsets); + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") long[] hYShapeInfo, @Cast("const Nd4jLong*") long[] dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") long[] hDimensionShape, @Cast("const Nd4jLong*") long[] dDimensionShape, + @Cast("const Nd4jLong*") long[] tadOnlyShapeInfo, @Cast("const Nd4jLong*") long[] tadOffsets, + @Cast("const Nd4jLong*") long[] yTadOnlyShapeInfo, @Cast("const Nd4jLong*") long[] yTadOffsets); public native void execReduce3All(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParamsVals, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongPointer hYShapeInfo, @Cast("Nd4jLong*") LongPointer dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongPointer hDimensionShape, @Cast("Nd4jLong*") LongPointer dDimensionShape, - @Cast("Nd4jLong*") LongPointer xTadShapeInfo, @Cast("Nd4jLong*") LongPointer xOffsets, - @Cast("Nd4jLong*") LongPointer yTadShapeInfo, @Cast("Nd4jLong*") LongPointer yOffsets); + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongPointer hYShapeInfo, @Cast("const Nd4jLong*") LongPointer dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongPointer hDimensionShape, @Cast("const Nd4jLong*") LongPointer dDimensionShape, + @Cast("const Nd4jLong*") LongPointer xTadShapeInfo, @Cast("const Nd4jLong*") LongPointer xOffsets, + @Cast("const Nd4jLong*") LongPointer yTadShapeInfo, @Cast("const Nd4jLong*") LongPointer yOffsets); public native void execReduce3All(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParamsVals, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongBuffer hYShapeInfo, @Cast("Nd4jLong*") LongBuffer dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongBuffer hDimensionShape, @Cast("Nd4jLong*") LongBuffer dDimensionShape, - @Cast("Nd4jLong*") LongBuffer xTadShapeInfo, @Cast("Nd4jLong*") LongBuffer xOffsets, - @Cast("Nd4jLong*") LongBuffer yTadShapeInfo, @Cast("Nd4jLong*") LongBuffer yOffsets); + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongBuffer hYShapeInfo, @Cast("const Nd4jLong*") LongBuffer dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongBuffer hDimensionShape, @Cast("const Nd4jLong*") LongBuffer dDimensionShape, + @Cast("const Nd4jLong*") LongBuffer xTadShapeInfo, @Cast("const Nd4jLong*") LongBuffer xOffsets, + @Cast("const Nd4jLong*") LongBuffer yTadShapeInfo, @Cast("const Nd4jLong*") LongBuffer yOffsets); public native void execReduce3All(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParamsVals, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") long[] hYShapeInfo, @Cast("Nd4jLong*") long[] dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") long[] hDimensionShape, @Cast("Nd4jLong*") long[] dDimensionShape, - @Cast("Nd4jLong*") long[] xTadShapeInfo, @Cast("Nd4jLong*") long[] xOffsets, - @Cast("Nd4jLong*") long[] yTadShapeInfo, @Cast("Nd4jLong*") long[] yOffsets); + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") long[] hYShapeInfo, @Cast("const Nd4jLong*") long[] dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") long[] hDimensionShape, @Cast("const Nd4jLong*") long[] dDimensionShape, + @Cast("const Nd4jLong*") long[] xTadShapeInfo, @Cast("const Nd4jLong*") long[] xOffsets, + @Cast("const Nd4jLong*") long[] yTadShapeInfo, @Cast("const Nd4jLong*") long[] yOffsets); /** * @@ -1582,40 +1582,40 @@ public native void execReduce3All(@Cast("Nd4jPointer*") PointerPointer extraPoin */ public native void execScalar(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, - OpaqueDataBuffer dbScalar, @Cast("Nd4jLong*") LongPointer hSscalarShapeInfo, @Cast("Nd4jLong*") LongPointer dSscalarShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbScalar, @Cast("const Nd4jLong*") LongPointer hSscalarShapeInfo, @Cast("const Nd4jLong*") LongPointer dSscalarShapeInfo, Pointer extraParams); public native void execScalar(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, - OpaqueDataBuffer dbScalar, @Cast("Nd4jLong*") LongBuffer hSscalarShapeInfo, @Cast("Nd4jLong*") LongBuffer dSscalarShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbScalar, @Cast("const Nd4jLong*") LongBuffer hSscalarShapeInfo, @Cast("const Nd4jLong*") LongBuffer dSscalarShapeInfo, Pointer extraParams); public native void execScalar(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, - OpaqueDataBuffer dbScalar, @Cast("Nd4jLong*") long[] hSscalarShapeInfo, @Cast("Nd4jLong*") long[] dSscalarShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbScalar, @Cast("const Nd4jLong*") long[] hSscalarShapeInfo, @Cast("const Nd4jLong*") long[] dSscalarShapeInfo, Pointer extraParams); public native void execScalarBool(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, - OpaqueDataBuffer dbScalar, @Cast("Nd4jLong*") LongPointer hSscalarShapeInfo, @Cast("Nd4jLong*") LongPointer dSscalarShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbScalar, @Cast("const Nd4jLong*") LongPointer hSscalarShapeInfo, @Cast("const Nd4jLong*") LongPointer dSscalarShapeInfo, Pointer extraParams); public native void execScalarBool(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, - OpaqueDataBuffer dbScalar, @Cast("Nd4jLong*") LongBuffer hSscalarShapeInfo, @Cast("Nd4jLong*") LongBuffer dSscalarShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbScalar, @Cast("const Nd4jLong*") LongBuffer hSscalarShapeInfo, @Cast("const Nd4jLong*") LongBuffer dSscalarShapeInfo, Pointer extraParams); public native void execScalarBool(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, - OpaqueDataBuffer dbScalar, @Cast("Nd4jLong*") long[] hSscalarShapeInfo, @Cast("Nd4jLong*") long[] dSscalarShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbScalar, @Cast("const Nd4jLong*") long[] hSscalarShapeInfo, @Cast("const Nd4jLong*") long[] dSscalarShapeInfo, Pointer extraParams); /** @@ -1627,21 +1627,21 @@ public native void execScalarBool(@Cast("Nd4jPointer*") PointerPointer extraPoin */ public native void execSummaryStatsScalar(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, @Cast("bool") boolean biasCorrected); public native void execSummaryStatsScalar(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, @Cast("bool") boolean biasCorrected); public native void execSummaryStatsScalar(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, @Cast("bool") boolean biasCorrected); /** * @@ -1654,21 +1654,21 @@ public native void execSummaryStatsScalar(@Cast("Nd4jPointer*") PointerPointer e */ public native void execSummaryStats(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, @Cast("bool") boolean biasCorrected); public native void execSummaryStats(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, @Cast("bool") boolean biasCorrected); public native void execSummaryStats(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, @Cast("bool") boolean biasCorrected); /** * @@ -1683,28 +1683,28 @@ public native void execSummaryStats(@Cast("Nd4jPointer*") PointerPointer extraPo */ public native void execSummaryStatsTad(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongPointer hDimensionShape, @Cast("Nd4jLong*") LongPointer dDimensionShape, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongPointer hDimensionShape, @Cast("const Nd4jLong*") LongPointer dDimensionShape, @Cast("bool") boolean biasCorrected, - @Cast("Nd4jLong*") LongPointer tadShapeInfo, @Cast("Nd4jLong*") LongPointer tadOffsets); + @Cast("const Nd4jLong*") LongPointer tadShapeInfo, @Cast("const Nd4jLong*") LongPointer tadOffsets); public native void execSummaryStatsTad(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongBuffer hDimensionShape, @Cast("Nd4jLong*") LongBuffer dDimensionShape, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongBuffer hDimensionShape, @Cast("const Nd4jLong*") LongBuffer dDimensionShape, @Cast("bool") boolean biasCorrected, - @Cast("Nd4jLong*") LongBuffer tadShapeInfo, @Cast("Nd4jLong*") LongBuffer tadOffsets); + @Cast("const Nd4jLong*") LongBuffer tadShapeInfo, @Cast("const Nd4jLong*") LongBuffer tadOffsets); public native void execSummaryStatsTad(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") long[] hDimensionShape, @Cast("Nd4jLong*") long[] dDimensionShape, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") long[] hDimensionShape, @Cast("const Nd4jLong*") long[] dDimensionShape, @Cast("bool") boolean biasCorrected, - @Cast("Nd4jLong*") long[] tadShapeInfo, @Cast("Nd4jLong*") long[] tadOffsets); + @Cast("const Nd4jLong*") long[] tadShapeInfo, @Cast("const Nd4jLong*") long[] tadOffsets); /** * @@ -1718,82 +1718,82 @@ public native void execSummaryStatsTad(@Cast("Nd4jPointer*") PointerPointer extr */ public native void execTransformFloat(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, Pointer extraParams); public native void execTransformFloat(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, Pointer extraParams); public native void execTransformFloat(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, Pointer extraParams); public native void execTransformSame(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, Pointer extraParams); public native void execTransformSame(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, Pointer extraParams); public native void execTransformSame(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, Pointer extraParams); public native void execTransformBool(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, Pointer extraParams); public native void execTransformBool(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, Pointer extraParams); public native void execTransformBool(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, Pointer extraParams); public native void execTransformAny(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, Pointer extraParams); public native void execTransformAny(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, Pointer extraParams); public native void execTransformAny(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, Pointer extraParams); public native void execTransformStrict(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, Pointer extraParams); public native void execTransformStrict(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, Pointer extraParams); public native void execTransformStrict(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, Pointer extraParams); /** @@ -1811,59 +1811,59 @@ public native void execTransformStrict(@Cast("Nd4jPointer*") PointerPointer extr */ public native void execScalarTad(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, - OpaqueDataBuffer dbScalars, @Cast("Nd4jLong*") LongPointer hScalarShapeInfo, @Cast("Nd4jLong*") LongPointer dScalarShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbScalars, @Cast("const Nd4jLong*") LongPointer hScalarShapeInfo, @Cast("const Nd4jLong*") LongPointer dScalarShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongPointer hDimensionShape, @Cast("Nd4jLong*") LongPointer dDimensionShape, - @Cast("Nd4jLong*") LongPointer tadShapeInfo, @Cast("Nd4jLong*") LongPointer tadOffsets, - @Cast("Nd4jLong*") LongPointer tadShapeInfoZ, @Cast("Nd4jLong*") LongPointer tadOffsetsZ); + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongPointer hDimensionShape, @Cast("const Nd4jLong*") LongPointer dDimensionShape, + @Cast("const Nd4jLong*") LongPointer tadShapeInfo, @Cast("const Nd4jLong*") LongPointer tadOffsets, + @Cast("const Nd4jLong*") LongPointer tadShapeInfoZ, @Cast("const Nd4jLong*") LongPointer tadOffsetsZ); public native void execScalarTad(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, - OpaqueDataBuffer dbScalars, @Cast("Nd4jLong*") LongBuffer hScalarShapeInfo, @Cast("Nd4jLong*") LongBuffer dScalarShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbScalars, @Cast("const Nd4jLong*") LongBuffer hScalarShapeInfo, @Cast("const Nd4jLong*") LongBuffer dScalarShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongBuffer hDimensionShape, @Cast("Nd4jLong*") LongBuffer dDimensionShape, - @Cast("Nd4jLong*") LongBuffer tadShapeInfo, @Cast("Nd4jLong*") LongBuffer tadOffsets, - @Cast("Nd4jLong*") LongBuffer tadShapeInfoZ, @Cast("Nd4jLong*") LongBuffer tadOffsetsZ); + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongBuffer hDimensionShape, @Cast("const Nd4jLong*") LongBuffer dDimensionShape, + @Cast("const Nd4jLong*") LongBuffer tadShapeInfo, @Cast("const Nd4jLong*") LongBuffer tadOffsets, + @Cast("const Nd4jLong*") LongBuffer tadShapeInfoZ, @Cast("const Nd4jLong*") LongBuffer tadOffsetsZ); public native void execScalarTad(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, - OpaqueDataBuffer dbScalars, @Cast("Nd4jLong*") long[] hScalarShapeInfo, @Cast("Nd4jLong*") long[] dScalarShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbScalars, @Cast("const Nd4jLong*") long[] hScalarShapeInfo, @Cast("const Nd4jLong*") long[] dScalarShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") long[] hDimensionShape, @Cast("Nd4jLong*") long[] dDimensionShape, - @Cast("Nd4jLong*") long[] tadShapeInfo, @Cast("Nd4jLong*") long[] tadOffsets, - @Cast("Nd4jLong*") long[] tadShapeInfoZ, @Cast("Nd4jLong*") long[] tadOffsetsZ); + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") long[] hDimensionShape, @Cast("const Nd4jLong*") long[] dDimensionShape, + @Cast("const Nd4jLong*") long[] tadShapeInfo, @Cast("const Nd4jLong*") long[] tadOffsets, + @Cast("const Nd4jLong*") long[] tadShapeInfoZ, @Cast("const Nd4jLong*") long[] tadOffsetsZ); public native void execScalarBoolTad(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, - OpaqueDataBuffer dbScalars, @Cast("Nd4jLong*") LongPointer hScalarShapeInfo, @Cast("Nd4jLong*") LongPointer dScalarShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbScalars, @Cast("const Nd4jLong*") LongPointer hScalarShapeInfo, @Cast("const Nd4jLong*") LongPointer dScalarShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongPointer hDimensionShape, @Cast("Nd4jLong*") LongPointer dDimensionShape, - @Cast("Nd4jLong*") LongPointer tadShapeInfo, @Cast("Nd4jLong*") LongPointer tadOffsets, - @Cast("Nd4jLong*") LongPointer tadShapeInfoZ, @Cast("Nd4jLong*") LongPointer tadOffsetsZ); + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongPointer hDimensionShape, @Cast("const Nd4jLong*") LongPointer dDimensionShape, + @Cast("const Nd4jLong*") LongPointer tadShapeInfo, @Cast("const Nd4jLong*") LongPointer tadOffsets, + @Cast("const Nd4jLong*") LongPointer tadShapeInfoZ, @Cast("const Nd4jLong*") LongPointer tadOffsetsZ); public native void execScalarBoolTad(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, - OpaqueDataBuffer dbScalars, @Cast("Nd4jLong*") LongBuffer hScalarShapeInfo, @Cast("Nd4jLong*") LongBuffer dScalarShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbScalars, @Cast("const Nd4jLong*") LongBuffer hScalarShapeInfo, @Cast("const Nd4jLong*") LongBuffer dScalarShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongBuffer hDimensionShape, @Cast("Nd4jLong*") LongBuffer dDimensionShape, - @Cast("Nd4jLong*") LongBuffer tadShapeInfo, @Cast("Nd4jLong*") LongBuffer tadOffsets, - @Cast("Nd4jLong*") LongBuffer tadShapeInfoZ, @Cast("Nd4jLong*") LongBuffer tadOffsetsZ); + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongBuffer hDimensionShape, @Cast("const Nd4jLong*") LongBuffer dDimensionShape, + @Cast("const Nd4jLong*") LongBuffer tadShapeInfo, @Cast("const Nd4jLong*") LongBuffer tadOffsets, + @Cast("const Nd4jLong*") LongBuffer tadShapeInfoZ, @Cast("const Nd4jLong*") LongBuffer tadOffsetsZ); public native void execScalarBoolTad(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, - OpaqueDataBuffer dbScalars, @Cast("Nd4jLong*") long[] hScalarShapeInfo, @Cast("Nd4jLong*") long[] dScalarShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbScalars, @Cast("const Nd4jLong*") long[] hScalarShapeInfo, @Cast("const Nd4jLong*") long[] dScalarShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") long[] hDimensionShape, @Cast("Nd4jLong*") long[] dDimensionShape, - @Cast("Nd4jLong*") long[] tadShapeInfo, @Cast("Nd4jLong*") long[] tadOffsets, - @Cast("Nd4jLong*") long[] tadShapeInfoZ, @Cast("Nd4jLong*") long[] tadOffsetsZ); + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") long[] hDimensionShape, @Cast("const Nd4jLong*") long[] dDimensionShape, + @Cast("const Nd4jLong*") long[] tadShapeInfo, @Cast("const Nd4jLong*") long[] tadOffsets, + @Cast("const Nd4jLong*") long[] tadShapeInfoZ, @Cast("const Nd4jLong*") long[] tadOffsetsZ); public native void specialConcat( @Cast("Nd4jPointer*") PointerPointer extraPointers, @@ -1872,7 +1872,7 @@ public native void specialConcat( @Cast("Nd4jPointer*") PointerPointer data, @Cast("Nd4jPointer*") PointerPointer inputShapeInfo, Pointer result, - @Cast("Nd4jLong*") LongPointer resultShapeInfo, + @Cast("const Nd4jLong*") LongPointer resultShapeInfo, @Cast("Nd4jPointer*") PointerPointer tadPointers, @Cast("Nd4jPointer*") PointerPointer offsetPointers); public native void specialConcat( @@ -1882,7 +1882,7 @@ public native void specialConcat( @Cast("Nd4jPointer*") PointerPointer data, @Cast("Nd4jPointer*") PointerPointer inputShapeInfo, Pointer result, - @Cast("Nd4jLong*") LongBuffer resultShapeInfo, + @Cast("const Nd4jLong*") LongBuffer resultShapeInfo, @Cast("Nd4jPointer*") PointerPointer tadPointers, @Cast("Nd4jPointer*") PointerPointer offsetPointers); public native void specialConcat( @@ -1892,7 +1892,7 @@ public native void specialConcat( @Cast("Nd4jPointer*") PointerPointer data, @Cast("Nd4jPointer*") PointerPointer inputShapeInfo, Pointer result, - @Cast("Nd4jLong*") long[] resultShapeInfo, + @Cast("const Nd4jLong*") long[] resultShapeInfo, @Cast("Nd4jPointer*") PointerPointer tadPointers, @Cast("Nd4jPointer*") PointerPointer offsetPointers); @@ -2186,20 +2186,20 @@ public native void setGridLimit(int gridSize); * @param targetBuffer * @param offsetsBuffer */ -public native OpaqueTadPack tadOnlyShapeInfo(@Cast("Nd4jLong*") LongPointer xShapeInfo, +public native OpaqueTadPack tadOnlyShapeInfo(@Cast("const Nd4jLong*") LongPointer xShapeInfo, IntPointer dimension, int dimensionLength); -public native OpaqueTadPack tadOnlyShapeInfo(@Cast("Nd4jLong*") LongBuffer xShapeInfo, +public native OpaqueTadPack tadOnlyShapeInfo(@Cast("const Nd4jLong*") LongBuffer xShapeInfo, IntBuffer dimension, int dimensionLength); -public native OpaqueTadPack tadOnlyShapeInfo(@Cast("Nd4jLong*") long[] xShapeInfo, +public native OpaqueTadPack tadOnlyShapeInfo(@Cast("const Nd4jLong*") long[] xShapeInfo, int[] dimension, int dimensionLength); -public native @Cast("Nd4jLong*") LongPointer getPrimaryShapeInfo(OpaqueTadPack pack); -public native @Cast("Nd4jLong*") LongPointer getPrimaryOffsets(OpaqueTadPack pack); -public native @Cast("Nd4jLong*") LongPointer getSpecialShapeInfo(OpaqueTadPack pack); -public native @Cast("Nd4jLong*") LongPointer getSpecialOffsets(OpaqueTadPack pack); +public native @Cast("const Nd4jLong*") LongPointer getPrimaryShapeInfo(OpaqueTadPack pack); +public native @Cast("const Nd4jLong*") LongPointer getPrimaryOffsets(OpaqueTadPack pack); +public native @Cast("const Nd4jLong*") LongPointer getSpecialShapeInfo(OpaqueTadPack pack); +public native @Cast("const Nd4jLong*") LongPointer getSpecialOffsets(OpaqueTadPack pack); public native @Cast("Nd4jLong") long getNumberOfTads(OpaqueTadPack pack); public native int getShapeInfoLength(OpaqueTadPack pack); @@ -2224,32 +2224,32 @@ public native void deleteTadPack(OpaqueTadPack ptr); * @param zTadOffsets */ public native void pullRows(@Cast("Nd4jPointer*") PointerPointer extraPointers, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer xShapeInfo, @Cast("Nd4jLong*") LongPointer dxShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer zShapeInfo, @Cast("Nd4jLong*") LongPointer dzShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer xShapeInfo, @Cast("const Nd4jLong*") LongPointer dxShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer zShapeInfo, @Cast("const Nd4jLong*") LongPointer dzShapeInfo, @Cast("Nd4jLong") long n, @Cast("Nd4jLong*") LongPointer indexes, - @Cast("Nd4jLong*") LongPointer tadShapeInfo, - @Cast("Nd4jLong*") LongPointer tadOffsets, - @Cast("Nd4jLong*") LongPointer zTadShapeInfo, - @Cast("Nd4jLong*") LongPointer zTadOffsets); + @Cast("const Nd4jLong*") LongPointer tadShapeInfo, + @Cast("const Nd4jLong*") LongPointer tadOffsets, + @Cast("const Nd4jLong*") LongPointer zTadShapeInfo, + @Cast("const Nd4jLong*") LongPointer zTadOffsets); public native void pullRows(@Cast("Nd4jPointer*") PointerPointer extraPointers, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer xShapeInfo, @Cast("Nd4jLong*") LongBuffer dxShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer zShapeInfo, @Cast("Nd4jLong*") LongBuffer dzShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer xShapeInfo, @Cast("const Nd4jLong*") LongBuffer dxShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer zShapeInfo, @Cast("const Nd4jLong*") LongBuffer dzShapeInfo, @Cast("Nd4jLong") long n, @Cast("Nd4jLong*") LongBuffer indexes, - @Cast("Nd4jLong*") LongBuffer tadShapeInfo, - @Cast("Nd4jLong*") LongBuffer tadOffsets, - @Cast("Nd4jLong*") LongBuffer zTadShapeInfo, - @Cast("Nd4jLong*") LongBuffer zTadOffsets); + @Cast("const Nd4jLong*") LongBuffer tadShapeInfo, + @Cast("const Nd4jLong*") LongBuffer tadOffsets, + @Cast("const Nd4jLong*") LongBuffer zTadShapeInfo, + @Cast("const Nd4jLong*") LongBuffer zTadOffsets); public native void pullRows(@Cast("Nd4jPointer*") PointerPointer extraPointers, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] xShapeInfo, @Cast("Nd4jLong*") long[] dxShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] zShapeInfo, @Cast("Nd4jLong*") long[] dzShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] xShapeInfo, @Cast("const Nd4jLong*") long[] dxShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] zShapeInfo, @Cast("const Nd4jLong*") long[] dzShapeInfo, @Cast("Nd4jLong") long n, @Cast("Nd4jLong*") long[] indexes, - @Cast("Nd4jLong*") long[] tadShapeInfo, - @Cast("Nd4jLong*") long[] tadOffsets, - @Cast("Nd4jLong*") long[] zTadShapeInfo, - @Cast("Nd4jLong*") long[] zTadOffsets); + @Cast("const Nd4jLong*") long[] tadShapeInfo, + @Cast("const Nd4jLong*") long[] tadOffsets, + @Cast("const Nd4jLong*") long[] zTadShapeInfo, + @Cast("const Nd4jLong*") long[] zTadOffsets); /** * @@ -2261,50 +2261,50 @@ public native void pullRows(@Cast("Nd4jPointer*") PointerPointer extraPointers, * @param propagate */ public native void average(@Cast("Nd4jPointer*") PointerPointer extras, - @Cast("Nd4jPointer*") PointerPointer x, @Cast("Nd4jLong*") LongPointer xShapeInfo, - @Cast("Nd4jPointer*") PointerPointer dx, @Cast("Nd4jLong*") LongPointer dxShapeInfo, - Pointer z, @Cast("Nd4jLong*") LongPointer zShapeInfo, - Pointer dz, @Cast("Nd4jLong*") LongPointer dzShapeInfo, + @Cast("Nd4jPointer*") PointerPointer x, @Cast("const Nd4jLong*") LongPointer xShapeInfo, + @Cast("Nd4jPointer*") PointerPointer dx, @Cast("const Nd4jLong*") LongPointer dxShapeInfo, + Pointer z, @Cast("const Nd4jLong*") LongPointer zShapeInfo, + Pointer dz, @Cast("const Nd4jLong*") LongPointer dzShapeInfo, int n, @Cast("Nd4jLong") long length, @Cast("bool") boolean propagate); public native void average(@Cast("Nd4jPointer*") PointerPointer extras, - @Cast("Nd4jPointer*") PointerPointer x, @Cast("Nd4jLong*") LongBuffer xShapeInfo, - @Cast("Nd4jPointer*") PointerPointer dx, @Cast("Nd4jLong*") LongBuffer dxShapeInfo, - Pointer z, @Cast("Nd4jLong*") LongBuffer zShapeInfo, - Pointer dz, @Cast("Nd4jLong*") LongBuffer dzShapeInfo, + @Cast("Nd4jPointer*") PointerPointer x, @Cast("const Nd4jLong*") LongBuffer xShapeInfo, + @Cast("Nd4jPointer*") PointerPointer dx, @Cast("const Nd4jLong*") LongBuffer dxShapeInfo, + Pointer z, @Cast("const Nd4jLong*") LongBuffer zShapeInfo, + Pointer dz, @Cast("const Nd4jLong*") LongBuffer dzShapeInfo, int n, @Cast("Nd4jLong") long length, @Cast("bool") boolean propagate); public native void average(@Cast("Nd4jPointer*") PointerPointer extras, - @Cast("Nd4jPointer*") PointerPointer x, @Cast("Nd4jLong*") long[] xShapeInfo, - @Cast("Nd4jPointer*") PointerPointer dx, @Cast("Nd4jLong*") long[] dxShapeInfo, - Pointer z, @Cast("Nd4jLong*") long[] zShapeInfo, - Pointer dz, @Cast("Nd4jLong*") long[] dzShapeInfo, + @Cast("Nd4jPointer*") PointerPointer x, @Cast("const Nd4jLong*") long[] xShapeInfo, + @Cast("Nd4jPointer*") PointerPointer dx, @Cast("const Nd4jLong*") long[] dxShapeInfo, + Pointer z, @Cast("const Nd4jLong*") long[] zShapeInfo, + Pointer dz, @Cast("const Nd4jLong*") long[] dzShapeInfo, int n, @Cast("Nd4jLong") long length, @Cast("bool") boolean propagate); public native void accumulate(@Cast("Nd4jPointer*") PointerPointer extras, - @Cast("Nd4jPointer*") PointerPointer x, @Cast("Nd4jLong*") LongPointer xShapeInfo, - @Cast("Nd4jPointer*") PointerPointer dx, @Cast("Nd4jLong*") LongPointer dxShapeInfo, - Pointer z, @Cast("Nd4jLong*") LongPointer zShapeInfo, - Pointer dz, @Cast("Nd4jLong*") LongPointer dzShapeInfo, + @Cast("Nd4jPointer*") PointerPointer x, @Cast("const Nd4jLong*") LongPointer xShapeInfo, + @Cast("Nd4jPointer*") PointerPointer dx, @Cast("const Nd4jLong*") LongPointer dxShapeInfo, + Pointer z, @Cast("const Nd4jLong*") LongPointer zShapeInfo, + Pointer dz, @Cast("const Nd4jLong*") LongPointer dzShapeInfo, int n, @Cast("Nd4jLong") long length); public native void accumulate(@Cast("Nd4jPointer*") PointerPointer extras, - @Cast("Nd4jPointer*") PointerPointer x, @Cast("Nd4jLong*") LongBuffer xShapeInfo, - @Cast("Nd4jPointer*") PointerPointer dx, @Cast("Nd4jLong*") LongBuffer dxShapeInfo, - Pointer z, @Cast("Nd4jLong*") LongBuffer zShapeInfo, - Pointer dz, @Cast("Nd4jLong*") LongBuffer dzShapeInfo, + @Cast("Nd4jPointer*") PointerPointer x, @Cast("const Nd4jLong*") LongBuffer xShapeInfo, + @Cast("Nd4jPointer*") PointerPointer dx, @Cast("const Nd4jLong*") LongBuffer dxShapeInfo, + Pointer z, @Cast("const Nd4jLong*") LongBuffer zShapeInfo, + Pointer dz, @Cast("const Nd4jLong*") LongBuffer dzShapeInfo, int n, @Cast("Nd4jLong") long length); public native void accumulate(@Cast("Nd4jPointer*") PointerPointer extras, - @Cast("Nd4jPointer*") PointerPointer x, @Cast("Nd4jLong*") long[] xShapeInfo, - @Cast("Nd4jPointer*") PointerPointer dx, @Cast("Nd4jLong*") long[] dxShapeInfo, - Pointer z, @Cast("Nd4jLong*") long[] zShapeInfo, - Pointer dz, @Cast("Nd4jLong*") long[] dzShapeInfo, + @Cast("Nd4jPointer*") PointerPointer x, @Cast("const Nd4jLong*") long[] xShapeInfo, + @Cast("Nd4jPointer*") PointerPointer dx, @Cast("const Nd4jLong*") long[] dxShapeInfo, + Pointer z, @Cast("const Nd4jLong*") long[] zShapeInfo, + Pointer dz, @Cast("const Nd4jLong*") long[] dzShapeInfo, int n, @Cast("Nd4jLong") long length); @@ -2509,17 +2509,17 @@ public native void execAggregateBatch(@Cast("Nd4jPointer*") PointerPointer extra public native void execRandom(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, @Cast("Nd4jPointer") Pointer state, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeBuffer, @Cast("Nd4jLong*") LongPointer dZShapeBuffer, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeBuffer, @Cast("const Nd4jLong*") LongPointer dZShapeBuffer, Pointer extraArguments); public native void execRandom(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, @Cast("Nd4jPointer") Pointer state, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeBuffer, @Cast("Nd4jLong*") LongBuffer dZShapeBuffer, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeBuffer, @Cast("const Nd4jLong*") LongBuffer dZShapeBuffer, Pointer extraArguments); public native void execRandom(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, @Cast("Nd4jPointer") Pointer state, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeBuffer, @Cast("Nd4jLong*") long[] dZShapeBuffer, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeBuffer, @Cast("const Nd4jLong*") long[] dZShapeBuffer, Pointer extraArguments); /** @@ -2538,23 +2538,23 @@ public native void execRandom(@Cast("Nd4jPointer*") PointerPointer extraPointers public native void execRandom3(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, @Cast("Nd4jPointer") Pointer state, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeBuffer, @Cast("Nd4jLong*") LongPointer dXShapeBuffer, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongPointer hYShapeBuffer, @Cast("Nd4jLong*") LongPointer dYShapeBuffer, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeBuffer, @Cast("Nd4jLong*") LongPointer dZShapeBuffer, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeBuffer, @Cast("const Nd4jLong*") LongPointer dXShapeBuffer, + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongPointer hYShapeBuffer, @Cast("const Nd4jLong*") LongPointer dYShapeBuffer, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeBuffer, @Cast("const Nd4jLong*") LongPointer dZShapeBuffer, Pointer extraArguments); public native void execRandom3(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, @Cast("Nd4jPointer") Pointer state, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeBuffer, @Cast("Nd4jLong*") LongBuffer dXShapeBuffer, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongBuffer hYShapeBuffer, @Cast("Nd4jLong*") LongBuffer dYShapeBuffer, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeBuffer, @Cast("Nd4jLong*") LongBuffer dZShapeBuffer, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeBuffer, @Cast("const Nd4jLong*") LongBuffer dXShapeBuffer, + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongBuffer hYShapeBuffer, @Cast("const Nd4jLong*") LongBuffer dYShapeBuffer, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeBuffer, @Cast("const Nd4jLong*") LongBuffer dZShapeBuffer, Pointer extraArguments); public native void execRandom3(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, @Cast("Nd4jPointer") Pointer state, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeBuffer, @Cast("Nd4jLong*") long[] dXShapeBuffer, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") long[] hYShapeBuffer, @Cast("Nd4jLong*") long[] dYShapeBuffer, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeBuffer, @Cast("Nd4jLong*") long[] dZShapeBuffer, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeBuffer, @Cast("const Nd4jLong*") long[] dXShapeBuffer, + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") long[] hYShapeBuffer, @Cast("const Nd4jLong*") long[] dYShapeBuffer, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeBuffer, @Cast("const Nd4jLong*") long[] dZShapeBuffer, Pointer extraArguments); /** @@ -2571,20 +2571,20 @@ public native void execRandom3(@Cast("Nd4jPointer*") PointerPointer extraPointer public native void execRandom2(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, @Cast("Nd4jPointer") Pointer state, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeBuffer, @Cast("Nd4jLong*") LongPointer dXShapeBuffer, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeBuffer, @Cast("Nd4jLong*") LongPointer dZShapeBuffer, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeBuffer, @Cast("const Nd4jLong*") LongPointer dXShapeBuffer, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeBuffer, @Cast("const Nd4jLong*") LongPointer dZShapeBuffer, Pointer extraArguments); public native void execRandom2(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, @Cast("Nd4jPointer") Pointer state, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeBuffer, @Cast("Nd4jLong*") LongBuffer dXShapeBuffer, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeBuffer, @Cast("Nd4jLong*") LongBuffer dZShapeBuffer, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeBuffer, @Cast("const Nd4jLong*") LongBuffer dXShapeBuffer, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeBuffer, @Cast("const Nd4jLong*") LongBuffer dZShapeBuffer, Pointer extraArguments); public native void execRandom2(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, @Cast("Nd4jPointer") Pointer state, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeBuffer, @Cast("Nd4jLong*") long[] dXShapeBuffer, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeBuffer, @Cast("Nd4jLong*") long[] dZShapeBuffer, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeBuffer, @Cast("const Nd4jLong*") long[] dXShapeBuffer, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeBuffer, @Cast("const Nd4jLong*") long[] dZShapeBuffer, Pointer extraArguments); @@ -2789,167 +2789,143 @@ public native @Cast("Nd4jPointer") Pointer pointerForAddress(@Cast("Nd4jLong") l * @return */ public native void tear(@Cast("Nd4jPointer*") PointerPointer extraPointers, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer xShapeInfo, @Cast("Nd4jLong*") LongPointer dxShapeInfo, - @Cast("Nd4jPointer*") PointerPointer targets, @Cast("Nd4jLong*") LongPointer zShapeInfo, - @Cast("Nd4jLong*") LongPointer tadShapeInfo, - @Cast("Nd4jLong*") LongPointer tadOffsets); + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer xShapeInfo, @Cast("const Nd4jLong*") LongPointer dxShapeInfo, + @Cast("Nd4jPointer*") PointerPointer targets, @Cast("const Nd4jLong*") LongPointer zShapeInfo, + @Cast("const Nd4jLong*") LongPointer tadShapeInfo, + @Cast("const Nd4jLong*") LongPointer tadOffsets); public native void tear(@Cast("Nd4jPointer*") PointerPointer extraPointers, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer xShapeInfo, @Cast("Nd4jLong*") LongBuffer dxShapeInfo, - @Cast("Nd4jPointer*") PointerPointer targets, @Cast("Nd4jLong*") LongBuffer zShapeInfo, - @Cast("Nd4jLong*") LongBuffer tadShapeInfo, - @Cast("Nd4jLong*") LongBuffer tadOffsets); + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer xShapeInfo, @Cast("const Nd4jLong*") LongBuffer dxShapeInfo, + @Cast("Nd4jPointer*") PointerPointer targets, @Cast("const Nd4jLong*") LongBuffer zShapeInfo, + @Cast("const Nd4jLong*") LongBuffer tadShapeInfo, + @Cast("const Nd4jLong*") LongBuffer tadOffsets); public native void tear(@Cast("Nd4jPointer*") PointerPointer extraPointers, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] xShapeInfo, @Cast("Nd4jLong*") long[] dxShapeInfo, - @Cast("Nd4jPointer*") PointerPointer targets, @Cast("Nd4jLong*") long[] zShapeInfo, - @Cast("Nd4jLong*") long[] tadShapeInfo, - @Cast("Nd4jLong*") long[] tadOffsets); - -public native @Cast("Nd4jLong") long encodeBitmap(@Cast("Nd4jPointer*") PointerPointer extraPointers, Pointer dx, @Cast("Nd4jLong*") LongPointer xShapeInfo, @Cast("Nd4jLong") long N, IntPointer dz, float threshold); -public native @Cast("Nd4jLong") long encodeBitmap(@Cast("Nd4jPointer*") PointerPointer extraPointers, Pointer dx, @Cast("Nd4jLong*") LongBuffer xShapeInfo, @Cast("Nd4jLong") long N, IntBuffer dz, float threshold); -public native @Cast("Nd4jLong") long encodeBitmap(@Cast("Nd4jPointer*") PointerPointer extraPointers, Pointer dx, @Cast("Nd4jLong*") long[] xShapeInfo, @Cast("Nd4jLong") long N, int[] dz, float threshold); -public native void decodeBitmap(@Cast("Nd4jPointer*") PointerPointer extraPointers, Pointer dx, @Cast("Nd4jLong") long N, Pointer dz, @Cast("Nd4jLong*") LongPointer zShapeInfo); -public native void decodeBitmap(@Cast("Nd4jPointer*") PointerPointer extraPointers, Pointer dx, @Cast("Nd4jLong") long N, Pointer dz, @Cast("Nd4jLong*") LongBuffer zShapeInfo); -public native void decodeBitmap(@Cast("Nd4jPointer*") PointerPointer extraPointers, Pointer dx, @Cast("Nd4jLong") long N, Pointer dz, @Cast("Nd4jLong*") long[] zShapeInfo); - - -public native void encodeThresholdP1(@Cast("Nd4jPointer*") PointerPointer extraPointers, Pointer dx, @Cast("Nd4jLong*") LongPointer xShapeInfo, @Cast("Nd4jLong") long N, IntPointer dz, float threshold); -public native void encodeThresholdP1(@Cast("Nd4jPointer*") PointerPointer extraPointers, Pointer dx, @Cast("Nd4jLong*") LongBuffer xShapeInfo, @Cast("Nd4jLong") long N, IntBuffer dz, float threshold); -public native void encodeThresholdP1(@Cast("Nd4jPointer*") PointerPointer extraPointers, Pointer dx, @Cast("Nd4jLong*") long[] xShapeInfo, @Cast("Nd4jLong") long N, int[] dz, float threshold); -public native void encodeThresholdP2Int(@Cast("Nd4jPointer*") PointerPointer extraPointers, IntPointer dx, @Cast("Nd4jLong") long N, IntPointer dz); -public native void encodeThresholdP2Int(@Cast("Nd4jPointer*") PointerPointer extraPointers, IntBuffer dx, @Cast("Nd4jLong") long N, IntBuffer dz); -public native void encodeThresholdP2Int(@Cast("Nd4jPointer*") PointerPointer extraPointers, int[] dx, @Cast("Nd4jLong") long N, int[] dz); -public native void encodeThresholdP3(@Cast("Nd4jPointer*") PointerPointer extraPointers, Pointer dx, @Cast("Nd4jLong*") LongPointer xShapeInfo, IntPointer offsets, @Cast("Nd4jLong") long N, IntPointer dz); -public native void encodeThresholdP3(@Cast("Nd4jPointer*") PointerPointer extraPointers, Pointer dx, @Cast("Nd4jLong*") LongBuffer xShapeInfo, IntBuffer offsets, @Cast("Nd4jLong") long N, IntBuffer dz); -public native void encodeThresholdP3(@Cast("Nd4jPointer*") PointerPointer extraPointers, Pointer dx, @Cast("Nd4jLong*") long[] xShapeInfo, int[] offsets, @Cast("Nd4jLong") long N, int[] dz); - - -public native void decodeThreshold(@Cast("Nd4jPointer*") PointerPointer extraPointers, Pointer dx, @Cast("Nd4jLong") long N, Pointer dz, @Cast("Nd4jLong*") LongPointer zShapeInfo); -public native void decodeThreshold(@Cast("Nd4jPointer*") PointerPointer extraPointers, Pointer dx, @Cast("Nd4jLong") long N, Pointer dz, @Cast("Nd4jLong*") LongBuffer zShapeInfo); -public native void decodeThreshold(@Cast("Nd4jPointer*") PointerPointer extraPointers, Pointer dx, @Cast("Nd4jLong") long N, Pointer dz, @Cast("Nd4jLong*") long[] zShapeInfo); - + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] xShapeInfo, @Cast("const Nd4jLong*") long[] dxShapeInfo, + @Cast("Nd4jPointer*") PointerPointer targets, @Cast("const Nd4jLong*") long[] zShapeInfo, + @Cast("const Nd4jLong*") long[] tadShapeInfo, + @Cast("const Nd4jLong*") long[] tadOffsets); public native void sort(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") LongPointer xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") LongPointer dxShapeInfo, + Pointer x, @Cast("const Nd4jLong*") LongPointer xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") LongPointer dxShapeInfo, @Cast("bool") boolean descending); public native void sort(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") LongBuffer xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") LongBuffer dxShapeInfo, + Pointer x, @Cast("const Nd4jLong*") LongBuffer xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") LongBuffer dxShapeInfo, @Cast("bool") boolean descending); public native void sort(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") long[] xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") long[] dxShapeInfo, + Pointer x, @Cast("const Nd4jLong*") long[] xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") long[] dxShapeInfo, @Cast("bool") boolean descending); public native void sortByKey(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") LongPointer xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") LongPointer dxShapeInfo, - Pointer y, @Cast("Nd4jLong*") LongPointer yShapeInfo, - Pointer dy, @Cast("Nd4jLong*") LongPointer dyShapeInfo, + Pointer x, @Cast("const Nd4jLong*") LongPointer xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") LongPointer dxShapeInfo, + Pointer y, @Cast("const Nd4jLong*") LongPointer yShapeInfo, + Pointer dy, @Cast("const Nd4jLong*") LongPointer dyShapeInfo, @Cast("bool") boolean descending); public native void sortByKey(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") LongBuffer xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") LongBuffer dxShapeInfo, - Pointer y, @Cast("Nd4jLong*") LongBuffer yShapeInfo, - Pointer dy, @Cast("Nd4jLong*") LongBuffer dyShapeInfo, + Pointer x, @Cast("const Nd4jLong*") LongBuffer xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") LongBuffer dxShapeInfo, + Pointer y, @Cast("const Nd4jLong*") LongBuffer yShapeInfo, + Pointer dy, @Cast("const Nd4jLong*") LongBuffer dyShapeInfo, @Cast("bool") boolean descending); public native void sortByKey(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") long[] xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") long[] dxShapeInfo, - Pointer y, @Cast("Nd4jLong*") long[] yShapeInfo, - Pointer dy, @Cast("Nd4jLong*") long[] dyShapeInfo, + Pointer x, @Cast("const Nd4jLong*") long[] xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") long[] dxShapeInfo, + Pointer y, @Cast("const Nd4jLong*") long[] yShapeInfo, + Pointer dy, @Cast("const Nd4jLong*") long[] dyShapeInfo, @Cast("bool") boolean descending); public native void sortByValue(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") LongPointer xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") LongPointer dxShapeInfo, - Pointer y, @Cast("Nd4jLong*") LongPointer yShapeInfo, - Pointer dy, @Cast("Nd4jLong*") LongPointer dyShapeInfo, + Pointer x, @Cast("const Nd4jLong*") LongPointer xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") LongPointer dxShapeInfo, + Pointer y, @Cast("const Nd4jLong*") LongPointer yShapeInfo, + Pointer dy, @Cast("const Nd4jLong*") LongPointer dyShapeInfo, @Cast("bool") boolean descending); public native void sortByValue(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") LongBuffer xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") LongBuffer dxShapeInfo, - Pointer y, @Cast("Nd4jLong*") LongBuffer yShapeInfo, - Pointer dy, @Cast("Nd4jLong*") LongBuffer dyShapeInfo, + Pointer x, @Cast("const Nd4jLong*") LongBuffer xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") LongBuffer dxShapeInfo, + Pointer y, @Cast("const Nd4jLong*") LongBuffer yShapeInfo, + Pointer dy, @Cast("const Nd4jLong*") LongBuffer dyShapeInfo, @Cast("bool") boolean descending); public native void sortByValue(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") long[] xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") long[] dxShapeInfo, - Pointer y, @Cast("Nd4jLong*") long[] yShapeInfo, - Pointer dy, @Cast("Nd4jLong*") long[] dyShapeInfo, + Pointer x, @Cast("const Nd4jLong*") long[] xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") long[] dxShapeInfo, + Pointer y, @Cast("const Nd4jLong*") long[] yShapeInfo, + Pointer dy, @Cast("const Nd4jLong*") long[] dyShapeInfo, @Cast("bool") boolean descending); public native void sortTad(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") LongPointer xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") LongPointer dxShapeInfo, + Pointer x, @Cast("const Nd4jLong*") LongPointer xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") LongPointer dxShapeInfo, IntPointer dimension, int dimensionLength, - @Cast("Nd4jLong*") LongPointer tadShapeInfo, - @Cast("Nd4jLong*") LongPointer tadOffsets, + @Cast("const Nd4jLong*") LongPointer tadShapeInfo, + @Cast("const Nd4jLong*") LongPointer tadOffsets, @Cast("bool") boolean descending); public native void sortTad(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") LongBuffer xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") LongBuffer dxShapeInfo, + Pointer x, @Cast("const Nd4jLong*") LongBuffer xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") LongBuffer dxShapeInfo, IntBuffer dimension, int dimensionLength, - @Cast("Nd4jLong*") LongBuffer tadShapeInfo, - @Cast("Nd4jLong*") LongBuffer tadOffsets, + @Cast("const Nd4jLong*") LongBuffer tadShapeInfo, + @Cast("const Nd4jLong*") LongBuffer tadOffsets, @Cast("bool") boolean descending); public native void sortTad(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") long[] xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") long[] dxShapeInfo, + Pointer x, @Cast("const Nd4jLong*") long[] xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") long[] dxShapeInfo, int[] dimension, int dimensionLength, - @Cast("Nd4jLong*") long[] tadShapeInfo, - @Cast("Nd4jLong*") long[] tadOffsets, + @Cast("const Nd4jLong*") long[] tadShapeInfo, + @Cast("const Nd4jLong*") long[] tadOffsets, @Cast("bool") boolean descending); public native void sortTadByKey(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") LongPointer xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") LongPointer dxShapeInfo, - Pointer y, @Cast("Nd4jLong*") LongPointer yShapeInfo, - Pointer dy, @Cast("Nd4jLong*") LongPointer dyShapeInfo, + Pointer x, @Cast("const Nd4jLong*") LongPointer xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") LongPointer dxShapeInfo, + Pointer y, @Cast("const Nd4jLong*") LongPointer yShapeInfo, + Pointer dy, @Cast("const Nd4jLong*") LongPointer dyShapeInfo, IntPointer dimension, int dimensionLength, @Cast("bool") boolean descending); public native void sortTadByKey(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") LongBuffer xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") LongBuffer dxShapeInfo, - Pointer y, @Cast("Nd4jLong*") LongBuffer yShapeInfo, - Pointer dy, @Cast("Nd4jLong*") LongBuffer dyShapeInfo, + Pointer x, @Cast("const Nd4jLong*") LongBuffer xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") LongBuffer dxShapeInfo, + Pointer y, @Cast("const Nd4jLong*") LongBuffer yShapeInfo, + Pointer dy, @Cast("const Nd4jLong*") LongBuffer dyShapeInfo, IntBuffer dimension, int dimensionLength, @Cast("bool") boolean descending); public native void sortTadByKey(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") long[] xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") long[] dxShapeInfo, - Pointer y, @Cast("Nd4jLong*") long[] yShapeInfo, - Pointer dy, @Cast("Nd4jLong*") long[] dyShapeInfo, + Pointer x, @Cast("const Nd4jLong*") long[] xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") long[] dxShapeInfo, + Pointer y, @Cast("const Nd4jLong*") long[] yShapeInfo, + Pointer dy, @Cast("const Nd4jLong*") long[] dyShapeInfo, int[] dimension, int dimensionLength, @Cast("bool") boolean descending); public native void sortTadByValue(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") LongPointer xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") LongPointer dxShapeInfo, - Pointer y, @Cast("Nd4jLong*") LongPointer yShapeInfo, - Pointer dy, @Cast("Nd4jLong*") LongPointer dyShapeInfo, + Pointer x, @Cast("const Nd4jLong*") LongPointer xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") LongPointer dxShapeInfo, + Pointer y, @Cast("const Nd4jLong*") LongPointer yShapeInfo, + Pointer dy, @Cast("const Nd4jLong*") LongPointer dyShapeInfo, IntPointer dimension, int dimensionLength, @Cast("bool") boolean descending); public native void sortTadByValue(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") LongBuffer xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") LongBuffer dxShapeInfo, - Pointer y, @Cast("Nd4jLong*") LongBuffer yShapeInfo, - Pointer dy, @Cast("Nd4jLong*") LongBuffer dyShapeInfo, + Pointer x, @Cast("const Nd4jLong*") LongBuffer xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") LongBuffer dxShapeInfo, + Pointer y, @Cast("const Nd4jLong*") LongBuffer yShapeInfo, + Pointer dy, @Cast("const Nd4jLong*") LongBuffer dyShapeInfo, IntBuffer dimension, int dimensionLength, @Cast("bool") boolean descending); public native void sortTadByValue(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") long[] xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") long[] dxShapeInfo, - Pointer y, @Cast("Nd4jLong*") long[] yShapeInfo, - Pointer dy, @Cast("Nd4jLong*") long[] dyShapeInfo, + Pointer x, @Cast("const Nd4jLong*") long[] xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") long[] dxShapeInfo, + Pointer y, @Cast("const Nd4jLong*") long[] yShapeInfo, + Pointer dy, @Cast("const Nd4jLong*") long[] dyShapeInfo, int[] dimension, int dimensionLength, @Cast("bool") boolean descending); @@ -2998,7 +2974,7 @@ public native OpaqueShapeList calculateOutputShapes2(@Cast("Nd4jPointer*") Point public native OpaqueShapeList calculateOutputShapes2(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jLong") long hash, @Cast("Nd4jPointer*") PointerPointer inputBuffers, @Cast("Nd4jPointer*") PointerPointer inputShapes, int numInputShapes, double[] tArgs, int numTArgs, @Cast("Nd4jLong*") long[] iArgs, int numIArgs, @Cast("bool*") boolean[] bArgs, int numBArgs, int[] dArgs, int numDArgs); public native @Cast("Nd4jLong") long getShapeListSize(OpaqueShapeList list); -public native @Cast("Nd4jLong*") LongPointer getShape(OpaqueShapeList list, @Cast("Nd4jLong") long i); +public native @Cast("const Nd4jLong*") LongPointer getShape(OpaqueShapeList list, @Cast("Nd4jLong") long i); public native void deleteShapeList(@Cast("Nd4jPointer") Pointer shapeList); @@ -3014,7 +2990,7 @@ public native OpaqueVariable getVariable(OpaqueVariablesSet set, @Cast("Nd4jLong public native int getVariableId(OpaqueVariable variable); public native int getVariableIndex(OpaqueVariable variable); public native @Cast("char*") String getVariableName(OpaqueVariable variable); -public native @Cast("Nd4jLong*") LongPointer getVariableShape(OpaqueVariable variable); +public native @Cast("const Nd4jLong*") LongPointer getVariableShape(OpaqueVariable variable); public native Pointer getVariableBuffer(OpaqueVariable variable); public native int unregisterGraph(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jLong") long graphId); @@ -3033,9 +3009,9 @@ public native void deleteGraphState(@Cast("Nd4jPointer") Pointer state); public native void deleteResultWrapper(@Cast("Nd4jPointer") Pointer ptr); -public native int estimateThreshold(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jPointer") Pointer x, @Cast("Nd4jLong*") LongPointer xShapeInfo, int N, float threshold); -public native int estimateThreshold(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jPointer") Pointer x, @Cast("Nd4jLong*") LongBuffer xShapeInfo, int N, float threshold); -public native int estimateThreshold(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jPointer") Pointer x, @Cast("Nd4jLong*") long[] xShapeInfo, int N, float threshold); +public native int estimateThreshold(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jPointer") Pointer x, @Cast("const Nd4jLong*") LongPointer xShapeInfo, int N, float threshold); +public native int estimateThreshold(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jPointer") Pointer x, @Cast("const Nd4jLong*") LongBuffer xShapeInfo, int N, float threshold); +public native int estimateThreshold(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jPointer") Pointer x, @Cast("const Nd4jLong*") long[] xShapeInfo, int N, float threshold); // this method executes op that requires scope to be present: if/while/cond/whatever public native @Cast("Nd4jStatus") int execCustomOpWithScope(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jPointer") Pointer state, @Cast("Nd4jLong") long opHash, @Cast("Nd4jLong*") LongPointer scopes, int numScopes, @Cast("Nd4jPointer*") PointerPointer inputBuffers, @Cast("Nd4jPointer*") PointerPointer inputShapes, int numInputs, @Cast("Nd4jPointer*") PointerPointer outputBuffers, @Cast("Nd4jPointer*") PointerPointer outputShapes, int numOutputs); @@ -3050,23 +3026,23 @@ public native @Cast("char*") BytePointer getUtf8StringBuffer(@Cast("Nd4jPointer* public native void deleteUtf8String(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jPointer") Pointer ptr); public native void scatterUpdate(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opCode, int numOfSubArrs, - Pointer hX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer hXOffsets, - Pointer dX, @Cast("Nd4jLong*") LongPointer dXShapeInfo, @Cast("Nd4jLong*") LongPointer dXOffsets, - Pointer hY, @Cast("Nd4jLong*") LongPointer hYShapeInfo, @Cast("Nd4jLong*") LongPointer hYOffsets, - Pointer dY, @Cast("Nd4jLong*") LongPointer dYShapeInfo, @Cast("Nd4jLong*") LongPointer dYOffsets, - Pointer hIindexes, @Cast("Nd4jLong*") LongPointer hIndicesShapeInfo, Pointer dIindexes, @Cast("Nd4jLong*") LongPointer dIndicesShapeInfo); + Pointer hX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer hXOffsets, + Pointer dX, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXOffsets, + Pointer hY, @Cast("const Nd4jLong*") LongPointer hYShapeInfo, @Cast("const Nd4jLong*") LongPointer hYOffsets, + Pointer dY, @Cast("const Nd4jLong*") LongPointer dYShapeInfo, @Cast("const Nd4jLong*") LongPointer dYOffsets, + Pointer hIindexes, @Cast("const Nd4jLong*") LongPointer hIndicesShapeInfo, Pointer dIindexes, @Cast("const Nd4jLong*") LongPointer dIndicesShapeInfo); public native void scatterUpdate(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opCode, int numOfSubArrs, - Pointer hX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer hXOffsets, - Pointer dX, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXOffsets, - Pointer hY, @Cast("Nd4jLong*") LongBuffer hYShapeInfo, @Cast("Nd4jLong*") LongBuffer hYOffsets, - Pointer dY, @Cast("Nd4jLong*") LongBuffer dYShapeInfo, @Cast("Nd4jLong*") LongBuffer dYOffsets, - Pointer hIindexes, @Cast("Nd4jLong*") LongBuffer hIndicesShapeInfo, Pointer dIindexes, @Cast("Nd4jLong*") LongBuffer dIndicesShapeInfo); + Pointer hX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer hXOffsets, + Pointer dX, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXOffsets, + Pointer hY, @Cast("const Nd4jLong*") LongBuffer hYShapeInfo, @Cast("const Nd4jLong*") LongBuffer hYOffsets, + Pointer dY, @Cast("const Nd4jLong*") LongBuffer dYShapeInfo, @Cast("const Nd4jLong*") LongBuffer dYOffsets, + Pointer hIindexes, @Cast("const Nd4jLong*") LongBuffer hIndicesShapeInfo, Pointer dIindexes, @Cast("const Nd4jLong*") LongBuffer dIndicesShapeInfo); public native void scatterUpdate(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opCode, int numOfSubArrs, - Pointer hX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] hXOffsets, - Pointer dX, @Cast("Nd4jLong*") long[] dXShapeInfo, @Cast("Nd4jLong*") long[] dXOffsets, - Pointer hY, @Cast("Nd4jLong*") long[] hYShapeInfo, @Cast("Nd4jLong*") long[] hYOffsets, - Pointer dY, @Cast("Nd4jLong*") long[] dYShapeInfo, @Cast("Nd4jLong*") long[] dYOffsets, - Pointer hIindexes, @Cast("Nd4jLong*") long[] hIndicesShapeInfo, Pointer dIindexes, @Cast("Nd4jLong*") long[] dIndicesShapeInfo); + Pointer hX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] hXOffsets, + Pointer dX, @Cast("const Nd4jLong*") long[] dXShapeInfo, @Cast("const Nd4jLong*") long[] dXOffsets, + Pointer hY, @Cast("const Nd4jLong*") long[] hYShapeInfo, @Cast("const Nd4jLong*") long[] hYOffsets, + Pointer dY, @Cast("const Nd4jLong*") long[] dYShapeInfo, @Cast("const Nd4jLong*") long[] dYOffsets, + Pointer hIindexes, @Cast("const Nd4jLong*") long[] hIndicesShapeInfo, Pointer dIindexes, @Cast("const Nd4jLong*") long[] dIndicesShapeInfo); public native void inspectArray(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jPointer") Pointer buffer, @Cast("Nd4jLong*") LongPointer shapeInfo, @Cast("Nd4jPointer") Pointer specialBuffer, @Cast("Nd4jLong*") LongPointer specialShapeInfo, @Cast("Nd4jPointer") Pointer debugInfo); public native void inspectArray(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jPointer") Pointer buffer, @Cast("Nd4jLong*") LongBuffer shapeInfo, @Cast("Nd4jPointer") Pointer specialBuffer, @Cast("Nd4jLong*") LongBuffer specialShapeInfo, @Cast("Nd4jPointer") Pointer debugInfo); @@ -3076,9 +3052,9 @@ public native OpaqueConstantDataBuffer shapeBuffer(int rank, @Cast("Nd4jLong*") public native OpaqueConstantDataBuffer shapeBuffer(int rank, @Cast("Nd4jLong*") LongBuffer shape, @Cast("Nd4jLong*") LongBuffer strides, @Cast("sd::DataType") int dtype, char order, @Cast("Nd4jLong") long ews, @Cast("bool") boolean empty); public native OpaqueConstantDataBuffer shapeBuffer(int rank, @Cast("Nd4jLong*") long[] shape, @Cast("Nd4jLong*") long[] strides, @Cast("sd::DataType") int dtype, char order, @Cast("Nd4jLong") long ews, @Cast("bool") boolean empty); -public native OpaqueConstantDataBuffer constantBufferLong(@Cast("sd::DataType") int dtype, @Cast("Nd4jLong*") LongPointer data, int length); -public native OpaqueConstantDataBuffer constantBufferLong(@Cast("sd::DataType") int dtype, @Cast("Nd4jLong*") LongBuffer data, int length); -public native OpaqueConstantDataBuffer constantBufferLong(@Cast("sd::DataType") int dtype, @Cast("Nd4jLong*") long[] data, int length); +public native OpaqueConstantDataBuffer constantBufferLong(@Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") LongPointer data, int length); +public native OpaqueConstantDataBuffer constantBufferLong(@Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") LongBuffer data, int length); +public native OpaqueConstantDataBuffer constantBufferLong(@Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") long[] data, int length); public native OpaqueConstantDataBuffer constantBufferDouble(@Cast("sd::DataType") int dtype, DoublePointer data, int length); public native OpaqueConstantDataBuffer constantBufferDouble(@Cast("sd::DataType") int dtype, DoubleBuffer data, int length); public native OpaqueConstantDataBuffer constantBufferDouble(@Cast("sd::DataType") int dtype, double[] data, int length); @@ -3685,16 +3661,16 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); /** * do not allocate memory, memory for array is passed from outside */ - public NDArray(Pointer buffer, @Cast("Nd4jLong*") LongPointer shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/) { super((Pointer)null); allocate(buffer, shapeInfo, context, isBuffAlloc); } - private native void allocate(Pointer buffer, @Cast("Nd4jLong*") LongPointer shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/); + public NDArray(Pointer buffer, @Cast("Nd4jLong*") LongPointer shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean isBuffAlloc/*=false*/) { super((Pointer)null); allocate(buffer, shapeInfo, context, isBuffAlloc); } + private native void allocate(Pointer buffer, @Cast("Nd4jLong*") LongPointer shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean isBuffAlloc/*=false*/); public NDArray(Pointer buffer, @Cast("Nd4jLong*") LongPointer shapeInfo) { super((Pointer)null); allocate(buffer, shapeInfo); } private native void allocate(Pointer buffer, @Cast("Nd4jLong*") LongPointer shapeInfo); - public NDArray(Pointer buffer, @Cast("Nd4jLong*") LongBuffer shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/) { super((Pointer)null); allocate(buffer, shapeInfo, context, isBuffAlloc); } - private native void allocate(Pointer buffer, @Cast("Nd4jLong*") LongBuffer shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/); + public NDArray(Pointer buffer, @Cast("Nd4jLong*") LongBuffer shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean isBuffAlloc/*=false*/) { super((Pointer)null); allocate(buffer, shapeInfo, context, isBuffAlloc); } + private native void allocate(Pointer buffer, @Cast("Nd4jLong*") LongBuffer shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean isBuffAlloc/*=false*/); public NDArray(Pointer buffer, @Cast("Nd4jLong*") LongBuffer shapeInfo) { super((Pointer)null); allocate(buffer, shapeInfo); } private native void allocate(Pointer buffer, @Cast("Nd4jLong*") LongBuffer shapeInfo); - public NDArray(Pointer buffer, @Cast("Nd4jLong*") long[] shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/) { super((Pointer)null); allocate(buffer, shapeInfo, context, isBuffAlloc); } - private native void allocate(Pointer buffer, @Cast("Nd4jLong*") long[] shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/); + public NDArray(Pointer buffer, @Cast("Nd4jLong*") long[] shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean isBuffAlloc/*=false*/) { super((Pointer)null); allocate(buffer, shapeInfo, context, isBuffAlloc); } + private native void allocate(Pointer buffer, @Cast("Nd4jLong*") long[] shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean isBuffAlloc/*=false*/); public NDArray(Pointer buffer, @Cast("Nd4jLong*") long[] shapeInfo) { super((Pointer)null); allocate(buffer, shapeInfo); } private native void allocate(Pointer buffer, @Cast("Nd4jLong*") long[] shapeInfo); @@ -3702,18 +3678,18 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); * do not allocate memory, memory for array is passed from outside * we suppose the content of both (device and host) buffers is identical */ - public NDArray(Pointer buffer, Pointer bufferD, @Cast("Nd4jLong*") LongPointer shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/, @Cast("const bool") boolean isBuffDAlloc/*=false*/) { super((Pointer)null); allocate(buffer, bufferD, shapeInfo, context, isBuffAlloc, isBuffDAlloc); } - private native void allocate(Pointer buffer, Pointer bufferD, @Cast("Nd4jLong*") LongPointer shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/, @Cast("const bool") boolean isBuffDAlloc/*=false*/); - public NDArray(Pointer buffer, Pointer bufferD, @Cast("Nd4jLong*") LongPointer shapeInfo) { super((Pointer)null); allocate(buffer, bufferD, shapeInfo); } - private native void allocate(Pointer buffer, Pointer bufferD, @Cast("Nd4jLong*") LongPointer shapeInfo); - public NDArray(Pointer buffer, Pointer bufferD, @Cast("Nd4jLong*") LongBuffer shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/, @Cast("const bool") boolean isBuffDAlloc/*=false*/) { super((Pointer)null); allocate(buffer, bufferD, shapeInfo, context, isBuffAlloc, isBuffDAlloc); } - private native void allocate(Pointer buffer, Pointer bufferD, @Cast("Nd4jLong*") LongBuffer shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/, @Cast("const bool") boolean isBuffDAlloc/*=false*/); - public NDArray(Pointer buffer, Pointer bufferD, @Cast("Nd4jLong*") LongBuffer shapeInfo) { super((Pointer)null); allocate(buffer, bufferD, shapeInfo); } - private native void allocate(Pointer buffer, Pointer bufferD, @Cast("Nd4jLong*") LongBuffer shapeInfo); - public NDArray(Pointer buffer, Pointer bufferD, @Cast("Nd4jLong*") long[] shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/, @Cast("const bool") boolean isBuffDAlloc/*=false*/) { super((Pointer)null); allocate(buffer, bufferD, shapeInfo, context, isBuffAlloc, isBuffDAlloc); } - private native void allocate(Pointer buffer, Pointer bufferD, @Cast("Nd4jLong*") long[] shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/, @Cast("const bool") boolean isBuffDAlloc/*=false*/); - public NDArray(Pointer buffer, Pointer bufferD, @Cast("Nd4jLong*") long[] shapeInfo) { super((Pointer)null); allocate(buffer, bufferD, shapeInfo); } - private native void allocate(Pointer buffer, Pointer bufferD, @Cast("Nd4jLong*") long[] shapeInfo); + public NDArray(Pointer buffer, Pointer bufferD, @Cast("const Nd4jLong*") LongPointer shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean isBuffAlloc/*=false*/, @Cast("bool") boolean isBuffDAlloc/*=false*/) { super((Pointer)null); allocate(buffer, bufferD, shapeInfo, context, isBuffAlloc, isBuffDAlloc); } + private native void allocate(Pointer buffer, Pointer bufferD, @Cast("const Nd4jLong*") LongPointer shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean isBuffAlloc/*=false*/, @Cast("bool") boolean isBuffDAlloc/*=false*/); + public NDArray(Pointer buffer, Pointer bufferD, @Cast("const Nd4jLong*") LongPointer shapeInfo) { super((Pointer)null); allocate(buffer, bufferD, shapeInfo); } + private native void allocate(Pointer buffer, Pointer bufferD, @Cast("const Nd4jLong*") LongPointer shapeInfo); + public NDArray(Pointer buffer, Pointer bufferD, @Cast("const Nd4jLong*") LongBuffer shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean isBuffAlloc/*=false*/, @Cast("bool") boolean isBuffDAlloc/*=false*/) { super((Pointer)null); allocate(buffer, bufferD, shapeInfo, context, isBuffAlloc, isBuffDAlloc); } + private native void allocate(Pointer buffer, Pointer bufferD, @Cast("const Nd4jLong*") LongBuffer shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean isBuffAlloc/*=false*/, @Cast("bool") boolean isBuffDAlloc/*=false*/); + public NDArray(Pointer buffer, Pointer bufferD, @Cast("const Nd4jLong*") LongBuffer shapeInfo) { super((Pointer)null); allocate(buffer, bufferD, shapeInfo); } + private native void allocate(Pointer buffer, Pointer bufferD, @Cast("const Nd4jLong*") LongBuffer shapeInfo); + public NDArray(Pointer buffer, Pointer bufferD, @Cast("const Nd4jLong*") long[] shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean isBuffAlloc/*=false*/, @Cast("bool") boolean isBuffDAlloc/*=false*/) { super((Pointer)null); allocate(buffer, bufferD, shapeInfo, context, isBuffAlloc, isBuffDAlloc); } + private native void allocate(Pointer buffer, Pointer bufferD, @Cast("const Nd4jLong*") long[] shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean isBuffAlloc/*=false*/, @Cast("bool") boolean isBuffDAlloc/*=false*/); + public NDArray(Pointer buffer, Pointer bufferD, @Cast("const Nd4jLong*") long[] shapeInfo) { super((Pointer)null); allocate(buffer, bufferD, shapeInfo); } + private native void allocate(Pointer buffer, Pointer bufferD, @Cast("const Nd4jLong*") long[] shapeInfo); /** * copy constructor @@ -3735,83 +3711,83 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); /** * constructor creates new NDArray using shape information from "shapeInfo", set all elements in new array to zeros, if copyStrides is true then use stride values from "shapeInfo", else calculate strides independently */ - public NDArray(@Cast("Nd4jLong*") LongPointer shapeInfo, @Cast("const bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean nullify/*=true*/) { super((Pointer)null); allocate(shapeInfo, copyStrides, context, nullify); } - private native void allocate(@Cast("Nd4jLong*") LongPointer shapeInfo, @Cast("const bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean nullify/*=true*/); - public NDArray(@Cast("Nd4jLong*") LongPointer shapeInfo) { super((Pointer)null); allocate(shapeInfo); } - private native void allocate(@Cast("Nd4jLong*") LongPointer shapeInfo); - public NDArray(@Cast("Nd4jLong*") LongBuffer shapeInfo, @Cast("const bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean nullify/*=true*/) { super((Pointer)null); allocate(shapeInfo, copyStrides, context, nullify); } - private native void allocate(@Cast("Nd4jLong*") LongBuffer shapeInfo, @Cast("const bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean nullify/*=true*/); - public NDArray(@Cast("Nd4jLong*") LongBuffer shapeInfo) { super((Pointer)null); allocate(shapeInfo); } - private native void allocate(@Cast("Nd4jLong*") LongBuffer shapeInfo); - public NDArray(@Cast("Nd4jLong*") long[] shapeInfo, @Cast("const bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean nullify/*=true*/) { super((Pointer)null); allocate(shapeInfo, copyStrides, context, nullify); } - private native void allocate(@Cast("Nd4jLong*") long[] shapeInfo, @Cast("const bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean nullify/*=true*/); - public NDArray(@Cast("Nd4jLong*") long[] shapeInfo) { super((Pointer)null); allocate(shapeInfo); } - private native void allocate(@Cast("Nd4jLong*") long[] shapeInfo); + public NDArray(@Cast("const Nd4jLong*") LongPointer shapeInfo, @Cast("bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean nullify/*=true*/) { super((Pointer)null); allocate(shapeInfo, copyStrides, context, nullify); } + private native void allocate(@Cast("const Nd4jLong*") LongPointer shapeInfo, @Cast("bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean nullify/*=true*/); + public NDArray(@Cast("const Nd4jLong*") LongPointer shapeInfo) { super((Pointer)null); allocate(shapeInfo); } + private native void allocate(@Cast("const Nd4jLong*") LongPointer shapeInfo); + public NDArray(@Cast("const Nd4jLong*") LongBuffer shapeInfo, @Cast("bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean nullify/*=true*/) { super((Pointer)null); allocate(shapeInfo, copyStrides, context, nullify); } + private native void allocate(@Cast("const Nd4jLong*") LongBuffer shapeInfo, @Cast("bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean nullify/*=true*/); + public NDArray(@Cast("const Nd4jLong*") LongBuffer shapeInfo) { super((Pointer)null); allocate(shapeInfo); } + private native void allocate(@Cast("const Nd4jLong*") LongBuffer shapeInfo); + public NDArray(@Cast("const Nd4jLong*") long[] shapeInfo, @Cast("bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean nullify/*=true*/) { super((Pointer)null); allocate(shapeInfo, copyStrides, context, nullify); } + private native void allocate(@Cast("const Nd4jLong*") long[] shapeInfo, @Cast("bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean nullify/*=true*/); + public NDArray(@Cast("const Nd4jLong*") long[] shapeInfo) { super((Pointer)null); allocate(shapeInfo); } + private native void allocate(@Cast("const Nd4jLong*") long[] shapeInfo); /** * constructor creates new NDArray using shape information from "shapeInfo", set all elements in new array to be zeros, if copyStrides is true then use stride values from "shapeInfo", else calculate strides independently * set dtype as array type */ - public NDArray(@Cast("Nd4jLong*") LongPointer shapeInfo, @Cast("const sd::DataType") int dtype, @Cast("const bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean nullify/*=true*/) { super((Pointer)null); allocate(shapeInfo, dtype, copyStrides, context, nullify); } - private native void allocate(@Cast("Nd4jLong*") LongPointer shapeInfo, @Cast("const sd::DataType") int dtype, @Cast("const bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean nullify/*=true*/); - public NDArray(@Cast("Nd4jLong*") LongPointer shapeInfo, @Cast("const sd::DataType") int dtype) { super((Pointer)null); allocate(shapeInfo, dtype); } - private native void allocate(@Cast("Nd4jLong*") LongPointer shapeInfo, @Cast("const sd::DataType") int dtype); - public NDArray(@Cast("Nd4jLong*") LongBuffer shapeInfo, @Cast("const sd::DataType") int dtype, @Cast("const bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean nullify/*=true*/) { super((Pointer)null); allocate(shapeInfo, dtype, copyStrides, context, nullify); } - private native void allocate(@Cast("Nd4jLong*") LongBuffer shapeInfo, @Cast("const sd::DataType") int dtype, @Cast("const bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean nullify/*=true*/); - public NDArray(@Cast("Nd4jLong*") LongBuffer shapeInfo, @Cast("const sd::DataType") int dtype) { super((Pointer)null); allocate(shapeInfo, dtype); } - private native void allocate(@Cast("Nd4jLong*") LongBuffer shapeInfo, @Cast("const sd::DataType") int dtype); - public NDArray(@Cast("Nd4jLong*") long[] shapeInfo, @Cast("const sd::DataType") int dtype, @Cast("const bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean nullify/*=true*/) { super((Pointer)null); allocate(shapeInfo, dtype, copyStrides, context, nullify); } - private native void allocate(@Cast("Nd4jLong*") long[] shapeInfo, @Cast("const sd::DataType") int dtype, @Cast("const bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean nullify/*=true*/); - public NDArray(@Cast("Nd4jLong*") long[] shapeInfo, @Cast("const sd::DataType") int dtype) { super((Pointer)null); allocate(shapeInfo, dtype); } - private native void allocate(@Cast("Nd4jLong*") long[] shapeInfo, @Cast("const sd::DataType") int dtype); + public NDArray(@Cast("const Nd4jLong*") LongPointer shapeInfo, @Cast("sd::DataType") int dtype, @Cast("bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean nullify/*=true*/) { super((Pointer)null); allocate(shapeInfo, dtype, copyStrides, context, nullify); } + private native void allocate(@Cast("const Nd4jLong*") LongPointer shapeInfo, @Cast("sd::DataType") int dtype, @Cast("bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean nullify/*=true*/); + public NDArray(@Cast("const Nd4jLong*") LongPointer shapeInfo, @Cast("sd::DataType") int dtype) { super((Pointer)null); allocate(shapeInfo, dtype); } + private native void allocate(@Cast("const Nd4jLong*") LongPointer shapeInfo, @Cast("sd::DataType") int dtype); + public NDArray(@Cast("const Nd4jLong*") LongBuffer shapeInfo, @Cast("sd::DataType") int dtype, @Cast("bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean nullify/*=true*/) { super((Pointer)null); allocate(shapeInfo, dtype, copyStrides, context, nullify); } + private native void allocate(@Cast("const Nd4jLong*") LongBuffer shapeInfo, @Cast("sd::DataType") int dtype, @Cast("bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean nullify/*=true*/); + public NDArray(@Cast("const Nd4jLong*") LongBuffer shapeInfo, @Cast("sd::DataType") int dtype) { super((Pointer)null); allocate(shapeInfo, dtype); } + private native void allocate(@Cast("const Nd4jLong*") LongBuffer shapeInfo, @Cast("sd::DataType") int dtype); + public NDArray(@Cast("const Nd4jLong*") long[] shapeInfo, @Cast("sd::DataType") int dtype, @Cast("bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean nullify/*=true*/) { super((Pointer)null); allocate(shapeInfo, dtype, copyStrides, context, nullify); } + private native void allocate(@Cast("const Nd4jLong*") long[] shapeInfo, @Cast("sd::DataType") int dtype, @Cast("bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean nullify/*=true*/); + public NDArray(@Cast("const Nd4jLong*") long[] shapeInfo, @Cast("sd::DataType") int dtype) { super((Pointer)null); allocate(shapeInfo, dtype); } + private native void allocate(@Cast("const Nd4jLong*") long[] shapeInfo, @Cast("sd::DataType") int dtype); /** * this constructor creates new array using shape information contained in vector argument */ - public NDArray(byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/) { super((Pointer)null); allocate(order, shape, dtype, context); } - private native void allocate(byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/); - public NDArray(byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape) { super((Pointer)null); allocate(order, shape); } - private native void allocate(byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape); - public NDArray(byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/) { super((Pointer)null); allocate(order, shape, dtype, context); } - private native void allocate(byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/); - public NDArray(byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape) { super((Pointer)null); allocate(order, shape); } - private native void allocate(byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape); - public NDArray(byte order, @Cast("Nd4jLong*") @StdVector long[] shape, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/) { super((Pointer)null); allocate(order, shape, dtype, context); } - private native void allocate(byte order, @Cast("Nd4jLong*") @StdVector long[] shape, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/); - public NDArray(byte order, @Cast("Nd4jLong*") @StdVector long[] shape) { super((Pointer)null); allocate(order, shape); } - private native void allocate(byte order, @Cast("Nd4jLong*") @StdVector long[] shape); + public NDArray(char order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/) { super((Pointer)null); allocate(order, shape, dtype, context); } + private native void allocate(char order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/); + public NDArray(char order, @Cast("Nd4jLong*") @StdVector LongPointer shape) { super((Pointer)null); allocate(order, shape); } + private native void allocate(char order, @Cast("Nd4jLong*") @StdVector LongPointer shape); + public NDArray(char order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/) { super((Pointer)null); allocate(order, shape, dtype, context); } + private native void allocate(char order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/); + public NDArray(char order, @Cast("Nd4jLong*") @StdVector LongBuffer shape) { super((Pointer)null); allocate(order, shape); } + private native void allocate(char order, @Cast("Nd4jLong*") @StdVector LongBuffer shape); + public NDArray(char order, @Cast("Nd4jLong*") @StdVector long[] shape, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/) { super((Pointer)null); allocate(order, shape, dtype, context); } + private native void allocate(char order, @Cast("Nd4jLong*") @StdVector long[] shape, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/); + public NDArray(char order, @Cast("Nd4jLong*") @StdVector long[] shape) { super((Pointer)null); allocate(order, shape); } + private native void allocate(char order, @Cast("Nd4jLong*") @StdVector long[] shape); /** * This constructor creates new array with elements copied from data and using shape information stored in shape, elements from data will be casted to dtype */ - public NDArray(byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @StdVector DoublePointer data, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/) { super((Pointer)null); allocate(order, shape, data, dtype, context); } - private native void allocate(byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @StdVector DoublePointer data, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/); - public NDArray(byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @StdVector DoublePointer data) { super((Pointer)null); allocate(order, shape, data); } - private native void allocate(byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @StdVector DoublePointer data); - public NDArray(byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @StdVector DoubleBuffer data, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/) { super((Pointer)null); allocate(order, shape, data, dtype, context); } - private native void allocate(byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @StdVector DoubleBuffer data, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/); - public NDArray(byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @StdVector DoubleBuffer data) { super((Pointer)null); allocate(order, shape, data); } - private native void allocate(byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @StdVector DoubleBuffer data); - public NDArray(byte order, @Cast("Nd4jLong*") @StdVector long[] shape, @StdVector double[] data, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/) { super((Pointer)null); allocate(order, shape, data, dtype, context); } - private native void allocate(byte order, @Cast("Nd4jLong*") @StdVector long[] shape, @StdVector double[] data, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/); - public NDArray(byte order, @Cast("Nd4jLong*") @StdVector long[] shape, @StdVector double[] data) { super((Pointer)null); allocate(order, shape, data); } - private native void allocate(byte order, @Cast("Nd4jLong*") @StdVector long[] shape, @StdVector double[] data); + public NDArray(char order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @StdVector DoublePointer data, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/) { super((Pointer)null); allocate(order, shape, data, dtype, context); } + private native void allocate(char order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @StdVector DoublePointer data, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/); + public NDArray(char order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @StdVector DoublePointer data) { super((Pointer)null); allocate(order, shape, data); } + private native void allocate(char order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @StdVector DoublePointer data); + public NDArray(char order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @StdVector DoubleBuffer data, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/) { super((Pointer)null); allocate(order, shape, data, dtype, context); } + private native void allocate(char order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @StdVector DoubleBuffer data, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/); + public NDArray(char order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @StdVector DoubleBuffer data) { super((Pointer)null); allocate(order, shape, data); } + private native void allocate(char order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @StdVector DoubleBuffer data); + public NDArray(char order, @Cast("Nd4jLong*") @StdVector long[] shape, @StdVector double[] data, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/) { super((Pointer)null); allocate(order, shape, data, dtype, context); } + private native void allocate(char order, @Cast("Nd4jLong*") @StdVector long[] shape, @StdVector double[] data, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/); + public NDArray(char order, @Cast("Nd4jLong*") @StdVector long[] shape, @StdVector double[] data) { super((Pointer)null); allocate(order, shape, data); } + private native void allocate(char order, @Cast("Nd4jLong*") @StdVector long[] shape, @StdVector double[] data); /** * this constructor creates new array using given buffer (without memory allocation) and shape information stored in shape */ - public NDArray(Pointer buffer, byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("sd::DataType") int dtype, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/) { super((Pointer)null); allocate(buffer, order, shape, dtype, context, isBuffAlloc); } - private native void allocate(Pointer buffer, byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("sd::DataType") int dtype, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/); - public NDArray(Pointer buffer, byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("sd::DataType") int dtype) { super((Pointer)null); allocate(buffer, order, shape, dtype); } - private native void allocate(Pointer buffer, byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("sd::DataType") int dtype); - public NDArray(Pointer buffer, byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("sd::DataType") int dtype, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/) { super((Pointer)null); allocate(buffer, order, shape, dtype, context, isBuffAlloc); } - private native void allocate(Pointer buffer, byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("sd::DataType") int dtype, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/); - public NDArray(Pointer buffer, byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("sd::DataType") int dtype) { super((Pointer)null); allocate(buffer, order, shape, dtype); } - private native void allocate(Pointer buffer, byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("sd::DataType") int dtype); - public NDArray(Pointer buffer, byte order, @Cast("Nd4jLong*") @StdVector long[] shape, @Cast("sd::DataType") int dtype, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/) { super((Pointer)null); allocate(buffer, order, shape, dtype, context, isBuffAlloc); } - private native void allocate(Pointer buffer, byte order, @Cast("Nd4jLong*") @StdVector long[] shape, @Cast("sd::DataType") int dtype, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/); - public NDArray(Pointer buffer, byte order, @Cast("Nd4jLong*") @StdVector long[] shape, @Cast("sd::DataType") int dtype) { super((Pointer)null); allocate(buffer, order, shape, dtype); } - private native void allocate(Pointer buffer, byte order, @Cast("Nd4jLong*") @StdVector long[] shape, @Cast("sd::DataType") int dtype); + public NDArray(Pointer buffer, char order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("sd::DataType") int dtype, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/) { super((Pointer)null); allocate(buffer, order, shape, dtype, context, isBuffAlloc); } + private native void allocate(Pointer buffer, char order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("sd::DataType") int dtype, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/); + public NDArray(Pointer buffer, char order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("sd::DataType") int dtype) { super((Pointer)null); allocate(buffer, order, shape, dtype); } + private native void allocate(Pointer buffer, char order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("sd::DataType") int dtype); + public NDArray(Pointer buffer, char order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("sd::DataType") int dtype, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/) { super((Pointer)null); allocate(buffer, order, shape, dtype, context, isBuffAlloc); } + private native void allocate(Pointer buffer, char order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("sd::DataType") int dtype, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/); + public NDArray(Pointer buffer, char order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("sd::DataType") int dtype) { super((Pointer)null); allocate(buffer, order, shape, dtype); } + private native void allocate(Pointer buffer, char order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("sd::DataType") int dtype); + public NDArray(Pointer buffer, char order, @Cast("Nd4jLong*") @StdVector long[] shape, @Cast("sd::DataType") int dtype, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/) { super((Pointer)null); allocate(buffer, order, shape, dtype, context, isBuffAlloc); } + private native void allocate(Pointer buffer, char order, @Cast("Nd4jLong*") @StdVector long[] shape, @Cast("sd::DataType") int dtype, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/); + public NDArray(Pointer buffer, char order, @Cast("Nd4jLong*") @StdVector long[] shape, @Cast("sd::DataType") int dtype) { super((Pointer)null); allocate(buffer, order, shape, dtype); } + private native void allocate(Pointer buffer, char order, @Cast("Nd4jLong*") @StdVector long[] shape, @Cast("sd::DataType") int dtype); /** * This method returns new array with the same shape & data type @@ -3830,14 +3806,14 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); * this constructor creates new NDArray with shape matching "other" array, * doesn't copy "other" elements into new array !!! */ - public NDArray(@Const NDArray other, @Cast("const bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/) { super((Pointer)null); allocate(other, copyStrides, context); } - private native void allocate(@Const NDArray other, @Cast("const bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/); + public NDArray(@Const NDArray other, @Cast("bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/) { super((Pointer)null); allocate(other, copyStrides, context); } + private native void allocate(@Const NDArray other, @Cast("bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/); /** * this constructor creates scalar(and set its value = 0) or empty array depending on bool argument isScalar */ - public NDArray(@Cast("sd::DataType") int dtype, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isScalar/*=true*/) { super((Pointer)null); allocate(dtype, context, isScalar); } - private native void allocate(@Cast("sd::DataType") int dtype, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isScalar/*=true*/); + public NDArray(@Cast("sd::DataType") int dtype, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean isScalar/*=true*/) { super((Pointer)null); allocate(dtype, context, isScalar); } + private native void allocate(@Cast("sd::DataType") int dtype, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean isScalar/*=true*/); public NDArray(@Cast("sd::DataType") int dtype) { super((Pointer)null); allocate(dtype); } private native void allocate(@Cast("sd::DataType") int dtype); @@ -3885,7 +3861,6 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); * @return */ public native Pointer bufferWithOffset(@Cast("Nd4jLong") long offset); - public native Pointer specialBufferWithOffset(@Cast("Nd4jLong") long offset); /** * copy assignment operator @@ -3967,33 +3942,28 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); /** * returns host buffer */ - public native Pointer getBuffer(); public native Pointer buffer(); /** * returns buffer offset (offset is the same for host and device buffers) */ - public native @Cast("Nd4jLong") long getBufferOffset(); public native @Cast("Nd4jLong") long bufferOffset(); /** * if _bufferD==nullptr return _buffer, else return _bufferD */ public native Pointer specialBuffer(); - public native Pointer getSpecialBuffer(); /** * returns device buffer if compilation is for cuda case, otherwise returns host buffer */ - public native Pointer getPlatformBuffer(); public native Pointer platformBuffer(); /** * returns _shapeInfo */ - public native @Cast("Nd4jLong*") LongPointer shapeInfo(); - public native @Cast("Nd4jLong*") LongPointer getShapeInfo(); + public native @Cast("const Nd4jLong*") LongPointer shapeInfo(); /** @@ -4005,12 +3975,9 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); /** * if _shapeInfoD==nullptr return _shapeInfo, else return _shapeInfoD */ - public native @Cast("Nd4jLong*") LongPointer specialShapeInfo(); - public native @Cast("Nd4jLong*") LongPointer getSpecialShapeInfo(); + public native @Cast("const Nd4jLong*") LongPointer specialShapeInfo(); - - public native @Cast("Nd4jLong*") LongPointer platformShapeInfo(); - public native @Cast("Nd4jLong*") LongPointer getPlatformShapeInfo(); + public native @Cast("const Nd4jLong*") LongPointer platformShapeInfo(); /** * permutes (in-place) the dimensions in array according to "dimensions" array @@ -4876,9 +4843,6 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); ////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////// - - ////////////////////////////////////////////////////////////////////////// @@ -4888,12 +4852,6 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); //////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////// - - -//////////////////////////////////////////////////////////////////////// - - // #if defined(__CUDACC__) //&& defined(BUILD_TESTS) // for CUDA we need stil stuff inline @@ -6278,12 +6236,12 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); public native void setObjectsSize(@Cast("Nd4jLong") long bytes); public native void setTotalSize(@Cast("Nd4jLong") long bytes); - public native void addInputShape(@Cast("Nd4jLong*") LongPointer shapeInfo); - public native void addInputShape(@Cast("Nd4jLong*") LongBuffer shapeInfo); - public native void addInputShape(@Cast("Nd4jLong*") long[] shapeInfo); - public native void addOutputShape(@Cast("Nd4jLong*") LongPointer shapeInfo); - public native void addOutputShape(@Cast("Nd4jLong*") LongBuffer shapeInfo); - public native void addOutputShape(@Cast("Nd4jLong*") long[] shapeInfo); + public native void addInputShape(@Cast("const Nd4jLong*") LongPointer shapeInfo); + public native void addInputShape(@Cast("const Nd4jLong*") LongBuffer shapeInfo); + public native void addInputShape(@Cast("const Nd4jLong*") long[] shapeInfo); + public native void addOutputShape(@Cast("const Nd4jLong*") LongPointer shapeInfo); + public native void addOutputShape(@Cast("const Nd4jLong*") LongBuffer shapeInfo); + public native void addOutputShape(@Cast("const Nd4jLong*") long[] shapeInfo); public native @Cast("Nd4jLong") long getActivationsSize(); public native @Cast("Nd4jLong") long getTemporarySize(); @@ -6480,13 +6438,13 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); public native void setInputArray(int index, NDArray array, @Cast("bool") boolean removable/*=false*/); public native void setInputArray(int index, NDArray array); - public native void setInputArray(int index, Pointer buffer, Pointer shapeInfo, Pointer specialBuffer, Pointer specialShapeInfo); - public native void setInputArray(int index, Pointer databuffer, Pointer shapeInfo, Pointer specialShapeInfo); + public native void setInputArray(int index, Pointer buffer, @Const Pointer shapeInfo, Pointer specialBuffer, @Const Pointer specialShapeInfo); + public native void setInputArray(int index, Pointer databuffer, @Const Pointer shapeInfo, @Const Pointer specialShapeInfo); public native void setOutputArray(int index, NDArray array, @Cast("bool") boolean removable/*=false*/); public native void setOutputArray(int index, NDArray array); - public native void setOutputArray(int index, Pointer buffer, Pointer shapeInfo, Pointer specialBuffer, Pointer specialShapeInfo); - public native void setOutputArray(int index, Pointer databuffer, Pointer shapeInfo, Pointer specialShapeInfo); + public native void setOutputArray(int index, Pointer buffer, @Const Pointer shapeInfo, Pointer specialBuffer, @Const Pointer specialShapeInfo); + public native void setOutputArray(int index, Pointer databuffer, @Const Pointer shapeInfo, @Const Pointer specialShapeInfo); public native void setTArguments(DoublePointer arguments, int numberOfArguments); public native void setTArguments(DoubleBuffer arguments, int numberOfArguments); @@ -6828,13 +6786,13 @@ public static final int PREALLOC_SIZE = 33554432; @Namespace("shape") public static native @Cast("bool") boolean shapeEquals(int shape1Rank, @Cast("const Nd4jLong*") LongBuffer shape1, int shape2Rank, @Cast("const Nd4jLong*") LongBuffer shape2); @Namespace("shape") public static native @Cast("bool") boolean shapeEquals(int shape1Rank, @Cast("const Nd4jLong*") long[] shape1, int shape2Rank, @Cast("const Nd4jLong*") long[] shape2); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer detachShape(@Cast("Nd4jLong*") LongPointer originalShape); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer detachShape(@Cast("Nd4jLong*") LongBuffer originalShape); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] detachShape(@Cast("Nd4jLong*") long[] originalShape); + @Namespace("shape") public static native @Cast("const Nd4jLong*") LongPointer detachShape(@Cast("const Nd4jLong*") LongPointer originalShape); + @Namespace("shape") public static native @Cast("const Nd4jLong*") LongBuffer detachShape(@Cast("const Nd4jLong*") LongBuffer originalShape); + @Namespace("shape") public static native @Cast("const Nd4jLong*") long[] detachShape(@Cast("const Nd4jLong*") long[] originalShape); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer copyShape(@Cast("Nd4jLong*") LongPointer originalShape); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer copyShape(@Cast("Nd4jLong*") LongBuffer originalShape); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] copyShape(@Cast("Nd4jLong*") long[] originalShape); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer copyShape(@Cast("const Nd4jLong*") LongPointer originalShape); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer copyShape(@Cast("const Nd4jLong*") LongBuffer originalShape); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] copyShape(@Cast("const Nd4jLong*") long[] originalShape); @Namespace("shape") public static native @Cast("bool") boolean shapeEquals(@Cast("const Nd4jLong*") LongPointer shapeInfo1, @Cast("const Nd4jLong*") LongPointer shapeInfo2); @Namespace("shape") public static native @Cast("bool") boolean shapeEquals(@Cast("const Nd4jLong*") LongBuffer shapeInfo1, @Cast("const Nd4jLong*") LongBuffer shapeInfo2); @@ -6844,17 +6802,17 @@ public static final int PREALLOC_SIZE = 33554432; @Namespace("shape") public static native @Cast("bool") boolean shapeEquals(@Cast("const Nd4jLong*") LongBuffer shapeInfo1, @Cast("const Nd4jLong*") LongBuffer shapeInfo2, @Cast("const Nd4jLong*") LongBuffer shapeInfo3); @Namespace("shape") public static native @Cast("bool") boolean shapeEquals(@Cast("const Nd4jLong*") long[] shapeInfo1, @Cast("const Nd4jLong*") long[] shapeInfo2, @Cast("const Nd4jLong*") long[] shapeInfo3); - @Namespace("shape") public static native @Cast("bool") boolean strideEquals(int shape1Rank,@Cast("Nd4jLong*") LongPointer shape1,int shape2Rank,@Cast("Nd4jLong*") LongPointer shape2); - @Namespace("shape") public static native @Cast("bool") boolean strideEquals(int shape1Rank,@Cast("Nd4jLong*") LongBuffer shape1,int shape2Rank,@Cast("Nd4jLong*") LongBuffer shape2); - @Namespace("shape") public static native @Cast("bool") boolean strideEquals(int shape1Rank,@Cast("Nd4jLong*") long[] shape1,int shape2Rank,@Cast("Nd4jLong*") long[] shape2); + @Namespace("shape") public static native @Cast("bool") boolean strideEquals(int shape1Rank,@Cast("const Nd4jLong*") LongPointer shape1,int shape2Rank, @Cast("const Nd4jLong*") LongPointer shape2); + @Namespace("shape") public static native @Cast("bool") boolean strideEquals(int shape1Rank,@Cast("const Nd4jLong*") LongBuffer shape1,int shape2Rank, @Cast("const Nd4jLong*") LongBuffer shape2); + @Namespace("shape") public static native @Cast("bool") boolean strideEquals(int shape1Rank,@Cast("const Nd4jLong*") long[] shape1,int shape2Rank, @Cast("const Nd4jLong*") long[] shape2); - @Namespace("shape") public static native @Cast("bool") boolean strideEquals(@Cast("Nd4jLong*") LongPointer shapeInfo1,@Cast("Nd4jLong*") LongPointer shapeInfo2); - @Namespace("shape") public static native @Cast("bool") boolean strideEquals(@Cast("Nd4jLong*") LongBuffer shapeInfo1,@Cast("Nd4jLong*") LongBuffer shapeInfo2); - @Namespace("shape") public static native @Cast("bool") boolean strideEquals(@Cast("Nd4jLong*") long[] shapeInfo1,@Cast("Nd4jLong*") long[] shapeInfo2); + @Namespace("shape") public static native @Cast("bool") boolean strideEquals(@Cast("const Nd4jLong*") LongPointer shapeInfo1, @Cast("const Nd4jLong*") LongPointer shapeInfo2); + @Namespace("shape") public static native @Cast("bool") boolean strideEquals(@Cast("const Nd4jLong*") LongBuffer shapeInfo1, @Cast("const Nd4jLong*") LongBuffer shapeInfo2); + @Namespace("shape") public static native @Cast("bool") boolean strideEquals(@Cast("const Nd4jLong*") long[] shapeInfo1, @Cast("const Nd4jLong*") long[] shapeInfo2); - @Namespace("shape") public static native @Cast("bool") boolean strideEquals(@Cast("Nd4jLong*") LongPointer stride1,int rank1,@Cast("Nd4jLong*") LongPointer stride2,int rank2); - @Namespace("shape") public static native @Cast("bool") boolean strideEquals(@Cast("Nd4jLong*") LongBuffer stride1,int rank1,@Cast("Nd4jLong*") LongBuffer stride2,int rank2); - @Namespace("shape") public static native @Cast("bool") boolean strideEquals(@Cast("Nd4jLong*") long[] stride1,int rank1,@Cast("Nd4jLong*") long[] stride2,int rank2); + @Namespace("shape") public static native @Cast("bool") boolean strideEquals(@Cast("const Nd4jLong*") LongPointer stride1,int rank1, @Cast("const Nd4jLong*") LongPointer stride2, int rank2); + @Namespace("shape") public static native @Cast("bool") boolean strideEquals(@Cast("const Nd4jLong*") LongBuffer stride1,int rank1, @Cast("const Nd4jLong*") LongBuffer stride2, int rank2); + @Namespace("shape") public static native @Cast("bool") boolean strideEquals(@Cast("const Nd4jLong*") long[] stride1,int rank1, @Cast("const Nd4jLong*") long[] stride2, int rank2); @Namespace("shape") public static native @Cast("bool") boolean equalsSoft(@Cast("const Nd4jLong*") LongPointer shapeA, @Cast("const Nd4jLong*") LongPointer shapeB); @Namespace("shape") public static native @Cast("bool") boolean equalsSoft(@Cast("const Nd4jLong*") LongBuffer shapeA, @Cast("const Nd4jLong*") LongBuffer shapeB); @@ -6888,9 +6846,9 @@ public static final int PREALLOC_SIZE = 33554432; @Namespace("shape") public static native int tadIndexForLinear(int linearIndex, int tadLength); - @Namespace("shape") public static native @Cast("Nd4jLong") long tadLength(@Cast("Nd4jLong*") LongPointer shapeInfo, IntPointer dimension, int dimensionLength); - @Namespace("shape") public static native @Cast("Nd4jLong") long tadLength(@Cast("Nd4jLong*") LongBuffer shapeInfo, IntBuffer dimension, int dimensionLength); - @Namespace("shape") public static native @Cast("Nd4jLong") long tadLength(@Cast("Nd4jLong*") long[] shapeInfo, int[] dimension, int dimensionLength); + @Namespace("shape") public static native @Cast("Nd4jLong") long tadLength(@Cast("const Nd4jLong*") LongPointer shapeInfo, IntPointer dimension, int dimensionLength); + @Namespace("shape") public static native @Cast("Nd4jLong") long tadLength(@Cast("const Nd4jLong*") LongBuffer shapeInfo, IntBuffer dimension, int dimensionLength); + @Namespace("shape") public static native @Cast("Nd4jLong") long tadLength(@Cast("const Nd4jLong*") long[] shapeInfo, int[] dimension, int dimensionLength); @Namespace("shape") public static native @Cast("bool") boolean canReshape(int oldRank, @Cast("Nd4jLong*") LongPointer oldShape, int newRank, @Cast("Nd4jLong*") LongPointer newShape, @Cast("bool") boolean isFOrder); @Namespace("shape") public static native @Cast("bool") boolean canReshape(int oldRank, @Cast("Nd4jLong*") LongBuffer oldShape, int newRank, @Cast("Nd4jLong*") LongBuffer newShape, @Cast("bool") boolean isFOrder); @@ -6910,25 +6868,25 @@ public static final int PREALLOC_SIZE = 33554432; * Get the shape info buffer * for the given rank and shape. */ - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer shapeBuffer(int rank, @Cast("sd::DataType") int dtype, @Cast("Nd4jLong*") LongPointer shape); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer shapeBuffer(int rank, @Cast("sd::DataType") int dtype, @Cast("Nd4jLong*") LongBuffer shape); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] shapeBuffer(int rank, @Cast("sd::DataType") int dtype, @Cast("Nd4jLong*") long[] shape); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer shapeBuffer(int rank, @Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") LongPointer shape); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer shapeBuffer(int rank, @Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") LongBuffer shape); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] shapeBuffer(int rank, @Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") long[] shape); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer shapeBuffer(int rank, @Cast("sd::DataType") int dtype, @Cast("Nd4jLong*") LongPointer shape, @Cast("Nd4jLong*") LongPointer buffer); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer shapeBuffer(int rank, @Cast("sd::DataType") int dtype, @Cast("Nd4jLong*") LongBuffer shape, @Cast("Nd4jLong*") LongBuffer buffer); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] shapeBuffer(int rank, @Cast("sd::DataType") int dtype, @Cast("Nd4jLong*") long[] shape, @Cast("Nd4jLong*") long[] buffer); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer shapeBuffer(int rank, @Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") LongPointer shape, @Cast("Nd4jLong*") LongPointer buffer); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer shapeBuffer(int rank, @Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") LongBuffer shape, @Cast("Nd4jLong*") LongBuffer buffer); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] shapeBuffer(int rank, @Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") long[] shape, @Cast("Nd4jLong*") long[] buffer); /** * Get the shape info buffer * for the given rank and shape. */ - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer shapeBufferFortran(int rank, @Cast("sd::DataType") int dtype, @Cast("Nd4jLong*") LongPointer shape); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer shapeBufferFortran(int rank, @Cast("sd::DataType") int dtype, @Cast("Nd4jLong*") LongBuffer shape); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] shapeBufferFortran(int rank, @Cast("sd::DataType") int dtype, @Cast("Nd4jLong*") long[] shape); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer shapeBufferFortran(int rank, @Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") LongPointer shape); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer shapeBufferFortran(int rank, @Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") LongBuffer shape); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] shapeBufferFortran(int rank, @Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") long[] shape); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer shapeBufferFortran(int rank, @Cast("sd::DataType") int dtype, @Cast("Nd4jLong*") LongPointer shape, @Cast("Nd4jLong*") LongPointer output); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer shapeBufferFortran(int rank, @Cast("sd::DataType") int dtype, @Cast("Nd4jLong*") LongBuffer shape, @Cast("Nd4jLong*") LongBuffer output); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] shapeBufferFortran(int rank, @Cast("sd::DataType") int dtype, @Cast("Nd4jLong*") long[] shape, @Cast("Nd4jLong*") long[] output); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer shapeBufferFortran(int rank, @Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") LongPointer shape, @Cast("Nd4jLong*") LongPointer output); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer shapeBufferFortran(int rank, @Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") LongBuffer shape, @Cast("Nd4jLong*") LongBuffer output); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] shapeBufferFortran(int rank, @Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") long[] shape, @Cast("Nd4jLong*") long[] output); // #ifdef __CUDACC__ // #endif @@ -6942,13 +6900,13 @@ public static final int PREALLOC_SIZE = 33554432; * @param startNum the start number for the strides * @return the strides for a matrix of n dimensions */ - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer calcStridesFortran(@Cast("Nd4jLong*") LongPointer shape, int rank); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer calcStridesFortran(@Cast("Nd4jLong*") LongBuffer shape, int rank); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] calcStridesFortran(@Cast("Nd4jLong*") long[] shape, int rank); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer calcStridesFortran(@Cast("const Nd4jLong*") LongPointer shape, int rank); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer calcStridesFortran(@Cast("const Nd4jLong*") LongBuffer shape, int rank); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] calcStridesFortran(@Cast("const Nd4jLong*") long[] shape, int rank); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer calcStridesFortran(@Cast("Nd4jLong*") LongPointer shape, int rank, @Cast("Nd4jLong*") LongPointer ret); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer calcStridesFortran(@Cast("Nd4jLong*") LongBuffer shape, int rank, @Cast("Nd4jLong*") LongBuffer ret); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] calcStridesFortran(@Cast("Nd4jLong*") long[] shape, int rank, @Cast("Nd4jLong*") long[] ret); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer calcStridesFortran(@Cast("const Nd4jLong*") LongPointer shape, int rank, @Cast("Nd4jLong*") LongPointer ret); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer calcStridesFortran(@Cast("const Nd4jLong*") LongBuffer shape, int rank, @Cast("Nd4jLong*") LongBuffer ret); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] calcStridesFortran(@Cast("const Nd4jLong*") long[] shape, int rank, @Cast("Nd4jLong*") long[] ret); /** * Computes the standard packed array strides for a given shape. @@ -6958,13 +6916,13 @@ public static final int PREALLOC_SIZE = 33554432; * @return the strides for a matrix of n dimensions */ - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer calcStrides(@Cast("Nd4jLong*") LongPointer shape, int rank); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer calcStrides(@Cast("Nd4jLong*") LongBuffer shape, int rank); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] calcStrides(@Cast("Nd4jLong*") long[] shape, int rank); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer calcStrides(@Cast("const Nd4jLong*") LongPointer shape, int rank); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer calcStrides(@Cast("const Nd4jLong*") LongBuffer shape, int rank); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] calcStrides(@Cast("const Nd4jLong*") long[] shape, int rank); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer calcStrides(@Cast("Nd4jLong*") LongPointer shape, int rank, @Cast("Nd4jLong*") LongPointer ret); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer calcStrides(@Cast("Nd4jLong*") LongBuffer shape, int rank, @Cast("Nd4jLong*") LongBuffer ret); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] calcStrides(@Cast("Nd4jLong*") long[] shape, int rank, @Cast("Nd4jLong*") long[] ret); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer calcStrides(@Cast("const Nd4jLong*") LongPointer shape, int rank, @Cast("Nd4jLong*") LongPointer ret); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer calcStrides(@Cast("const Nd4jLong*") LongBuffer shape, int rank, @Cast("Nd4jLong*") LongBuffer ret); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] calcStrides(@Cast("const Nd4jLong*") long[] shape, int rank, @Cast("Nd4jLong*") long[] ret); @Namespace("shape") public static native void updateStrides(@Cast("Nd4jLong*") LongPointer shape, byte order); @Namespace("shape") public static native void updateStrides(@Cast("Nd4jLong*") LongBuffer shape, byte order); @@ -6983,13 +6941,13 @@ public static final int PREALLOC_SIZE = 33554432; * @param startNum the start number for the strides * @return the strides for a matrix of n dimensions */ - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer calcStridesFortran(@Cast("Nd4jLong*") LongPointer shape, int rank, int startNum); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer calcStridesFortran(@Cast("Nd4jLong*") LongBuffer shape, int rank, int startNum); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] calcStridesFortran(@Cast("Nd4jLong*") long[] shape, int rank, int startNum); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer calcStridesFortran(@Cast("const Nd4jLong*") LongPointer shape, int rank, int startNum); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer calcStridesFortran(@Cast("const Nd4jLong*") LongBuffer shape, int rank, int startNum); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] calcStridesFortran(@Cast("const Nd4jLong*") long[] shape, int rank, int startNum); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer calcStridesFortran(@Cast("Nd4jLong*") LongPointer shape, int rank, int startNum, @Cast("Nd4jLong*") LongPointer ret); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer calcStridesFortran(@Cast("Nd4jLong*") LongBuffer shape, int rank, int startNum, @Cast("Nd4jLong*") LongBuffer ret); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] calcStridesFortran(@Cast("Nd4jLong*") long[] shape, int rank, int startNum, @Cast("Nd4jLong*") long[] ret); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer calcStridesFortran(@Cast("const Nd4jLong*") LongPointer shape, int rank, int startNum, @Cast("Nd4jLong*") LongPointer ret); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer calcStridesFortran(@Cast("const Nd4jLong*") LongBuffer shape, int rank, int startNum, @Cast("Nd4jLong*") LongBuffer ret); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] calcStridesFortran(@Cast("const Nd4jLong*") long[] shape, int rank, int startNum, @Cast("Nd4jLong*") long[] ret); /** * Computes the standard packed array strides for a given shape. @@ -6998,13 +6956,13 @@ public static final int PREALLOC_SIZE = 33554432; * @param startNum the start number for the strides * @return the strides for a matrix of n dimensions */ - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer calcStrides(@Cast("Nd4jLong*") LongPointer shape, int rank, int startNum); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer calcStrides(@Cast("Nd4jLong*") LongBuffer shape, int rank, int startNum); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] calcStrides(@Cast("Nd4jLong*") long[] shape, int rank, int startNum); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer calcStrides(@Cast("const Nd4jLong*") LongPointer shape, int rank, int startNum); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer calcStrides(@Cast("const Nd4jLong*") LongBuffer shape, int rank, int startNum); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] calcStrides(@Cast("const Nd4jLong*") long[] shape, int rank, int startNum); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer calcStrides(@Cast("Nd4jLong*") LongPointer shape, int rank, int startNum, @Cast("Nd4jLong*") LongPointer ret); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer calcStrides(@Cast("Nd4jLong*") LongBuffer shape, int rank, int startNum, @Cast("Nd4jLong*") LongBuffer ret); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] calcStrides(@Cast("Nd4jLong*") long[] shape, int rank, int startNum, @Cast("Nd4jLong*") long[] ret); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer calcStrides(@Cast("const Nd4jLong*") LongPointer shape, int rank, int startNum, @Cast("Nd4jLong*") LongPointer ret); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer calcStrides(@Cast("const Nd4jLong*") LongBuffer shape, int rank, int startNum, @Cast("Nd4jLong*") LongBuffer ret); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] calcStrides(@Cast("const Nd4jLong*") long[] shape, int rank, int startNum, @Cast("Nd4jLong*") long[] ret); /** * @param toCopy the shape to copy @@ -7042,9 +7000,9 @@ public static final int PREALLOC_SIZE = 33554432; * @return 0 if there is no element wise stride the * element wise stride of reshape(1,length) otherwise */ - @Namespace("shape") public static native int computeElementWiseStride(int rank, @Cast("Nd4jLong*") LongPointer shape, @Cast("Nd4jLong*") LongPointer stride, int isFOrder); - @Namespace("shape") public static native int computeElementWiseStride(int rank, @Cast("Nd4jLong*") LongBuffer shape, @Cast("Nd4jLong*") LongBuffer stride, int isFOrder); - @Namespace("shape") public static native int computeElementWiseStride(int rank, @Cast("Nd4jLong*") long[] shape, @Cast("Nd4jLong*") long[] stride, int isFOrder); + @Namespace("shape") public static native int computeElementWiseStride(int rank, @Cast("const Nd4jLong*") LongPointer shape, @Cast("const Nd4jLong*") LongPointer stride, int isFOrder); + @Namespace("shape") public static native int computeElementWiseStride(int rank, @Cast("const Nd4jLong*") LongBuffer shape, @Cast("const Nd4jLong*") LongBuffer stride, int isFOrder); + @Namespace("shape") public static native int computeElementWiseStride(int rank, @Cast("const Nd4jLong*") long[] shape, @Cast("const Nd4jLong*") long[] stride, int isFOrder); /** * Compute the element wise stride @@ -7057,17 +7015,17 @@ public static final int PREALLOC_SIZE = 33554432; * @return 0 if there is no element wise stride the * element wise stride of reshape(1,length) otherwise */ - @Namespace("shape") public static native int computeElementWiseStride(int rank, @Cast("Nd4jLong*") LongPointer shape, @Cast("Nd4jLong*") LongPointer stride, int isFOrder, @Cast("Nd4jLong*") LongPointer dimension, int dimensionLength); - @Namespace("shape") public static native int computeElementWiseStride(int rank, @Cast("Nd4jLong*") LongBuffer shape, @Cast("Nd4jLong*") LongBuffer stride, int isFOrder, @Cast("Nd4jLong*") LongBuffer dimension, int dimensionLength); - @Namespace("shape") public static native int computeElementWiseStride(int rank, @Cast("Nd4jLong*") long[] shape, @Cast("Nd4jLong*") long[] stride, int isFOrder, @Cast("Nd4jLong*") long[] dimension, int dimensionLength); + @Namespace("shape") public static native int computeElementWiseStride(int rank, @Cast("const Nd4jLong*") LongPointer shape, @Cast("const Nd4jLong*") LongPointer stride, int isFOrder, @Cast("const Nd4jLong*") LongPointer dimension, int dimensionLength); + @Namespace("shape") public static native int computeElementWiseStride(int rank, @Cast("const Nd4jLong*") LongBuffer shape, @Cast("const Nd4jLong*") LongBuffer stride, int isFOrder, @Cast("const Nd4jLong*") LongBuffer dimension, int dimensionLength); + @Namespace("shape") public static native int computeElementWiseStride(int rank, @Cast("const Nd4jLong*") long[] shape, @Cast("const Nd4jLong*") long[] stride, int isFOrder, @Cast("const Nd4jLong*") long[] dimension, int dimensionLength); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer shapeInfoOnlyShapeAndStride(@Cast("Nd4jLong*") LongPointer shapeInfo, @Cast("Nd4jLong*") LongPointer dimension, int dimensionLength,@Cast("bool") boolean reverseCopyStride); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer shapeInfoOnlyShapeAndStride(@Cast("Nd4jLong*") LongBuffer shapeInfo, @Cast("Nd4jLong*") LongBuffer dimension, int dimensionLength,@Cast("bool") boolean reverseCopyStride); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] shapeInfoOnlyShapeAndStride(@Cast("Nd4jLong*") long[] shapeInfo, @Cast("Nd4jLong*") long[] dimension, int dimensionLength,@Cast("bool") boolean reverseCopyStride); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer shapeInfoOnlyShapeAndStride(@Cast("const Nd4jLong*") LongPointer shapeInfo, @Cast("Nd4jLong*") LongPointer dimension, int dimensionLength,@Cast("bool") boolean reverseCopyStride); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer shapeInfoOnlyShapeAndStride(@Cast("const Nd4jLong*") LongBuffer shapeInfo, @Cast("Nd4jLong*") LongBuffer dimension, int dimensionLength,@Cast("bool") boolean reverseCopyStride); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] shapeInfoOnlyShapeAndStride(@Cast("const Nd4jLong*") long[] shapeInfo, @Cast("Nd4jLong*") long[] dimension, int dimensionLength,@Cast("bool") boolean reverseCopyStride); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer shapeInfoOnlyShapeAndStride(@Cast("Nd4jLong*") LongPointer shapeInfo, @Cast("Nd4jLong*") LongPointer dimension, int dimensionLength,@Cast("bool") boolean reverseCopyStride, @Cast("Nd4jLong*") LongPointer buffer); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer shapeInfoOnlyShapeAndStride(@Cast("Nd4jLong*") LongBuffer shapeInfo, @Cast("Nd4jLong*") LongBuffer dimension, int dimensionLength,@Cast("bool") boolean reverseCopyStride, @Cast("Nd4jLong*") LongBuffer buffer); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] shapeInfoOnlyShapeAndStride(@Cast("Nd4jLong*") long[] shapeInfo, @Cast("Nd4jLong*") long[] dimension, int dimensionLength,@Cast("bool") boolean reverseCopyStride, @Cast("Nd4jLong*") long[] buffer); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer shapeInfoOnlyShapeAndStride(@Cast("const Nd4jLong*") LongPointer shapeInfo, @Cast("Nd4jLong*") LongPointer dimension, int dimensionLength,@Cast("bool") boolean reverseCopyStride, @Cast("Nd4jLong*") LongPointer buffer); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer shapeInfoOnlyShapeAndStride(@Cast("const Nd4jLong*") LongBuffer shapeInfo, @Cast("Nd4jLong*") LongBuffer dimension, int dimensionLength,@Cast("bool") boolean reverseCopyStride, @Cast("Nd4jLong*") LongBuffer buffer); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] shapeInfoOnlyShapeAndStride(@Cast("const Nd4jLong*") long[] shapeInfo, @Cast("Nd4jLong*") long[] dimension, int dimensionLength,@Cast("bool") boolean reverseCopyStride, @Cast("Nd4jLong*") long[] buffer); /** * * @param length @@ -7089,9 +7047,9 @@ public static final int PREALLOC_SIZE = 33554432; */ @Namespace("shape") public static native void doPermuteSwap(int length, @Cast("Nd4jLong**") PointerPointer shape, IntPointer rearrange); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer permuteShapeBuffer(@Cast("Nd4jLong*") LongPointer shapeBuffer, IntPointer rearrange); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer permuteShapeBuffer(@Cast("Nd4jLong*") LongBuffer shapeBuffer, IntBuffer rearrange); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] permuteShapeBuffer(@Cast("Nd4jLong*") long[] shapeBuffer, int[] rearrange); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer permuteShapeBuffer(@Cast("const Nd4jLong*") LongPointer shapeBuffer, IntPointer rearrange); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer permuteShapeBuffer(@Cast("const Nd4jLong*") LongBuffer shapeBuffer, IntBuffer rearrange); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] permuteShapeBuffer(@Cast("const Nd4jLong*") long[] shapeBuffer, int[] rearrange); @Namespace("shape") public static native void permuteShapeBufferInPlace(@Cast("Nd4jLong*") LongPointer shapeBuffer, IntPointer rearrange, @Cast("Nd4jLong*") LongPointer out); @Namespace("shape") public static native void permuteShapeBufferInPlace(@Cast("Nd4jLong*") LongBuffer shapeBuffer, IntBuffer rearrange, @Cast("Nd4jLong*") LongBuffer out); @@ -7123,9 +7081,9 @@ public static final int PREALLOC_SIZE = 33554432; @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer createPermuteIndexes(int originalRank, IntBuffer dimension,int dimensionLength); @Namespace("shape") public static native @Cast("Nd4jLong*") long[] createPermuteIndexes(int originalRank, int[] dimension,int dimensionLength); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer computeResultShape(@Cast("Nd4jLong*") LongPointer originalShapeBuffer, IntPointer dimension,int dimensionLength); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer computeResultShape(@Cast("Nd4jLong*") LongBuffer originalShapeBuffer, IntBuffer dimension,int dimensionLength); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] computeResultShape(@Cast("Nd4jLong*") long[] originalShapeBuffer, int[] dimension,int dimensionLength); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer computeResultShape(@Cast("const Nd4jLong*") LongPointer originalShapeBuffer, IntPointer dimension,int dimensionLength); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer computeResultShape(@Cast("const Nd4jLong*") LongBuffer originalShapeBuffer, IntBuffer dimension,int dimensionLength); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] computeResultShape(@Cast("const Nd4jLong*") long[] originalShapeBuffer, int[] dimension,int dimensionLength); /** * This method does inplace transpose of given shapeBuffer @@ -7176,9 +7134,9 @@ public static final int PREALLOC_SIZE = 33554432; * @param shape the shape of the array * @param rank the rank of cthe shape */ - @Namespace("shape") public static native int isVector(@Cast("Nd4jLong*") LongPointer shape, int rank); - @Namespace("shape") public static native int isVector(@Cast("Nd4jLong*") LongBuffer shape, int rank); - @Namespace("shape") public static native int isVector(@Cast("Nd4jLong*") long[] shape, int rank); + @Namespace("shape") public static native int isVector(@Cast("const Nd4jLong*") LongPointer shape, int rank); + @Namespace("shape") public static native int isVector(@Cast("const Nd4jLong*") LongBuffer shape, int rank); + @Namespace("shape") public static native int isVector(@Cast("const Nd4jLong*") long[] shape, int rank); /** @@ -7197,9 +7155,9 @@ public static final int PREALLOC_SIZE = 33554432; @Namespace("shape") public static native int isVector(@Cast("const Nd4jLong*") LongBuffer shapeInfo); @Namespace("shape") public static native int isVector(@Cast("const Nd4jLong*") long[] shapeInfo); - @Namespace("shape") public static native @Cast("bool") boolean isLikeVector(@Cast("Nd4jLong*") LongPointer shapeInfo, @ByRef IntPointer posOfNonUnityDim); - @Namespace("shape") public static native @Cast("bool") boolean isLikeVector(@Cast("Nd4jLong*") LongBuffer shapeInfo, @ByRef IntBuffer posOfNonUnityDim); - @Namespace("shape") public static native @Cast("bool") boolean isLikeVector(@Cast("Nd4jLong*") long[] shapeInfo, @ByRef int[] posOfNonUnityDim); + @Namespace("shape") public static native @Cast("bool") boolean isLikeVector(@Cast("const Nd4jLong*") LongPointer shapeInfo, @ByRef IntPointer posOfNonUnityDim); + @Namespace("shape") public static native @Cast("bool") boolean isLikeVector(@Cast("const Nd4jLong*") LongBuffer shapeInfo, @ByRef IntBuffer posOfNonUnityDim); + @Namespace("shape") public static native @Cast("bool") boolean isLikeVector(@Cast("const Nd4jLong*") long[] shapeInfo, @ByRef int[] posOfNonUnityDim); @Namespace("shape") public static native @Cast("bool") boolean isCommonVector(@Cast("const Nd4jLong*") LongPointer shapeInfo, @ByRef IntPointer posOfNonUnityDim); @Namespace("shape") public static native @Cast("bool") boolean isCommonVector(@Cast("const Nd4jLong*") LongBuffer shapeInfo, @ByRef IntBuffer posOfNonUnityDim); @@ -7209,9 +7167,9 @@ public static final int PREALLOC_SIZE = 33554432; @Namespace("shape") public static native @Cast("bool") boolean isRowVector(@Cast("const Nd4jLong*") LongBuffer shapeInfo); @Namespace("shape") public static native @Cast("bool") boolean isRowVector(@Cast("const Nd4jLong*") long[] shapeInfo); - @Namespace("shape") public static native @Cast("bool") boolean isColumnVector(@Cast("Nd4jLong*") LongPointer shapeInfo); - @Namespace("shape") public static native @Cast("bool") boolean isColumnVector(@Cast("Nd4jLong*") LongBuffer shapeInfo); - @Namespace("shape") public static native @Cast("bool") boolean isColumnVector(@Cast("Nd4jLong*") long[] shapeInfo); + @Namespace("shape") public static native @Cast("bool") boolean isColumnVector(@Cast("const Nd4jLong*") LongPointer shapeInfo); + @Namespace("shape") public static native @Cast("bool") boolean isColumnVector(@Cast("const Nd4jLong*") LongBuffer shapeInfo); + @Namespace("shape") public static native @Cast("bool") boolean isColumnVector(@Cast("const Nd4jLong*") long[] shapeInfo); /** * shape - input inShape is shape only, not shapeInfo @@ -7259,9 +7217,9 @@ public static final int PREALLOC_SIZE = 33554432; * This buffer allocates memory * that must be freed elsewhere. */ - @Namespace("shape") public static native void copyTo(int length, @Cast("Nd4jLong*") LongPointer from, @Cast("Nd4jLong*") LongPointer to, @Cast("Nd4jLong*") LongPointer indexes); - @Namespace("shape") public static native void copyTo(int length, @Cast("Nd4jLong*") LongBuffer from, @Cast("Nd4jLong*") LongBuffer to, @Cast("Nd4jLong*") LongBuffer indexes); - @Namespace("shape") public static native void copyTo(int length, @Cast("Nd4jLong*") long[] from, @Cast("Nd4jLong*") long[] to, @Cast("Nd4jLong*") long[] indexes); + @Namespace("shape") public static native void copyTo(int length, @Cast("const Nd4jLong*") LongPointer from, @Cast("Nd4jLong*") LongPointer to, @Cast("Nd4jLong*") LongPointer indexes); + @Namespace("shape") public static native void copyTo(int length, @Cast("const Nd4jLong*") LongBuffer from, @Cast("Nd4jLong*") LongBuffer to, @Cast("Nd4jLong*") LongBuffer indexes); + @Namespace("shape") public static native void copyTo(int length, @Cast("const Nd4jLong*") long[] from, @Cast("Nd4jLong*") long[] to, @Cast("Nd4jLong*") long[] indexes); /** * Permute the given strides @@ -7459,9 +7417,9 @@ public static final int PREALLOC_SIZE = 33554432; * indexes should be the indexes to exclude * indexes length should be the length of indexes */ - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer everyIndexBut(@Cast("Nd4jLong*") LongPointer indexes,int indexesLength,int begin,int end); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer everyIndexBut(@Cast("Nd4jLong*") LongBuffer indexes,int indexesLength,int begin,int end); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] everyIndexBut(@Cast("Nd4jLong*") long[] indexes,int indexesLength,int begin,int end); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer everyIndexBut(@Cast("const Nd4jLong*") LongPointer indexes,int indexesLength,int begin,int end); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer everyIndexBut(@Cast("const Nd4jLong*") LongBuffer indexes,int indexesLength,int begin,int end); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] everyIndexBut(@Cast("const Nd4jLong*") long[] indexes,int indexesLength,int begin,int end); /** * Computes the offset for accessing @@ -7507,9 +7465,9 @@ public static final int PREALLOC_SIZE = 33554432; * Keep the given indexes * in the data */ - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer keep(@Cast("Nd4jLong*") LongPointer data, IntPointer index, int indexLength, int dataLength); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer keep(@Cast("Nd4jLong*") LongBuffer data, IntBuffer index, int indexLength, int dataLength); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] keep(@Cast("Nd4jLong*") long[] data, int[] index, int indexLength, int dataLength); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer keep(@Cast("Nd4jLong*") LongPointer data, @Const IntPointer index, int indexLength, int dataLength); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer keep(@Cast("Nd4jLong*") LongBuffer data, @Const IntBuffer index, int indexLength, int dataLength); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] keep(@Cast("Nd4jLong*") long[] data, @Const int[] index, int indexLength, int dataLength); /** * Generate reverse copy of the data @@ -7547,9 +7505,9 @@ public static final int PREALLOC_SIZE = 33554432; * @return the length per slice of the given shape * along the given dimension */ - @Namespace("shape") public static native @Cast("Nd4jLong") long lengthPerSlice(int rank, @Cast("Nd4jLong*") LongPointer shape, IntPointer dimension, int dimensionLength); - @Namespace("shape") public static native @Cast("Nd4jLong") long lengthPerSlice(int rank, @Cast("Nd4jLong*") LongBuffer shape, IntBuffer dimension, int dimensionLength); - @Namespace("shape") public static native @Cast("Nd4jLong") long lengthPerSlice(int rank, @Cast("Nd4jLong*") long[] shape, int[] dimension, int dimensionLength); + @Namespace("shape") public static native @Cast("Nd4jLong") long lengthPerSlice(int rank, @Cast("const Nd4jLong*") LongPointer shape, @Const IntPointer dimension, int dimensionLength); + @Namespace("shape") public static native @Cast("Nd4jLong") long lengthPerSlice(int rank, @Cast("const Nd4jLong*") LongBuffer shape, @Const IntBuffer dimension, int dimensionLength); + @Namespace("shape") public static native @Cast("Nd4jLong") long lengthPerSlice(int rank, @Cast("const Nd4jLong*") long[] shape, @Const int[] dimension, int dimensionLength); /** * calculates the offset for a tensor @@ -7560,24 +7518,24 @@ public static final int PREALLOC_SIZE = 33554432; */ @Namespace("shape") public static native @Cast("Nd4jLong") long sliceOffsetForTensor(int rank, int index, - @Cast("Nd4jLong*") LongPointer shape, - @Cast("Nd4jLong*") LongPointer tensorShape, + @Cast("const Nd4jLong*") LongPointer shape, + @Cast("const Nd4jLong*") LongPointer tensorShape, int tensorShapeLength, - IntPointer dimension, + @Const IntPointer dimension, int dimensionLength); @Namespace("shape") public static native @Cast("Nd4jLong") long sliceOffsetForTensor(int rank, int index, - @Cast("Nd4jLong*") LongBuffer shape, - @Cast("Nd4jLong*") LongBuffer tensorShape, + @Cast("const Nd4jLong*") LongBuffer shape, + @Cast("const Nd4jLong*") LongBuffer tensorShape, int tensorShapeLength, - IntBuffer dimension, + @Const IntBuffer dimension, int dimensionLength); @Namespace("shape") public static native @Cast("Nd4jLong") long sliceOffsetForTensor(int rank, int index, - @Cast("Nd4jLong*") long[] shape, - @Cast("Nd4jLong*") long[] tensorShape, + @Cast("const Nd4jLong*") long[] shape, + @Cast("const Nd4jLong*") long[] tensorShape, int tensorShapeLength, - int[] dimension, + @Const int[] dimension, int dimensionLength); /** @@ -9203,25 +9161,33 @@ public static final int PREALLOC_SIZE = 33554432; return (ShapeList)super.position(position); } - public ShapeList(@Cast("Nd4jLong*") LongPointer shape/*=nullptr*/) { super((Pointer)null); allocate(shape); } - private native void allocate(@Cast("Nd4jLong*") LongPointer shape/*=nullptr*/); + public ShapeList(@Cast("const Nd4jLong*") LongPointer shape/*=nullptr*/) { super((Pointer)null); allocate(shape); } + private native void allocate(@Cast("const Nd4jLong*") LongPointer shape/*=nullptr*/); public ShapeList() { super((Pointer)null); allocate(); } private native void allocate(); - public ShapeList(@Cast("Nd4jLong*") LongBuffer shape/*=nullptr*/) { super((Pointer)null); allocate(shape); } - private native void allocate(@Cast("Nd4jLong*") LongBuffer shape/*=nullptr*/); - public ShapeList(@Cast("Nd4jLong*") long[] shape/*=nullptr*/) { super((Pointer)null); allocate(shape); } - private native void allocate(@Cast("Nd4jLong*") long[] shape/*=nullptr*/); - public ShapeList(@Cast("Nd4jLong**") @StdVector PointerPointer shapes) { super((Pointer)null); allocate(shapes); } - private native void allocate(@Cast("Nd4jLong**") @StdVector PointerPointer shapes); + public ShapeList(@Cast("const Nd4jLong*") LongBuffer shape/*=nullptr*/) { super((Pointer)null); allocate(shape); } + private native void allocate(@Cast("const Nd4jLong*") LongBuffer shape/*=nullptr*/); + public ShapeList(@Cast("const Nd4jLong*") long[] shape/*=nullptr*/) { super((Pointer)null); allocate(shape); } + private native void allocate(@Cast("const Nd4jLong*") long[] shape/*=nullptr*/); + public ShapeList(@Cast("const Nd4jLong**") @StdVector PointerPointer shapes, @Cast("bool") boolean isWorkspace) { super((Pointer)null); allocate(shapes, isWorkspace); } + private native void allocate(@Cast("const Nd4jLong**") @StdVector PointerPointer shapes, @Cast("bool") boolean isWorkspace); + public ShapeList(@Cast("const Nd4jLong**") @StdVector @ByPtrPtr LongPointer shapes, @Cast("bool") boolean isWorkspace) { super((Pointer)null); allocate(shapes, isWorkspace); } + private native void allocate(@Cast("const Nd4jLong**") @StdVector @ByPtrPtr LongPointer shapes, @Cast("bool") boolean isWorkspace); + public ShapeList(@Cast("const Nd4jLong**") @StdVector @ByPtrPtr LongBuffer shapes, @Cast("bool") boolean isWorkspace) { super((Pointer)null); allocate(shapes, isWorkspace); } + private native void allocate(@Cast("const Nd4jLong**") @StdVector @ByPtrPtr LongBuffer shapes, @Cast("bool") boolean isWorkspace); + public ShapeList(@Cast("const Nd4jLong**") @StdVector @ByPtrPtr long[] shapes, @Cast("bool") boolean isWorkspace) { super((Pointer)null); allocate(shapes, isWorkspace); } + private native void allocate(@Cast("const Nd4jLong**") @StdVector @ByPtrPtr long[] shapes, @Cast("bool") boolean isWorkspace); + public ShapeList(@Cast("const Nd4jLong**") @StdVector PointerPointer shapes) { super((Pointer)null); allocate(shapes); } + private native void allocate(@Cast("const Nd4jLong**") @StdVector PointerPointer shapes); //ShapeList(bool autoRemovable); - public native @Cast("Nd4jLong**") @StdVector PointerPointer asVector(); + public native @Cast("const Nd4jLong**") @StdVector PointerPointer asVector(); public native void destroy(); public native int size(); - public native @Cast("Nd4jLong*") LongPointer at(int idx); - public native void push_back(@Cast("Nd4jLong*") LongPointer shape); - public native void push_back(@Cast("Nd4jLong*") LongBuffer shape); - public native void push_back(@Cast("Nd4jLong*") long[] shape); + public native @Cast("const Nd4jLong*") LongPointer at(int idx); + public native void push_back(@Cast("const Nd4jLong*") LongPointer shape); + public native void push_back(@Cast("const Nd4jLong*") LongBuffer shape); + public native void push_back(@Cast("const Nd4jLong*") long[] shape); /** * PLEASE NOTE: This method should be called ONLY if shapes were generated at workspaces. Otherwise you'll get memory leak @@ -10712,6 +10678,7 @@ public static final int PREALLOC_SIZE = 33554432; // #include // #include // #include +// #include // #include // #include // #include diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/resources/META-INF/services/org.nd4j.linalg.compression.NDArrayCompressor b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/resources/META-INF/services/org.nd4j.linalg.compression.NDArrayCompressor index 5c829e57a..72e314156 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/resources/META-INF/services/org.nd4j.linalg.compression.NDArrayCompressor +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/resources/META-INF/services/org.nd4j.linalg.compression.NDArrayCompressor @@ -13,5 +13,3 @@ # # SPDX-License-Identifier: Apache-2.0 ################################################################################ - -org.nd4j.linalg.jcublas.compression.CudaThreshold \ No newline at end of file diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/NativeOpExecutioner.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/NativeOpExecutioner.java index 96def59d7..508144f26 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/NativeOpExecutioner.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/NativeOpExecutioner.java @@ -1376,142 +1376,6 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { } } - @Override - public INDArray thresholdEncode(INDArray input, double threshold) { - return thresholdEncode(input, threshold, null); - } - - @Override - public INDArray thresholdEncode(INDArray input, double threshold, Integer boundary) { - - //val condition = new MatchCondition(input, Conditions.absGreaterThanOrEqual(threshold)); - //long t1 = System.currentTimeMillis(); - int cntAbs = loop.estimateThreshold(null, - input.data().addressPointer(), - (LongPointer) input.shapeInfoDataBuffer().addressPointer(), - (int) input.length(), - (float) threshold); - //long t2 = System.currentTimeMillis(); - - if (loop.lastErrorCode() != 0) - throw new RuntimeException(loop.lastErrorMessage()); - - if (cntAbs < 2) - return null; - - if (boundary != null) - cntAbs = Math.min(cntAbs, boundary); - - //log.info("S: {}; T: {}", cntAbs, t2 - t1); - - DataBuffer buffer = input.data(); - - long originalLength = buffer.length() * Nd4j.sizeOfDataType(buffer.dataType()); - int compressedLength = cntAbs + 4; - // first 3 elements contain header - - DataBuffer encodedBuffer = Nd4j.getMemoryManager().getCurrentWorkspace() == null ? Nd4j.getDataBufferFactory().createInt(4+cntAbs, false) : Nd4j.getDataBufferFactory().createInt(4+cntAbs, false, Nd4j.getMemoryManager().getCurrentWorkspace()); - - encodedBuffer.put(0, cntAbs); - encodedBuffer.put(1, (int) buffer.length()); - encodedBuffer.put(2, Float.floatToIntBits((float) threshold)); - - // format id - encodedBuffer.put(3, ThresholdCompression.FLEXIBLE_ENCODING); - - CompressionDescriptor descriptor = new CompressionDescriptor(); - descriptor.setCompressedLength(compressedLength * 4); // sizeOf(INT) - descriptor.setOriginalLength(originalLength); - descriptor.setOriginalElementSize(Nd4j.sizeOfDataType(buffer.dataType())); - descriptor.setNumberOfElements(buffer.length()); - - descriptor.setCompressionAlgorithm("THRESHOLD"); - descriptor.setCompressionType(CompressionType.LOSSLESS); - - //CompressedDataBuffer cbuff = new CompressedDataBuffer(pointer, descriptor); - - Nd4j.getNDArrayFactory().convertDataEx(AbstractCompressor.getBufferTypeEx(buffer), buffer.addressPointer(), DataTypeEx.THRESHOLD, encodedBuffer.addressPointer(), buffer.length()); - - Nd4j.getAffinityManager().tagLocation(buffer, AffinityManager.Location.HOST); - - return Nd4j.createArrayFromShapeBuffer(encodedBuffer, input.shapeInfoDataBuffer()); - } - - @Override - public INDArray thresholdDecode(INDArray encoded, INDArray target) { - DataBuffer buffer = encoded.data(); - - if (buffer.dataType() != DataType.INT) - throw new ND4JIllegalStateException("thresholdEncoded array should have dataType of INT"); - - long compressedLength = buffer.getInt(0); - long originalLength = buffer.getInt(1); - float threshold = buffer.getInt(2); - - if (target.length() != originalLength) - throw new ND4JIllegalStateException("originalLength ["+ originalLength+"] stored in encoded array doesn't match target length ["+ target.length()+"]"); - - DataTypeEx typeDst = AbstractCompressor.getBufferTypeEx(target.data()); - - loop.convertTypes(null, DataTypeEx.THRESHOLD.ordinal(), buffer.addressPointer(), target.length(), typeDst.ordinal(), target.data().addressPointer()); - - if (loop.lastErrorCode() != 0) - throw new RuntimeException(loop.lastErrorMessage()); - - return target; - } - - - @Override - public long bitmapEncode(INDArray indArray, INDArray target, double threshold) { - long length = indArray.length(); - long tLen = target.data().length(); - - if (tLen != (length / 16 + 5)) - throw new ND4JIllegalStateException("Length of target array should be " + (length / 16 + 5)); - - if (target.data().dataType() != DataType.INT) - throw new ND4JIllegalStateException("Target array should have INT dataType"); - - DataBuffer buffer = target.data(); - - buffer.put(0, (int) length); - buffer.put(1, (int) length); - buffer.put(2, Float.floatToIntBits((float) threshold)); - - // format id - buffer.put(3, ThresholdCompression.BITMAP_ENCODING); - - long affected = loop.encodeBitmap(null, - indArray.data().addressPointer(), - (LongPointer) indArray.shapeInfoDataBuffer().addressPointer(), - length, - (IntPointer) buffer.addressPointer(), - (float) threshold); - - if (loop.lastErrorCode() != 0) - throw new RuntimeException(loop.lastErrorMessage()); - - return affected; - } - - @Override - public INDArray bitmapDecode(INDArray encoded, INDArray target) { - - loop.decodeBitmap(null, - encoded.data().addressPointer(), - target.length(), - target.data().addressPointer(), - (LongPointer) target.shapeInfoDataBuffer().addressPointer() - ); - - if (loop.lastErrorCode() != 0) - throw new RuntimeException(loop.lastErrorMessage()); - - return target; - } - - @Override public synchronized Map getCustomOperations() { if (customOps == null) { diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java index fd9ffc7c1..b67949a54 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java @@ -590,12 +590,12 @@ public class Nd4jCpu extends org.nd4j.nativeblas.Nd4jCpuHelper { private native void allocate(DoubleBuffer values, int length); public ConstantDescriptor(double[] values, int length) { super((Pointer)null); allocate(values, length); } private native void allocate(double[] values, int length); - public ConstantDescriptor(@Cast("Nd4jLong*") LongPointer values, int length) { super((Pointer)null); allocate(values, length); } - private native void allocate(@Cast("Nd4jLong*") LongPointer values, int length); - public ConstantDescriptor(@Cast("Nd4jLong*") LongBuffer values, int length) { super((Pointer)null); allocate(values, length); } - private native void allocate(@Cast("Nd4jLong*") LongBuffer values, int length); - public ConstantDescriptor(@Cast("Nd4jLong*") long[] values, int length) { super((Pointer)null); allocate(values, length); } - private native void allocate(@Cast("Nd4jLong*") long[] values, int length); + public ConstantDescriptor(@Cast("const Nd4jLong*") LongPointer values, int length) { super((Pointer)null); allocate(values, length); } + private native void allocate(@Cast("const Nd4jLong*") LongPointer values, int length); + public ConstantDescriptor(@Cast("const Nd4jLong*") LongBuffer values, int length) { super((Pointer)null); allocate(values, length); } + private native void allocate(@Cast("const Nd4jLong*") LongBuffer values, int length); + public ConstantDescriptor(@Cast("const Nd4jLong*") long[] values, int length) { super((Pointer)null); allocate(values, length); } + private native void allocate(@Cast("const Nd4jLong*") long[] values, int length); public ConstantDescriptor(@Cast("Nd4jLong*") @StdVector LongPointer values) { super((Pointer)null); allocate(values); } private native void allocate(@Cast("Nd4jLong*") @StdVector LongPointer values); @@ -676,11 +676,11 @@ public class Nd4jCpu extends org.nd4j.nativeblas.Nd4jCpuHelper { public TadPack() { super((Pointer)null); allocate(); } private native void allocate(); - public native @Cast("Nd4jLong*") LongPointer primaryShapeInfo(); - public native @Cast("Nd4jLong*") LongPointer primaryOffsets(); + public native @Cast("const Nd4jLong*") LongPointer primaryShapeInfo(); + public native @Cast("const Nd4jLong*") LongPointer primaryOffsets(); - public native @Cast("Nd4jLong*") LongPointer specialShapeInfo(); - public native @Cast("Nd4jLong*") LongPointer specialOffsets(); + public native @Cast("const Nd4jLong*") LongPointer specialShapeInfo(); + public native @Cast("const Nd4jLong*") LongPointer specialOffsets(); public native @Cast("Nd4jLong") long numberOfTads(); public native int shapeInfoLength(); @@ -689,8 +689,8 @@ public class Nd4jCpu extends org.nd4j.nativeblas.Nd4jCpuHelper { * These methods return either primary or special pointers depending on platform binaries were compiled for * @return */ - public native @Cast("Nd4jLong*") LongPointer platformShapeInfo(); - public native @Cast("Nd4jLong*") LongPointer platformOffsets(); + public native @Cast("const Nd4jLong*") LongPointer platformShapeInfo(); + public native @Cast("const Nd4jLong*") LongPointer platformOffsets(); } @@ -1124,19 +1124,19 @@ public native void setTADThreshold(int num); */ public native void execIndexReduceScalar(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo); public native void execIndexReduceScalar(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo); public native void execIndexReduceScalar(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo); /** * @@ -1151,22 +1151,22 @@ public native void execIndexReduceScalar(@Cast("Nd4jPointer*") PointerPointer ex */ public native void execIndexReduce(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongPointer hDimensionShape, @Cast("Nd4jLong*") LongPointer dDimensionShape); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongPointer hDimensionShape, @Cast("const Nd4jLong*") LongPointer dDimensionShape); public native void execIndexReduce(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongBuffer hDimensionShape, @Cast("Nd4jLong*") LongBuffer dDimensionShape); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongBuffer hDimensionShape, @Cast("const Nd4jLong*") LongBuffer dDimensionShape); public native void execIndexReduce(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") long[] hDimensionShape, @Cast("Nd4jLong*") long[] dDimensionShape); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") long[] hDimensionShape, @Cast("const Nd4jLong*") long[] dDimensionShape); /** * @@ -1183,50 +1183,50 @@ public native void execIndexReduce(@Cast("Nd4jPointer*") PointerPointer extraPoi public native void execBroadcast( @Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongPointer hYShapeInfo, @Cast("Nd4jLong*") LongPointer dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongPointer hDimensionShape, @Cast("Nd4jLong*") LongPointer dDimensionShape); + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongPointer hYShapeInfo, @Cast("const Nd4jLong*") LongPointer dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongPointer hDimensionShape, @Cast("const Nd4jLong*") LongPointer dDimensionShape); public native void execBroadcast( @Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongBuffer hYShapeInfo, @Cast("Nd4jLong*") LongBuffer dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongBuffer hDimensionShape, @Cast("Nd4jLong*") LongBuffer dDimensionShape); + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongBuffer hYShapeInfo, @Cast("const Nd4jLong*") LongBuffer dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongBuffer hDimensionShape, @Cast("const Nd4jLong*") LongBuffer dDimensionShape); public native void execBroadcast( @Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") long[] hYShapeInfo, @Cast("Nd4jLong*") long[] dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") long[] hDimensionShape, @Cast("Nd4jLong*") long[] dDimensionShape); + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") long[] hYShapeInfo, @Cast("const Nd4jLong*") long[] dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") long[] hDimensionShape, @Cast("const Nd4jLong*") long[] dDimensionShape); public native void execBroadcastBool( @Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongPointer hYShapeInfo, @Cast("Nd4jLong*") LongPointer dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongPointer hYShapeInfo, @Cast("const Nd4jLong*") LongPointer dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongPointer hDimensionShape, @Cast("Nd4jLong*") LongPointer dDimensionShape); + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongPointer hDimensionShape, @Cast("const Nd4jLong*") LongPointer dDimensionShape); public native void execBroadcastBool( @Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongBuffer hYShapeInfo, @Cast("Nd4jLong*") LongBuffer dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongBuffer hYShapeInfo, @Cast("const Nd4jLong*") LongBuffer dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongBuffer hDimensionShape, @Cast("Nd4jLong*") LongBuffer dDimensionShape); + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongBuffer hDimensionShape, @Cast("const Nd4jLong*") LongBuffer dDimensionShape); public native void execBroadcastBool( @Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") long[] hYShapeInfo, @Cast("Nd4jLong*") long[] dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") long[] hYShapeInfo, @Cast("const Nd4jLong*") long[] dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") long[] hDimensionShape, @Cast("Nd4jLong*") long[] dDimensionShape); + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") long[] hDimensionShape, @Cast("const Nd4jLong*") long[] dDimensionShape); /** * @@ -1243,45 +1243,45 @@ public native void execBroadcastBool( public native void execPairwiseTransform( @Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongPointer hYShapeInfo, @Cast("Nd4jLong*") LongPointer dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongPointer hYShapeInfo, @Cast("const Nd4jLong*") LongPointer dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, Pointer extraParams); public native void execPairwiseTransform( @Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongBuffer hYShapeInfo, @Cast("Nd4jLong*") LongBuffer dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongBuffer hYShapeInfo, @Cast("const Nd4jLong*") LongBuffer dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, Pointer extraParams); public native void execPairwiseTransform( @Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") long[] hYShapeInfo, @Cast("Nd4jLong*") long[] dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") long[] hYShapeInfo, @Cast("const Nd4jLong*") long[] dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, Pointer extraParams); public native void execPairwiseTransformBool( @Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongPointer hYShapeInfo, @Cast("Nd4jLong*") LongPointer dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongPointer hYShapeInfo, @Cast("const Nd4jLong*") LongPointer dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, Pointer extraParams); public native void execPairwiseTransformBool( @Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongBuffer hYShapeInfo, @Cast("Nd4jLong*") LongBuffer dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongBuffer hYShapeInfo, @Cast("const Nd4jLong*") LongBuffer dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, Pointer extraParams); public native void execPairwiseTransformBool( @Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") long[] hYShapeInfo, @Cast("Nd4jLong*") long[] dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") long[] hYShapeInfo, @Cast("const Nd4jLong*") long[] dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, Pointer extraParams); /** @@ -1295,68 +1295,68 @@ public native void execPairwiseTransformBool( */ public native void execReduceFloat(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo); public native void execReduceFloat(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo); public native void execReduceFloat(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo); public native void execReduceSame(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo); public native void execReduceSame(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo); public native void execReduceSame(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo); public native void execReduceBool(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo); public native void execReduceBool(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo); public native void execReduceBool(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo); public native void execReduceLong(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo); public native void execReduceLong(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo); public native void execReduceLong(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo); /** * @@ -1369,82 +1369,82 @@ public native void execReduceLong(@Cast("Nd4jPointer*") PointerPointer extraPoin */ public native void execReduceFloat2(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongPointer hDimensionShape, @Cast("Nd4jLong*") LongPointer dDimensionShape); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongPointer hDimensionShape, @Cast("const Nd4jLong*") LongPointer dDimensionShape); public native void execReduceFloat2(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongBuffer hDimensionShape, @Cast("Nd4jLong*") LongBuffer dDimensionShape); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongBuffer hDimensionShape, @Cast("const Nd4jLong*") LongBuffer dDimensionShape); public native void execReduceFloat2(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") long[] hDimensionShape, @Cast("Nd4jLong*") long[] dDimensionShape); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") long[] hDimensionShape, @Cast("const Nd4jLong*") long[] dDimensionShape); public native void execReduceSame2(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongPointer hDimensionShape, @Cast("Nd4jLong*") LongPointer dDimensionShape); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongPointer hDimensionShape, @Cast("const Nd4jLong*") LongPointer dDimensionShape); public native void execReduceSame2(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongBuffer hDimensionShape, @Cast("Nd4jLong*") LongBuffer dDimensionShape); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongBuffer hDimensionShape, @Cast("const Nd4jLong*") LongBuffer dDimensionShape); public native void execReduceSame2(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") long[] hDimensionShape, @Cast("Nd4jLong*") long[] dDimensionShape); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") long[] hDimensionShape, @Cast("const Nd4jLong*") long[] dDimensionShape); public native void execReduceBool2(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongPointer hDimensionShape, @Cast("Nd4jLong*") LongPointer dDimensionShape); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongPointer hDimensionShape, @Cast("const Nd4jLong*") LongPointer dDimensionShape); public native void execReduceBool2(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongBuffer hDimensionShape, @Cast("Nd4jLong*") LongBuffer dDimensionShape); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongBuffer hDimensionShape, @Cast("const Nd4jLong*") LongBuffer dDimensionShape); public native void execReduceBool2(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") long[] hDimensionShape, @Cast("Nd4jLong*") long[] dDimensionShape); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") long[] hDimensionShape, @Cast("const Nd4jLong*") long[] dDimensionShape); public native void execReduceLong2(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongPointer hDimensionShape, @Cast("Nd4jLong*") LongPointer dDimensionShape); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongPointer hDimensionShape, @Cast("const Nd4jLong*") LongPointer dDimensionShape); public native void execReduceLong2(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongBuffer hDimensionShape, @Cast("Nd4jLong*") LongBuffer dDimensionShape); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongBuffer hDimensionShape, @Cast("const Nd4jLong*") LongBuffer dDimensionShape); public native void execReduceLong2(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") long[] hDimensionShape, @Cast("Nd4jLong*") long[] dDimensionShape); + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") long[] hDimensionShape, @Cast("const Nd4jLong*") long[] dDimensionShape); /** * @@ -1459,22 +1459,22 @@ public native void execReduceLong2(@Cast("Nd4jPointer*") PointerPointer extraPoi */ public native void execReduce3(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParamsVals, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongPointer hYShapeInfo, @Cast("Nd4jLong*") LongPointer dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo); + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongPointer hYShapeInfo, @Cast("const Nd4jLong*") LongPointer dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo); public native void execReduce3(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParamsVals, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongBuffer hYShapeInfo, @Cast("Nd4jLong*") LongBuffer dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo); + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongBuffer hYShapeInfo, @Cast("const Nd4jLong*") LongBuffer dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo); public native void execReduce3(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParamsVals, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") long[] hYShapeInfo, @Cast("Nd4jLong*") long[] dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo); + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") long[] hYShapeInfo, @Cast("const Nd4jLong*") long[] dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo); /** * @@ -1487,22 +1487,22 @@ public native void execReduce3(@Cast("Nd4jPointer*") PointerPointer extraPointer */ public native void execReduce3Scalar(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParamsVals, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongPointer hYShapeInfo, @Cast("Nd4jLong*") LongPointer dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo); + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongPointer hYShapeInfo, @Cast("const Nd4jLong*") LongPointer dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo); public native void execReduce3Scalar(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParamsVals, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongBuffer hYShapeInfo, @Cast("Nd4jLong*") LongBuffer dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo); + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongBuffer hYShapeInfo, @Cast("const Nd4jLong*") LongBuffer dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo); public native void execReduce3Scalar(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParamsVals, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") long[] hYShapeInfo, @Cast("Nd4jLong*") long[] dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo); + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") long[] hYShapeInfo, @Cast("const Nd4jLong*") long[] dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo); /** * * @param opNum @@ -1518,60 +1518,60 @@ public native void execReduce3Scalar(@Cast("Nd4jPointer*") PointerPointer extraP */ public native void execReduce3Tad(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParamsVals, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongPointer hYShapeInfo, @Cast("Nd4jLong*") LongPointer dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongPointer hDimensionShape, @Cast("Nd4jLong*") LongPointer dDimensionShape, - @Cast("Nd4jLong*") LongPointer tadOnlyShapeInfo, @Cast("Nd4jLong*") LongPointer tadOffsets, - @Cast("Nd4jLong*") LongPointer yTadOnlyShapeInfo, @Cast("Nd4jLong*") LongPointer yTadOffsets); + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongPointer hYShapeInfo, @Cast("const Nd4jLong*") LongPointer dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongPointer hDimensionShape, @Cast("const Nd4jLong*") LongPointer dDimensionShape, + @Cast("const Nd4jLong*") LongPointer tadOnlyShapeInfo, @Cast("const Nd4jLong*") LongPointer tadOffsets, + @Cast("const Nd4jLong*") LongPointer yTadOnlyShapeInfo, @Cast("const Nd4jLong*") LongPointer yTadOffsets); public native void execReduce3Tad(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParamsVals, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongBuffer hYShapeInfo, @Cast("Nd4jLong*") LongBuffer dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongBuffer hDimensionShape, @Cast("Nd4jLong*") LongBuffer dDimensionShape, - @Cast("Nd4jLong*") LongBuffer tadOnlyShapeInfo, @Cast("Nd4jLong*") LongBuffer tadOffsets, - @Cast("Nd4jLong*") LongBuffer yTadOnlyShapeInfo, @Cast("Nd4jLong*") LongBuffer yTadOffsets); + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongBuffer hYShapeInfo, @Cast("const Nd4jLong*") LongBuffer dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongBuffer hDimensionShape, @Cast("const Nd4jLong*") LongBuffer dDimensionShape, + @Cast("const Nd4jLong*") LongBuffer tadOnlyShapeInfo, @Cast("const Nd4jLong*") LongBuffer tadOffsets, + @Cast("const Nd4jLong*") LongBuffer yTadOnlyShapeInfo, @Cast("const Nd4jLong*") LongBuffer yTadOffsets); public native void execReduce3Tad(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParamsVals, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") long[] hYShapeInfo, @Cast("Nd4jLong*") long[] dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") long[] hDimensionShape, @Cast("Nd4jLong*") long[] dDimensionShape, - @Cast("Nd4jLong*") long[] tadOnlyShapeInfo, @Cast("Nd4jLong*") long[] tadOffsets, - @Cast("Nd4jLong*") long[] yTadOnlyShapeInfo, @Cast("Nd4jLong*") long[] yTadOffsets); + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") long[] hYShapeInfo, @Cast("const Nd4jLong*") long[] dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") long[] hDimensionShape, @Cast("const Nd4jLong*") long[] dDimensionShape, + @Cast("const Nd4jLong*") long[] tadOnlyShapeInfo, @Cast("const Nd4jLong*") long[] tadOffsets, + @Cast("const Nd4jLong*") long[] yTadOnlyShapeInfo, @Cast("const Nd4jLong*") long[] yTadOffsets); public native void execReduce3All(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParamsVals, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongPointer hYShapeInfo, @Cast("Nd4jLong*") LongPointer dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongPointer hDimensionShape, @Cast("Nd4jLong*") LongPointer dDimensionShape, - @Cast("Nd4jLong*") LongPointer xTadShapeInfo, @Cast("Nd4jLong*") LongPointer xOffsets, - @Cast("Nd4jLong*") LongPointer yTadShapeInfo, @Cast("Nd4jLong*") LongPointer yOffsets); + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongPointer hYShapeInfo, @Cast("const Nd4jLong*") LongPointer dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongPointer hDimensionShape, @Cast("const Nd4jLong*") LongPointer dDimensionShape, + @Cast("const Nd4jLong*") LongPointer xTadShapeInfo, @Cast("const Nd4jLong*") LongPointer xOffsets, + @Cast("const Nd4jLong*") LongPointer yTadShapeInfo, @Cast("const Nd4jLong*") LongPointer yOffsets); public native void execReduce3All(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParamsVals, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongBuffer hYShapeInfo, @Cast("Nd4jLong*") LongBuffer dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongBuffer hDimensionShape, @Cast("Nd4jLong*") LongBuffer dDimensionShape, - @Cast("Nd4jLong*") LongBuffer xTadShapeInfo, @Cast("Nd4jLong*") LongBuffer xOffsets, - @Cast("Nd4jLong*") LongBuffer yTadShapeInfo, @Cast("Nd4jLong*") LongBuffer yOffsets); + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongBuffer hYShapeInfo, @Cast("const Nd4jLong*") LongBuffer dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongBuffer hDimensionShape, @Cast("const Nd4jLong*") LongBuffer dDimensionShape, + @Cast("const Nd4jLong*") LongBuffer xTadShapeInfo, @Cast("const Nd4jLong*") LongBuffer xOffsets, + @Cast("const Nd4jLong*") LongBuffer yTadShapeInfo, @Cast("const Nd4jLong*") LongBuffer yOffsets); public native void execReduce3All(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParamsVals, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") long[] hYShapeInfo, @Cast("Nd4jLong*") long[] dYShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") long[] hDimensionShape, @Cast("Nd4jLong*") long[] dDimensionShape, - @Cast("Nd4jLong*") long[] xTadShapeInfo, @Cast("Nd4jLong*") long[] xOffsets, - @Cast("Nd4jLong*") long[] yTadShapeInfo, @Cast("Nd4jLong*") long[] yOffsets); + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") long[] hYShapeInfo, @Cast("const Nd4jLong*") long[] dYShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") long[] hDimensionShape, @Cast("const Nd4jLong*") long[] dDimensionShape, + @Cast("const Nd4jLong*") long[] xTadShapeInfo, @Cast("const Nd4jLong*") long[] xOffsets, + @Cast("const Nd4jLong*") long[] yTadShapeInfo, @Cast("const Nd4jLong*") long[] yOffsets); /** * @@ -1586,40 +1586,40 @@ public native void execReduce3All(@Cast("Nd4jPointer*") PointerPointer extraPoin */ public native void execScalar(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, - OpaqueDataBuffer dbScalar, @Cast("Nd4jLong*") LongPointer hSscalarShapeInfo, @Cast("Nd4jLong*") LongPointer dSscalarShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbScalar, @Cast("const Nd4jLong*") LongPointer hSscalarShapeInfo, @Cast("const Nd4jLong*") LongPointer dSscalarShapeInfo, Pointer extraParams); public native void execScalar(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, - OpaqueDataBuffer dbScalar, @Cast("Nd4jLong*") LongBuffer hSscalarShapeInfo, @Cast("Nd4jLong*") LongBuffer dSscalarShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbScalar, @Cast("const Nd4jLong*") LongBuffer hSscalarShapeInfo, @Cast("const Nd4jLong*") LongBuffer dSscalarShapeInfo, Pointer extraParams); public native void execScalar(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, - OpaqueDataBuffer dbScalar, @Cast("Nd4jLong*") long[] hSscalarShapeInfo, @Cast("Nd4jLong*") long[] dSscalarShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbScalar, @Cast("const Nd4jLong*") long[] hSscalarShapeInfo, @Cast("const Nd4jLong*") long[] dSscalarShapeInfo, Pointer extraParams); public native void execScalarBool(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, - OpaqueDataBuffer dbScalar, @Cast("Nd4jLong*") LongPointer hSscalarShapeInfo, @Cast("Nd4jLong*") LongPointer dSscalarShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbScalar, @Cast("const Nd4jLong*") LongPointer hSscalarShapeInfo, @Cast("const Nd4jLong*") LongPointer dSscalarShapeInfo, Pointer extraParams); public native void execScalarBool(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, - OpaqueDataBuffer dbScalar, @Cast("Nd4jLong*") LongBuffer hSscalarShapeInfo, @Cast("Nd4jLong*") LongBuffer dSscalarShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbScalar, @Cast("const Nd4jLong*") LongBuffer hSscalarShapeInfo, @Cast("const Nd4jLong*") LongBuffer dSscalarShapeInfo, Pointer extraParams); public native void execScalarBool(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, - OpaqueDataBuffer dbScalar, @Cast("Nd4jLong*") long[] hSscalarShapeInfo, @Cast("Nd4jLong*") long[] dSscalarShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbScalar, @Cast("const Nd4jLong*") long[] hSscalarShapeInfo, @Cast("const Nd4jLong*") long[] dSscalarShapeInfo, Pointer extraParams); /** @@ -1631,21 +1631,21 @@ public native void execScalarBool(@Cast("Nd4jPointer*") PointerPointer extraPoin */ public native void execSummaryStatsScalar(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, @Cast("bool") boolean biasCorrected); public native void execSummaryStatsScalar(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, @Cast("bool") boolean biasCorrected); public native void execSummaryStatsScalar(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, @Cast("bool") boolean biasCorrected); /** * @@ -1658,21 +1658,21 @@ public native void execSummaryStatsScalar(@Cast("Nd4jPointer*") PointerPointer e */ public native void execSummaryStats(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, @Cast("bool") boolean biasCorrected); public native void execSummaryStats(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, @Cast("bool") boolean biasCorrected); public native void execSummaryStats(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, @Cast("bool") boolean biasCorrected); /** * @@ -1687,28 +1687,28 @@ public native void execSummaryStats(@Cast("Nd4jPointer*") PointerPointer extraPo */ public native void execSummaryStatsTad(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongPointer hDimensionShape, @Cast("Nd4jLong*") LongPointer dDimensionShape, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongPointer hDimensionShape, @Cast("const Nd4jLong*") LongPointer dDimensionShape, @Cast("bool") boolean biasCorrected, - @Cast("Nd4jLong*") LongPointer tadShapeInfo, @Cast("Nd4jLong*") LongPointer tadOffsets); + @Cast("const Nd4jLong*") LongPointer tadShapeInfo, @Cast("const Nd4jLong*") LongPointer tadOffsets); public native void execSummaryStatsTad(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongBuffer hDimensionShape, @Cast("Nd4jLong*") LongBuffer dDimensionShape, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongBuffer hDimensionShape, @Cast("const Nd4jLong*") LongBuffer dDimensionShape, @Cast("bool") boolean biasCorrected, - @Cast("Nd4jLong*") LongBuffer tadShapeInfo, @Cast("Nd4jLong*") LongBuffer tadOffsets); + @Cast("const Nd4jLong*") LongBuffer tadShapeInfo, @Cast("const Nd4jLong*") LongBuffer tadOffsets); public native void execSummaryStatsTad(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") long[] hDimensionShape, @Cast("Nd4jLong*") long[] dDimensionShape, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") long[] hDimensionShape, @Cast("const Nd4jLong*") long[] dDimensionShape, @Cast("bool") boolean biasCorrected, - @Cast("Nd4jLong*") long[] tadShapeInfo, @Cast("Nd4jLong*") long[] tadOffsets); + @Cast("const Nd4jLong*") long[] tadShapeInfo, @Cast("const Nd4jLong*") long[] tadOffsets); /** * @@ -1722,82 +1722,82 @@ public native void execSummaryStatsTad(@Cast("Nd4jPointer*") PointerPointer extr */ public native void execTransformFloat(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, Pointer extraParams); public native void execTransformFloat(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, Pointer extraParams); public native void execTransformFloat(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, Pointer extraParams); public native void execTransformSame(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, Pointer extraParams); public native void execTransformSame(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, Pointer extraParams); public native void execTransformSame(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, Pointer extraParams); public native void execTransformBool(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, Pointer extraParams); public native void execTransformBool(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, Pointer extraParams); public native void execTransformBool(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, Pointer extraParams); public native void execTransformAny(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, Pointer extraParams); public native void execTransformAny(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, Pointer extraParams); public native void execTransformAny(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, Pointer extraParams); public native void execTransformStrict(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, Pointer extraParams); public native void execTransformStrict(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, Pointer extraParams); public native void execTransformStrict(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, Pointer extraParams); /** @@ -1815,59 +1815,59 @@ public native void execTransformStrict(@Cast("Nd4jPointer*") PointerPointer extr */ public native void execScalarTad(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, - OpaqueDataBuffer dbScalars, @Cast("Nd4jLong*") LongPointer hScalarShapeInfo, @Cast("Nd4jLong*") LongPointer dScalarShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbScalars, @Cast("const Nd4jLong*") LongPointer hScalarShapeInfo, @Cast("const Nd4jLong*") LongPointer dScalarShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongPointer hDimensionShape, @Cast("Nd4jLong*") LongPointer dDimensionShape, - @Cast("Nd4jLong*") LongPointer tadShapeInfo, @Cast("Nd4jLong*") LongPointer tadOffsets, - @Cast("Nd4jLong*") LongPointer tadShapeInfoZ, @Cast("Nd4jLong*") LongPointer tadOffsetsZ); + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongPointer hDimensionShape, @Cast("const Nd4jLong*") LongPointer dDimensionShape, + @Cast("const Nd4jLong*") LongPointer tadShapeInfo, @Cast("const Nd4jLong*") LongPointer tadOffsets, + @Cast("const Nd4jLong*") LongPointer tadShapeInfoZ, @Cast("const Nd4jLong*") LongPointer tadOffsetsZ); public native void execScalarTad(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, - OpaqueDataBuffer dbScalars, @Cast("Nd4jLong*") LongBuffer hScalarShapeInfo, @Cast("Nd4jLong*") LongBuffer dScalarShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbScalars, @Cast("const Nd4jLong*") LongBuffer hScalarShapeInfo, @Cast("const Nd4jLong*") LongBuffer dScalarShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongBuffer hDimensionShape, @Cast("Nd4jLong*") LongBuffer dDimensionShape, - @Cast("Nd4jLong*") LongBuffer tadShapeInfo, @Cast("Nd4jLong*") LongBuffer tadOffsets, - @Cast("Nd4jLong*") LongBuffer tadShapeInfoZ, @Cast("Nd4jLong*") LongBuffer tadOffsetsZ); + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongBuffer hDimensionShape, @Cast("const Nd4jLong*") LongBuffer dDimensionShape, + @Cast("const Nd4jLong*") LongBuffer tadShapeInfo, @Cast("const Nd4jLong*") LongBuffer tadOffsets, + @Cast("const Nd4jLong*") LongBuffer tadShapeInfoZ, @Cast("const Nd4jLong*") LongBuffer tadOffsetsZ); public native void execScalarTad(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, - OpaqueDataBuffer dbScalars, @Cast("Nd4jLong*") long[] hScalarShapeInfo, @Cast("Nd4jLong*") long[] dScalarShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbScalars, @Cast("const Nd4jLong*") long[] hScalarShapeInfo, @Cast("const Nd4jLong*") long[] dScalarShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") long[] hDimensionShape, @Cast("Nd4jLong*") long[] dDimensionShape, - @Cast("Nd4jLong*") long[] tadShapeInfo, @Cast("Nd4jLong*") long[] tadOffsets, - @Cast("Nd4jLong*") long[] tadShapeInfoZ, @Cast("Nd4jLong*") long[] tadOffsetsZ); + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") long[] hDimensionShape, @Cast("const Nd4jLong*") long[] dDimensionShape, + @Cast("const Nd4jLong*") long[] tadShapeInfo, @Cast("const Nd4jLong*") long[] tadOffsets, + @Cast("const Nd4jLong*") long[] tadShapeInfoZ, @Cast("const Nd4jLong*") long[] tadOffsetsZ); public native void execScalarBoolTad(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeInfo, @Cast("Nd4jLong*") LongPointer dZShapeInfo, - OpaqueDataBuffer dbScalars, @Cast("Nd4jLong*") LongPointer hScalarShapeInfo, @Cast("Nd4jLong*") LongPointer dScalarShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeInfo, @Cast("const Nd4jLong*") LongPointer dZShapeInfo, + OpaqueDataBuffer dbScalars, @Cast("const Nd4jLong*") LongPointer hScalarShapeInfo, @Cast("const Nd4jLong*") LongPointer dScalarShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongPointer hDimensionShape, @Cast("Nd4jLong*") LongPointer dDimensionShape, - @Cast("Nd4jLong*") LongPointer tadShapeInfo, @Cast("Nd4jLong*") LongPointer tadOffsets, - @Cast("Nd4jLong*") LongPointer tadShapeInfoZ, @Cast("Nd4jLong*") LongPointer tadOffsetsZ); + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongPointer hDimensionShape, @Cast("const Nd4jLong*") LongPointer dDimensionShape, + @Cast("const Nd4jLong*") LongPointer tadShapeInfo, @Cast("const Nd4jLong*") LongPointer tadOffsets, + @Cast("const Nd4jLong*") LongPointer tadShapeInfoZ, @Cast("const Nd4jLong*") LongPointer tadOffsetsZ); public native void execScalarBoolTad(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeInfo, @Cast("Nd4jLong*") LongBuffer dZShapeInfo, - OpaqueDataBuffer dbScalars, @Cast("Nd4jLong*") LongBuffer hScalarShapeInfo, @Cast("Nd4jLong*") LongBuffer dScalarShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeInfo, @Cast("const Nd4jLong*") LongBuffer dZShapeInfo, + OpaqueDataBuffer dbScalars, @Cast("const Nd4jLong*") LongBuffer hScalarShapeInfo, @Cast("const Nd4jLong*") LongBuffer dScalarShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") LongBuffer hDimensionShape, @Cast("Nd4jLong*") LongBuffer dDimensionShape, - @Cast("Nd4jLong*") LongBuffer tadShapeInfo, @Cast("Nd4jLong*") LongBuffer tadOffsets, - @Cast("Nd4jLong*") LongBuffer tadShapeInfoZ, @Cast("Nd4jLong*") LongBuffer tadOffsetsZ); + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") LongBuffer hDimensionShape, @Cast("const Nd4jLong*") LongBuffer dDimensionShape, + @Cast("const Nd4jLong*") LongBuffer tadShapeInfo, @Cast("const Nd4jLong*") LongBuffer tadOffsets, + @Cast("const Nd4jLong*") LongBuffer tadShapeInfoZ, @Cast("const Nd4jLong*") LongBuffer tadOffsetsZ); public native void execScalarBoolTad(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] dXShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeInfo, @Cast("Nd4jLong*") long[] dZShapeInfo, - OpaqueDataBuffer dbScalars, @Cast("Nd4jLong*") long[] hScalarShapeInfo, @Cast("Nd4jLong*") long[] dScalarShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] dXShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeInfo, @Cast("const Nd4jLong*") long[] dZShapeInfo, + OpaqueDataBuffer dbScalars, @Cast("const Nd4jLong*") long[] hScalarShapeInfo, @Cast("const Nd4jLong*") long[] dScalarShapeInfo, Pointer extraParams, - OpaqueDataBuffer dbDimension, @Cast("Nd4jLong*") long[] hDimensionShape, @Cast("Nd4jLong*") long[] dDimensionShape, - @Cast("Nd4jLong*") long[] tadShapeInfo, @Cast("Nd4jLong*") long[] tadOffsets, - @Cast("Nd4jLong*") long[] tadShapeInfoZ, @Cast("Nd4jLong*") long[] tadOffsetsZ); + OpaqueDataBuffer dbDimension, @Cast("const Nd4jLong*") long[] hDimensionShape, @Cast("const Nd4jLong*") long[] dDimensionShape, + @Cast("const Nd4jLong*") long[] tadShapeInfo, @Cast("const Nd4jLong*") long[] tadOffsets, + @Cast("const Nd4jLong*") long[] tadShapeInfoZ, @Cast("const Nd4jLong*") long[] tadOffsetsZ); public native void specialConcat( @Cast("Nd4jPointer*") PointerPointer extraPointers, @@ -1876,7 +1876,7 @@ public native void specialConcat( @Cast("Nd4jPointer*") PointerPointer data, @Cast("Nd4jPointer*") PointerPointer inputShapeInfo, Pointer result, - @Cast("Nd4jLong*") LongPointer resultShapeInfo, + @Cast("const Nd4jLong*") LongPointer resultShapeInfo, @Cast("Nd4jPointer*") PointerPointer tadPointers, @Cast("Nd4jPointer*") PointerPointer offsetPointers); public native void specialConcat( @@ -1886,7 +1886,7 @@ public native void specialConcat( @Cast("Nd4jPointer*") PointerPointer data, @Cast("Nd4jPointer*") PointerPointer inputShapeInfo, Pointer result, - @Cast("Nd4jLong*") LongBuffer resultShapeInfo, + @Cast("const Nd4jLong*") LongBuffer resultShapeInfo, @Cast("Nd4jPointer*") PointerPointer tadPointers, @Cast("Nd4jPointer*") PointerPointer offsetPointers); public native void specialConcat( @@ -1896,7 +1896,7 @@ public native void specialConcat( @Cast("Nd4jPointer*") PointerPointer data, @Cast("Nd4jPointer*") PointerPointer inputShapeInfo, Pointer result, - @Cast("Nd4jLong*") long[] resultShapeInfo, + @Cast("const Nd4jLong*") long[] resultShapeInfo, @Cast("Nd4jPointer*") PointerPointer tadPointers, @Cast("Nd4jPointer*") PointerPointer offsetPointers); @@ -2190,20 +2190,20 @@ public native void setGridLimit(int gridSize); * @param targetBuffer * @param offsetsBuffer */ -public native OpaqueTadPack tadOnlyShapeInfo(@Cast("Nd4jLong*") LongPointer xShapeInfo, +public native OpaqueTadPack tadOnlyShapeInfo(@Cast("const Nd4jLong*") LongPointer xShapeInfo, IntPointer dimension, int dimensionLength); -public native OpaqueTadPack tadOnlyShapeInfo(@Cast("Nd4jLong*") LongBuffer xShapeInfo, +public native OpaqueTadPack tadOnlyShapeInfo(@Cast("const Nd4jLong*") LongBuffer xShapeInfo, IntBuffer dimension, int dimensionLength); -public native OpaqueTadPack tadOnlyShapeInfo(@Cast("Nd4jLong*") long[] xShapeInfo, +public native OpaqueTadPack tadOnlyShapeInfo(@Cast("const Nd4jLong*") long[] xShapeInfo, int[] dimension, int dimensionLength); -public native @Cast("Nd4jLong*") LongPointer getPrimaryShapeInfo(OpaqueTadPack pack); -public native @Cast("Nd4jLong*") LongPointer getPrimaryOffsets(OpaqueTadPack pack); -public native @Cast("Nd4jLong*") LongPointer getSpecialShapeInfo(OpaqueTadPack pack); -public native @Cast("Nd4jLong*") LongPointer getSpecialOffsets(OpaqueTadPack pack); +public native @Cast("const Nd4jLong*") LongPointer getPrimaryShapeInfo(OpaqueTadPack pack); +public native @Cast("const Nd4jLong*") LongPointer getPrimaryOffsets(OpaqueTadPack pack); +public native @Cast("const Nd4jLong*") LongPointer getSpecialShapeInfo(OpaqueTadPack pack); +public native @Cast("const Nd4jLong*") LongPointer getSpecialOffsets(OpaqueTadPack pack); public native @Cast("Nd4jLong") long getNumberOfTads(OpaqueTadPack pack); public native int getShapeInfoLength(OpaqueTadPack pack); @@ -2228,32 +2228,32 @@ public native void deleteTadPack(OpaqueTadPack ptr); * @param zTadOffsets */ public native void pullRows(@Cast("Nd4jPointer*") PointerPointer extraPointers, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer xShapeInfo, @Cast("Nd4jLong*") LongPointer dxShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer zShapeInfo, @Cast("Nd4jLong*") LongPointer dzShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer xShapeInfo, @Cast("const Nd4jLong*") LongPointer dxShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer zShapeInfo, @Cast("const Nd4jLong*") LongPointer dzShapeInfo, @Cast("Nd4jLong") long n, @Cast("Nd4jLong*") LongPointer indexes, - @Cast("Nd4jLong*") LongPointer tadShapeInfo, - @Cast("Nd4jLong*") LongPointer tadOffsets, - @Cast("Nd4jLong*") LongPointer zTadShapeInfo, - @Cast("Nd4jLong*") LongPointer zTadOffsets); + @Cast("const Nd4jLong*") LongPointer tadShapeInfo, + @Cast("const Nd4jLong*") LongPointer tadOffsets, + @Cast("const Nd4jLong*") LongPointer zTadShapeInfo, + @Cast("const Nd4jLong*") LongPointer zTadOffsets); public native void pullRows(@Cast("Nd4jPointer*") PointerPointer extraPointers, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer xShapeInfo, @Cast("Nd4jLong*") LongBuffer dxShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer zShapeInfo, @Cast("Nd4jLong*") LongBuffer dzShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer xShapeInfo, @Cast("const Nd4jLong*") LongBuffer dxShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer zShapeInfo, @Cast("const Nd4jLong*") LongBuffer dzShapeInfo, @Cast("Nd4jLong") long n, @Cast("Nd4jLong*") LongBuffer indexes, - @Cast("Nd4jLong*") LongBuffer tadShapeInfo, - @Cast("Nd4jLong*") LongBuffer tadOffsets, - @Cast("Nd4jLong*") LongBuffer zTadShapeInfo, - @Cast("Nd4jLong*") LongBuffer zTadOffsets); + @Cast("const Nd4jLong*") LongBuffer tadShapeInfo, + @Cast("const Nd4jLong*") LongBuffer tadOffsets, + @Cast("const Nd4jLong*") LongBuffer zTadShapeInfo, + @Cast("const Nd4jLong*") LongBuffer zTadOffsets); public native void pullRows(@Cast("Nd4jPointer*") PointerPointer extraPointers, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] xShapeInfo, @Cast("Nd4jLong*") long[] dxShapeInfo, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] zShapeInfo, @Cast("Nd4jLong*") long[] dzShapeInfo, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] xShapeInfo, @Cast("const Nd4jLong*") long[] dxShapeInfo, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] zShapeInfo, @Cast("const Nd4jLong*") long[] dzShapeInfo, @Cast("Nd4jLong") long n, @Cast("Nd4jLong*") long[] indexes, - @Cast("Nd4jLong*") long[] tadShapeInfo, - @Cast("Nd4jLong*") long[] tadOffsets, - @Cast("Nd4jLong*") long[] zTadShapeInfo, - @Cast("Nd4jLong*") long[] zTadOffsets); + @Cast("const Nd4jLong*") long[] tadShapeInfo, + @Cast("const Nd4jLong*") long[] tadOffsets, + @Cast("const Nd4jLong*") long[] zTadShapeInfo, + @Cast("const Nd4jLong*") long[] zTadOffsets); /** * @@ -2265,50 +2265,50 @@ public native void pullRows(@Cast("Nd4jPointer*") PointerPointer extraPointers, * @param propagate */ public native void average(@Cast("Nd4jPointer*") PointerPointer extras, - @Cast("Nd4jPointer*") PointerPointer x, @Cast("Nd4jLong*") LongPointer xShapeInfo, - @Cast("Nd4jPointer*") PointerPointer dx, @Cast("Nd4jLong*") LongPointer dxShapeInfo, - Pointer z, @Cast("Nd4jLong*") LongPointer zShapeInfo, - Pointer dz, @Cast("Nd4jLong*") LongPointer dzShapeInfo, + @Cast("Nd4jPointer*") PointerPointer x, @Cast("const Nd4jLong*") LongPointer xShapeInfo, + @Cast("Nd4jPointer*") PointerPointer dx, @Cast("const Nd4jLong*") LongPointer dxShapeInfo, + Pointer z, @Cast("const Nd4jLong*") LongPointer zShapeInfo, + Pointer dz, @Cast("const Nd4jLong*") LongPointer dzShapeInfo, int n, @Cast("Nd4jLong") long length, @Cast("bool") boolean propagate); public native void average(@Cast("Nd4jPointer*") PointerPointer extras, - @Cast("Nd4jPointer*") PointerPointer x, @Cast("Nd4jLong*") LongBuffer xShapeInfo, - @Cast("Nd4jPointer*") PointerPointer dx, @Cast("Nd4jLong*") LongBuffer dxShapeInfo, - Pointer z, @Cast("Nd4jLong*") LongBuffer zShapeInfo, - Pointer dz, @Cast("Nd4jLong*") LongBuffer dzShapeInfo, + @Cast("Nd4jPointer*") PointerPointer x, @Cast("const Nd4jLong*") LongBuffer xShapeInfo, + @Cast("Nd4jPointer*") PointerPointer dx, @Cast("const Nd4jLong*") LongBuffer dxShapeInfo, + Pointer z, @Cast("const Nd4jLong*") LongBuffer zShapeInfo, + Pointer dz, @Cast("const Nd4jLong*") LongBuffer dzShapeInfo, int n, @Cast("Nd4jLong") long length, @Cast("bool") boolean propagate); public native void average(@Cast("Nd4jPointer*") PointerPointer extras, - @Cast("Nd4jPointer*") PointerPointer x, @Cast("Nd4jLong*") long[] xShapeInfo, - @Cast("Nd4jPointer*") PointerPointer dx, @Cast("Nd4jLong*") long[] dxShapeInfo, - Pointer z, @Cast("Nd4jLong*") long[] zShapeInfo, - Pointer dz, @Cast("Nd4jLong*") long[] dzShapeInfo, + @Cast("Nd4jPointer*") PointerPointer x, @Cast("const Nd4jLong*") long[] xShapeInfo, + @Cast("Nd4jPointer*") PointerPointer dx, @Cast("const Nd4jLong*") long[] dxShapeInfo, + Pointer z, @Cast("const Nd4jLong*") long[] zShapeInfo, + Pointer dz, @Cast("const Nd4jLong*") long[] dzShapeInfo, int n, @Cast("Nd4jLong") long length, @Cast("bool") boolean propagate); public native void accumulate(@Cast("Nd4jPointer*") PointerPointer extras, - @Cast("Nd4jPointer*") PointerPointer x, @Cast("Nd4jLong*") LongPointer xShapeInfo, - @Cast("Nd4jPointer*") PointerPointer dx, @Cast("Nd4jLong*") LongPointer dxShapeInfo, - Pointer z, @Cast("Nd4jLong*") LongPointer zShapeInfo, - Pointer dz, @Cast("Nd4jLong*") LongPointer dzShapeInfo, + @Cast("Nd4jPointer*") PointerPointer x, @Cast("const Nd4jLong*") LongPointer xShapeInfo, + @Cast("Nd4jPointer*") PointerPointer dx, @Cast("const Nd4jLong*") LongPointer dxShapeInfo, + Pointer z, @Cast("const Nd4jLong*") LongPointer zShapeInfo, + Pointer dz, @Cast("const Nd4jLong*") LongPointer dzShapeInfo, int n, @Cast("Nd4jLong") long length); public native void accumulate(@Cast("Nd4jPointer*") PointerPointer extras, - @Cast("Nd4jPointer*") PointerPointer x, @Cast("Nd4jLong*") LongBuffer xShapeInfo, - @Cast("Nd4jPointer*") PointerPointer dx, @Cast("Nd4jLong*") LongBuffer dxShapeInfo, - Pointer z, @Cast("Nd4jLong*") LongBuffer zShapeInfo, - Pointer dz, @Cast("Nd4jLong*") LongBuffer dzShapeInfo, + @Cast("Nd4jPointer*") PointerPointer x, @Cast("const Nd4jLong*") LongBuffer xShapeInfo, + @Cast("Nd4jPointer*") PointerPointer dx, @Cast("const Nd4jLong*") LongBuffer dxShapeInfo, + Pointer z, @Cast("const Nd4jLong*") LongBuffer zShapeInfo, + Pointer dz, @Cast("const Nd4jLong*") LongBuffer dzShapeInfo, int n, @Cast("Nd4jLong") long length); public native void accumulate(@Cast("Nd4jPointer*") PointerPointer extras, - @Cast("Nd4jPointer*") PointerPointer x, @Cast("Nd4jLong*") long[] xShapeInfo, - @Cast("Nd4jPointer*") PointerPointer dx, @Cast("Nd4jLong*") long[] dxShapeInfo, - Pointer z, @Cast("Nd4jLong*") long[] zShapeInfo, - Pointer dz, @Cast("Nd4jLong*") long[] dzShapeInfo, + @Cast("Nd4jPointer*") PointerPointer x, @Cast("const Nd4jLong*") long[] xShapeInfo, + @Cast("Nd4jPointer*") PointerPointer dx, @Cast("const Nd4jLong*") long[] dxShapeInfo, + Pointer z, @Cast("const Nd4jLong*") long[] zShapeInfo, + Pointer dz, @Cast("const Nd4jLong*") long[] dzShapeInfo, int n, @Cast("Nd4jLong") long length); @@ -2513,17 +2513,17 @@ public native void execAggregateBatch(@Cast("Nd4jPointer*") PointerPointer extra public native void execRandom(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, @Cast("Nd4jPointer") Pointer state, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeBuffer, @Cast("Nd4jLong*") LongPointer dZShapeBuffer, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeBuffer, @Cast("const Nd4jLong*") LongPointer dZShapeBuffer, Pointer extraArguments); public native void execRandom(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, @Cast("Nd4jPointer") Pointer state, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeBuffer, @Cast("Nd4jLong*") LongBuffer dZShapeBuffer, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeBuffer, @Cast("const Nd4jLong*") LongBuffer dZShapeBuffer, Pointer extraArguments); public native void execRandom(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, @Cast("Nd4jPointer") Pointer state, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeBuffer, @Cast("Nd4jLong*") long[] dZShapeBuffer, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeBuffer, @Cast("const Nd4jLong*") long[] dZShapeBuffer, Pointer extraArguments); /** @@ -2542,23 +2542,23 @@ public native void execRandom(@Cast("Nd4jPointer*") PointerPointer extraPointers public native void execRandom3(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, @Cast("Nd4jPointer") Pointer state, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeBuffer, @Cast("Nd4jLong*") LongPointer dXShapeBuffer, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongPointer hYShapeBuffer, @Cast("Nd4jLong*") LongPointer dYShapeBuffer, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeBuffer, @Cast("Nd4jLong*") LongPointer dZShapeBuffer, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeBuffer, @Cast("const Nd4jLong*") LongPointer dXShapeBuffer, + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongPointer hYShapeBuffer, @Cast("const Nd4jLong*") LongPointer dYShapeBuffer, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeBuffer, @Cast("const Nd4jLong*") LongPointer dZShapeBuffer, Pointer extraArguments); public native void execRandom3(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, @Cast("Nd4jPointer") Pointer state, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeBuffer, @Cast("Nd4jLong*") LongBuffer dXShapeBuffer, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") LongBuffer hYShapeBuffer, @Cast("Nd4jLong*") LongBuffer dYShapeBuffer, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeBuffer, @Cast("Nd4jLong*") LongBuffer dZShapeBuffer, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeBuffer, @Cast("const Nd4jLong*") LongBuffer dXShapeBuffer, + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") LongBuffer hYShapeBuffer, @Cast("const Nd4jLong*") LongBuffer dYShapeBuffer, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeBuffer, @Cast("const Nd4jLong*") LongBuffer dZShapeBuffer, Pointer extraArguments); public native void execRandom3(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, @Cast("Nd4jPointer") Pointer state, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeBuffer, @Cast("Nd4jLong*") long[] dXShapeBuffer, - OpaqueDataBuffer dbY, @Cast("Nd4jLong*") long[] hYShapeBuffer, @Cast("Nd4jLong*") long[] dYShapeBuffer, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeBuffer, @Cast("Nd4jLong*") long[] dZShapeBuffer, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeBuffer, @Cast("const Nd4jLong*") long[] dXShapeBuffer, + OpaqueDataBuffer dbY, @Cast("const Nd4jLong*") long[] hYShapeBuffer, @Cast("const Nd4jLong*") long[] dYShapeBuffer, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeBuffer, @Cast("const Nd4jLong*") long[] dZShapeBuffer, Pointer extraArguments); /** @@ -2575,20 +2575,20 @@ public native void execRandom3(@Cast("Nd4jPointer*") PointerPointer extraPointer public native void execRandom2(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, @Cast("Nd4jPointer") Pointer state, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer hXShapeBuffer, @Cast("Nd4jLong*") LongPointer dXShapeBuffer, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongPointer hZShapeBuffer, @Cast("Nd4jLong*") LongPointer dZShapeBuffer, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer hXShapeBuffer, @Cast("const Nd4jLong*") LongPointer dXShapeBuffer, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongPointer hZShapeBuffer, @Cast("const Nd4jLong*") LongPointer dZShapeBuffer, Pointer extraArguments); public native void execRandom2(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, @Cast("Nd4jPointer") Pointer state, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer hXShapeBuffer, @Cast("Nd4jLong*") LongBuffer dXShapeBuffer, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") LongBuffer hZShapeBuffer, @Cast("Nd4jLong*") LongBuffer dZShapeBuffer, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer hXShapeBuffer, @Cast("const Nd4jLong*") LongBuffer dXShapeBuffer, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") LongBuffer hZShapeBuffer, @Cast("const Nd4jLong*") LongBuffer dZShapeBuffer, Pointer extraArguments); public native void execRandom2(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opNum, @Cast("Nd4jPointer") Pointer state, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] hXShapeBuffer, @Cast("Nd4jLong*") long[] dXShapeBuffer, - OpaqueDataBuffer dbZ, @Cast("Nd4jLong*") long[] hZShapeBuffer, @Cast("Nd4jLong*") long[] dZShapeBuffer, + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] hXShapeBuffer, @Cast("const Nd4jLong*") long[] dXShapeBuffer, + OpaqueDataBuffer dbZ, @Cast("const Nd4jLong*") long[] hZShapeBuffer, @Cast("const Nd4jLong*") long[] dZShapeBuffer, Pointer extraArguments); @@ -2793,167 +2793,143 @@ public native @Cast("Nd4jPointer") Pointer pointerForAddress(@Cast("Nd4jLong") l * @return */ public native void tear(@Cast("Nd4jPointer*") PointerPointer extraPointers, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongPointer xShapeInfo, @Cast("Nd4jLong*") LongPointer dxShapeInfo, - @Cast("Nd4jPointer*") PointerPointer targets, @Cast("Nd4jLong*") LongPointer zShapeInfo, - @Cast("Nd4jLong*") LongPointer tadShapeInfo, - @Cast("Nd4jLong*") LongPointer tadOffsets); + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongPointer xShapeInfo, @Cast("const Nd4jLong*") LongPointer dxShapeInfo, + @Cast("Nd4jPointer*") PointerPointer targets, @Cast("const Nd4jLong*") LongPointer zShapeInfo, + @Cast("const Nd4jLong*") LongPointer tadShapeInfo, + @Cast("const Nd4jLong*") LongPointer tadOffsets); public native void tear(@Cast("Nd4jPointer*") PointerPointer extraPointers, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") LongBuffer xShapeInfo, @Cast("Nd4jLong*") LongBuffer dxShapeInfo, - @Cast("Nd4jPointer*") PointerPointer targets, @Cast("Nd4jLong*") LongBuffer zShapeInfo, - @Cast("Nd4jLong*") LongBuffer tadShapeInfo, - @Cast("Nd4jLong*") LongBuffer tadOffsets); + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") LongBuffer xShapeInfo, @Cast("const Nd4jLong*") LongBuffer dxShapeInfo, + @Cast("Nd4jPointer*") PointerPointer targets, @Cast("const Nd4jLong*") LongBuffer zShapeInfo, + @Cast("const Nd4jLong*") LongBuffer tadShapeInfo, + @Cast("const Nd4jLong*") LongBuffer tadOffsets); public native void tear(@Cast("Nd4jPointer*") PointerPointer extraPointers, - OpaqueDataBuffer dbX, @Cast("Nd4jLong*") long[] xShapeInfo, @Cast("Nd4jLong*") long[] dxShapeInfo, - @Cast("Nd4jPointer*") PointerPointer targets, @Cast("Nd4jLong*") long[] zShapeInfo, - @Cast("Nd4jLong*") long[] tadShapeInfo, - @Cast("Nd4jLong*") long[] tadOffsets); - -public native @Cast("Nd4jLong") long encodeBitmap(@Cast("Nd4jPointer*") PointerPointer extraPointers, Pointer dx, @Cast("Nd4jLong*") LongPointer xShapeInfo, @Cast("Nd4jLong") long N, IntPointer dz, float threshold); -public native @Cast("Nd4jLong") long encodeBitmap(@Cast("Nd4jPointer*") PointerPointer extraPointers, Pointer dx, @Cast("Nd4jLong*") LongBuffer xShapeInfo, @Cast("Nd4jLong") long N, IntBuffer dz, float threshold); -public native @Cast("Nd4jLong") long encodeBitmap(@Cast("Nd4jPointer*") PointerPointer extraPointers, Pointer dx, @Cast("Nd4jLong*") long[] xShapeInfo, @Cast("Nd4jLong") long N, int[] dz, float threshold); -public native void decodeBitmap(@Cast("Nd4jPointer*") PointerPointer extraPointers, Pointer dx, @Cast("Nd4jLong") long N, Pointer dz, @Cast("Nd4jLong*") LongPointer zShapeInfo); -public native void decodeBitmap(@Cast("Nd4jPointer*") PointerPointer extraPointers, Pointer dx, @Cast("Nd4jLong") long N, Pointer dz, @Cast("Nd4jLong*") LongBuffer zShapeInfo); -public native void decodeBitmap(@Cast("Nd4jPointer*") PointerPointer extraPointers, Pointer dx, @Cast("Nd4jLong") long N, Pointer dz, @Cast("Nd4jLong*") long[] zShapeInfo); - - -public native void encodeThresholdP1(@Cast("Nd4jPointer*") PointerPointer extraPointers, Pointer dx, @Cast("Nd4jLong*") LongPointer xShapeInfo, @Cast("Nd4jLong") long N, IntPointer dz, float threshold); -public native void encodeThresholdP1(@Cast("Nd4jPointer*") PointerPointer extraPointers, Pointer dx, @Cast("Nd4jLong*") LongBuffer xShapeInfo, @Cast("Nd4jLong") long N, IntBuffer dz, float threshold); -public native void encodeThresholdP1(@Cast("Nd4jPointer*") PointerPointer extraPointers, Pointer dx, @Cast("Nd4jLong*") long[] xShapeInfo, @Cast("Nd4jLong") long N, int[] dz, float threshold); -public native void encodeThresholdP2Int(@Cast("Nd4jPointer*") PointerPointer extraPointers, IntPointer dx, @Cast("Nd4jLong") long N, IntPointer dz); -public native void encodeThresholdP2Int(@Cast("Nd4jPointer*") PointerPointer extraPointers, IntBuffer dx, @Cast("Nd4jLong") long N, IntBuffer dz); -public native void encodeThresholdP2Int(@Cast("Nd4jPointer*") PointerPointer extraPointers, int[] dx, @Cast("Nd4jLong") long N, int[] dz); -public native void encodeThresholdP3(@Cast("Nd4jPointer*") PointerPointer extraPointers, Pointer dx, @Cast("Nd4jLong*") LongPointer xShapeInfo, IntPointer offsets, @Cast("Nd4jLong") long N, IntPointer dz); -public native void encodeThresholdP3(@Cast("Nd4jPointer*") PointerPointer extraPointers, Pointer dx, @Cast("Nd4jLong*") LongBuffer xShapeInfo, IntBuffer offsets, @Cast("Nd4jLong") long N, IntBuffer dz); -public native void encodeThresholdP3(@Cast("Nd4jPointer*") PointerPointer extraPointers, Pointer dx, @Cast("Nd4jLong*") long[] xShapeInfo, int[] offsets, @Cast("Nd4jLong") long N, int[] dz); - - -public native void decodeThreshold(@Cast("Nd4jPointer*") PointerPointer extraPointers, Pointer dx, @Cast("Nd4jLong") long N, Pointer dz, @Cast("Nd4jLong*") LongPointer zShapeInfo); -public native void decodeThreshold(@Cast("Nd4jPointer*") PointerPointer extraPointers, Pointer dx, @Cast("Nd4jLong") long N, Pointer dz, @Cast("Nd4jLong*") LongBuffer zShapeInfo); -public native void decodeThreshold(@Cast("Nd4jPointer*") PointerPointer extraPointers, Pointer dx, @Cast("Nd4jLong") long N, Pointer dz, @Cast("Nd4jLong*") long[] zShapeInfo); - + OpaqueDataBuffer dbX, @Cast("const Nd4jLong*") long[] xShapeInfo, @Cast("const Nd4jLong*") long[] dxShapeInfo, + @Cast("Nd4jPointer*") PointerPointer targets, @Cast("const Nd4jLong*") long[] zShapeInfo, + @Cast("const Nd4jLong*") long[] tadShapeInfo, + @Cast("const Nd4jLong*") long[] tadOffsets); public native void sort(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") LongPointer xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") LongPointer dxShapeInfo, + Pointer x, @Cast("const Nd4jLong*") LongPointer xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") LongPointer dxShapeInfo, @Cast("bool") boolean descending); public native void sort(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") LongBuffer xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") LongBuffer dxShapeInfo, + Pointer x, @Cast("const Nd4jLong*") LongBuffer xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") LongBuffer dxShapeInfo, @Cast("bool") boolean descending); public native void sort(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") long[] xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") long[] dxShapeInfo, + Pointer x, @Cast("const Nd4jLong*") long[] xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") long[] dxShapeInfo, @Cast("bool") boolean descending); public native void sortByKey(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") LongPointer xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") LongPointer dxShapeInfo, - Pointer y, @Cast("Nd4jLong*") LongPointer yShapeInfo, - Pointer dy, @Cast("Nd4jLong*") LongPointer dyShapeInfo, + Pointer x, @Cast("const Nd4jLong*") LongPointer xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") LongPointer dxShapeInfo, + Pointer y, @Cast("const Nd4jLong*") LongPointer yShapeInfo, + Pointer dy, @Cast("const Nd4jLong*") LongPointer dyShapeInfo, @Cast("bool") boolean descending); public native void sortByKey(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") LongBuffer xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") LongBuffer dxShapeInfo, - Pointer y, @Cast("Nd4jLong*") LongBuffer yShapeInfo, - Pointer dy, @Cast("Nd4jLong*") LongBuffer dyShapeInfo, + Pointer x, @Cast("const Nd4jLong*") LongBuffer xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") LongBuffer dxShapeInfo, + Pointer y, @Cast("const Nd4jLong*") LongBuffer yShapeInfo, + Pointer dy, @Cast("const Nd4jLong*") LongBuffer dyShapeInfo, @Cast("bool") boolean descending); public native void sortByKey(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") long[] xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") long[] dxShapeInfo, - Pointer y, @Cast("Nd4jLong*") long[] yShapeInfo, - Pointer dy, @Cast("Nd4jLong*") long[] dyShapeInfo, + Pointer x, @Cast("const Nd4jLong*") long[] xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") long[] dxShapeInfo, + Pointer y, @Cast("const Nd4jLong*") long[] yShapeInfo, + Pointer dy, @Cast("const Nd4jLong*") long[] dyShapeInfo, @Cast("bool") boolean descending); public native void sortByValue(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") LongPointer xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") LongPointer dxShapeInfo, - Pointer y, @Cast("Nd4jLong*") LongPointer yShapeInfo, - Pointer dy, @Cast("Nd4jLong*") LongPointer dyShapeInfo, + Pointer x, @Cast("const Nd4jLong*") LongPointer xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") LongPointer dxShapeInfo, + Pointer y, @Cast("const Nd4jLong*") LongPointer yShapeInfo, + Pointer dy, @Cast("const Nd4jLong*") LongPointer dyShapeInfo, @Cast("bool") boolean descending); public native void sortByValue(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") LongBuffer xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") LongBuffer dxShapeInfo, - Pointer y, @Cast("Nd4jLong*") LongBuffer yShapeInfo, - Pointer dy, @Cast("Nd4jLong*") LongBuffer dyShapeInfo, + Pointer x, @Cast("const Nd4jLong*") LongBuffer xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") LongBuffer dxShapeInfo, + Pointer y, @Cast("const Nd4jLong*") LongBuffer yShapeInfo, + Pointer dy, @Cast("const Nd4jLong*") LongBuffer dyShapeInfo, @Cast("bool") boolean descending); public native void sortByValue(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") long[] xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") long[] dxShapeInfo, - Pointer y, @Cast("Nd4jLong*") long[] yShapeInfo, - Pointer dy, @Cast("Nd4jLong*") long[] dyShapeInfo, + Pointer x, @Cast("const Nd4jLong*") long[] xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") long[] dxShapeInfo, + Pointer y, @Cast("const Nd4jLong*") long[] yShapeInfo, + Pointer dy, @Cast("const Nd4jLong*") long[] dyShapeInfo, @Cast("bool") boolean descending); public native void sortTad(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") LongPointer xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") LongPointer dxShapeInfo, + Pointer x, @Cast("const Nd4jLong*") LongPointer xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") LongPointer dxShapeInfo, IntPointer dimension, int dimensionLength, - @Cast("Nd4jLong*") LongPointer tadShapeInfo, - @Cast("Nd4jLong*") LongPointer tadOffsets, + @Cast("const Nd4jLong*") LongPointer tadShapeInfo, + @Cast("const Nd4jLong*") LongPointer tadOffsets, @Cast("bool") boolean descending); public native void sortTad(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") LongBuffer xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") LongBuffer dxShapeInfo, + Pointer x, @Cast("const Nd4jLong*") LongBuffer xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") LongBuffer dxShapeInfo, IntBuffer dimension, int dimensionLength, - @Cast("Nd4jLong*") LongBuffer tadShapeInfo, - @Cast("Nd4jLong*") LongBuffer tadOffsets, + @Cast("const Nd4jLong*") LongBuffer tadShapeInfo, + @Cast("const Nd4jLong*") LongBuffer tadOffsets, @Cast("bool") boolean descending); public native void sortTad(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") long[] xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") long[] dxShapeInfo, + Pointer x, @Cast("const Nd4jLong*") long[] xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") long[] dxShapeInfo, int[] dimension, int dimensionLength, - @Cast("Nd4jLong*") long[] tadShapeInfo, - @Cast("Nd4jLong*") long[] tadOffsets, + @Cast("const Nd4jLong*") long[] tadShapeInfo, + @Cast("const Nd4jLong*") long[] tadOffsets, @Cast("bool") boolean descending); public native void sortTadByKey(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") LongPointer xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") LongPointer dxShapeInfo, - Pointer y, @Cast("Nd4jLong*") LongPointer yShapeInfo, - Pointer dy, @Cast("Nd4jLong*") LongPointer dyShapeInfo, + Pointer x, @Cast("const Nd4jLong*") LongPointer xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") LongPointer dxShapeInfo, + Pointer y, @Cast("const Nd4jLong*") LongPointer yShapeInfo, + Pointer dy, @Cast("const Nd4jLong*") LongPointer dyShapeInfo, IntPointer dimension, int dimensionLength, @Cast("bool") boolean descending); public native void sortTadByKey(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") LongBuffer xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") LongBuffer dxShapeInfo, - Pointer y, @Cast("Nd4jLong*") LongBuffer yShapeInfo, - Pointer dy, @Cast("Nd4jLong*") LongBuffer dyShapeInfo, + Pointer x, @Cast("const Nd4jLong*") LongBuffer xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") LongBuffer dxShapeInfo, + Pointer y, @Cast("const Nd4jLong*") LongBuffer yShapeInfo, + Pointer dy, @Cast("const Nd4jLong*") LongBuffer dyShapeInfo, IntBuffer dimension, int dimensionLength, @Cast("bool") boolean descending); public native void sortTadByKey(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") long[] xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") long[] dxShapeInfo, - Pointer y, @Cast("Nd4jLong*") long[] yShapeInfo, - Pointer dy, @Cast("Nd4jLong*") long[] dyShapeInfo, + Pointer x, @Cast("const Nd4jLong*") long[] xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") long[] dxShapeInfo, + Pointer y, @Cast("const Nd4jLong*") long[] yShapeInfo, + Pointer dy, @Cast("const Nd4jLong*") long[] dyShapeInfo, int[] dimension, int dimensionLength, @Cast("bool") boolean descending); public native void sortTadByValue(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") LongPointer xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") LongPointer dxShapeInfo, - Pointer y, @Cast("Nd4jLong*") LongPointer yShapeInfo, - Pointer dy, @Cast("Nd4jLong*") LongPointer dyShapeInfo, + Pointer x, @Cast("const Nd4jLong*") LongPointer xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") LongPointer dxShapeInfo, + Pointer y, @Cast("const Nd4jLong*") LongPointer yShapeInfo, + Pointer dy, @Cast("const Nd4jLong*") LongPointer dyShapeInfo, IntPointer dimension, int dimensionLength, @Cast("bool") boolean descending); public native void sortTadByValue(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") LongBuffer xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") LongBuffer dxShapeInfo, - Pointer y, @Cast("Nd4jLong*") LongBuffer yShapeInfo, - Pointer dy, @Cast("Nd4jLong*") LongBuffer dyShapeInfo, + Pointer x, @Cast("const Nd4jLong*") LongBuffer xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") LongBuffer dxShapeInfo, + Pointer y, @Cast("const Nd4jLong*") LongBuffer yShapeInfo, + Pointer dy, @Cast("const Nd4jLong*") LongBuffer dyShapeInfo, IntBuffer dimension, int dimensionLength, @Cast("bool") boolean descending); public native void sortTadByValue(@Cast("Nd4jPointer*") PointerPointer extraPointers, - Pointer x, @Cast("Nd4jLong*") long[] xShapeInfo, - Pointer dx, @Cast("Nd4jLong*") long[] dxShapeInfo, - Pointer y, @Cast("Nd4jLong*") long[] yShapeInfo, - Pointer dy, @Cast("Nd4jLong*") long[] dyShapeInfo, + Pointer x, @Cast("const Nd4jLong*") long[] xShapeInfo, + Pointer dx, @Cast("const Nd4jLong*") long[] dxShapeInfo, + Pointer y, @Cast("const Nd4jLong*") long[] yShapeInfo, + Pointer dy, @Cast("const Nd4jLong*") long[] dyShapeInfo, int[] dimension, int dimensionLength, @Cast("bool") boolean descending); @@ -3002,7 +2978,7 @@ public native OpaqueShapeList calculateOutputShapes2(@Cast("Nd4jPointer*") Point public native OpaqueShapeList calculateOutputShapes2(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jLong") long hash, @Cast("Nd4jPointer*") PointerPointer inputBuffers, @Cast("Nd4jPointer*") PointerPointer inputShapes, int numInputShapes, double[] tArgs, int numTArgs, @Cast("Nd4jLong*") long[] iArgs, int numIArgs, @Cast("bool*") boolean[] bArgs, int numBArgs, int[] dArgs, int numDArgs); public native @Cast("Nd4jLong") long getShapeListSize(OpaqueShapeList list); -public native @Cast("Nd4jLong*") LongPointer getShape(OpaqueShapeList list, @Cast("Nd4jLong") long i); +public native @Cast("const Nd4jLong*") LongPointer getShape(OpaqueShapeList list, @Cast("Nd4jLong") long i); public native void deleteShapeList(@Cast("Nd4jPointer") Pointer shapeList); @@ -3018,7 +2994,7 @@ public native OpaqueVariable getVariable(OpaqueVariablesSet set, @Cast("Nd4jLong public native int getVariableId(OpaqueVariable variable); public native int getVariableIndex(OpaqueVariable variable); public native @Cast("char*") String getVariableName(OpaqueVariable variable); -public native @Cast("Nd4jLong*") LongPointer getVariableShape(OpaqueVariable variable); +public native @Cast("const Nd4jLong*") LongPointer getVariableShape(OpaqueVariable variable); public native Pointer getVariableBuffer(OpaqueVariable variable); public native int unregisterGraph(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jLong") long graphId); @@ -3037,9 +3013,9 @@ public native void deleteGraphState(@Cast("Nd4jPointer") Pointer state); public native void deleteResultWrapper(@Cast("Nd4jPointer") Pointer ptr); -public native int estimateThreshold(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jPointer") Pointer x, @Cast("Nd4jLong*") LongPointer xShapeInfo, int N, float threshold); -public native int estimateThreshold(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jPointer") Pointer x, @Cast("Nd4jLong*") LongBuffer xShapeInfo, int N, float threshold); -public native int estimateThreshold(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jPointer") Pointer x, @Cast("Nd4jLong*") long[] xShapeInfo, int N, float threshold); +public native int estimateThreshold(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jPointer") Pointer x, @Cast("const Nd4jLong*") LongPointer xShapeInfo, int N, float threshold); +public native int estimateThreshold(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jPointer") Pointer x, @Cast("const Nd4jLong*") LongBuffer xShapeInfo, int N, float threshold); +public native int estimateThreshold(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jPointer") Pointer x, @Cast("const Nd4jLong*") long[] xShapeInfo, int N, float threshold); // this method executes op that requires scope to be present: if/while/cond/whatever public native @Cast("Nd4jStatus") int execCustomOpWithScope(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jPointer") Pointer state, @Cast("Nd4jLong") long opHash, @Cast("Nd4jLong*") LongPointer scopes, int numScopes, @Cast("Nd4jPointer*") PointerPointer inputBuffers, @Cast("Nd4jPointer*") PointerPointer inputShapes, int numInputs, @Cast("Nd4jPointer*") PointerPointer outputBuffers, @Cast("Nd4jPointer*") PointerPointer outputShapes, int numOutputs); @@ -3054,23 +3030,23 @@ public native @Cast("char*") BytePointer getUtf8StringBuffer(@Cast("Nd4jPointer* public native void deleteUtf8String(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jPointer") Pointer ptr); public native void scatterUpdate(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opCode, int numOfSubArrs, - Pointer hX, @Cast("Nd4jLong*") LongPointer hXShapeInfo, @Cast("Nd4jLong*") LongPointer hXOffsets, - Pointer dX, @Cast("Nd4jLong*") LongPointer dXShapeInfo, @Cast("Nd4jLong*") LongPointer dXOffsets, - Pointer hY, @Cast("Nd4jLong*") LongPointer hYShapeInfo, @Cast("Nd4jLong*") LongPointer hYOffsets, - Pointer dY, @Cast("Nd4jLong*") LongPointer dYShapeInfo, @Cast("Nd4jLong*") LongPointer dYOffsets, - Pointer hIindexes, @Cast("Nd4jLong*") LongPointer hIndicesShapeInfo, Pointer dIindexes, @Cast("Nd4jLong*") LongPointer dIndicesShapeInfo); + Pointer hX, @Cast("const Nd4jLong*") LongPointer hXShapeInfo, @Cast("const Nd4jLong*") LongPointer hXOffsets, + Pointer dX, @Cast("const Nd4jLong*") LongPointer dXShapeInfo, @Cast("const Nd4jLong*") LongPointer dXOffsets, + Pointer hY, @Cast("const Nd4jLong*") LongPointer hYShapeInfo, @Cast("const Nd4jLong*") LongPointer hYOffsets, + Pointer dY, @Cast("const Nd4jLong*") LongPointer dYShapeInfo, @Cast("const Nd4jLong*") LongPointer dYOffsets, + Pointer hIindexes, @Cast("const Nd4jLong*") LongPointer hIndicesShapeInfo, Pointer dIindexes, @Cast("const Nd4jLong*") LongPointer dIndicesShapeInfo); public native void scatterUpdate(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opCode, int numOfSubArrs, - Pointer hX, @Cast("Nd4jLong*") LongBuffer hXShapeInfo, @Cast("Nd4jLong*") LongBuffer hXOffsets, - Pointer dX, @Cast("Nd4jLong*") LongBuffer dXShapeInfo, @Cast("Nd4jLong*") LongBuffer dXOffsets, - Pointer hY, @Cast("Nd4jLong*") LongBuffer hYShapeInfo, @Cast("Nd4jLong*") LongBuffer hYOffsets, - Pointer dY, @Cast("Nd4jLong*") LongBuffer dYShapeInfo, @Cast("Nd4jLong*") LongBuffer dYOffsets, - Pointer hIindexes, @Cast("Nd4jLong*") LongBuffer hIndicesShapeInfo, Pointer dIindexes, @Cast("Nd4jLong*") LongBuffer dIndicesShapeInfo); + Pointer hX, @Cast("const Nd4jLong*") LongBuffer hXShapeInfo, @Cast("const Nd4jLong*") LongBuffer hXOffsets, + Pointer dX, @Cast("const Nd4jLong*") LongBuffer dXShapeInfo, @Cast("const Nd4jLong*") LongBuffer dXOffsets, + Pointer hY, @Cast("const Nd4jLong*") LongBuffer hYShapeInfo, @Cast("const Nd4jLong*") LongBuffer hYOffsets, + Pointer dY, @Cast("const Nd4jLong*") LongBuffer dYShapeInfo, @Cast("const Nd4jLong*") LongBuffer dYOffsets, + Pointer hIindexes, @Cast("const Nd4jLong*") LongBuffer hIndicesShapeInfo, Pointer dIindexes, @Cast("const Nd4jLong*") LongBuffer dIndicesShapeInfo); public native void scatterUpdate(@Cast("Nd4jPointer*") PointerPointer extraPointers, int opCode, int numOfSubArrs, - Pointer hX, @Cast("Nd4jLong*") long[] hXShapeInfo, @Cast("Nd4jLong*") long[] hXOffsets, - Pointer dX, @Cast("Nd4jLong*") long[] dXShapeInfo, @Cast("Nd4jLong*") long[] dXOffsets, - Pointer hY, @Cast("Nd4jLong*") long[] hYShapeInfo, @Cast("Nd4jLong*") long[] hYOffsets, - Pointer dY, @Cast("Nd4jLong*") long[] dYShapeInfo, @Cast("Nd4jLong*") long[] dYOffsets, - Pointer hIindexes, @Cast("Nd4jLong*") long[] hIndicesShapeInfo, Pointer dIindexes, @Cast("Nd4jLong*") long[] dIndicesShapeInfo); + Pointer hX, @Cast("const Nd4jLong*") long[] hXShapeInfo, @Cast("const Nd4jLong*") long[] hXOffsets, + Pointer dX, @Cast("const Nd4jLong*") long[] dXShapeInfo, @Cast("const Nd4jLong*") long[] dXOffsets, + Pointer hY, @Cast("const Nd4jLong*") long[] hYShapeInfo, @Cast("const Nd4jLong*") long[] hYOffsets, + Pointer dY, @Cast("const Nd4jLong*") long[] dYShapeInfo, @Cast("const Nd4jLong*") long[] dYOffsets, + Pointer hIindexes, @Cast("const Nd4jLong*") long[] hIndicesShapeInfo, Pointer dIindexes, @Cast("const Nd4jLong*") long[] dIndicesShapeInfo); public native void inspectArray(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jPointer") Pointer buffer, @Cast("Nd4jLong*") LongPointer shapeInfo, @Cast("Nd4jPointer") Pointer specialBuffer, @Cast("Nd4jLong*") LongPointer specialShapeInfo, @Cast("Nd4jPointer") Pointer debugInfo); public native void inspectArray(@Cast("Nd4jPointer*") PointerPointer extraPointers, @Cast("Nd4jPointer") Pointer buffer, @Cast("Nd4jLong*") LongBuffer shapeInfo, @Cast("Nd4jPointer") Pointer specialBuffer, @Cast("Nd4jLong*") LongBuffer specialShapeInfo, @Cast("Nd4jPointer") Pointer debugInfo); @@ -3080,9 +3056,9 @@ public native OpaqueConstantDataBuffer shapeBuffer(int rank, @Cast("Nd4jLong*") public native OpaqueConstantDataBuffer shapeBuffer(int rank, @Cast("Nd4jLong*") LongBuffer shape, @Cast("Nd4jLong*") LongBuffer strides, @Cast("sd::DataType") int dtype, char order, @Cast("Nd4jLong") long ews, @Cast("bool") boolean empty); public native OpaqueConstantDataBuffer shapeBuffer(int rank, @Cast("Nd4jLong*") long[] shape, @Cast("Nd4jLong*") long[] strides, @Cast("sd::DataType") int dtype, char order, @Cast("Nd4jLong") long ews, @Cast("bool") boolean empty); -public native OpaqueConstantDataBuffer constantBufferLong(@Cast("sd::DataType") int dtype, @Cast("Nd4jLong*") LongPointer data, int length); -public native OpaqueConstantDataBuffer constantBufferLong(@Cast("sd::DataType") int dtype, @Cast("Nd4jLong*") LongBuffer data, int length); -public native OpaqueConstantDataBuffer constantBufferLong(@Cast("sd::DataType") int dtype, @Cast("Nd4jLong*") long[] data, int length); +public native OpaqueConstantDataBuffer constantBufferLong(@Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") LongPointer data, int length); +public native OpaqueConstantDataBuffer constantBufferLong(@Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") LongBuffer data, int length); +public native OpaqueConstantDataBuffer constantBufferLong(@Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") long[] data, int length); public native OpaqueConstantDataBuffer constantBufferDouble(@Cast("sd::DataType") int dtype, DoublePointer data, int length); public native OpaqueConstantDataBuffer constantBufferDouble(@Cast("sd::DataType") int dtype, DoubleBuffer data, int length); public native OpaqueConstantDataBuffer constantBufferDouble(@Cast("sd::DataType") int dtype, double[] data, int length); @@ -3689,16 +3665,16 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); /** * do not allocate memory, memory for array is passed from outside */ - public NDArray(Pointer buffer, @Cast("Nd4jLong*") LongPointer shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/) { super((Pointer)null); allocate(buffer, shapeInfo, context, isBuffAlloc); } - private native void allocate(Pointer buffer, @Cast("Nd4jLong*") LongPointer shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/); + public NDArray(Pointer buffer, @Cast("Nd4jLong*") LongPointer shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean isBuffAlloc/*=false*/) { super((Pointer)null); allocate(buffer, shapeInfo, context, isBuffAlloc); } + private native void allocate(Pointer buffer, @Cast("Nd4jLong*") LongPointer shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean isBuffAlloc/*=false*/); public NDArray(Pointer buffer, @Cast("Nd4jLong*") LongPointer shapeInfo) { super((Pointer)null); allocate(buffer, shapeInfo); } private native void allocate(Pointer buffer, @Cast("Nd4jLong*") LongPointer shapeInfo); - public NDArray(Pointer buffer, @Cast("Nd4jLong*") LongBuffer shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/) { super((Pointer)null); allocate(buffer, shapeInfo, context, isBuffAlloc); } - private native void allocate(Pointer buffer, @Cast("Nd4jLong*") LongBuffer shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/); + public NDArray(Pointer buffer, @Cast("Nd4jLong*") LongBuffer shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean isBuffAlloc/*=false*/) { super((Pointer)null); allocate(buffer, shapeInfo, context, isBuffAlloc); } + private native void allocate(Pointer buffer, @Cast("Nd4jLong*") LongBuffer shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean isBuffAlloc/*=false*/); public NDArray(Pointer buffer, @Cast("Nd4jLong*") LongBuffer shapeInfo) { super((Pointer)null); allocate(buffer, shapeInfo); } private native void allocate(Pointer buffer, @Cast("Nd4jLong*") LongBuffer shapeInfo); - public NDArray(Pointer buffer, @Cast("Nd4jLong*") long[] shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/) { super((Pointer)null); allocate(buffer, shapeInfo, context, isBuffAlloc); } - private native void allocate(Pointer buffer, @Cast("Nd4jLong*") long[] shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/); + public NDArray(Pointer buffer, @Cast("Nd4jLong*") long[] shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean isBuffAlloc/*=false*/) { super((Pointer)null); allocate(buffer, shapeInfo, context, isBuffAlloc); } + private native void allocate(Pointer buffer, @Cast("Nd4jLong*") long[] shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean isBuffAlloc/*=false*/); public NDArray(Pointer buffer, @Cast("Nd4jLong*") long[] shapeInfo) { super((Pointer)null); allocate(buffer, shapeInfo); } private native void allocate(Pointer buffer, @Cast("Nd4jLong*") long[] shapeInfo); @@ -3706,18 +3682,18 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); * do not allocate memory, memory for array is passed from outside * we suppose the content of both (device and host) buffers is identical */ - public NDArray(Pointer buffer, Pointer bufferD, @Cast("Nd4jLong*") LongPointer shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/, @Cast("const bool") boolean isBuffDAlloc/*=false*/) { super((Pointer)null); allocate(buffer, bufferD, shapeInfo, context, isBuffAlloc, isBuffDAlloc); } - private native void allocate(Pointer buffer, Pointer bufferD, @Cast("Nd4jLong*") LongPointer shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/, @Cast("const bool") boolean isBuffDAlloc/*=false*/); - public NDArray(Pointer buffer, Pointer bufferD, @Cast("Nd4jLong*") LongPointer shapeInfo) { super((Pointer)null); allocate(buffer, bufferD, shapeInfo); } - private native void allocate(Pointer buffer, Pointer bufferD, @Cast("Nd4jLong*") LongPointer shapeInfo); - public NDArray(Pointer buffer, Pointer bufferD, @Cast("Nd4jLong*") LongBuffer shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/, @Cast("const bool") boolean isBuffDAlloc/*=false*/) { super((Pointer)null); allocate(buffer, bufferD, shapeInfo, context, isBuffAlloc, isBuffDAlloc); } - private native void allocate(Pointer buffer, Pointer bufferD, @Cast("Nd4jLong*") LongBuffer shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/, @Cast("const bool") boolean isBuffDAlloc/*=false*/); - public NDArray(Pointer buffer, Pointer bufferD, @Cast("Nd4jLong*") LongBuffer shapeInfo) { super((Pointer)null); allocate(buffer, bufferD, shapeInfo); } - private native void allocate(Pointer buffer, Pointer bufferD, @Cast("Nd4jLong*") LongBuffer shapeInfo); - public NDArray(Pointer buffer, Pointer bufferD, @Cast("Nd4jLong*") long[] shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/, @Cast("const bool") boolean isBuffDAlloc/*=false*/) { super((Pointer)null); allocate(buffer, bufferD, shapeInfo, context, isBuffAlloc, isBuffDAlloc); } - private native void allocate(Pointer buffer, Pointer bufferD, @Cast("Nd4jLong*") long[] shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/, @Cast("const bool") boolean isBuffDAlloc/*=false*/); - public NDArray(Pointer buffer, Pointer bufferD, @Cast("Nd4jLong*") long[] shapeInfo) { super((Pointer)null); allocate(buffer, bufferD, shapeInfo); } - private native void allocate(Pointer buffer, Pointer bufferD, @Cast("Nd4jLong*") long[] shapeInfo); + public NDArray(Pointer buffer, Pointer bufferD, @Cast("const Nd4jLong*") LongPointer shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean isBuffAlloc/*=false*/, @Cast("bool") boolean isBuffDAlloc/*=false*/) { super((Pointer)null); allocate(buffer, bufferD, shapeInfo, context, isBuffAlloc, isBuffDAlloc); } + private native void allocate(Pointer buffer, Pointer bufferD, @Cast("const Nd4jLong*") LongPointer shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean isBuffAlloc/*=false*/, @Cast("bool") boolean isBuffDAlloc/*=false*/); + public NDArray(Pointer buffer, Pointer bufferD, @Cast("const Nd4jLong*") LongPointer shapeInfo) { super((Pointer)null); allocate(buffer, bufferD, shapeInfo); } + private native void allocate(Pointer buffer, Pointer bufferD, @Cast("const Nd4jLong*") LongPointer shapeInfo); + public NDArray(Pointer buffer, Pointer bufferD, @Cast("const Nd4jLong*") LongBuffer shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean isBuffAlloc/*=false*/, @Cast("bool") boolean isBuffDAlloc/*=false*/) { super((Pointer)null); allocate(buffer, bufferD, shapeInfo, context, isBuffAlloc, isBuffDAlloc); } + private native void allocate(Pointer buffer, Pointer bufferD, @Cast("const Nd4jLong*") LongBuffer shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean isBuffAlloc/*=false*/, @Cast("bool") boolean isBuffDAlloc/*=false*/); + public NDArray(Pointer buffer, Pointer bufferD, @Cast("const Nd4jLong*") LongBuffer shapeInfo) { super((Pointer)null); allocate(buffer, bufferD, shapeInfo); } + private native void allocate(Pointer buffer, Pointer bufferD, @Cast("const Nd4jLong*") LongBuffer shapeInfo); + public NDArray(Pointer buffer, Pointer bufferD, @Cast("const Nd4jLong*") long[] shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean isBuffAlloc/*=false*/, @Cast("bool") boolean isBuffDAlloc/*=false*/) { super((Pointer)null); allocate(buffer, bufferD, shapeInfo, context, isBuffAlloc, isBuffDAlloc); } + private native void allocate(Pointer buffer, Pointer bufferD, @Cast("const Nd4jLong*") long[] shapeInfo, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean isBuffAlloc/*=false*/, @Cast("bool") boolean isBuffDAlloc/*=false*/); + public NDArray(Pointer buffer, Pointer bufferD, @Cast("const Nd4jLong*") long[] shapeInfo) { super((Pointer)null); allocate(buffer, bufferD, shapeInfo); } + private native void allocate(Pointer buffer, Pointer bufferD, @Cast("const Nd4jLong*") long[] shapeInfo); /** * copy constructor @@ -3739,83 +3715,83 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); /** * constructor creates new NDArray using shape information from "shapeInfo", set all elements in new array to zeros, if copyStrides is true then use stride values from "shapeInfo", else calculate strides independently */ - public NDArray(@Cast("Nd4jLong*") LongPointer shapeInfo, @Cast("const bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean nullify/*=true*/) { super((Pointer)null); allocate(shapeInfo, copyStrides, context, nullify); } - private native void allocate(@Cast("Nd4jLong*") LongPointer shapeInfo, @Cast("const bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean nullify/*=true*/); - public NDArray(@Cast("Nd4jLong*") LongPointer shapeInfo) { super((Pointer)null); allocate(shapeInfo); } - private native void allocate(@Cast("Nd4jLong*") LongPointer shapeInfo); - public NDArray(@Cast("Nd4jLong*") LongBuffer shapeInfo, @Cast("const bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean nullify/*=true*/) { super((Pointer)null); allocate(shapeInfo, copyStrides, context, nullify); } - private native void allocate(@Cast("Nd4jLong*") LongBuffer shapeInfo, @Cast("const bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean nullify/*=true*/); - public NDArray(@Cast("Nd4jLong*") LongBuffer shapeInfo) { super((Pointer)null); allocate(shapeInfo); } - private native void allocate(@Cast("Nd4jLong*") LongBuffer shapeInfo); - public NDArray(@Cast("Nd4jLong*") long[] shapeInfo, @Cast("const bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean nullify/*=true*/) { super((Pointer)null); allocate(shapeInfo, copyStrides, context, nullify); } - private native void allocate(@Cast("Nd4jLong*") long[] shapeInfo, @Cast("const bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean nullify/*=true*/); - public NDArray(@Cast("Nd4jLong*") long[] shapeInfo) { super((Pointer)null); allocate(shapeInfo); } - private native void allocate(@Cast("Nd4jLong*") long[] shapeInfo); + public NDArray(@Cast("const Nd4jLong*") LongPointer shapeInfo, @Cast("bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean nullify/*=true*/) { super((Pointer)null); allocate(shapeInfo, copyStrides, context, nullify); } + private native void allocate(@Cast("const Nd4jLong*") LongPointer shapeInfo, @Cast("bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean nullify/*=true*/); + public NDArray(@Cast("const Nd4jLong*") LongPointer shapeInfo) { super((Pointer)null); allocate(shapeInfo); } + private native void allocate(@Cast("const Nd4jLong*") LongPointer shapeInfo); + public NDArray(@Cast("const Nd4jLong*") LongBuffer shapeInfo, @Cast("bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean nullify/*=true*/) { super((Pointer)null); allocate(shapeInfo, copyStrides, context, nullify); } + private native void allocate(@Cast("const Nd4jLong*") LongBuffer shapeInfo, @Cast("bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean nullify/*=true*/); + public NDArray(@Cast("const Nd4jLong*") LongBuffer shapeInfo) { super((Pointer)null); allocate(shapeInfo); } + private native void allocate(@Cast("const Nd4jLong*") LongBuffer shapeInfo); + public NDArray(@Cast("const Nd4jLong*") long[] shapeInfo, @Cast("bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean nullify/*=true*/) { super((Pointer)null); allocate(shapeInfo, copyStrides, context, nullify); } + private native void allocate(@Cast("const Nd4jLong*") long[] shapeInfo, @Cast("bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean nullify/*=true*/); + public NDArray(@Cast("const Nd4jLong*") long[] shapeInfo) { super((Pointer)null); allocate(shapeInfo); } + private native void allocate(@Cast("const Nd4jLong*") long[] shapeInfo); /** * constructor creates new NDArray using shape information from "shapeInfo", set all elements in new array to be zeros, if copyStrides is true then use stride values from "shapeInfo", else calculate strides independently * set dtype as array type */ - public NDArray(@Cast("Nd4jLong*") LongPointer shapeInfo, @Cast("const sd::DataType") int dtype, @Cast("const bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean nullify/*=true*/) { super((Pointer)null); allocate(shapeInfo, dtype, copyStrides, context, nullify); } - private native void allocate(@Cast("Nd4jLong*") LongPointer shapeInfo, @Cast("const sd::DataType") int dtype, @Cast("const bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean nullify/*=true*/); - public NDArray(@Cast("Nd4jLong*") LongPointer shapeInfo, @Cast("const sd::DataType") int dtype) { super((Pointer)null); allocate(shapeInfo, dtype); } - private native void allocate(@Cast("Nd4jLong*") LongPointer shapeInfo, @Cast("const sd::DataType") int dtype); - public NDArray(@Cast("Nd4jLong*") LongBuffer shapeInfo, @Cast("const sd::DataType") int dtype, @Cast("const bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean nullify/*=true*/) { super((Pointer)null); allocate(shapeInfo, dtype, copyStrides, context, nullify); } - private native void allocate(@Cast("Nd4jLong*") LongBuffer shapeInfo, @Cast("const sd::DataType") int dtype, @Cast("const bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean nullify/*=true*/); - public NDArray(@Cast("Nd4jLong*") LongBuffer shapeInfo, @Cast("const sd::DataType") int dtype) { super((Pointer)null); allocate(shapeInfo, dtype); } - private native void allocate(@Cast("Nd4jLong*") LongBuffer shapeInfo, @Cast("const sd::DataType") int dtype); - public NDArray(@Cast("Nd4jLong*") long[] shapeInfo, @Cast("const sd::DataType") int dtype, @Cast("const bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean nullify/*=true*/) { super((Pointer)null); allocate(shapeInfo, dtype, copyStrides, context, nullify); } - private native void allocate(@Cast("Nd4jLong*") long[] shapeInfo, @Cast("const sd::DataType") int dtype, @Cast("const bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean nullify/*=true*/); - public NDArray(@Cast("Nd4jLong*") long[] shapeInfo, @Cast("const sd::DataType") int dtype) { super((Pointer)null); allocate(shapeInfo, dtype); } - private native void allocate(@Cast("Nd4jLong*") long[] shapeInfo, @Cast("const sd::DataType") int dtype); + public NDArray(@Cast("const Nd4jLong*") LongPointer shapeInfo, @Cast("sd::DataType") int dtype, @Cast("bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean nullify/*=true*/) { super((Pointer)null); allocate(shapeInfo, dtype, copyStrides, context, nullify); } + private native void allocate(@Cast("const Nd4jLong*") LongPointer shapeInfo, @Cast("sd::DataType") int dtype, @Cast("bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean nullify/*=true*/); + public NDArray(@Cast("const Nd4jLong*") LongPointer shapeInfo, @Cast("sd::DataType") int dtype) { super((Pointer)null); allocate(shapeInfo, dtype); } + private native void allocate(@Cast("const Nd4jLong*") LongPointer shapeInfo, @Cast("sd::DataType") int dtype); + public NDArray(@Cast("const Nd4jLong*") LongBuffer shapeInfo, @Cast("sd::DataType") int dtype, @Cast("bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean nullify/*=true*/) { super((Pointer)null); allocate(shapeInfo, dtype, copyStrides, context, nullify); } + private native void allocate(@Cast("const Nd4jLong*") LongBuffer shapeInfo, @Cast("sd::DataType") int dtype, @Cast("bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean nullify/*=true*/); + public NDArray(@Cast("const Nd4jLong*") LongBuffer shapeInfo, @Cast("sd::DataType") int dtype) { super((Pointer)null); allocate(shapeInfo, dtype); } + private native void allocate(@Cast("const Nd4jLong*") LongBuffer shapeInfo, @Cast("sd::DataType") int dtype); + public NDArray(@Cast("const Nd4jLong*") long[] shapeInfo, @Cast("sd::DataType") int dtype, @Cast("bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean nullify/*=true*/) { super((Pointer)null); allocate(shapeInfo, dtype, copyStrides, context, nullify); } + private native void allocate(@Cast("const Nd4jLong*") long[] shapeInfo, @Cast("sd::DataType") int dtype, @Cast("bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean nullify/*=true*/); + public NDArray(@Cast("const Nd4jLong*") long[] shapeInfo, @Cast("sd::DataType") int dtype) { super((Pointer)null); allocate(shapeInfo, dtype); } + private native void allocate(@Cast("const Nd4jLong*") long[] shapeInfo, @Cast("sd::DataType") int dtype); /** * this constructor creates new array using shape information contained in vector argument */ - public NDArray(byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/) { super((Pointer)null); allocate(order, shape, dtype, context); } - private native void allocate(byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/); - public NDArray(byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape) { super((Pointer)null); allocate(order, shape); } - private native void allocate(byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape); - public NDArray(byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/) { super((Pointer)null); allocate(order, shape, dtype, context); } - private native void allocate(byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/); - public NDArray(byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape) { super((Pointer)null); allocate(order, shape); } - private native void allocate(byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape); - public NDArray(byte order, @Cast("Nd4jLong*") @StdVector long[] shape, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/) { super((Pointer)null); allocate(order, shape, dtype, context); } - private native void allocate(byte order, @Cast("Nd4jLong*") @StdVector long[] shape, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/); - public NDArray(byte order, @Cast("Nd4jLong*") @StdVector long[] shape) { super((Pointer)null); allocate(order, shape); } - private native void allocate(byte order, @Cast("Nd4jLong*") @StdVector long[] shape); + public NDArray(char order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/) { super((Pointer)null); allocate(order, shape, dtype, context); } + private native void allocate(char order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/); + public NDArray(char order, @Cast("Nd4jLong*") @StdVector LongPointer shape) { super((Pointer)null); allocate(order, shape); } + private native void allocate(char order, @Cast("Nd4jLong*") @StdVector LongPointer shape); + public NDArray(char order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/) { super((Pointer)null); allocate(order, shape, dtype, context); } + private native void allocate(char order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/); + public NDArray(char order, @Cast("Nd4jLong*") @StdVector LongBuffer shape) { super((Pointer)null); allocate(order, shape); } + private native void allocate(char order, @Cast("Nd4jLong*") @StdVector LongBuffer shape); + public NDArray(char order, @Cast("Nd4jLong*") @StdVector long[] shape, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/) { super((Pointer)null); allocate(order, shape, dtype, context); } + private native void allocate(char order, @Cast("Nd4jLong*") @StdVector long[] shape, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/); + public NDArray(char order, @Cast("Nd4jLong*") @StdVector long[] shape) { super((Pointer)null); allocate(order, shape); } + private native void allocate(char order, @Cast("Nd4jLong*") @StdVector long[] shape); /** * This constructor creates new array with elements copied from data and using shape information stored in shape, elements from data will be casted to dtype */ - public NDArray(byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @StdVector DoublePointer data, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/) { super((Pointer)null); allocate(order, shape, data, dtype, context); } - private native void allocate(byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @StdVector DoublePointer data, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/); - public NDArray(byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @StdVector DoublePointer data) { super((Pointer)null); allocate(order, shape, data); } - private native void allocate(byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @StdVector DoublePointer data); - public NDArray(byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @StdVector DoubleBuffer data, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/) { super((Pointer)null); allocate(order, shape, data, dtype, context); } - private native void allocate(byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @StdVector DoubleBuffer data, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/); - public NDArray(byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @StdVector DoubleBuffer data) { super((Pointer)null); allocate(order, shape, data); } - private native void allocate(byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @StdVector DoubleBuffer data); - public NDArray(byte order, @Cast("Nd4jLong*") @StdVector long[] shape, @StdVector double[] data, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/) { super((Pointer)null); allocate(order, shape, data, dtype, context); } - private native void allocate(byte order, @Cast("Nd4jLong*") @StdVector long[] shape, @StdVector double[] data, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/); - public NDArray(byte order, @Cast("Nd4jLong*") @StdVector long[] shape, @StdVector double[] data) { super((Pointer)null); allocate(order, shape, data); } - private native void allocate(byte order, @Cast("Nd4jLong*") @StdVector long[] shape, @StdVector double[] data); + public NDArray(char order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @StdVector DoublePointer data, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/) { super((Pointer)null); allocate(order, shape, data, dtype, context); } + private native void allocate(char order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @StdVector DoublePointer data, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/); + public NDArray(char order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @StdVector DoublePointer data) { super((Pointer)null); allocate(order, shape, data); } + private native void allocate(char order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @StdVector DoublePointer data); + public NDArray(char order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @StdVector DoubleBuffer data, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/) { super((Pointer)null); allocate(order, shape, data, dtype, context); } + private native void allocate(char order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @StdVector DoubleBuffer data, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/); + public NDArray(char order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @StdVector DoubleBuffer data) { super((Pointer)null); allocate(order, shape, data); } + private native void allocate(char order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @StdVector DoubleBuffer data); + public NDArray(char order, @Cast("Nd4jLong*") @StdVector long[] shape, @StdVector double[] data, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/) { super((Pointer)null); allocate(order, shape, data, dtype, context); } + private native void allocate(char order, @Cast("Nd4jLong*") @StdVector long[] shape, @StdVector double[] data, @Cast("sd::DataType") int dtype/*=sd::DOUBLE*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/); + public NDArray(char order, @Cast("Nd4jLong*") @StdVector long[] shape, @StdVector double[] data) { super((Pointer)null); allocate(order, shape, data); } + private native void allocate(char order, @Cast("Nd4jLong*") @StdVector long[] shape, @StdVector double[] data); /** * this constructor creates new array using given buffer (without memory allocation) and shape information stored in shape */ - public NDArray(Pointer buffer, byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("sd::DataType") int dtype, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/) { super((Pointer)null); allocate(buffer, order, shape, dtype, context, isBuffAlloc); } - private native void allocate(Pointer buffer, byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("sd::DataType") int dtype, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/); - public NDArray(Pointer buffer, byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("sd::DataType") int dtype) { super((Pointer)null); allocate(buffer, order, shape, dtype); } - private native void allocate(Pointer buffer, byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("sd::DataType") int dtype); - public NDArray(Pointer buffer, byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("sd::DataType") int dtype, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/) { super((Pointer)null); allocate(buffer, order, shape, dtype, context, isBuffAlloc); } - private native void allocate(Pointer buffer, byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("sd::DataType") int dtype, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/); - public NDArray(Pointer buffer, byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("sd::DataType") int dtype) { super((Pointer)null); allocate(buffer, order, shape, dtype); } - private native void allocate(Pointer buffer, byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("sd::DataType") int dtype); - public NDArray(Pointer buffer, byte order, @Cast("Nd4jLong*") @StdVector long[] shape, @Cast("sd::DataType") int dtype, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/) { super((Pointer)null); allocate(buffer, order, shape, dtype, context, isBuffAlloc); } - private native void allocate(Pointer buffer, byte order, @Cast("Nd4jLong*") @StdVector long[] shape, @Cast("sd::DataType") int dtype, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/); - public NDArray(Pointer buffer, byte order, @Cast("Nd4jLong*") @StdVector long[] shape, @Cast("sd::DataType") int dtype) { super((Pointer)null); allocate(buffer, order, shape, dtype); } - private native void allocate(Pointer buffer, byte order, @Cast("Nd4jLong*") @StdVector long[] shape, @Cast("sd::DataType") int dtype); + public NDArray(Pointer buffer, char order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("sd::DataType") int dtype, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/) { super((Pointer)null); allocate(buffer, order, shape, dtype, context, isBuffAlloc); } + private native void allocate(Pointer buffer, char order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("sd::DataType") int dtype, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/); + public NDArray(Pointer buffer, char order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("sd::DataType") int dtype) { super((Pointer)null); allocate(buffer, order, shape, dtype); } + private native void allocate(Pointer buffer, char order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("sd::DataType") int dtype); + public NDArray(Pointer buffer, char order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("sd::DataType") int dtype, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/) { super((Pointer)null); allocate(buffer, order, shape, dtype, context, isBuffAlloc); } + private native void allocate(Pointer buffer, char order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("sd::DataType") int dtype, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/); + public NDArray(Pointer buffer, char order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("sd::DataType") int dtype) { super((Pointer)null); allocate(buffer, order, shape, dtype); } + private native void allocate(Pointer buffer, char order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("sd::DataType") int dtype); + public NDArray(Pointer buffer, char order, @Cast("Nd4jLong*") @StdVector long[] shape, @Cast("sd::DataType") int dtype, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/) { super((Pointer)null); allocate(buffer, order, shape, dtype, context, isBuffAlloc); } + private native void allocate(Pointer buffer, char order, @Cast("Nd4jLong*") @StdVector long[] shape, @Cast("sd::DataType") int dtype, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isBuffAlloc/*=false*/); + public NDArray(Pointer buffer, char order, @Cast("Nd4jLong*") @StdVector long[] shape, @Cast("sd::DataType") int dtype) { super((Pointer)null); allocate(buffer, order, shape, dtype); } + private native void allocate(Pointer buffer, char order, @Cast("Nd4jLong*") @StdVector long[] shape, @Cast("sd::DataType") int dtype); /** * This method returns new array with the same shape & data type @@ -3834,14 +3810,14 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); * this constructor creates new NDArray with shape matching "other" array, * doesn't copy "other" elements into new array !!! */ - public NDArray(@Const NDArray other, @Cast("const bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/) { super((Pointer)null); allocate(other, copyStrides, context); } - private native void allocate(@Const NDArray other, @Cast("const bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/); + public NDArray(@Const NDArray other, @Cast("bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/) { super((Pointer)null); allocate(other, copyStrides, context); } + private native void allocate(@Const NDArray other, @Cast("bool") boolean copyStrides/*=false*/, LaunchContext context/*=sd::LaunchContext::defaultContext()*/); /** * this constructor creates scalar(and set its value = 0) or empty array depending on bool argument isScalar */ - public NDArray(@Cast("sd::DataType") int dtype, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isScalar/*=true*/) { super((Pointer)null); allocate(dtype, context, isScalar); } - private native void allocate(@Cast("sd::DataType") int dtype, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("const bool") boolean isScalar/*=true*/); + public NDArray(@Cast("sd::DataType") int dtype, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean isScalar/*=true*/) { super((Pointer)null); allocate(dtype, context, isScalar); } + private native void allocate(@Cast("sd::DataType") int dtype, LaunchContext context/*=sd::LaunchContext::defaultContext()*/, @Cast("bool") boolean isScalar/*=true*/); public NDArray(@Cast("sd::DataType") int dtype) { super((Pointer)null); allocate(dtype); } private native void allocate(@Cast("sd::DataType") int dtype); @@ -3889,7 +3865,6 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); * @return */ public native Pointer bufferWithOffset(@Cast("Nd4jLong") long offset); - public native Pointer specialBufferWithOffset(@Cast("Nd4jLong") long offset); /** * copy assignment operator @@ -3971,33 +3946,28 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); /** * returns host buffer */ - public native Pointer getBuffer(); public native Pointer buffer(); /** * returns buffer offset (offset is the same for host and device buffers) */ - public native @Cast("Nd4jLong") long getBufferOffset(); public native @Cast("Nd4jLong") long bufferOffset(); /** * if _bufferD==nullptr return _buffer, else return _bufferD */ public native Pointer specialBuffer(); - public native Pointer getSpecialBuffer(); /** * returns device buffer if compilation is for cuda case, otherwise returns host buffer */ - public native Pointer getPlatformBuffer(); public native Pointer platformBuffer(); /** * returns _shapeInfo */ - public native @Cast("Nd4jLong*") LongPointer shapeInfo(); - public native @Cast("Nd4jLong*") LongPointer getShapeInfo(); + public native @Cast("const Nd4jLong*") LongPointer shapeInfo(); /** @@ -4009,12 +3979,9 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); /** * if _shapeInfoD==nullptr return _shapeInfo, else return _shapeInfoD */ - public native @Cast("Nd4jLong*") LongPointer specialShapeInfo(); - public native @Cast("Nd4jLong*") LongPointer getSpecialShapeInfo(); + public native @Cast("const Nd4jLong*") LongPointer specialShapeInfo(); - - public native @Cast("Nd4jLong*") LongPointer platformShapeInfo(); - public native @Cast("Nd4jLong*") LongPointer getPlatformShapeInfo(); + public native @Cast("const Nd4jLong*") LongPointer platformShapeInfo(); /** * permutes (in-place) the dimensions in array according to "dimensions" array @@ -4880,9 +4847,6 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); ////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////// - - ////////////////////////////////////////////////////////////////////////// @@ -4892,12 +4856,6 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); //////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////// - - -//////////////////////////////////////////////////////////////////////// - - // #if defined(__CUDACC__) //&& defined(BUILD_TESTS) // for CUDA we need stil stuff inline @@ -6282,12 +6240,12 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); public native void setObjectsSize(@Cast("Nd4jLong") long bytes); public native void setTotalSize(@Cast("Nd4jLong") long bytes); - public native void addInputShape(@Cast("Nd4jLong*") LongPointer shapeInfo); - public native void addInputShape(@Cast("Nd4jLong*") LongBuffer shapeInfo); - public native void addInputShape(@Cast("Nd4jLong*") long[] shapeInfo); - public native void addOutputShape(@Cast("Nd4jLong*") LongPointer shapeInfo); - public native void addOutputShape(@Cast("Nd4jLong*") LongBuffer shapeInfo); - public native void addOutputShape(@Cast("Nd4jLong*") long[] shapeInfo); + public native void addInputShape(@Cast("const Nd4jLong*") LongPointer shapeInfo); + public native void addInputShape(@Cast("const Nd4jLong*") LongBuffer shapeInfo); + public native void addInputShape(@Cast("const Nd4jLong*") long[] shapeInfo); + public native void addOutputShape(@Cast("const Nd4jLong*") LongPointer shapeInfo); + public native void addOutputShape(@Cast("const Nd4jLong*") LongBuffer shapeInfo); + public native void addOutputShape(@Cast("const Nd4jLong*") long[] shapeInfo); public native @Cast("Nd4jLong") long getActivationsSize(); public native @Cast("Nd4jLong") long getTemporarySize(); @@ -6484,13 +6442,13 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); public native void setInputArray(int index, NDArray array, @Cast("bool") boolean removable/*=false*/); public native void setInputArray(int index, NDArray array); - public native void setInputArray(int index, Pointer buffer, Pointer shapeInfo, Pointer specialBuffer, Pointer specialShapeInfo); - public native void setInputArray(int index, Pointer databuffer, Pointer shapeInfo, Pointer specialShapeInfo); + public native void setInputArray(int index, Pointer buffer, @Const Pointer shapeInfo, Pointer specialBuffer, @Const Pointer specialShapeInfo); + public native void setInputArray(int index, Pointer databuffer, @Const Pointer shapeInfo, @Const Pointer specialShapeInfo); public native void setOutputArray(int index, NDArray array, @Cast("bool") boolean removable/*=false*/); public native void setOutputArray(int index, NDArray array); - public native void setOutputArray(int index, Pointer buffer, Pointer shapeInfo, Pointer specialBuffer, Pointer specialShapeInfo); - public native void setOutputArray(int index, Pointer databuffer, Pointer shapeInfo, Pointer specialShapeInfo); + public native void setOutputArray(int index, Pointer buffer, @Const Pointer shapeInfo, Pointer specialBuffer, @Const Pointer specialShapeInfo); + public native void setOutputArray(int index, Pointer databuffer, @Const Pointer shapeInfo, @Const Pointer specialShapeInfo); public native void setTArguments(DoublePointer arguments, int numberOfArguments); public native void setTArguments(DoubleBuffer arguments, int numberOfArguments); @@ -6832,13 +6790,13 @@ public static final int PREALLOC_SIZE = 33554432; @Namespace("shape") public static native @Cast("bool") boolean shapeEquals(int shape1Rank, @Cast("const Nd4jLong*") LongBuffer shape1, int shape2Rank, @Cast("const Nd4jLong*") LongBuffer shape2); @Namespace("shape") public static native @Cast("bool") boolean shapeEquals(int shape1Rank, @Cast("const Nd4jLong*") long[] shape1, int shape2Rank, @Cast("const Nd4jLong*") long[] shape2); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer detachShape(@Cast("Nd4jLong*") LongPointer originalShape); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer detachShape(@Cast("Nd4jLong*") LongBuffer originalShape); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] detachShape(@Cast("Nd4jLong*") long[] originalShape); + @Namespace("shape") public static native @Cast("const Nd4jLong*") LongPointer detachShape(@Cast("const Nd4jLong*") LongPointer originalShape); + @Namespace("shape") public static native @Cast("const Nd4jLong*") LongBuffer detachShape(@Cast("const Nd4jLong*") LongBuffer originalShape); + @Namespace("shape") public static native @Cast("const Nd4jLong*") long[] detachShape(@Cast("const Nd4jLong*") long[] originalShape); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer copyShape(@Cast("Nd4jLong*") LongPointer originalShape); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer copyShape(@Cast("Nd4jLong*") LongBuffer originalShape); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] copyShape(@Cast("Nd4jLong*") long[] originalShape); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer copyShape(@Cast("const Nd4jLong*") LongPointer originalShape); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer copyShape(@Cast("const Nd4jLong*") LongBuffer originalShape); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] copyShape(@Cast("const Nd4jLong*") long[] originalShape); @Namespace("shape") public static native @Cast("bool") boolean shapeEquals(@Cast("const Nd4jLong*") LongPointer shapeInfo1, @Cast("const Nd4jLong*") LongPointer shapeInfo2); @Namespace("shape") public static native @Cast("bool") boolean shapeEquals(@Cast("const Nd4jLong*") LongBuffer shapeInfo1, @Cast("const Nd4jLong*") LongBuffer shapeInfo2); @@ -6848,17 +6806,17 @@ public static final int PREALLOC_SIZE = 33554432; @Namespace("shape") public static native @Cast("bool") boolean shapeEquals(@Cast("const Nd4jLong*") LongBuffer shapeInfo1, @Cast("const Nd4jLong*") LongBuffer shapeInfo2, @Cast("const Nd4jLong*") LongBuffer shapeInfo3); @Namespace("shape") public static native @Cast("bool") boolean shapeEquals(@Cast("const Nd4jLong*") long[] shapeInfo1, @Cast("const Nd4jLong*") long[] shapeInfo2, @Cast("const Nd4jLong*") long[] shapeInfo3); - @Namespace("shape") public static native @Cast("bool") boolean strideEquals(int shape1Rank,@Cast("Nd4jLong*") LongPointer shape1,int shape2Rank,@Cast("Nd4jLong*") LongPointer shape2); - @Namespace("shape") public static native @Cast("bool") boolean strideEquals(int shape1Rank,@Cast("Nd4jLong*") LongBuffer shape1,int shape2Rank,@Cast("Nd4jLong*") LongBuffer shape2); - @Namespace("shape") public static native @Cast("bool") boolean strideEquals(int shape1Rank,@Cast("Nd4jLong*") long[] shape1,int shape2Rank,@Cast("Nd4jLong*") long[] shape2); + @Namespace("shape") public static native @Cast("bool") boolean strideEquals(int shape1Rank,@Cast("const Nd4jLong*") LongPointer shape1,int shape2Rank, @Cast("const Nd4jLong*") LongPointer shape2); + @Namespace("shape") public static native @Cast("bool") boolean strideEquals(int shape1Rank,@Cast("const Nd4jLong*") LongBuffer shape1,int shape2Rank, @Cast("const Nd4jLong*") LongBuffer shape2); + @Namespace("shape") public static native @Cast("bool") boolean strideEquals(int shape1Rank,@Cast("const Nd4jLong*") long[] shape1,int shape2Rank, @Cast("const Nd4jLong*") long[] shape2); - @Namespace("shape") public static native @Cast("bool") boolean strideEquals(@Cast("Nd4jLong*") LongPointer shapeInfo1,@Cast("Nd4jLong*") LongPointer shapeInfo2); - @Namespace("shape") public static native @Cast("bool") boolean strideEquals(@Cast("Nd4jLong*") LongBuffer shapeInfo1,@Cast("Nd4jLong*") LongBuffer shapeInfo2); - @Namespace("shape") public static native @Cast("bool") boolean strideEquals(@Cast("Nd4jLong*") long[] shapeInfo1,@Cast("Nd4jLong*") long[] shapeInfo2); + @Namespace("shape") public static native @Cast("bool") boolean strideEquals(@Cast("const Nd4jLong*") LongPointer shapeInfo1, @Cast("const Nd4jLong*") LongPointer shapeInfo2); + @Namespace("shape") public static native @Cast("bool") boolean strideEquals(@Cast("const Nd4jLong*") LongBuffer shapeInfo1, @Cast("const Nd4jLong*") LongBuffer shapeInfo2); + @Namespace("shape") public static native @Cast("bool") boolean strideEquals(@Cast("const Nd4jLong*") long[] shapeInfo1, @Cast("const Nd4jLong*") long[] shapeInfo2); - @Namespace("shape") public static native @Cast("bool") boolean strideEquals(@Cast("Nd4jLong*") LongPointer stride1,int rank1,@Cast("Nd4jLong*") LongPointer stride2,int rank2); - @Namespace("shape") public static native @Cast("bool") boolean strideEquals(@Cast("Nd4jLong*") LongBuffer stride1,int rank1,@Cast("Nd4jLong*") LongBuffer stride2,int rank2); - @Namespace("shape") public static native @Cast("bool") boolean strideEquals(@Cast("Nd4jLong*") long[] stride1,int rank1,@Cast("Nd4jLong*") long[] stride2,int rank2); + @Namespace("shape") public static native @Cast("bool") boolean strideEquals(@Cast("const Nd4jLong*") LongPointer stride1,int rank1, @Cast("const Nd4jLong*") LongPointer stride2, int rank2); + @Namespace("shape") public static native @Cast("bool") boolean strideEquals(@Cast("const Nd4jLong*") LongBuffer stride1,int rank1, @Cast("const Nd4jLong*") LongBuffer stride2, int rank2); + @Namespace("shape") public static native @Cast("bool") boolean strideEquals(@Cast("const Nd4jLong*") long[] stride1,int rank1, @Cast("const Nd4jLong*") long[] stride2, int rank2); @Namespace("shape") public static native @Cast("bool") boolean equalsSoft(@Cast("const Nd4jLong*") LongPointer shapeA, @Cast("const Nd4jLong*") LongPointer shapeB); @Namespace("shape") public static native @Cast("bool") boolean equalsSoft(@Cast("const Nd4jLong*") LongBuffer shapeA, @Cast("const Nd4jLong*") LongBuffer shapeB); @@ -6892,9 +6850,9 @@ public static final int PREALLOC_SIZE = 33554432; @Namespace("shape") public static native int tadIndexForLinear(int linearIndex, int tadLength); - @Namespace("shape") public static native @Cast("Nd4jLong") long tadLength(@Cast("Nd4jLong*") LongPointer shapeInfo, IntPointer dimension, int dimensionLength); - @Namespace("shape") public static native @Cast("Nd4jLong") long tadLength(@Cast("Nd4jLong*") LongBuffer shapeInfo, IntBuffer dimension, int dimensionLength); - @Namespace("shape") public static native @Cast("Nd4jLong") long tadLength(@Cast("Nd4jLong*") long[] shapeInfo, int[] dimension, int dimensionLength); + @Namespace("shape") public static native @Cast("Nd4jLong") long tadLength(@Cast("const Nd4jLong*") LongPointer shapeInfo, IntPointer dimension, int dimensionLength); + @Namespace("shape") public static native @Cast("Nd4jLong") long tadLength(@Cast("const Nd4jLong*") LongBuffer shapeInfo, IntBuffer dimension, int dimensionLength); + @Namespace("shape") public static native @Cast("Nd4jLong") long tadLength(@Cast("const Nd4jLong*") long[] shapeInfo, int[] dimension, int dimensionLength); @Namespace("shape") public static native @Cast("bool") boolean canReshape(int oldRank, @Cast("Nd4jLong*") LongPointer oldShape, int newRank, @Cast("Nd4jLong*") LongPointer newShape, @Cast("bool") boolean isFOrder); @Namespace("shape") public static native @Cast("bool") boolean canReshape(int oldRank, @Cast("Nd4jLong*") LongBuffer oldShape, int newRank, @Cast("Nd4jLong*") LongBuffer newShape, @Cast("bool") boolean isFOrder); @@ -6914,25 +6872,25 @@ public static final int PREALLOC_SIZE = 33554432; * Get the shape info buffer * for the given rank and shape. */ - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer shapeBuffer(int rank, @Cast("sd::DataType") int dtype, @Cast("Nd4jLong*") LongPointer shape); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer shapeBuffer(int rank, @Cast("sd::DataType") int dtype, @Cast("Nd4jLong*") LongBuffer shape); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] shapeBuffer(int rank, @Cast("sd::DataType") int dtype, @Cast("Nd4jLong*") long[] shape); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer shapeBuffer(int rank, @Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") LongPointer shape); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer shapeBuffer(int rank, @Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") LongBuffer shape); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] shapeBuffer(int rank, @Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") long[] shape); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer shapeBuffer(int rank, @Cast("sd::DataType") int dtype, @Cast("Nd4jLong*") LongPointer shape, @Cast("Nd4jLong*") LongPointer buffer); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer shapeBuffer(int rank, @Cast("sd::DataType") int dtype, @Cast("Nd4jLong*") LongBuffer shape, @Cast("Nd4jLong*") LongBuffer buffer); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] shapeBuffer(int rank, @Cast("sd::DataType") int dtype, @Cast("Nd4jLong*") long[] shape, @Cast("Nd4jLong*") long[] buffer); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer shapeBuffer(int rank, @Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") LongPointer shape, @Cast("Nd4jLong*") LongPointer buffer); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer shapeBuffer(int rank, @Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") LongBuffer shape, @Cast("Nd4jLong*") LongBuffer buffer); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] shapeBuffer(int rank, @Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") long[] shape, @Cast("Nd4jLong*") long[] buffer); /** * Get the shape info buffer * for the given rank and shape. */ - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer shapeBufferFortran(int rank, @Cast("sd::DataType") int dtype, @Cast("Nd4jLong*") LongPointer shape); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer shapeBufferFortran(int rank, @Cast("sd::DataType") int dtype, @Cast("Nd4jLong*") LongBuffer shape); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] shapeBufferFortran(int rank, @Cast("sd::DataType") int dtype, @Cast("Nd4jLong*") long[] shape); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer shapeBufferFortran(int rank, @Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") LongPointer shape); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer shapeBufferFortran(int rank, @Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") LongBuffer shape); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] shapeBufferFortran(int rank, @Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") long[] shape); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer shapeBufferFortran(int rank, @Cast("sd::DataType") int dtype, @Cast("Nd4jLong*") LongPointer shape, @Cast("Nd4jLong*") LongPointer output); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer shapeBufferFortran(int rank, @Cast("sd::DataType") int dtype, @Cast("Nd4jLong*") LongBuffer shape, @Cast("Nd4jLong*") LongBuffer output); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] shapeBufferFortran(int rank, @Cast("sd::DataType") int dtype, @Cast("Nd4jLong*") long[] shape, @Cast("Nd4jLong*") long[] output); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer shapeBufferFortran(int rank, @Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") LongPointer shape, @Cast("Nd4jLong*") LongPointer output); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer shapeBufferFortran(int rank, @Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") LongBuffer shape, @Cast("Nd4jLong*") LongBuffer output); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] shapeBufferFortran(int rank, @Cast("sd::DataType") int dtype, @Cast("const Nd4jLong*") long[] shape, @Cast("Nd4jLong*") long[] output); // #ifdef __CUDACC__ // #endif @@ -6946,13 +6904,13 @@ public static final int PREALLOC_SIZE = 33554432; * @param startNum the start number for the strides * @return the strides for a matrix of n dimensions */ - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer calcStridesFortran(@Cast("Nd4jLong*") LongPointer shape, int rank); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer calcStridesFortran(@Cast("Nd4jLong*") LongBuffer shape, int rank); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] calcStridesFortran(@Cast("Nd4jLong*") long[] shape, int rank); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer calcStridesFortran(@Cast("const Nd4jLong*") LongPointer shape, int rank); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer calcStridesFortran(@Cast("const Nd4jLong*") LongBuffer shape, int rank); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] calcStridesFortran(@Cast("const Nd4jLong*") long[] shape, int rank); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer calcStridesFortran(@Cast("Nd4jLong*") LongPointer shape, int rank, @Cast("Nd4jLong*") LongPointer ret); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer calcStridesFortran(@Cast("Nd4jLong*") LongBuffer shape, int rank, @Cast("Nd4jLong*") LongBuffer ret); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] calcStridesFortran(@Cast("Nd4jLong*") long[] shape, int rank, @Cast("Nd4jLong*") long[] ret); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer calcStridesFortran(@Cast("const Nd4jLong*") LongPointer shape, int rank, @Cast("Nd4jLong*") LongPointer ret); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer calcStridesFortran(@Cast("const Nd4jLong*") LongBuffer shape, int rank, @Cast("Nd4jLong*") LongBuffer ret); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] calcStridesFortran(@Cast("const Nd4jLong*") long[] shape, int rank, @Cast("Nd4jLong*") long[] ret); /** * Computes the standard packed array strides for a given shape. @@ -6962,13 +6920,13 @@ public static final int PREALLOC_SIZE = 33554432; * @return the strides for a matrix of n dimensions */ - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer calcStrides(@Cast("Nd4jLong*") LongPointer shape, int rank); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer calcStrides(@Cast("Nd4jLong*") LongBuffer shape, int rank); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] calcStrides(@Cast("Nd4jLong*") long[] shape, int rank); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer calcStrides(@Cast("const Nd4jLong*") LongPointer shape, int rank); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer calcStrides(@Cast("const Nd4jLong*") LongBuffer shape, int rank); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] calcStrides(@Cast("const Nd4jLong*") long[] shape, int rank); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer calcStrides(@Cast("Nd4jLong*") LongPointer shape, int rank, @Cast("Nd4jLong*") LongPointer ret); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer calcStrides(@Cast("Nd4jLong*") LongBuffer shape, int rank, @Cast("Nd4jLong*") LongBuffer ret); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] calcStrides(@Cast("Nd4jLong*") long[] shape, int rank, @Cast("Nd4jLong*") long[] ret); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer calcStrides(@Cast("const Nd4jLong*") LongPointer shape, int rank, @Cast("Nd4jLong*") LongPointer ret); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer calcStrides(@Cast("const Nd4jLong*") LongBuffer shape, int rank, @Cast("Nd4jLong*") LongBuffer ret); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] calcStrides(@Cast("const Nd4jLong*") long[] shape, int rank, @Cast("Nd4jLong*") long[] ret); @Namespace("shape") public static native void updateStrides(@Cast("Nd4jLong*") LongPointer shape, byte order); @Namespace("shape") public static native void updateStrides(@Cast("Nd4jLong*") LongBuffer shape, byte order); @@ -6987,13 +6945,13 @@ public static final int PREALLOC_SIZE = 33554432; * @param startNum the start number for the strides * @return the strides for a matrix of n dimensions */ - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer calcStridesFortran(@Cast("Nd4jLong*") LongPointer shape, int rank, int startNum); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer calcStridesFortran(@Cast("Nd4jLong*") LongBuffer shape, int rank, int startNum); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] calcStridesFortran(@Cast("Nd4jLong*") long[] shape, int rank, int startNum); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer calcStridesFortran(@Cast("const Nd4jLong*") LongPointer shape, int rank, int startNum); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer calcStridesFortran(@Cast("const Nd4jLong*") LongBuffer shape, int rank, int startNum); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] calcStridesFortran(@Cast("const Nd4jLong*") long[] shape, int rank, int startNum); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer calcStridesFortran(@Cast("Nd4jLong*") LongPointer shape, int rank, int startNum, @Cast("Nd4jLong*") LongPointer ret); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer calcStridesFortran(@Cast("Nd4jLong*") LongBuffer shape, int rank, int startNum, @Cast("Nd4jLong*") LongBuffer ret); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] calcStridesFortran(@Cast("Nd4jLong*") long[] shape, int rank, int startNum, @Cast("Nd4jLong*") long[] ret); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer calcStridesFortran(@Cast("const Nd4jLong*") LongPointer shape, int rank, int startNum, @Cast("Nd4jLong*") LongPointer ret); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer calcStridesFortran(@Cast("const Nd4jLong*") LongBuffer shape, int rank, int startNum, @Cast("Nd4jLong*") LongBuffer ret); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] calcStridesFortran(@Cast("const Nd4jLong*") long[] shape, int rank, int startNum, @Cast("Nd4jLong*") long[] ret); /** * Computes the standard packed array strides for a given shape. @@ -7002,13 +6960,13 @@ public static final int PREALLOC_SIZE = 33554432; * @param startNum the start number for the strides * @return the strides for a matrix of n dimensions */ - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer calcStrides(@Cast("Nd4jLong*") LongPointer shape, int rank, int startNum); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer calcStrides(@Cast("Nd4jLong*") LongBuffer shape, int rank, int startNum); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] calcStrides(@Cast("Nd4jLong*") long[] shape, int rank, int startNum); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer calcStrides(@Cast("const Nd4jLong*") LongPointer shape, int rank, int startNum); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer calcStrides(@Cast("const Nd4jLong*") LongBuffer shape, int rank, int startNum); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] calcStrides(@Cast("const Nd4jLong*") long[] shape, int rank, int startNum); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer calcStrides(@Cast("Nd4jLong*") LongPointer shape, int rank, int startNum, @Cast("Nd4jLong*") LongPointer ret); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer calcStrides(@Cast("Nd4jLong*") LongBuffer shape, int rank, int startNum, @Cast("Nd4jLong*") LongBuffer ret); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] calcStrides(@Cast("Nd4jLong*") long[] shape, int rank, int startNum, @Cast("Nd4jLong*") long[] ret); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer calcStrides(@Cast("const Nd4jLong*") LongPointer shape, int rank, int startNum, @Cast("Nd4jLong*") LongPointer ret); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer calcStrides(@Cast("const Nd4jLong*") LongBuffer shape, int rank, int startNum, @Cast("Nd4jLong*") LongBuffer ret); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] calcStrides(@Cast("const Nd4jLong*") long[] shape, int rank, int startNum, @Cast("Nd4jLong*") long[] ret); /** * @param toCopy the shape to copy @@ -7046,9 +7004,9 @@ public static final int PREALLOC_SIZE = 33554432; * @return 0 if there is no element wise stride the * element wise stride of reshape(1,length) otherwise */ - @Namespace("shape") public static native int computeElementWiseStride(int rank, @Cast("Nd4jLong*") LongPointer shape, @Cast("Nd4jLong*") LongPointer stride, int isFOrder); - @Namespace("shape") public static native int computeElementWiseStride(int rank, @Cast("Nd4jLong*") LongBuffer shape, @Cast("Nd4jLong*") LongBuffer stride, int isFOrder); - @Namespace("shape") public static native int computeElementWiseStride(int rank, @Cast("Nd4jLong*") long[] shape, @Cast("Nd4jLong*") long[] stride, int isFOrder); + @Namespace("shape") public static native int computeElementWiseStride(int rank, @Cast("const Nd4jLong*") LongPointer shape, @Cast("const Nd4jLong*") LongPointer stride, int isFOrder); + @Namespace("shape") public static native int computeElementWiseStride(int rank, @Cast("const Nd4jLong*") LongBuffer shape, @Cast("const Nd4jLong*") LongBuffer stride, int isFOrder); + @Namespace("shape") public static native int computeElementWiseStride(int rank, @Cast("const Nd4jLong*") long[] shape, @Cast("const Nd4jLong*") long[] stride, int isFOrder); /** * Compute the element wise stride @@ -7061,17 +7019,17 @@ public static final int PREALLOC_SIZE = 33554432; * @return 0 if there is no element wise stride the * element wise stride of reshape(1,length) otherwise */ - @Namespace("shape") public static native int computeElementWiseStride(int rank, @Cast("Nd4jLong*") LongPointer shape, @Cast("Nd4jLong*") LongPointer stride, int isFOrder, @Cast("Nd4jLong*") LongPointer dimension, int dimensionLength); - @Namespace("shape") public static native int computeElementWiseStride(int rank, @Cast("Nd4jLong*") LongBuffer shape, @Cast("Nd4jLong*") LongBuffer stride, int isFOrder, @Cast("Nd4jLong*") LongBuffer dimension, int dimensionLength); - @Namespace("shape") public static native int computeElementWiseStride(int rank, @Cast("Nd4jLong*") long[] shape, @Cast("Nd4jLong*") long[] stride, int isFOrder, @Cast("Nd4jLong*") long[] dimension, int dimensionLength); + @Namespace("shape") public static native int computeElementWiseStride(int rank, @Cast("const Nd4jLong*") LongPointer shape, @Cast("const Nd4jLong*") LongPointer stride, int isFOrder, @Cast("const Nd4jLong*") LongPointer dimension, int dimensionLength); + @Namespace("shape") public static native int computeElementWiseStride(int rank, @Cast("const Nd4jLong*") LongBuffer shape, @Cast("const Nd4jLong*") LongBuffer stride, int isFOrder, @Cast("const Nd4jLong*") LongBuffer dimension, int dimensionLength); + @Namespace("shape") public static native int computeElementWiseStride(int rank, @Cast("const Nd4jLong*") long[] shape, @Cast("const Nd4jLong*") long[] stride, int isFOrder, @Cast("const Nd4jLong*") long[] dimension, int dimensionLength); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer shapeInfoOnlyShapeAndStride(@Cast("Nd4jLong*") LongPointer shapeInfo, @Cast("Nd4jLong*") LongPointer dimension, int dimensionLength,@Cast("bool") boolean reverseCopyStride); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer shapeInfoOnlyShapeAndStride(@Cast("Nd4jLong*") LongBuffer shapeInfo, @Cast("Nd4jLong*") LongBuffer dimension, int dimensionLength,@Cast("bool") boolean reverseCopyStride); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] shapeInfoOnlyShapeAndStride(@Cast("Nd4jLong*") long[] shapeInfo, @Cast("Nd4jLong*") long[] dimension, int dimensionLength,@Cast("bool") boolean reverseCopyStride); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer shapeInfoOnlyShapeAndStride(@Cast("const Nd4jLong*") LongPointer shapeInfo, @Cast("Nd4jLong*") LongPointer dimension, int dimensionLength,@Cast("bool") boolean reverseCopyStride); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer shapeInfoOnlyShapeAndStride(@Cast("const Nd4jLong*") LongBuffer shapeInfo, @Cast("Nd4jLong*") LongBuffer dimension, int dimensionLength,@Cast("bool") boolean reverseCopyStride); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] shapeInfoOnlyShapeAndStride(@Cast("const Nd4jLong*") long[] shapeInfo, @Cast("Nd4jLong*") long[] dimension, int dimensionLength,@Cast("bool") boolean reverseCopyStride); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer shapeInfoOnlyShapeAndStride(@Cast("Nd4jLong*") LongPointer shapeInfo, @Cast("Nd4jLong*") LongPointer dimension, int dimensionLength,@Cast("bool") boolean reverseCopyStride, @Cast("Nd4jLong*") LongPointer buffer); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer shapeInfoOnlyShapeAndStride(@Cast("Nd4jLong*") LongBuffer shapeInfo, @Cast("Nd4jLong*") LongBuffer dimension, int dimensionLength,@Cast("bool") boolean reverseCopyStride, @Cast("Nd4jLong*") LongBuffer buffer); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] shapeInfoOnlyShapeAndStride(@Cast("Nd4jLong*") long[] shapeInfo, @Cast("Nd4jLong*") long[] dimension, int dimensionLength,@Cast("bool") boolean reverseCopyStride, @Cast("Nd4jLong*") long[] buffer); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer shapeInfoOnlyShapeAndStride(@Cast("const Nd4jLong*") LongPointer shapeInfo, @Cast("Nd4jLong*") LongPointer dimension, int dimensionLength,@Cast("bool") boolean reverseCopyStride, @Cast("Nd4jLong*") LongPointer buffer); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer shapeInfoOnlyShapeAndStride(@Cast("const Nd4jLong*") LongBuffer shapeInfo, @Cast("Nd4jLong*") LongBuffer dimension, int dimensionLength,@Cast("bool") boolean reverseCopyStride, @Cast("Nd4jLong*") LongBuffer buffer); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] shapeInfoOnlyShapeAndStride(@Cast("const Nd4jLong*") long[] shapeInfo, @Cast("Nd4jLong*") long[] dimension, int dimensionLength,@Cast("bool") boolean reverseCopyStride, @Cast("Nd4jLong*") long[] buffer); /** * * @param length @@ -7093,9 +7051,9 @@ public static final int PREALLOC_SIZE = 33554432; */ @Namespace("shape") public static native void doPermuteSwap(int length, @Cast("Nd4jLong**") PointerPointer shape, IntPointer rearrange); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer permuteShapeBuffer(@Cast("Nd4jLong*") LongPointer shapeBuffer, IntPointer rearrange); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer permuteShapeBuffer(@Cast("Nd4jLong*") LongBuffer shapeBuffer, IntBuffer rearrange); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] permuteShapeBuffer(@Cast("Nd4jLong*") long[] shapeBuffer, int[] rearrange); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer permuteShapeBuffer(@Cast("const Nd4jLong*") LongPointer shapeBuffer, IntPointer rearrange); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer permuteShapeBuffer(@Cast("const Nd4jLong*") LongBuffer shapeBuffer, IntBuffer rearrange); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] permuteShapeBuffer(@Cast("const Nd4jLong*") long[] shapeBuffer, int[] rearrange); @Namespace("shape") public static native void permuteShapeBufferInPlace(@Cast("Nd4jLong*") LongPointer shapeBuffer, IntPointer rearrange, @Cast("Nd4jLong*") LongPointer out); @Namespace("shape") public static native void permuteShapeBufferInPlace(@Cast("Nd4jLong*") LongBuffer shapeBuffer, IntBuffer rearrange, @Cast("Nd4jLong*") LongBuffer out); @@ -7127,9 +7085,9 @@ public static final int PREALLOC_SIZE = 33554432; @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer createPermuteIndexes(int originalRank, IntBuffer dimension,int dimensionLength); @Namespace("shape") public static native @Cast("Nd4jLong*") long[] createPermuteIndexes(int originalRank, int[] dimension,int dimensionLength); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer computeResultShape(@Cast("Nd4jLong*") LongPointer originalShapeBuffer, IntPointer dimension,int dimensionLength); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer computeResultShape(@Cast("Nd4jLong*") LongBuffer originalShapeBuffer, IntBuffer dimension,int dimensionLength); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] computeResultShape(@Cast("Nd4jLong*") long[] originalShapeBuffer, int[] dimension,int dimensionLength); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer computeResultShape(@Cast("const Nd4jLong*") LongPointer originalShapeBuffer, IntPointer dimension,int dimensionLength); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer computeResultShape(@Cast("const Nd4jLong*") LongBuffer originalShapeBuffer, IntBuffer dimension,int dimensionLength); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] computeResultShape(@Cast("const Nd4jLong*") long[] originalShapeBuffer, int[] dimension,int dimensionLength); /** * This method does inplace transpose of given shapeBuffer @@ -7180,9 +7138,9 @@ public static final int PREALLOC_SIZE = 33554432; * @param shape the shape of the array * @param rank the rank of cthe shape */ - @Namespace("shape") public static native int isVector(@Cast("Nd4jLong*") LongPointer shape, int rank); - @Namespace("shape") public static native int isVector(@Cast("Nd4jLong*") LongBuffer shape, int rank); - @Namespace("shape") public static native int isVector(@Cast("Nd4jLong*") long[] shape, int rank); + @Namespace("shape") public static native int isVector(@Cast("const Nd4jLong*") LongPointer shape, int rank); + @Namespace("shape") public static native int isVector(@Cast("const Nd4jLong*") LongBuffer shape, int rank); + @Namespace("shape") public static native int isVector(@Cast("const Nd4jLong*") long[] shape, int rank); /** @@ -7201,9 +7159,9 @@ public static final int PREALLOC_SIZE = 33554432; @Namespace("shape") public static native int isVector(@Cast("const Nd4jLong*") LongBuffer shapeInfo); @Namespace("shape") public static native int isVector(@Cast("const Nd4jLong*") long[] shapeInfo); - @Namespace("shape") public static native @Cast("bool") boolean isLikeVector(@Cast("Nd4jLong*") LongPointer shapeInfo, @ByRef IntPointer posOfNonUnityDim); - @Namespace("shape") public static native @Cast("bool") boolean isLikeVector(@Cast("Nd4jLong*") LongBuffer shapeInfo, @ByRef IntBuffer posOfNonUnityDim); - @Namespace("shape") public static native @Cast("bool") boolean isLikeVector(@Cast("Nd4jLong*") long[] shapeInfo, @ByRef int[] posOfNonUnityDim); + @Namespace("shape") public static native @Cast("bool") boolean isLikeVector(@Cast("const Nd4jLong*") LongPointer shapeInfo, @ByRef IntPointer posOfNonUnityDim); + @Namespace("shape") public static native @Cast("bool") boolean isLikeVector(@Cast("const Nd4jLong*") LongBuffer shapeInfo, @ByRef IntBuffer posOfNonUnityDim); + @Namespace("shape") public static native @Cast("bool") boolean isLikeVector(@Cast("const Nd4jLong*") long[] shapeInfo, @ByRef int[] posOfNonUnityDim); @Namespace("shape") public static native @Cast("bool") boolean isCommonVector(@Cast("const Nd4jLong*") LongPointer shapeInfo, @ByRef IntPointer posOfNonUnityDim); @Namespace("shape") public static native @Cast("bool") boolean isCommonVector(@Cast("const Nd4jLong*") LongBuffer shapeInfo, @ByRef IntBuffer posOfNonUnityDim); @@ -7213,9 +7171,9 @@ public static final int PREALLOC_SIZE = 33554432; @Namespace("shape") public static native @Cast("bool") boolean isRowVector(@Cast("const Nd4jLong*") LongBuffer shapeInfo); @Namespace("shape") public static native @Cast("bool") boolean isRowVector(@Cast("const Nd4jLong*") long[] shapeInfo); - @Namespace("shape") public static native @Cast("bool") boolean isColumnVector(@Cast("Nd4jLong*") LongPointer shapeInfo); - @Namespace("shape") public static native @Cast("bool") boolean isColumnVector(@Cast("Nd4jLong*") LongBuffer shapeInfo); - @Namespace("shape") public static native @Cast("bool") boolean isColumnVector(@Cast("Nd4jLong*") long[] shapeInfo); + @Namespace("shape") public static native @Cast("bool") boolean isColumnVector(@Cast("const Nd4jLong*") LongPointer shapeInfo); + @Namespace("shape") public static native @Cast("bool") boolean isColumnVector(@Cast("const Nd4jLong*") LongBuffer shapeInfo); + @Namespace("shape") public static native @Cast("bool") boolean isColumnVector(@Cast("const Nd4jLong*") long[] shapeInfo); /** * shape - input inShape is shape only, not shapeInfo @@ -7263,9 +7221,9 @@ public static final int PREALLOC_SIZE = 33554432; * This buffer allocates memory * that must be freed elsewhere. */ - @Namespace("shape") public static native void copyTo(int length, @Cast("Nd4jLong*") LongPointer from, @Cast("Nd4jLong*") LongPointer to, @Cast("Nd4jLong*") LongPointer indexes); - @Namespace("shape") public static native void copyTo(int length, @Cast("Nd4jLong*") LongBuffer from, @Cast("Nd4jLong*") LongBuffer to, @Cast("Nd4jLong*") LongBuffer indexes); - @Namespace("shape") public static native void copyTo(int length, @Cast("Nd4jLong*") long[] from, @Cast("Nd4jLong*") long[] to, @Cast("Nd4jLong*") long[] indexes); + @Namespace("shape") public static native void copyTo(int length, @Cast("const Nd4jLong*") LongPointer from, @Cast("Nd4jLong*") LongPointer to, @Cast("Nd4jLong*") LongPointer indexes); + @Namespace("shape") public static native void copyTo(int length, @Cast("const Nd4jLong*") LongBuffer from, @Cast("Nd4jLong*") LongBuffer to, @Cast("Nd4jLong*") LongBuffer indexes); + @Namespace("shape") public static native void copyTo(int length, @Cast("const Nd4jLong*") long[] from, @Cast("Nd4jLong*") long[] to, @Cast("Nd4jLong*") long[] indexes); /** * Permute the given strides @@ -7463,9 +7421,9 @@ public static final int PREALLOC_SIZE = 33554432; * indexes should be the indexes to exclude * indexes length should be the length of indexes */ - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer everyIndexBut(@Cast("Nd4jLong*") LongPointer indexes,int indexesLength,int begin,int end); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer everyIndexBut(@Cast("Nd4jLong*") LongBuffer indexes,int indexesLength,int begin,int end); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] everyIndexBut(@Cast("Nd4jLong*") long[] indexes,int indexesLength,int begin,int end); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer everyIndexBut(@Cast("const Nd4jLong*") LongPointer indexes,int indexesLength,int begin,int end); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer everyIndexBut(@Cast("const Nd4jLong*") LongBuffer indexes,int indexesLength,int begin,int end); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] everyIndexBut(@Cast("const Nd4jLong*") long[] indexes,int indexesLength,int begin,int end); /** * Computes the offset for accessing @@ -7511,9 +7469,9 @@ public static final int PREALLOC_SIZE = 33554432; * Keep the given indexes * in the data */ - @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer keep(@Cast("Nd4jLong*") LongPointer data, IntPointer index, int indexLength, int dataLength); - @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer keep(@Cast("Nd4jLong*") LongBuffer data, IntBuffer index, int indexLength, int dataLength); - @Namespace("shape") public static native @Cast("Nd4jLong*") long[] keep(@Cast("Nd4jLong*") long[] data, int[] index, int indexLength, int dataLength); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer keep(@Cast("Nd4jLong*") LongPointer data, @Const IntPointer index, int indexLength, int dataLength); + @Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer keep(@Cast("Nd4jLong*") LongBuffer data, @Const IntBuffer index, int indexLength, int dataLength); + @Namespace("shape") public static native @Cast("Nd4jLong*") long[] keep(@Cast("Nd4jLong*") long[] data, @Const int[] index, int indexLength, int dataLength); /** * Generate reverse copy of the data @@ -7551,9 +7509,9 @@ public static final int PREALLOC_SIZE = 33554432; * @return the length per slice of the given shape * along the given dimension */ - @Namespace("shape") public static native @Cast("Nd4jLong") long lengthPerSlice(int rank, @Cast("Nd4jLong*") LongPointer shape, IntPointer dimension, int dimensionLength); - @Namespace("shape") public static native @Cast("Nd4jLong") long lengthPerSlice(int rank, @Cast("Nd4jLong*") LongBuffer shape, IntBuffer dimension, int dimensionLength); - @Namespace("shape") public static native @Cast("Nd4jLong") long lengthPerSlice(int rank, @Cast("Nd4jLong*") long[] shape, int[] dimension, int dimensionLength); + @Namespace("shape") public static native @Cast("Nd4jLong") long lengthPerSlice(int rank, @Cast("const Nd4jLong*") LongPointer shape, @Const IntPointer dimension, int dimensionLength); + @Namespace("shape") public static native @Cast("Nd4jLong") long lengthPerSlice(int rank, @Cast("const Nd4jLong*") LongBuffer shape, @Const IntBuffer dimension, int dimensionLength); + @Namespace("shape") public static native @Cast("Nd4jLong") long lengthPerSlice(int rank, @Cast("const Nd4jLong*") long[] shape, @Const int[] dimension, int dimensionLength); /** * calculates the offset for a tensor @@ -7564,24 +7522,24 @@ public static final int PREALLOC_SIZE = 33554432; */ @Namespace("shape") public static native @Cast("Nd4jLong") long sliceOffsetForTensor(int rank, int index, - @Cast("Nd4jLong*") LongPointer shape, - @Cast("Nd4jLong*") LongPointer tensorShape, + @Cast("const Nd4jLong*") LongPointer shape, + @Cast("const Nd4jLong*") LongPointer tensorShape, int tensorShapeLength, - IntPointer dimension, + @Const IntPointer dimension, int dimensionLength); @Namespace("shape") public static native @Cast("Nd4jLong") long sliceOffsetForTensor(int rank, int index, - @Cast("Nd4jLong*") LongBuffer shape, - @Cast("Nd4jLong*") LongBuffer tensorShape, + @Cast("const Nd4jLong*") LongBuffer shape, + @Cast("const Nd4jLong*") LongBuffer tensorShape, int tensorShapeLength, - IntBuffer dimension, + @Const IntBuffer dimension, int dimensionLength); @Namespace("shape") public static native @Cast("Nd4jLong") long sliceOffsetForTensor(int rank, int index, - @Cast("Nd4jLong*") long[] shape, - @Cast("Nd4jLong*") long[] tensorShape, + @Cast("const Nd4jLong*") long[] shape, + @Cast("const Nd4jLong*") long[] tensorShape, int tensorShapeLength, - int[] dimension, + @Const int[] dimension, int dimensionLength); /** @@ -9311,25 +9269,33 @@ public static final int PREALLOC_SIZE = 33554432; return (ShapeList)super.position(position); } - public ShapeList(@Cast("Nd4jLong*") LongPointer shape/*=nullptr*/) { super((Pointer)null); allocate(shape); } - private native void allocate(@Cast("Nd4jLong*") LongPointer shape/*=nullptr*/); + public ShapeList(@Cast("const Nd4jLong*") LongPointer shape/*=nullptr*/) { super((Pointer)null); allocate(shape); } + private native void allocate(@Cast("const Nd4jLong*") LongPointer shape/*=nullptr*/); public ShapeList() { super((Pointer)null); allocate(); } private native void allocate(); - public ShapeList(@Cast("Nd4jLong*") LongBuffer shape/*=nullptr*/) { super((Pointer)null); allocate(shape); } - private native void allocate(@Cast("Nd4jLong*") LongBuffer shape/*=nullptr*/); - public ShapeList(@Cast("Nd4jLong*") long[] shape/*=nullptr*/) { super((Pointer)null); allocate(shape); } - private native void allocate(@Cast("Nd4jLong*") long[] shape/*=nullptr*/); - public ShapeList(@Cast("Nd4jLong**") @StdVector PointerPointer shapes) { super((Pointer)null); allocate(shapes); } - private native void allocate(@Cast("Nd4jLong**") @StdVector PointerPointer shapes); + public ShapeList(@Cast("const Nd4jLong*") LongBuffer shape/*=nullptr*/) { super((Pointer)null); allocate(shape); } + private native void allocate(@Cast("const Nd4jLong*") LongBuffer shape/*=nullptr*/); + public ShapeList(@Cast("const Nd4jLong*") long[] shape/*=nullptr*/) { super((Pointer)null); allocate(shape); } + private native void allocate(@Cast("const Nd4jLong*") long[] shape/*=nullptr*/); + public ShapeList(@Cast("const Nd4jLong**") @StdVector PointerPointer shapes, @Cast("bool") boolean isWorkspace) { super((Pointer)null); allocate(shapes, isWorkspace); } + private native void allocate(@Cast("const Nd4jLong**") @StdVector PointerPointer shapes, @Cast("bool") boolean isWorkspace); + public ShapeList(@Cast("const Nd4jLong**") @StdVector @ByPtrPtr LongPointer shapes, @Cast("bool") boolean isWorkspace) { super((Pointer)null); allocate(shapes, isWorkspace); } + private native void allocate(@Cast("const Nd4jLong**") @StdVector @ByPtrPtr LongPointer shapes, @Cast("bool") boolean isWorkspace); + public ShapeList(@Cast("const Nd4jLong**") @StdVector @ByPtrPtr LongBuffer shapes, @Cast("bool") boolean isWorkspace) { super((Pointer)null); allocate(shapes, isWorkspace); } + private native void allocate(@Cast("const Nd4jLong**") @StdVector @ByPtrPtr LongBuffer shapes, @Cast("bool") boolean isWorkspace); + public ShapeList(@Cast("const Nd4jLong**") @StdVector @ByPtrPtr long[] shapes, @Cast("bool") boolean isWorkspace) { super((Pointer)null); allocate(shapes, isWorkspace); } + private native void allocate(@Cast("const Nd4jLong**") @StdVector @ByPtrPtr long[] shapes, @Cast("bool") boolean isWorkspace); + public ShapeList(@Cast("const Nd4jLong**") @StdVector PointerPointer shapes) { super((Pointer)null); allocate(shapes); } + private native void allocate(@Cast("const Nd4jLong**") @StdVector PointerPointer shapes); //ShapeList(bool autoRemovable); - public native @Cast("Nd4jLong**") @StdVector PointerPointer asVector(); + public native @Cast("const Nd4jLong**") @StdVector PointerPointer asVector(); public native void destroy(); public native int size(); - public native @Cast("Nd4jLong*") LongPointer at(int idx); - public native void push_back(@Cast("Nd4jLong*") LongPointer shape); - public native void push_back(@Cast("Nd4jLong*") LongBuffer shape); - public native void push_back(@Cast("Nd4jLong*") long[] shape); + public native @Cast("const Nd4jLong*") LongPointer at(int idx); + public native void push_back(@Cast("const Nd4jLong*") LongPointer shape); + public native void push_back(@Cast("const Nd4jLong*") LongBuffer shape); + public native void push_back(@Cast("const Nd4jLong*") long[] shape); /** * PLEASE NOTE: This method should be called ONLY if shapes were generated at workspaces. Otherwise you'll get memory leak @@ -12463,6 +12429,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD(); // #include // #include // #include +// #include // #include // #include // #include diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/Nd4jTestsC.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/Nd4jTestsC.java index d98c1b3bc..77df067ca 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/Nd4jTestsC.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/Nd4jTestsC.java @@ -8315,6 +8315,85 @@ public class Nd4jTestsC extends BaseNd4jTest { assertArrayEquals(new long[]{bS, oH, oW, oC}, ret[0].shape()); } + @Test + public void testMatmulMethod_8() { + val x = Nd4j.create(DataType.INT8, 3, 5).assign(1); + val y = Nd4j.create(DataType.INT8, 5, 3).assign(1); + val e = Nd4j.create(DataType.INT8, 3, 3).assign(5); + + val z = x.mmul(y); + assertEquals(e, z); + } + + @Test + public void testMatmulMethod_7() { + val x = Nd4j.create(DataType.INT16, 3, 5).assign(1); + val y = Nd4j.create(DataType.INT16, 5, 3).assign(1); + val e = Nd4j.create(DataType.INT16, 3, 3).assign(5); + + val z = x.mmul(y); + assertEquals(e, z); + } + + @Test + public void testMatmulMethod_1() { + val x = Nd4j.create(DataType.INT32, 3, 5).assign(1); + val y = Nd4j.create(DataType.INT32, 5, 3).assign(1); + val e = Nd4j.create(DataType.INT32, 3, 3).assign(5); + + val z = x.mmul(y); + assertEquals(e, z); + } + + @Test + public void testMatmulMethod_2() { + val x = Nd4j.create(DataType.INT64, 3, 5).assign(1); + val y = Nd4j.create(DataType.INT64, 5, 3).assign(1); + val e = Nd4j.create(DataType.INT64, 3, 3).assign(5); + + val z = x.mmul(y); + assertEquals(e, z); + } + + @Test + public void testMatmulMethod_6() { + val x = Nd4j.create(DataType.UINT8, 3, 5).assign(1); + val y = Nd4j.create(DataType.UINT8, 5, 3).assign(1); + val e = Nd4j.create(DataType.UINT8, 3, 3).assign(5); + + val z = x.mmul(y); + assertEquals(e, z); + } + + @Test + public void testMatmulMethod_5() { + val x = Nd4j.create(DataType.UINT16, 3, 5).assign(1); + val y = Nd4j.create(DataType.UINT16, 5, 3).assign(1); + val e = Nd4j.create(DataType.UINT16, 3, 3).assign(5); + + val z = x.mmul(y); + assertEquals(e, z); + } + + @Test + public void testMatmulMethod_3() { + val x = Nd4j.create(DataType.UINT32, 3, 5).assign(1); + val y = Nd4j.create(DataType.UINT32, 5, 3).assign(1); + val e = Nd4j.create(DataType.UINT32, 3, 3).assign(5); + + val z = x.mmul(y); + assertEquals(e, z); + } + + @Test + public void testMatmulMethod_4() { + val x = Nd4j.create(DataType.UINT64, 3, 5).assign(1); + val y = Nd4j.create(DataType.UINT64, 5, 3).assign(1); + val e = Nd4j.create(DataType.UINT64, 3, 3).assign(5); + + val z = x.mmul(y); + assertEquals(e, z); + } @Override public char ordering() { diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/compression/CompressionTests.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/compression/CompressionTests.java index 3967a2c95..5742ab53f 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/compression/CompressionTests.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/compression/CompressionTests.java @@ -17,6 +17,7 @@ package org.nd4j.linalg.compression; import lombok.extern.slf4j.Slf4j; +import lombok.val; import org.junit.Ignore; import org.junit.Test; import org.junit.runner.RunWith; @@ -44,7 +45,6 @@ import static org.junit.Assert.*; /** * @author raver119@gmail.com */ -@Ignore @Slf4j @RunWith(Parameterized.class) public class CompressionTests extends BaseNd4jTest { @@ -140,40 +140,6 @@ public class CompressionTests extends BaseNd4jTest { } - @Test - public void testThresholdCompressionZ() { - INDArray initial = Nd4j.create(1, 16384); - for (int i = 0; i < 96; i++) - initial.putScalar(i * 20, 1.0f); - - - INDArray exp = Nd4j.create(1, 16384); - for (int i = 0; i < 96; i++) - exp.putScalar(i * 20, 0.1f); - - INDArray exp_d = Nd4j.create(1, 16384); - for (int i = 0; i < 96; i++) - exp_d.putScalar(i * 20, 0.9f); - - NDArrayCompressor compressor = Nd4j.getCompressor().getCompressor("THRESHOLD"); - compressor.configure(0.9); - - INDArray compressed = Nd4j.getExecutioner().thresholdEncode(initial, 0.9); - - assertEquals(exp, initial); - - log.info("Compressed length: {}", compressed.data().length()); - // log.info("Compressed: {}", Arrays.toString(compressed.data().asInt())); - - INDArray decompressed = Nd4j.create(1, initial.length()); - Nd4j.getExecutioner().thresholdDecode(compressed, decompressed); - - log.info("Decompressed length: {}", decompressed.length()); - - assertEquals(exp_d, decompressed); - } - - @Ignore @Test public void testThresholdCompression0() { @@ -296,6 +262,23 @@ public class CompressionTests extends BaseNd4jTest { @Test public void testThresholdCompression5() { + INDArray initial = Nd4j.ones(10); + INDArray exp_0 = initial.dup(); + + Nd4j.getExecutioner().commit(); + + //Nd4j.getCompressor().getCompressor("THRESHOLD").configure(1e-3); + INDArray compressed = Nd4j.getExecutioner().thresholdEncode(initial, 1.0f, 3); + + assertEquals(7, compressed.data().length()); + + assertNotEquals(exp_0, initial); + + assertEquals(7, initial.sumNumber().doubleValue(), 0.01); + } + + @Test + public void testThresholdCompression5_1() { INDArray initial = Nd4j.ones(1000); INDArray exp_0 = initial.dup(); @@ -435,8 +418,8 @@ public class CompressionTests extends BaseNd4jTest { INDArray exp_0 = Nd4j.create(new float[] {0.0f, -1e-4f, 0.0f, 0.0f, 0.0f, 0.0f}); INDArray exp_1 = Nd4j.create(new float[] {0.0f, -5e-4f, 1e-3f, -1e-3f, 0.0f, 0.0f}); - DataBuffer ib = Nd4j.getDataBufferFactory().createInt(5); - INDArray enc = Nd4j.createArrayFromShapeBuffer(ib, initial.shapeInfoDataBuffer()); + + INDArray enc = Nd4j.create(DataType.INT32, initial.length() / 16 + 5); long elements = Nd4j.getExecutioner().bitmapEncode(initial, enc, 1e-3); log.info("Encoded: {}", Arrays.toString(enc.data().asInt())); @@ -471,7 +454,7 @@ public class CompressionTests extends BaseNd4jTest { @Test public void testBitmapEncoding5() { Nd4j.getRandom().setSeed(119); - INDArray initial = Nd4j.rand(new int[]{1, 10000}, -1, -0.5, Nd4j.getRandom()); + INDArray initial = Nd4j.rand(new int[]{10000}, -1, -0.5, Nd4j.getRandom()); INDArray exp_0 = initial.dup().addi(1e-1); INDArray exp_1 = initial.dup(); @@ -486,7 +469,7 @@ public class CompressionTests extends BaseNd4jTest { @Test public void testBitmapEncoding6() { Nd4j.getRandom().setSeed(119); - INDArray initial = Nd4j.rand(new int[]{1, 100000}, -1, 1, Nd4j.getRandom()); + INDArray initial = Nd4j.rand(new int[]{10000}, -1, 1, Nd4j.getRandom()); INDArray exp_1 = initial.dup(); INDArray enc = Nd4j.getExecutioner().bitmapEncode(initial, 1e-3); @@ -494,6 +477,11 @@ public class CompressionTests extends BaseNd4jTest { Nd4j.getExecutioner().bitmapDecode(enc, initial); + val f0 = exp_1.toFloatVector(); + val f1 = initial.toFloatVector(); + + assertArrayEquals(f0, f1, 1e-5f); + assertEquals(exp_1, initial); } diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/workspace/SpecialWorkspaceTests.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/workspace/SpecialWorkspaceTests.java index aefbafe53..2c98d23b1 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/workspace/SpecialWorkspaceTests.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/workspace/SpecialWorkspaceTests.java @@ -425,6 +425,21 @@ public class SpecialWorkspaceTests extends BaseNd4jTest { Files.delete(tmpFile); } + + @Test + public void testMigrateToWorkspace(){ + val src = Nd4j.createFromArray (1L,2L); + val wsConf = new WorkspaceConfiguration().builder().build(); + Nd4j.getWorkspaceManager().createNewWorkspace(wsConf,"testWS"); + val ws = Nd4j.getWorkspaceManager().getAndActivateWorkspace("testWS"); + + val migrated = src.migrate(); + assertEquals(src.dataType(), migrated.dataType()); + assertEquals(1L, migrated.getLong(0)); + + ws.close(); + } + @Override public char ordering() { return 'c'; diff --git a/nd4j/nd4j-common/src/main/java/org/nd4j/common/util/ThreadUtils.java b/nd4j/nd4j-common/src/main/java/org/nd4j/common/util/ThreadUtils.java new file mode 100644 index 000000000..60313c1d9 --- /dev/null +++ b/nd4j/nd4j-common/src/main/java/org/nd4j/common/util/ThreadUtils.java @@ -0,0 +1,29 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +package org.nd4j.common.util; + +public class ThreadUtils { + + private ThreadUtils(){ } + + public static void uncheckedSleep(long sleepTimeMs){ + try{ + Thread.sleep(sleepTimeMs); + } catch (InterruptedException e){ } + } + +} diff --git a/pom.xml b/pom.xml index b34148656..ab9f80b92 100644 --- a/pom.xml +++ b/pom.xml @@ -297,7 +297,7 @@ 1.18.2 ${numpy.version}-${javacpp-presets.version} - 0.3.9 + 0.3.9-1 2020.1 4.3.0 4.2.2