diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/CapsnetGradientCheckTest.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/CapsnetGradientCheckTest.java index e69766677..ac0224b7a 100644 --- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/CapsnetGradientCheckTest.java +++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/CapsnetGradientCheckTest.java @@ -43,12 +43,6 @@ import java.util.Random; public class CapsnetGradientCheckTest extends BaseDL4JTest { - private static final boolean PRINT_RESULTS = true; - private static final boolean RETURN_ON_FIRST_FAILURE = false; - private static final double DEFAULT_EPS = 1e-6; - private static final double DEFAULT_MAX_REL_ERROR = 1e-3; - private static final double DEFAULT_MIN_ABS_ERROR = 1e-8; - @Test public void testCapsNet() { diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/OutputLayerGradientChecks.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/OutputLayerGradientChecks.java index 67fc4c11c..6796b2790 100644 --- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/OutputLayerGradientChecks.java +++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/OutputLayerGradientChecks.java @@ -43,10 +43,6 @@ import static org.junit.Assert.assertTrue; public class OutputLayerGradientChecks extends BaseDL4JTest { private static final boolean PRINT_RESULTS = true; - private static final boolean RETURN_ON_FIRST_FAILURE = false; - private static final double DEFAULT_EPS = 1e-6; - private static final double DEFAULT_MAX_REL_ERROR = 1e-3; - private static final double DEFAULT_MIN_ABS_ERROR = 1e-8; static { Nd4j.setDataType(DataType.DOUBLE); diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/RnnGradientChecks.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/RnnGradientChecks.java index 2980cad7c..afb4fd67c 100644 --- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/RnnGradientChecks.java +++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/RnnGradientChecks.java @@ -47,10 +47,6 @@ import static org.junit.Assert.assertTrue; public class RnnGradientChecks extends BaseDL4JTest { private static final boolean PRINT_RESULTS = true; - private static final boolean RETURN_ON_FIRST_FAILURE = false; - private static final double DEFAULT_EPS = 1e-6; - private static final double DEFAULT_MAX_REL_ERROR = 1e-3; - private static final double DEFAULT_MIN_ABS_ERROR = 1e-8; static { Nd4j.setDataType(DataType.DOUBLE); diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/UtilLayerGradientChecks.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/UtilLayerGradientChecks.java index 2d889a6a1..6482984cb 100644 --- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/UtilLayerGradientChecks.java +++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/UtilLayerGradientChecks.java @@ -48,12 +48,6 @@ import static org.junit.Assert.assertTrue; public class UtilLayerGradientChecks extends BaseDL4JTest { - private static final boolean PRINT_RESULTS = true; - private static final boolean RETURN_ON_FIRST_FAILURE = false; - private static final double DEFAULT_EPS = 1e-6; - private static final double DEFAULT_MAX_REL_ERROR = 1e-3; - private static final double DEFAULT_MIN_ABS_ERROR = 1e-6; - static { Nd4j.setDataType(DataType.DOUBLE); } @@ -182,9 +176,9 @@ public class UtilLayerGradientChecks extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(net).input(input) - .minAbsoluteError(1e-7) - .labels(label).inputMask(inMask)); + boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(net) + .minAbsoluteError(1e-6) + .input(input).labels(label).inputMask(inMask)); assertTrue(gradOK); TestUtils.testModelSerialization(net); @@ -233,8 +227,9 @@ public class UtilLayerGradientChecks extends BaseDL4JTest { //Test ComputationGraph equivalent: ComputationGraph g = net.toComputationGraph(); - boolean gradOKCG = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(g).inputs(new INDArray[]{in}) - .labels(new INDArray[]{labels}).excludeParams(excludeParams)); + boolean gradOKCG = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(g) + .minAbsoluteError(1e-6) + .inputs(new INDArray[]{in}).labels(new INDArray[]{labels}).excludeParams(excludeParams)); assertTrue(gradOKCG); TestUtils.testModelSerialization(g); diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/YoloGradientCheckTests.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/YoloGradientCheckTests.java index 147150aa8..a47716740 100644 --- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/YoloGradientCheckTests.java +++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/YoloGradientCheckTests.java @@ -56,11 +56,6 @@ import static org.junit.Assert.assertTrue; * @author Alex Black */ public class YoloGradientCheckTests extends BaseDL4JTest { - private static final boolean PRINT_RESULTS = true; - private static final boolean RETURN_ON_FIRST_FAILURE = false; - private static final double DEFAULT_EPS = 1e-6; - private static final double DEFAULT_MAX_REL_ERROR = 1e-3; - private static final double DEFAULT_MIN_ABS_ERROR = 1e-8; static { Nd4j.setDataType(DataType.DOUBLE); diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingMaskingTests.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingMaskingTests.java index 957d22a08..aebf23673 100644 --- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingMaskingTests.java +++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingMaskingTests.java @@ -21,14 +21,13 @@ import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; -import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; -import org.deeplearning4j.nn.conf.layers.GravesLSTM; -import org.deeplearning4j.nn.conf.layers.OutputLayer; -import org.deeplearning4j.nn.conf.layers.PoolingType; +import org.deeplearning4j.nn.conf.layers.*; +import org.deeplearning4j.nn.conf.layers.GlobalPoolingLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; import org.junit.Test; import org.nd4j.linalg.activations.Activation; +import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.impl.broadcast.BroadcastMulOp; import org.nd4j.linalg.factory.Nd4j; @@ -416,4 +415,53 @@ public class GlobalPoolingMaskingTests extends BaseDL4JTest { } } } + + @Test + public void testMaskLayerDataTypes(){ + + for(DataType dt : new DataType[]{DataType.FLOAT16, DataType.BFLOAT16, DataType.FLOAT, DataType.DOUBLE, + DataType.INT8, DataType.INT16, DataType.INT32, DataType.INT64, + DataType.UINT8, DataType.UINT16, DataType.UINT32, DataType.UINT64}){ + INDArray mask = Nd4j.rand(DataType.FLOAT, 2, 10).addi(0.3).castTo(dt); + + for(DataType networkDtype : new DataType[]{DataType.FLOAT16, DataType.BFLOAT16, DataType.FLOAT, DataType.DOUBLE}){ + + INDArray in = Nd4j.rand(networkDtype, 2, 5, 10); + INDArray label1 = Nd4j.rand(networkDtype, 2, 5); + INDArray label2 = Nd4j.rand(networkDtype, 2, 5, 10); + + for(PoolingType pt : PoolingType.values()) { + //System.out.println("Net: " + networkDtype + ", mask: " + dt + ", pt=" + pt); + + MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() + .list() + .layer(new GlobalPoolingLayer(pt)) + .layer(new OutputLayer.Builder().nIn(5).nOut(5).activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).build()) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + net.output(in, false, mask, null); + net.output(in, false, mask, null); + + + MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + + .list() + .layer(new RnnOutputLayer.Builder().nIn(5).nOut(5).activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).build()) + .build(); + + MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); + net2.init(); + + net2.output(in, false, mask, mask); + net2.output(in, false, mask, mask); + + net.fit(in, label1, mask, null); + net2.fit(in, label2, mask, mask); + } + } + } + } } diff --git a/deeplearning4j/deeplearning4j-core/src/test/resources/logback-test.xml b/deeplearning4j/deeplearning4j-core/src/test/resources/logback-test.xml index c6f89b60a..69246755b 100644 --- a/deeplearning4j/deeplearning4j-core/src/test/resources/logback-test.xml +++ b/deeplearning4j/deeplearning4j-core/src/test/resources/logback-test.xml @@ -19,8 +19,8 @@ logs/application.log - %date - [%level] - from %logger in %thread - %n%message%n%xException%n + %logger{15} - %message%n%xException{5} + diff --git a/deeplearning4j/deeplearning4j-cuda/src/test/resources/logback.xml b/deeplearning4j/deeplearning4j-cuda/src/test/resources/logback.xml index c6f89b60a..69246755b 100644 --- a/deeplearning4j/deeplearning4j-cuda/src/test/resources/logback.xml +++ b/deeplearning4j/deeplearning4j-cuda/src/test/resources/logback.xml @@ -19,8 +19,8 @@ logs/application.log - %date - [%level] - from %logger in %thread - %n%message%n%xException%n + %logger{15} - %message%n%xException{5} + diff --git a/deeplearning4j/deeplearning4j-nearestneighbors-parent/deeplearning4j-nearestneighbor-server/src/test/resources/logback.xml b/deeplearning4j/deeplearning4j-nearestneighbors-parent/deeplearning4j-nearestneighbor-server/src/test/resources/logback.xml index 7953c2712..7d49481af 100644 --- a/deeplearning4j/deeplearning4j-nearestneighbors-parent/deeplearning4j-nearestneighbor-server/src/test/resources/logback.xml +++ b/deeplearning4j/deeplearning4j-nearestneighbors-parent/deeplearning4j-nearestneighbor-server/src/test/resources/logback.xml @@ -19,8 +19,8 @@ logs/application.log - %date - [%level] - from %logger in %thread - %n%message%n%xException%n + %logger{15} - %message%n%xException{5} + diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnOutputLayer.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnOutputLayer.java index 27cc0f9df..93f77ad67 100644 --- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnOutputLayer.java +++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnOutputLayer.java @@ -131,7 +131,7 @@ public class RnnOutputLayer extends BaseOutputLayer logs/application.log - %date - [%level] - from %logger in %thread - %n%message%n%xException%n + %logger{15} - %message%n%xException{5} + diff --git a/deeplearning4j/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper-parameter-server/src/test/resources/logback.xml b/deeplearning4j/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper-parameter-server/src/test/resources/logback.xml index f1ffbc8ac..f6b823056 100644 --- a/deeplearning4j/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper-parameter-server/src/test/resources/logback.xml +++ b/deeplearning4j/deeplearning4j-scaleout/deeplearning4j-scaleout-parallelwrapper-parameter-server/src/test/resources/logback.xml @@ -21,8 +21,8 @@ logs/application.log - %date - [%level] - from %logger in %thread - %n%message%n%xException%n + %logger{15} - %message%n%xException{5} + diff --git a/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/test/resources/logback.xml b/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/test/resources/logback.xml index 9dec22fae..4d94f2516 100644 --- a/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/test/resources/logback.xml +++ b/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-nlp-java8/src/test/resources/logback.xml @@ -21,8 +21,8 @@ logs/application.log - %date - [%level] - from %logger in %thread - %n%message%n%xException%n + %logger{15} - %message%n%xException{5} + diff --git a/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/test/resources/logback.xml b/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/test/resources/logback.xml index 9dec22fae..4d94f2516 100644 --- a/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/test/resources/logback.xml +++ b/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-nlp/src/test/resources/logback.xml @@ -21,8 +21,8 @@ logs/application.log - %date - [%level] - from %logger in %thread - %n%message%n%xException%n + %logger{15} - %message%n%xException{5} + diff --git a/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/test/resources/logback.xml b/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/test/resources/logback.xml index 9dec22fae..4d94f2516 100644 --- a/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/test/resources/logback.xml +++ b/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/test/resources/logback.xml @@ -21,8 +21,8 @@ logs/application.log - %date - [%level] - from %logger in %thread - %n%message%n%xException%n + %logger{15} - %message%n%xException{5} + diff --git a/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/resources/logback.xml b/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/resources/logback.xml index 9dec22fae..4d94f2516 100644 --- a/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/resources/logback.xml +++ b/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/resources/logback.xml @@ -21,8 +21,8 @@ logs/application.log - %date - [%level] - from %logger in %thread - %n%message%n%xException%n + %logger{15} - %message%n%xException{5} + diff --git a/deeplearning4j/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/test/resources/logback.xml b/deeplearning4j/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/test/resources/logback.xml index 2c204cafa..9baf66a0d 100644 --- a/deeplearning4j/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/test/resources/logback.xml +++ b/deeplearning4j/deeplearning4j-ui-parent/deeplearning4j-ui-model/src/test/resources/logback.xml @@ -21,8 +21,8 @@ logs/application.log - %date - [%level] - from %logger in %thread - %n%message%n%xException%n + %logger{15} - %message%n%xException{5} + diff --git a/deeplearning4j/deeplearning4j-ui-parent/deeplearning4j-ui-standalone/src/main/resources/logback.xml b/deeplearning4j/deeplearning4j-ui-parent/deeplearning4j-ui-standalone/src/main/resources/logback.xml index 1753f88dc..2283bdc50 100644 --- a/deeplearning4j/deeplearning4j-ui-parent/deeplearning4j-ui-standalone/src/main/resources/logback.xml +++ b/deeplearning4j/deeplearning4j-ui-parent/deeplearning4j-ui-standalone/src/main/resources/logback.xml @@ -21,8 +21,8 @@ logs/application.log - %date - [%level] - from %logger in %thread - %n%message%n%xException%n + %logger{15} - %message%n%xException{5} + diff --git a/deeplearning4j/deeplearning4j-ui-parent/deeplearning4j-ui/src/test/resources/logback.xml b/deeplearning4j/deeplearning4j-ui-parent/deeplearning4j-ui/src/test/resources/logback.xml index 2c204cafa..9baf66a0d 100644 --- a/deeplearning4j/deeplearning4j-ui-parent/deeplearning4j-ui/src/test/resources/logback.xml +++ b/deeplearning4j/deeplearning4j-ui-parent/deeplearning4j-ui/src/test/resources/logback.xml @@ -21,8 +21,8 @@ logs/application.log - %date - [%level] - from %logger in %thread - %n%message%n%xException%n + %logger{15} - %message%n%xException{5} + diff --git a/deeplearning4j/deeplearning4j-ui-parent/deeplearning4j-vertx/src/test/resources/logback.xml b/deeplearning4j/deeplearning4j-ui-parent/deeplearning4j-vertx/src/test/resources/logback.xml index 2c204cafa..9baf66a0d 100644 --- a/deeplearning4j/deeplearning4j-ui-parent/deeplearning4j-vertx/src/test/resources/logback.xml +++ b/deeplearning4j/deeplearning4j-ui-parent/deeplearning4j-vertx/src/test/resources/logback.xml @@ -21,8 +21,8 @@ logs/application.log - %date - [%level] - from %logger in %thread - %n%message%n%xException%n + %logger{15} - %message%n%xException{5} + diff --git a/deeplearning4j/dl4j-integration-tests/src/test/resources/logback-test.xml b/deeplearning4j/dl4j-integration-tests/src/test/resources/logback-test.xml index c6f89b60a..69246755b 100644 --- a/deeplearning4j/dl4j-integration-tests/src/test/resources/logback-test.xml +++ b/deeplearning4j/dl4j-integration-tests/src/test/resources/logback-test.xml @@ -19,8 +19,8 @@ logs/application.log - %date - [%level] - from %logger in %thread - %n%message%n%xException%n + %logger{15} - %message%n%xException{5} + diff --git a/libnd4j/CMakeLists.txt b/libnd4j/CMakeLists.txt index c82b0b217..cf9d4ff88 100755 --- a/libnd4j/CMakeLists.txt +++ b/libnd4j/CMakeLists.txt @@ -5,7 +5,7 @@ option(NATIVE "Optimize for build machine (might not work on others)" OFF) set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH}) #ensure we create lib files set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS OFF) - +option(CHECK_VECTORIZATION "checks for vectorization" OFF) option(BUILD_TESTS "Build tests" OFF) option(FLATBUFFERS_BUILD_FLATC "Enable the build of the flatbuffers compiler" OFF) set(FLATBUFFERS_BUILD_FLATC "OFF" CACHE STRING "Hack to disable flatc build" FORCE) diff --git a/libnd4j/CMakeLists.txt.mkldnn.in b/libnd4j/CMakeLists.txt.mkldnn.in index 3069d9efe..3de36dfde 100644 --- a/libnd4j/CMakeLists.txt.mkldnn.in +++ b/libnd4j/CMakeLists.txt.mkldnn.in @@ -5,7 +5,7 @@ project(mkldnn-download NONE) include(ExternalProject) ExternalProject_Add(mkldnn GIT_REPOSITORY https://github.com/intel/mkl-dnn.git - GIT_TAG v1.1.2 + GIT_TAG v1.1.3 SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/mkldnn-src" BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/mkldnn-build" CONFIGURE_COMMAND "" diff --git a/libnd4j/README.md b/libnd4j/README.md index 9cea1b597..ec17c6227 100644 --- a/libnd4j/README.md +++ b/libnd4j/README.md @@ -17,8 +17,11 @@ There's few additional arguments for `buildnativeoperations.sh` script you could -b release OR -b debug // enables/desables debug builds. release is considered by default -j XX // this argument defines how many threads will be used to binaries on your box. i.e. -j 8 -cc XX// CUDA-only argument, builds only binaries for target GPU architecture. use this for fast builds + --check-vectorization auto-vectorization report for developers. (Currently, only GCC is supported) ``` +[More about AutoVectorization report](auto_vectorization/AutoVectorization.md) + You can find the compute capability for your card [on the NVIDIA website here](https://developer.nvidia.com/cuda-gpus). For example, a GTX 1080 has compute capability 6.1, for which you would use ```-cc 61``` (note no decimal point). diff --git a/libnd4j/auto_vectorization/AutoVectorization.md b/libnd4j/auto_vectorization/AutoVectorization.md new file mode 100644 index 000000000..61b98febe --- /dev/null +++ b/libnd4j/auto_vectorization/AutoVectorization.md @@ -0,0 +1,49 @@ +# Auto-vectorization Report + +This report tool is used to get a human-friendly compiler output of the auto-vectorization process. It is intended for developers to help them to investigate the obstacles that compiler faced during auto-vectorization. + +## Usage +```--check-vectorization``` option should be added to the **release** build to be able to get the auto-vectorization report +```./buildnativeoperations.sh -a native -j 28 --check-vectorization``` +it will output ```vecmiss.html``` inside blasbuild/cpu folder. + +## Report Format +Each filename contains info about optimization attempts for the source code lines. +Each line number is also expandable (⇲) and contains distinct failure notes. +It is possible to click on the line number to see source code + +| file name | total successful attempts | total failed attempts | ⇲ | +|---|---|---|--| +| line number | successful attempts | failed attempts | ⇲ | +|- failure reasons | +| line number | successful attempts | failed attempts |⇲ | + +##### Requirements +- GCC (Currently, only GCC is supported) +- python3 + +### Detailed report with `-fsave-optimization-record` option: +If you want to get more detailed information (for now it reports the functions of failures) you should use new version of the toolchain (GCC > 9). As the new version of GCC compilers have `-fsave-optimization-record` option. +`buildnativeoperations.sh` using CMake will detect it and switch to the more detailed version. +Please, note that this option is still experimental and so the compiler can fail to output some json.gz file with error. +On that case try to exclude those files from the build. +And also the internal structure of the `-fsave-optimization-record` json.gz can be changed in future. + +It outputs two files **vecmiss_fsave.html** and **vecmiss_fsave.html.js**. So to see report details you need to enable javascript on browser if it was disabled. + +##### Requirements for the Detailed report +- GCC version > 9 +- python3 +- Cython (python3) +- json (python3) +- gzip (python3) +- c++filt + +Internally, we are using Cython to speed up json.gz file processing (bigGzipJson.pyx). Because json.gz files can take big memory in raw when loaded in whole. + +If you want to use bigGzipJson outside `buildnativeoperations.sh` and CMake then you should compile it manually using this command in auto_vectorization folder: +`python3 cython_setup.py build_ext --inplace` + +json.gz files could be processed outside of `buildnativeoperations.sh`. +You need to call `python3 auto_vect.py --fsave` inside base source folder and where json.gz files exist. + diff --git a/libnd4j/auto_vectorization/auto_vect.py b/libnd4j/auto_vectorization/auto_vect.py new file mode 100644 index 000000000..f98dc7422 --- /dev/null +++ b/libnd4j/auto_vectorization/auto_vect.py @@ -0,0 +1,546 @@ +''' +@author : Abdelrauf rauf@konduit.ai +''' +import re +import sys +import os +import subprocess +import fnmatch +import json +import gzip +try: + from bigGzipJson import json_gzip_extract_objects +except ImportError: + pass +from pathlib import Path +from multiprocessing import Pool, Manager ,cpu_count +import traceback +import html + +mtch = re.compile(r"[^/]*([^:]+)\:(\d+)\:(\d+)\:(.*)") +replace_msg = re.compile(r"(\d+)?\.?(\d+)?_?\d+\.?(\d+)?") +progress_msg = re.compile(r"\s{0,4}\[\s{0,2}\d+\%\]") +file_dir_strip = str(Path(os.getcwd())) +pp_index = file_dir_strip.rfind("libnd4j") +if pp_index>=0: + file_dir_strip =file_dir_strip[:pp_index+len("libnd4j")] +BASE_URL = "https://github.com/eclipse/deeplearning4j/tree/master/libnd4j/" +if BASE_URL.endswith("/")==False: + BASE_URL = BASE_URL + "/" +#print(file_dir_strip) +class info: + def __repr__(self): + return str(self.__dict__) + +FSAVE_IGNORE_EXTERNALS = True + +def get_cxx_filt_result(strx): + if len(strx)<1: + return "" + res = subprocess.Popen(["c++filt","-i", strx], stdout=subprocess.PIPE).communicate()[0] + res =res.decode('utf-8') + #replace some long names to reduce size + res = res.replace("unsigned long long", "uLL") + res = res.replace("unsigned long int","uL") + res = res.replace("unsigned long", "uL") + res = res.replace("unsigned int", "ui") + res = res.replace("unsigned char", "uchar") + res = res.replace("unsigned short", "ushort") + res = res.replace("long long", "LL") + res = res.replace(", ",",") + return res.strip() + + +def internal_glob(dir, match): + listx = [] + for root, dirnames, filenames in os.walk(dir): + for filename in fnmatch.filter(filenames, match): + listx.append(os.path.join(root, filename)) + return listx + +def get_obj_json_gz(filename): + with gzip.GzipFile(filename, 'r') as f: + return json.loads(f.read().decode('utf-8'))[-1] + + + +def get_msg(msg): + msg = msg.lower().strip() + if "note: not vectorized:" in msg: + msg = replace_msg.sub("_numb",msg.replace("note: not vectorized:","")) + return( 0, 1, msg.strip()) + elif "loop vectorized" in msg: + return (1, 0, None) + # elif msg.startswith("missed")==False: + # msg = replace_msg.sub("_numb",msg) + # return( 0, 0, msg.strip()) + return None + + + + +class File_Info: + ''' + Holds information about vectorized and miss vectorized lines for one file + ''' + + def __init__(self): + self.infos = {} + self.total_opted =0 + self.total_missed = 0 + self.external = False + + + def add_line(self, line_pos): + if line_pos not in self.infos: + v = info() + v.optimized = 0 + v.missed = 0 + v.miss_details = set() + self.infos[line_pos] = v + return v + else: + return self.infos[line_pos] + + + def add_line_fsave(self, line_pos): + if line_pos not in self.infos: + v = info() + v.optimized = 0 + v.missed = 0 + v.miss_details2 = dict() + self.infos[line_pos] = v + return v + else: + return self.infos[line_pos] + + + + def add_fsave(self, line_pos,success, msg, function ,inline_fns=''): + v = self.add_line_fsave(line_pos) + if success and "loop vectorized" in msg: + v.optimized +=1 + self.total_opted +=1 + elif success==False and "not vectorized:" in msg: + #reduce this msg + msg = msg.replace("not vectorized:","") + v.missed +=1 + self.total_missed +=1 + msg = sys.intern(msg) + if msg in v.miss_details2: + ls = v.miss_details2.get(msg) + ls.add(function) + else: + ls =set() + v.miss_details2[msg]=ls + ls.add(function) + return self + + def add(self, line_pos, msg_x): + v = self.add_line(line_pos) + if msg_x is not None: + v.optimized += msg_x[0] + v.missed += msg_x[1] + self.total_opted += msg_x[0] + self.total_missed += msg_x[1] + if msg_x[2] is not None: + v.miss_details.add(msg_x[2]) + return self + + + def __repr__(self): + return str(self.__dict__) + + + + +def process_gzip_json_mp(args): + process_gzip_json_new(*args) + +def process_gzip_json_new(json_gz_fname,list_Queue): + gz_name = Path(json_gz_fname).stem + #print("::--open and process {0}".format(gz_name)) + queue_count = len(list_Queue) + #print(queue_count) + q = list_Queue[0] + old_fname = '' + total_c = 0 + for x in json_gzip_extract_objects(json_gz_fname,'message','vectorized'): + external_source = True + if len(x['message'])>0 and 'location' in x: + line = int(x['location']['line']) + file_name = x['location']['file'].strip() + if file_dir_strip in file_name: + file_name = file_name.replace(file_dir_strip,'./') + external_source = False + msg = x['message'][0] + success = x['kind'] == 'success' + func = '' if 'function' not in x else x['function'] + + if file_name!=old_fname: + #send our info to the right consumer + queue_ind = hash(file_name) % queue_count + #print("quen index {0}".format(queue_ind)) + q =list_Queue[queue_ind] + old_fname = file_name + total_c +=1 + #print("pp {0} {1}".format(q,(file_name,line,success, msg, func,external_source ))) + if FSAVE_IGNORE_EXTERNALS==True and external_source == True: + continue + q.put((file_name,line,success, msg, func,external_source )) + print("::finished {0:60s} :{1:8d}".format(gz_name,total_c)) + +def consume_processed_mp(args): + return consume_processed_new(*args) + + + +def consume_processed_new(list_Queue , c_index): + + info_ = dict() + func_list = dict() + last_func_index = 0 + q = list_Queue[c_index] + print("::consumer {0}".format(c_index)) + total_c = 0 + r_c = 0 + while True: + #print("try to get new from {0}".format(index)) + obj = q.get() + #print("cc {0} {1}".format(q,obj)) + if obj==None: + break #we received the end + file_name,line,success, msg, func, external_source = obj + try: + #get function index + func_index = -1 + if func in func_list: + func_index = func_list[func] + else: + func_list[func] = last_func_index + func_index = last_func_index + last_func_index +=1 + + if file_name in info_: + info_[file_name].add_fsave(line, success, msg, func_index) + else: + info_[file_name] = File_Info().add_fsave(line, success, msg, func_index) + info_[file_name].external = external_source + total_c +=1 + if total_c - r_c >10000: + r_c = total_c + print("::consumer {0:2d} :{1:10d}".format(c_index,total_c)) + except Exception as e: + print(traceback.format_exc()) + break + + print("::consumer {0:2d} :{1:10d}".format(c_index,total_c)) + #write to temp file + wr_fname= "vecmiss_fsave{0}.html".format(str(c_index) if len(list_Queue)>1 else '') + print("generate report for consumer {0} {1}".format(c_index,len(info_))) + try: + uniq_ind = str(c_index)+'_' if len(list_Queue)>1 else '' + generate_report(wr_fname,info_ ,only_body = False, unique_id_prefix = uniq_ind,fsave_format = True, function_list= func_list) + print(" consumer {0} saved output into {1}".format(c_index,wr_fname)) + except Exception as e: + print(traceback.format_exc()) + + + +def obtain_info_from(input_): + info_ = dict() + for line in input_: + x = mtch.match(line) + external_source = True + if x: + file_name =x.group(1).strip() + if file_dir_strip in file_name: + file_name = file_name.replace(file_dir_strip,'') + external_source = False + line_number = int(x.group(2)) + msg = x.group(4).lower() + msg = msg.replace(file_dir_strip,'./') + msg_x = get_msg(msg) + if msg_x is None: + continue + if file_name in info_: + #ignore col_number + info_[file_name].add(line_number,msg_x) + else: + #print("{0} {1}".format(file_name,external_source)) + info_[file_name] = File_Info().add(line_number,msg_x) + info_[file_name].external = external_source + elif progress_msg.match(line): + #actually we redirect only, stderr so this should not happen + print("__"+line.strip()) + elif "error" in line or "Error" in line: + print("****"+line.strip()) + return info_ + + + +def custom_style(fsave): + st = '''''' + +def header(fsave=False): + strx ='\n\n\n\nAuto-Vectorization\n' + strx +=''.format(BASE_URL) + strx +=custom_style(fsave) + strx +='\n\n\n' + return strx + +def footer(): + return '\n' + + +def get_compressed_indices(set_a): + a_len = len(set_a) + if a_len<=1: + if a_len<1: + return '' + return str(set_a)[1:-1] + #we sorted and only saved difference + # 1,14,15,19 --> 1,13,1,4 10bytes=>8bytes + list_sorted = sorted(list(set_a)) + last = list_sorted[0] + str_x = str(list_sorted[0]) + for i in range(1,a_len): + str_x += ','+str(list_sorted[i]-last) + last = list_sorted[i] + return str_x + + + + + +def get_content(k, v, unique_id_prefix = '', fsave_format=False): + inner_str='' + content = '' + inc_id = 0 + for fk,fv in sorted(v.infos.items()): + if fsave_format==True: + inner_str+='
{0}
{1}
{2}