From 7543fe9222dc8ecbf8d5a69097921cf5bb8e2f9d Mon Sep 17 00:00:00 2001 From: agibsonccc Date: Sat, 20 Mar 2021 09:54:34 +0900 Subject: [PATCH] Update ADR statuses, add gpu self hosted configuration similar to cpu --- .../run-cpu-integration-tests-self-hosted.yml | 2 +- .../run-gpu-integration-tests-self-hosted.yml | 38 +++++++++++ ADRs/0002-ONNX_Runtime.md | 2 +- ADRs/0003-Import_IR.md | 2 +- ADRs/0003-NdArray_Strides_ArmCompute.md | 3 +- ADRs/0004-Mapping_IR.md | 2 +- ADRs/0005-Interpreter.md | 2 +- ADRs/0006 - Test architecture.md | 64 +++++++++++++++++++ 8 files changed, 108 insertions(+), 7 deletions(-) create mode 100644 .github/workflows/run-gpu-integration-tests-self-hosted.yml create mode 100644 ADRs/0006 - Test architecture.md diff --git a/.github/workflows/run-cpu-integration-tests-self-hosted.yml b/.github/workflows/run-cpu-integration-tests-self-hosted.yml index f39af5b73..a59e2bfd9 100644 --- a/.github/workflows/run-cpu-integration-tests-self-hosted.yml +++ b/.github/workflows/run-cpu-integration-tests-self-hosted.yml @@ -34,5 +34,5 @@ jobs: cmake --version protoc --version export OMP_NUM_THREADS=1 - mvn -DskipTestResourceEnforcement=true -Ptestresources -Pintegration-tests -Pnd4j-tests-cpu clean test -rf :rl4j-core + mvn -DskipTestResourceEnforcement=true -Ptestresources -Pintegration-tests -Pnd4j-tests-cpu clean test diff --git a/.github/workflows/run-gpu-integration-tests-self-hosted.yml b/.github/workflows/run-gpu-integration-tests-self-hosted.yml new file mode 100644 index 000000000..25fe29250 --- /dev/null +++ b/.github/workflows/run-gpu-integration-tests-self-hosted.yml @@ -0,0 +1,38 @@ +on: + workflow_dispatch: +jobs: + # Wait for up to a minute for previous run to complete, abort if not done by then + pre-ci: + runs-on: self-hosted + timeout-minutes: 1 + steps: + - name: 'Block Concurrent Executions' + uses: softprops/turnstyle@v1 + with: + poll-interval-seconds: 10 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + linux-x86_64: + needs: pre-ci + runs-on: [self-hosted] + steps: + - uses: AutoModality/action-clean@v1 + - name: Cancel Previous Runs + uses: styfle/cancel-workflow-action@0.8.0 + with: + access_token: ${{ github.token }} + - uses: ./.github/actions/download-dl4j-test-resources-linux + - uses: actions/checkout@v2 + - name: Run cpu tests + shell: bash + env: + DEBIAN_FRONTEND: noninteractive + run: | + export PATH="/opt/protobuf/bin:/usr/local/cuda-11.2/bin:$PATH" + nvcc --version + mvn --version + cmake --version + protoc --version + export OMP_NUM_THREADS=1 + mvn -DskipTestResourceEnforcement=true -Ptestresources -Pintegration-tests -Pnd4j-tests-cuda clean test + diff --git a/ADRs/0002-ONNX_Runtime.md b/ADRs/0002-ONNX_Runtime.md index bb22d0cec..c3b843652 100644 --- a/ADRs/0002-ONNX_Runtime.md +++ b/ADRs/0002-ONNX_Runtime.md @@ -1,7 +1,7 @@ # Onnx runtime module ## Status -Proposed +Implemented Proposed by: Adam Gibson (23-09-2020) diff --git a/ADRs/0003-Import_IR.md b/ADRs/0003-Import_IR.md index eef7a789a..7f471b97d 100644 --- a/ADRs/0003-Import_IR.md +++ b/ADRs/0003-Import_IR.md @@ -2,7 +2,7 @@ ## Status -Proposed +Implemented Proposed by: Adam Gibson (28-09-2020) diff --git a/ADRs/0003-NdArray_Strides_ArmCompute.md b/ADRs/0003-NdArray_Strides_ArmCompute.md index 02e3b2a34..0fb153d68 100644 --- a/ADRs/0003-NdArray_Strides_ArmCompute.md +++ b/ADRs/0003-NdArray_Strides_ArmCompute.md @@ -1,9 +1,8 @@ - # Libnd4j NdArray padded buffers, strides for Arm_Compute Library wrapper ## Status -PROPOSED +Implemented Proposed by: Abdelrauf (23/09/2020) diff --git a/ADRs/0004-Mapping_IR.md b/ADRs/0004-Mapping_IR.md index b62eba532..ab6b64a74 100644 --- a/ADRs/0004-Mapping_IR.md +++ b/ADRs/0004-Mapping_IR.md @@ -1,7 +1,7 @@ # Import IR ## Status -Proposed +Implemented Proposed by: Adam Gibson (28-09-2020) diff --git a/ADRs/0005-Interpreter.md b/ADRs/0005-Interpreter.md index 6e2cc44d1..db57fbf79 100644 --- a/ADRs/0005-Interpreter.md +++ b/ADRs/0005-Interpreter.md @@ -1,7 +1,7 @@ # Interpreter ## Status -Proposed +Rejected Proposed by: Adam Gibson (28-09-2020) diff --git a/ADRs/0006 - Test architecture.md b/ADRs/0006 - Test architecture.md new file mode 100644 index 000000000..4703fa5da --- /dev/null +++ b/ADRs/0006 - Test architecture.md @@ -0,0 +1,64 @@ +# Junit 5 tag usage + +## Status +Proposed + +Proposed by: Adam Gibson (21-03-2021) + +Discussed with: N/A + +## Context +DL4J was a junit 4 based code based for testing. +It's now based on junit 5's jupiter API, which has support for [Tags](https://junit.org/junit5/docs/5.0.1/api/org/junit/jupiter/api/Tag.html). + +DL4j's code base has a number of different kinds of tests that fall in to several categories: +1. Long and flaky involving distributed systems (spark, parameter server) +2. Code that requires large downloads, but runs quickly +3. Quick tests that test basic functionality +4. Comprehensive integration tests that test several parts of a code base + +Due to the variety of behaviors across different tests, it's hard to tell what's actually needed +for running and validating whether changes work against such a complex test base. + +Much of the time, most of the tests aren't related to a given change. +Often times, quick sanity checks are all that's needed in order to make sure a change works. + +A common set of tags is used to filter which tests are needed to run when. +This allows us to retain complex integration tests and run them on a set schedule +to catch regressions while allowing a defined subset of tests to run for a quick feedback loop. + + + + +## Decision + +A few kinds of tags exist: +1. Time based: long-time,short-time +2. Network based: has-download +3. Distributed systems: spark, multi-threaded +4. Functional cross-cutting concerns: multi module tests, similar functionality (excludes time based) +5. Platform specific tests that can vary on different hardware: cpu, gpu +6. JVM crash: Tests with native code can crash the JVM for tests. It's useful to be able to turn those off when debugging.: jvm-crash + + + +## Consequences +### Advantages +* Ability to sort through and filter tests based on different running environments + +* Ability to reason about test suites as a whole dynamically across modules + +* Avoid the need to define test suites + +* Ability to define groups of tags based in profiles + +* Ability to dynamically filter tests from the maven command line + + +### Disadvantages + +* Documentation and maintenance burden needing to know what tags do what + +* Test maintenance for newcomers who may not know how to tag tests + +