diff --git a/.github/workflows/tensorrt.yml b/.github/workflows/tensorrt.yml index 36f9281418d..4657c02ed19 100644 --- a/.github/workflows/tensorrt.yml +++ b/.github/workflows/tensorrt.yml @@ -24,6 +24,7 @@ on: paths: - backends/nvidia/tensorrt/** - examples/nvidia/tensorrt/** + - .github/workflows/tensorrt.yml workflow_dispatch: schedule: # Run daily at 3 AM UTC (after CUDA workflow at 2 AM) @@ -61,7 +62,6 @@ jobs: echo "::endgroup::" echo "::group::Build TensorRT Backend" - # Build with TensorRT support cmake -S . -B cmake-out \ -DCMAKE_BUILD_TYPE=Release \ -DEXECUTORCH_BUILD_TENSORRT=ON \ @@ -87,7 +87,7 @@ jobs: strategy: fail-fast: false matrix: - model: [add, mul, linear] + model: [add, add_mul, conv1d, dl3, edsr, efficient_sam, emformer_join, emformer_transcribe, ic3, ic4, linear, mul, mv2, mv3, resnet18, resnet50, sdpa, softmax, w2l] with: timeout: 60 runner: linux.g5.4xlarge.nvidia.gpu @@ -129,7 +129,7 @@ jobs: strategy: fail-fast: false matrix: - model: [add, mul, linear] + model: [add, add_mul, conv1d, dl3, edsr, efficient_sam, emformer_join, emformer_transcribe, ic3, ic4, linear, mul, mv2, mv3, resnet18, resnet50, sdpa, softmax, w2l] with: timeout: 60 runner: linux.g5.4xlarge.nvidia.gpu @@ -202,16 +202,47 @@ jobs: echo "::endgroup::" echo "::group::Run TensorRT Backend Unit Tests" - # Run all test_*.py files in the backend test directory. # The -o "addopts=" override prevents pytest.ini from injecting # flags that would run unrelated test suites. - python -m pytest backends/nvidia/tensorrt/test/ -v -o "addopts=" + python -m pytest backends/nvidia/tensorrt/test/ \ + -v -o "addopts=" + echo "::endgroup::" + + # ---- Export correctness tests ---- + # Exports all supported models and verifies numerical correctness + # against eager PyTorch on GPU via the ExportCorrectnessTest class. + test-export: + name: test-export + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + permissions: + id-token: write + contents: read + with: + timeout: 120 + runner: linux.g5.4xlarge.nvidia.gpu + gpu-arch-type: cuda + gpu-arch-version: "12.6" + use-custom-docker-registry: false + submodules: recursive + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + script: | + set -eux + + echo "::group::Install TensorRT" + pip install tensorrt onnx + echo "::endgroup::" + + echo "::group::Install ExecuTorch" + PYTHON_EXECUTABLE=python ./install_executorch.sh + echo "::endgroup::" + + echo "::group::Export all models and verify correctness" + python -m pytest examples/nvidia/tensorrt/tests/test_export.py -v -o "addopts=" echo "::endgroup::" # Summary job to check all tests passed check-all-tensorrt-tests: - needs: [test-tensorrt-build, test-models-tensorrt-python, test-models-tensorrt-cpp, unittest-tensorrt] - # All four jobs must succeed for the overall check to pass. + needs: [test-tensorrt-build, test-models-tensorrt-python, test-models-tensorrt-cpp, unittest-tensorrt, test-export] runs-on: ubuntu-latest if: always() steps: @@ -233,4 +264,8 @@ jobs: echo "ERROR: TensorRT unit tests failed!" exit 1 fi + if [[ "${{ needs.test-export.result }}" != "success" ]]; then + echo "ERROR: TensorRT export correctness tests failed!" + exit 1 + fi echo "SUCCESS: All TensorRT backend tests passed!" diff --git a/backends/nvidia/tensorrt/README.md b/backends/nvidia/tensorrt/README.md index 4c7d4e4e46e..471e07d4b86 100644 --- a/backends/nvidia/tensorrt/README.md +++ b/backends/nvidia/tensorrt/README.md @@ -238,6 +238,20 @@ The TensorRT delegate uses a custom binary blob format: - cuDNN 8.x - PyTorch 2.x with CUDA support (for export) +### Correctness Tests + +```bash +# Run all correctness tests +python -m pytest examples/nvidia/tensorrt/tests/test_export.py -v + +# Run a single model's test +python -m pytest examples/nvidia/tensorrt/tests/test_export.py -v -k test_mv3 +``` + +Each test exports a model with TensorRT, runs inference via ExecuTorch +pybindings, and compares outputs against eager PyTorch (atol=1e-3, rtol=1e-3) +across 3 random seeds. + ## Troubleshooting | Issue | Fix |