Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -807,6 +807,12 @@ if(EXECUTORCH_BUILD_PTHREADPOOL AND EXECUTORCH_BUILD_CPUINFO)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/threadpool)
endif()

# TensorRT examples (benchmark, etc.) need extension_module and extension_tensor,
# so they must be included after those targets are defined above.
if(EXECUTORCH_BUILD_TENSORRT)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/examples/nvidia)
endif()

if(EXECUTORCH_BUILD_KERNELS_TORCHAO)
if(NOT TARGET cpuinfo)
message(
Expand Down
41 changes: 41 additions & 0 deletions examples/nvidia/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# NVIDIA Backend Examples
#
# This CMakeLists.txt includes the TensorRT examples subdirectory.
#
# Supported platforms:
# - Linux x86_64 with NVIDIA GPU (devgpu, workstations)
# - NVIDIA Jetson (Orin Nano, AGX Orin, etc.)
#
# Build instructions:
# cmake .. -DEXECUTORCH_BUILD_TENSORRT=ON
# cmake --build . --target benchmark_runner_tensorrt

cmake_minimum_required(VERSION 3.19)

project(nvidia_examples)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

# Ensure compile_commands.json is generated for tooling
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

# Source root directory for executorch
if(NOT EXECUTORCH_ROOT)
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..)
endif()

# Include utility CMake scripts from ExecuTorch
include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)

# Find CUDA (optional - needed for TensorRT backend)
find_package(CUDAToolkit QUIET)

# Add TensorRT examples subdirectory
add_subdirectory(tensorrt)
80 changes: 80 additions & 0 deletions examples/nvidia/tensorrt/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# TensorRT Examples - Benchmark runner
#
# Build:
# cmake -DEXECUTORCH_BUILD_TENSORRT=ON ...
# cmake --build . --target benchmark
#
# Usage:
# ./benchmark # all .pte/.onnx in current dir
# ./benchmark -m mv3 # mv3 .pte and .onnx in current dir
# ./benchmark -d /tmp/trt -n 200 # all models in /tmp/trt, 200 iterations

cmake_minimum_required(VERSION 3.19)

if(NOT EXECUTORCH_ROOT)
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../..)
endif()

set(COMMON_INCLUDE_DIRS ${EXECUTORCH_ROOT}/..)

if(EXECUTORCH_BUILD_TENSORRT)
find_library(NVONNXPARSER_LIBRARY nvonnxparser
HINTS ${TENSORRT_HOME}/lib ${TENSORRT_HOME}/lib64
/usr/lib /usr/lib/x86_64-linux-gnu /usr/lib/aarch64-linux-gnu
)

add_executable(benchmark ${CMAKE_CURRENT_SOURCE_DIR}/benchmark.cpp)

target_include_directories(
benchmark
PUBLIC $<BUILD_INTERFACE:${EXECUTORCH_ROOT}>
$<BUILD_INTERFACE:${COMMON_INCLUDE_DIRS}>
)

# extension_module builds as extension_module_static in OSS CMake.
if(TARGET extension_module_static)
set(_extension_module extension_module_static)
elseif(TARGET extension_module)
set(_extension_module extension_module)
else()
message(FATAL_ERROR
"extension_module not found. Build with -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON")
endif()

if(NOT TARGET extension_tensor)
message(FATAL_ERROR
"extension_tensor not found. Build with -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON")
endif()

target_link_libraries(
benchmark
PRIVATE executorch
${_extension_module}
extension_tensor
portable_kernels
)

target_link_options(
benchmark
PRIVATE
"SHELL:LINKER:--whole-archive $<TARGET_FILE:tensorrt_backend> LINKER:--no-whole-archive"
)
target_link_libraries(benchmark PRIVATE CUDA::cudart)
if(TENSORRT_LIBRARY)
target_link_libraries(benchmark PRIVATE ${TENSORRT_LIBRARY})
endif()
if(NVONNXPARSER_LIBRARY)
target_link_libraries(benchmark PRIVATE ${NVONNXPARSER_LIBRARY})
endif()
add_dependencies(benchmark tensorrt_backend)

target_compile_options(benchmark PRIVATE -frtti -fexceptions)

install(TARGETS benchmark DESTINATION ${CMAKE_INSTALL_BINDIR})
endif()
34 changes: 28 additions & 6 deletions examples/nvidia/tensorrt/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ Export a supported model to ExecuTorch format with TensorRT delegation:
# Export the add model
python -m executorch.examples.nvidia.tensorrt.export -m add

# Export with validation test
python -m executorch.examples.nvidia.tensorrt.export -m add --test
# Export all supported models to a directory
python -m executorch.examples.nvidia.tensorrt.export -o /tmp/trt

# Export to a specific directory
python -m executorch.examples.nvidia.tensorrt.export -m add -o ./output
Expand Down Expand Up @@ -59,6 +59,7 @@ python -m executorch.examples.nvidia.tensorrt.export --help

- `export.py` - Main export script for converting models to TensorRT format
- `runner.py` - Python utilities for running and testing exported models
- `benchmark.cpp` - C++ benchmark runner for performance measurement
- `tensorrt_executor_runner.cpp` - C++ executor runner for TensorRT models
- `__init__.py` - Package initialization

Expand All @@ -85,13 +86,31 @@ python -m executorch.examples.nvidia.tensorrt.export -m add
--help Show help message
```

### Validation Testing
## Benchmarking

The `--test` flag runs the exported model through the ExecuTorch runtime
and compares outputs against the PyTorch reference model:
Export models then benchmark with the C++ runner:

```bash
python -m executorch.examples.nvidia.tensorrt.export -m add --test
# Step 1: Export models
python -m executorch.examples.nvidia.tensorrt.export -o /tmp/trt

# Step 2: Benchmark all exported models
./benchmark -d /tmp/trt

# Benchmark a specific model
./benchmark -d /tmp/trt -m mv3

# Benchmark with custom iterations
./benchmark -d /tmp/trt -n 200 -w 5
```

**Benchmark Options:**
```
-d, --model_dir DIR Directory with .pte files (default: current dir)
-m, --model_name NAME Run only NAME_tensorrt.pte from the directory
-n, --num_executions N Number of timed iterations (default: 100)
-w, --warmup N Number of warmup runs (default: 3)
-v, --verbose Enable verbose logging
```

## Adding New Models
Expand All @@ -109,7 +128,10 @@ To add support for a new model:
examples/nvidia/tensorrt/
├── export.py # CLI export script using MODEL_NAME_TO_MODEL registry
├── runner.py # Python runtime utilities for testing
├── benchmark.cpp # C++ benchmark runner binary
├── tensorrt_executor_runner.cpp # C++ executor runner binary
├── tests/ # Correctness tests
│ └── test_export.py # Export + inference verification
├── __init__.py # Package exports
└── README.md # This file
```
Expand Down
Loading
Loading