Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions backends/nvidia/tensorrt/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,17 @@ Download and install from the
| Linux | x86_64 | pip or NVIDIA installer |
| Linux (Jetson) | aarch64 | Pre-installed via JetPack |
| Windows | x86_64 | NVIDIA installer |

## Configuration Options

`TensorRTCompileSpec` supports the following options:

| Option | Type | Default | Description |
|--------|------|---------|-------------|
| `workspace_size` | int | 1GB | TensorRT builder workspace size |
| `precision` | TensorRTPrecision | FP32 | Inference precision (FP32, FP16, INT8) |
| `strict_type_constraints` | bool | False | Enforce strict type constraints |
| `max_batch_size` | int | 1 | Maximum batch size |
| `device_id` | int | 0 | CUDA device ID |
| `dla_core` | int | -1 | DLA core ID (-1 = disabled) |
| `allow_gpu_fallback` | bool | True | Allow GPU fallback when using DLA |
2 changes: 2 additions & 0 deletions backends/nvidia/tensorrt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,11 @@
sys.path.append(_pkg_dir)

from executorch.backends.nvidia.tensorrt.backend import TensorRTBackend
from executorch.backends.nvidia.tensorrt.compile_spec import TensorRTCompileSpec
from executorch.backends.nvidia.tensorrt.partitioner import TensorRTPartitioner

__all__ = [
"TensorRTBackend",
"TensorRTCompileSpec",
"TensorRTPartitioner",
]
122 changes: 122 additions & 0 deletions backends/nvidia/tensorrt/compile_spec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""Compile specification for TensorRT backend."""

import json
from dataclasses import dataclass
from enum import Enum
from typing import List, Optional

from executorch.exir.backend.compile_spec_schema import CompileSpec


# Key used to identify TensorRT compile specs in the CompileSpec list
TENSORRT_COMPILE_SPEC_KEY = "tensorrt_compile_spec"


class TensorRTPrecision(Enum):
"""Supported precision modes for TensorRT."""

FP32 = "fp32"
FP16 = "fp16"
INT8 = "int8"
BF16 = "bf16"


@dataclass
class TensorRTCompileSpec:
"""Configuration options for TensorRT compilation.

This dataclass holds all the configuration options needed to compile
a model with TensorRT. It can be serialized to/from CompileSpec format
for use with the ExecuTorch backend interface.

Attributes:
workspace_size: Maximum GPU memory (in bytes) that TensorRT can use
for temporary storage during engine building. Default is 1GB.
precision: Target precision for the TensorRT engine.
strict_type_constraints: If True, TensorRT will strictly follow the
specified precision. If False, it may use higher precision where
beneficial for accuracy.
max_batch_size: Maximum batch size the engine will be optimized for.
device_id: CUDA device ID to use for building the engine.
dla_core: DLA (Deep Learning Accelerator) core to use, if available.
Set to -1 to disable DLA.
allow_gpu_fallback: If True, allows falling back to GPU for layers
that cannot run on DLA.
"""

workspace_size: int = 1 << 30 # 1GB default
precision: TensorRTPrecision = TensorRTPrecision.FP32
strict_type_constraints: bool = False
max_batch_size: int = 1
device_id: int = 0
dla_core: int = -1 # -1 means DLA is disabled
allow_gpu_fallback: bool = True

def to_compile_specs(self) -> List[CompileSpec]:
"""Serialize this TensorRTCompileSpec to a list of CompileSpec.

Returns:
List containing a single CompileSpec with the serialized options.
"""
options = {
"workspace_size": self.workspace_size,
"precision": self.precision.value,
"strict_type_constraints": self.strict_type_constraints,
"max_batch_size": self.max_batch_size,
"device_id": self.device_id,
"dla_core": self.dla_core,
"allow_gpu_fallback": self.allow_gpu_fallback,
}
return [
CompileSpec(
key=TENSORRT_COMPILE_SPEC_KEY,
value=json.dumps(options).encode("utf-8"),
)
]

@classmethod
def from_compile_specs(
cls, compile_specs: List[CompileSpec]
) -> Optional["TensorRTCompileSpec"]:
"""Deserialize a TensorRTCompileSpec from a list of CompileSpec.

Args:
compile_specs: List of CompileSpec to search for TensorRT options.

Returns:
TensorRTCompileSpec if found, None otherwise.
"""
for spec in compile_specs:
if spec.key == TENSORRT_COMPILE_SPEC_KEY:
value = (
spec.value.decode("utf-8")
if isinstance(spec.value, (bytes, bytearray))
else spec.value
)
options = json.loads(value)
return cls(
workspace_size=options.get("workspace_size", 1 << 30),
precision=TensorRTPrecision(options.get("precision", "fp32")),
strict_type_constraints=options.get(
"strict_type_constraints", False
),
max_batch_size=options.get("max_batch_size", 1),
device_id=options.get("device_id", 0),
dla_core=options.get("dla_core", -1),
allow_gpu_fallback=options.get("allow_gpu_fallback", True),
)
return None

def __repr__(self) -> str:
return (
f"TensorRTCompileSpec("
f"workspace_size={self.workspace_size}, "
f"precision={self.precision.value}, "
f"max_batch_size={self.max_batch_size})"
)
11 changes: 11 additions & 0 deletions backends/nvidia/tensorrt/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,14 @@ def define_common_targets():
"//deeplearning/trt/python:py_tensorrt",
],
)

runtime.python_library(
name = "compile_spec",
srcs = [
"compile_spec.py",
],
visibility = ["PUBLIC"],
deps = [
"//executorch/exir/backend:compile_spec_schema",
],
)
11 changes: 10 additions & 1 deletion backends/nvidia/tensorrt/test/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,13 @@ def define_common_targets():
The directory containing this targets.bzl file should also contain both
TARGETS and BUCK files that call this function.
"""
pass

runtime.python_test(
name = "test_compile_spec",
srcs = [
"test_compile_spec.py",
],
deps = [
"//executorch/backends/nvidia/tensorrt:compile_spec",
],
)
105 changes: 105 additions & 0 deletions backends/nvidia/tensorrt/test/test_compile_spec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""Tests for TensorRT compile spec."""

import unittest


class CompileSpecTest(unittest.TestCase):
"""Tests for TensorRTCompileSpec functionality."""

def test_default_values(self) -> None:
from executorch.backends.nvidia.tensorrt.compile_spec import (
TensorRTCompileSpec,
TensorRTPrecision,
)

spec = TensorRTCompileSpec()
self.assertEqual(spec.workspace_size, 1 << 30) # 1GB
self.assertEqual(spec.precision, TensorRTPrecision.FP32)
self.assertFalse(spec.strict_type_constraints)
self.assertEqual(spec.max_batch_size, 1)
self.assertEqual(spec.device_id, 0)
self.assertEqual(spec.dla_core, -1)
self.assertTrue(spec.allow_gpu_fallback)

def test_custom_values(self) -> None:
from executorch.backends.nvidia.tensorrt.compile_spec import (
TensorRTCompileSpec,
TensorRTPrecision,
)

spec = TensorRTCompileSpec(
workspace_size=2 << 30, # 2GB
precision=TensorRTPrecision.FP16,
strict_type_constraints=True,
max_batch_size=8,
device_id=1,
dla_core=0,
allow_gpu_fallback=False,
)
self.assertEqual(spec.workspace_size, 2 << 30)
self.assertEqual(spec.precision, TensorRTPrecision.FP16)
self.assertTrue(spec.strict_type_constraints)
self.assertEqual(spec.max_batch_size, 8)
self.assertEqual(spec.device_id, 1)
self.assertEqual(spec.dla_core, 0)
self.assertFalse(spec.allow_gpu_fallback)

def test_serialization_roundtrip(self) -> None:
from executorch.backends.nvidia.tensorrt.compile_spec import (
TensorRTCompileSpec,
)

# Test with default values
original = TensorRTCompileSpec()
serialized = original.to_compile_specs()
restored = TensorRTCompileSpec.from_compile_specs(serialized)

self.assertIsNotNone(restored)
self.assertEqual(original.workspace_size, restored.workspace_size)
self.assertEqual(original.precision, restored.precision)

def test_serialization_roundtrip_custom(self) -> None:
from executorch.backends.nvidia.tensorrt.compile_spec import (
TensorRTCompileSpec,
TensorRTPrecision,
)

# Test with custom values
original = TensorRTCompileSpec(
workspace_size=512 << 20, # 512MB
precision=TensorRTPrecision.INT8,
max_batch_size=16,
)
serialized = original.to_compile_specs()
restored = TensorRTCompileSpec.from_compile_specs(serialized)

self.assertIsNotNone(restored)
self.assertEqual(original.workspace_size, restored.workspace_size)
self.assertEqual(original.precision, restored.precision)
self.assertEqual(original.max_batch_size, restored.max_batch_size)

def test_from_empty_compile_specs(self) -> None:
from executorch.backends.nvidia.tensorrt.compile_spec import (
TensorRTCompileSpec,
)

result = TensorRTCompileSpec.from_compile_specs([])
self.assertIsNone(result)

def test_compile_spec_key(self) -> None:
from executorch.backends.nvidia.tensorrt.compile_spec import (
TENSORRT_COMPILE_SPEC_KEY,
TensorRTCompileSpec,
)

spec = TensorRTCompileSpec()
serialized = spec.to_compile_specs()

self.assertEqual(len(serialized), 1)
self.assertEqual(serialized[0].key, TENSORRT_COMPILE_SPEC_KEY)
Loading