pytorch · shoumikhin · Mar 5, 2026
diff --git a/backends/nvidia/tensorrt/README.md b/backends/nvidia/tensorrt/README.md
@@ -52,3 +52,17 @@ Download and install from the
 | Linux | x86_64 | pip or NVIDIA installer |
 | Linux (Jetson) | aarch64 | Pre-installed via JetPack |
 | Windows | x86_64 | NVIDIA installer |
+
+## Configuration Options
+
+`TensorRTCompileSpec` supports the following options:
+
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `workspace_size` | int | 1GB | TensorRT builder workspace size |
+| `precision` | TensorRTPrecision | FP32 | Inference precision (FP32, FP16, INT8) |
+| `strict_type_constraints` | bool | False | Enforce strict type constraints |
+| `max_batch_size` | int | 1 | Maximum batch size |
+| `device_id` | int | 0 | CUDA device ID |
+| `dla_core` | int | -1 | DLA core ID (-1 = disabled) |
+| `allow_gpu_fallback` | bool | True | Allow GPU fallback when using DLA |
diff --git a/backends/nvidia/tensorrt/__init__.py b/backends/nvidia/tensorrt/__init__.py
@@ -27,9 +27,11 @@
             sys.path.append(_pkg_dir)
 
 from executorch.backends.nvidia.tensorrt.backend import TensorRTBackend
+from executorch.backends.nvidia.tensorrt.compile_spec import TensorRTCompileSpec
 from executorch.backends.nvidia.tensorrt.partitioner import TensorRTPartitioner
 
 __all__ = [
     "TensorRTBackend",
+    "TensorRTCompileSpec",
     "TensorRTPartitioner",
 ]
diff --git a/backends/nvidia/tensorrt/compile_spec.py b/backends/nvidia/tensorrt/compile_spec.py
@@ -0,0 +1,122 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Compile specification for TensorRT backend."""
+
+import json
+from dataclasses import dataclass
+from enum import Enum
+from typing import List, Optional
+
+from executorch.exir.backend.compile_spec_schema import CompileSpec
+
+
+# Key used to identify TensorRT compile specs in the CompileSpec list
+TENSORRT_COMPILE_SPEC_KEY = "tensorrt_compile_spec"
+
+
+class TensorRTPrecision(Enum):
+    """Supported precision modes for TensorRT."""
+
+    FP32 = "fp32"
+    FP16 = "fp16"
+    INT8 = "int8"
+    BF16 = "bf16"
+
+
+@dataclass
+class TensorRTCompileSpec:
+    """Configuration options for TensorRT compilation.
+
+    This dataclass holds all the configuration options needed to compile
+    a model with TensorRT. It can be serialized to/from CompileSpec format
+    for use with the ExecuTorch backend interface.
+
+    Attributes:
+        workspace_size: Maximum GPU memory (in bytes) that TensorRT can use
+            for temporary storage during engine building. Default is 1GB.
+        precision: Target precision for the TensorRT engine.
+        strict_type_constraints: If True, TensorRT will strictly follow the
+            specified precision. If False, it may use higher precision where
+            beneficial for accuracy.
+        max_batch_size: Maximum batch size the engine will be optimized for.
+        device_id: CUDA device ID to use for building the engine.
+        dla_core: DLA (Deep Learning Accelerator) core to use, if available.
+            Set to -1 to disable DLA.
+        allow_gpu_fallback: If True, allows falling back to GPU for layers
+            that cannot run on DLA.
+    """
+
+    workspace_size: int = 1 << 30  # 1GB default
+    precision: TensorRTPrecision = TensorRTPrecision.FP32
+    strict_type_constraints: bool = False
+    max_batch_size: int = 1
+    device_id: int = 0
+    dla_core: int = -1  # -1 means DLA is disabled
+    allow_gpu_fallback: bool = True
+
+    def to_compile_specs(self) -> List[CompileSpec]:
+        """Serialize this TensorRTCompileSpec to a list of CompileSpec.
+
+        Returns:
+            List containing a single CompileSpec with the serialized options.
+        """
+        options = {
+            "workspace_size": self.workspace_size,
+            "precision": self.precision.value,
+            "strict_type_constraints": self.strict_type_constraints,
+            "max_batch_size": self.max_batch_size,
+            "device_id": self.device_id,
+            "dla_core": self.dla_core,
+            "allow_gpu_fallback": self.allow_gpu_fallback,
+        }
+        return [
+            CompileSpec(
+                key=TENSORRT_COMPILE_SPEC_KEY,
+                value=json.dumps(options).encode("utf-8"),
+            )
+        ]
+
+    @classmethod
+    def from_compile_specs(
+        cls, compile_specs: List[CompileSpec]
+    ) -> Optional["TensorRTCompileSpec"]:
+        """Deserialize a TensorRTCompileSpec from a list of CompileSpec.
+
+        Args:
+            compile_specs: List of CompileSpec to search for TensorRT options.
+
+        Returns:
+            TensorRTCompileSpec if found, None otherwise.
+        """
+        for spec in compile_specs:
+            if spec.key == TENSORRT_COMPILE_SPEC_KEY:
+                value = (
+                    spec.value.decode("utf-8")
+                    if isinstance(spec.value, (bytes, bytearray))
+                    else spec.value
+                )
+                options = json.loads(value)
+                return cls(
+                    workspace_size=options.get("workspace_size", 1 << 30),
+                    precision=TensorRTPrecision(options.get("precision", "fp32")),
+                    strict_type_constraints=options.get(
+                        "strict_type_constraints", False
+                    ),
+                    max_batch_size=options.get("max_batch_size", 1),
+                    device_id=options.get("device_id", 0),
+                    dla_core=options.get("dla_core", -1),
+                    allow_gpu_fallback=options.get("allow_gpu_fallback", True),
+                )
+        return None
+
+    def __repr__(self) -> str:
+        return (
+            f"TensorRTCompileSpec("
+            f"workspace_size={self.workspace_size}, "
+            f"precision={self.precision.value}, "
+            f"max_batch_size={self.max_batch_size})"
+        )
diff --git a/backends/nvidia/tensorrt/targets.bzl b/backends/nvidia/tensorrt/targets.bzl
@@ -52,3 +52,14 @@ def define_common_targets():
             "//deeplearning/trt/python:py_tensorrt",
         ],
     )
+
+    runtime.python_library(
+        name = "compile_spec",
+        srcs = [
+            "compile_spec.py",
+        ],
+        visibility = ["PUBLIC"],
+        deps = [
+            "//executorch/exir/backend:compile_spec_schema",
+        ],
+    )
diff --git a/backends/nvidia/tensorrt/test/targets.bzl b/backends/nvidia/tensorrt/test/targets.bzl
@@ -6,4 +6,13 @@ def define_common_targets():
     The directory containing this targets.bzl file should also contain both
     TARGETS and BUCK files that call this function.
     """
-    pass
+
+    runtime.python_test(
+        name = "test_compile_spec",
+        srcs = [
+            "test_compile_spec.py",
+        ],
+        deps = [
+            "//executorch/backends/nvidia/tensorrt:compile_spec",
+        ],
+    )
diff --git a/backends/nvidia/tensorrt/test/test_compile_spec.py b/backends/nvidia/tensorrt/test/test_compile_spec.py
@@ -0,0 +1,105 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Tests for TensorRT compile spec."""
+
+import unittest
+
+
+class CompileSpecTest(unittest.TestCase):
+    """Tests for TensorRTCompileSpec functionality."""
+
+    def test_default_values(self) -> None:
+        from executorch.backends.nvidia.tensorrt.compile_spec import (
+            TensorRTCompileSpec,
+            TensorRTPrecision,
+        )
+
+        spec = TensorRTCompileSpec()
+        self.assertEqual(spec.workspace_size, 1 << 30)  # 1GB
+        self.assertEqual(spec.precision, TensorRTPrecision.FP32)
+        self.assertFalse(spec.strict_type_constraints)
+        self.assertEqual(spec.max_batch_size, 1)
+        self.assertEqual(spec.device_id, 0)
+        self.assertEqual(spec.dla_core, -1)
+        self.assertTrue(spec.allow_gpu_fallback)
+
+    def test_custom_values(self) -> None:
+        from executorch.backends.nvidia.tensorrt.compile_spec import (
+            TensorRTCompileSpec,
+            TensorRTPrecision,
+        )
+
+        spec = TensorRTCompileSpec(
+            workspace_size=2 << 30,  # 2GB
+            precision=TensorRTPrecision.FP16,
+            strict_type_constraints=True,
+            max_batch_size=8,
+            device_id=1,
+            dla_core=0,
+            allow_gpu_fallback=False,
+        )
+        self.assertEqual(spec.workspace_size, 2 << 30)
+        self.assertEqual(spec.precision, TensorRTPrecision.FP16)
+        self.assertTrue(spec.strict_type_constraints)
+        self.assertEqual(spec.max_batch_size, 8)
+        self.assertEqual(spec.device_id, 1)
+        self.assertEqual(spec.dla_core, 0)
+        self.assertFalse(spec.allow_gpu_fallback)
+
+    def test_serialization_roundtrip(self) -> None:
+        from executorch.backends.nvidia.tensorrt.compile_spec import (
+            TensorRTCompileSpec,
+        )
+
+        # Test with default values
+        original = TensorRTCompileSpec()
+        serialized = original.to_compile_specs()
+        restored = TensorRTCompileSpec.from_compile_specs(serialized)
+
+        self.assertIsNotNone(restored)
+        self.assertEqual(original.workspace_size, restored.workspace_size)
+        self.assertEqual(original.precision, restored.precision)
+
+    def test_serialization_roundtrip_custom(self) -> None:
+        from executorch.backends.nvidia.tensorrt.compile_spec import (
+            TensorRTCompileSpec,
+            TensorRTPrecision,
+        )
+
+        # Test with custom values
+        original = TensorRTCompileSpec(
+            workspace_size=512 << 20,  # 512MB
+            precision=TensorRTPrecision.INT8,
+            max_batch_size=16,
+        )
+        serialized = original.to_compile_specs()
+        restored = TensorRTCompileSpec.from_compile_specs(serialized)
+
+        self.assertIsNotNone(restored)
+        self.assertEqual(original.workspace_size, restored.workspace_size)
+        self.assertEqual(original.precision, restored.precision)
+        self.assertEqual(original.max_batch_size, restored.max_batch_size)
+
+    def test_from_empty_compile_specs(self) -> None:
+        from executorch.backends.nvidia.tensorrt.compile_spec import (
+            TensorRTCompileSpec,
+        )
+
+        result = TensorRTCompileSpec.from_compile_specs([])
+        self.assertIsNone(result)
+
+    def test_compile_spec_key(self) -> None:
+        from executorch.backends.nvidia.tensorrt.compile_spec import (
+            TENSORRT_COMPILE_SPEC_KEY,
+            TensorRTCompileSpec,
+        )
+
+        spec = TensorRTCompileSpec()
+        serialized = spec.to_compile_specs()
+
+        self.assertEqual(len(serialized), 1)
+        self.assertEqual(serialized[0].key, TENSORRT_COMPILE_SPEC_KEY)