diff --git a/backends/nvidia/tensorrt/README.md b/backends/nvidia/tensorrt/README.md index bbb5dc01cc2..38e9c52de86 100644 --- a/backends/nvidia/tensorrt/README.md +++ b/backends/nvidia/tensorrt/README.md @@ -52,3 +52,17 @@ Download and install from the | Linux | x86_64 | pip or NVIDIA installer | | Linux (Jetson) | aarch64 | Pre-installed via JetPack | | Windows | x86_64 | NVIDIA installer | + +## Configuration Options + +`TensorRTCompileSpec` supports the following options: + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `workspace_size` | int | 1GB | TensorRT builder workspace size | +| `precision` | TensorRTPrecision | FP32 | Inference precision (FP32, FP16, INT8) | +| `strict_type_constraints` | bool | False | Enforce strict type constraints | +| `max_batch_size` | int | 1 | Maximum batch size | +| `device_id` | int | 0 | CUDA device ID | +| `dla_core` | int | -1 | DLA core ID (-1 = disabled) | +| `allow_gpu_fallback` | bool | True | Allow GPU fallback when using DLA | diff --git a/backends/nvidia/tensorrt/__init__.py b/backends/nvidia/tensorrt/__init__.py index 8d6d111f375..740e5582e69 100644 --- a/backends/nvidia/tensorrt/__init__.py +++ b/backends/nvidia/tensorrt/__init__.py @@ -27,9 +27,11 @@ sys.path.append(_pkg_dir) from executorch.backends.nvidia.tensorrt.backend import TensorRTBackend +from executorch.backends.nvidia.tensorrt.compile_spec import TensorRTCompileSpec from executorch.backends.nvidia.tensorrt.partitioner import TensorRTPartitioner __all__ = [ "TensorRTBackend", + "TensorRTCompileSpec", "TensorRTPartitioner", ] diff --git a/backends/nvidia/tensorrt/compile_spec.py b/backends/nvidia/tensorrt/compile_spec.py new file mode 100644 index 00000000000..ec780b1e4ad --- /dev/null +++ b/backends/nvidia/tensorrt/compile_spec.py @@ -0,0 +1,122 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""Compile specification for TensorRT backend.""" + +import json +from dataclasses import dataclass +from enum import Enum +from typing import List, Optional + +from executorch.exir.backend.compile_spec_schema import CompileSpec + + +# Key used to identify TensorRT compile specs in the CompileSpec list +TENSORRT_COMPILE_SPEC_KEY = "tensorrt_compile_spec" + + +class TensorRTPrecision(Enum): + """Supported precision modes for TensorRT.""" + + FP32 = "fp32" + FP16 = "fp16" + INT8 = "int8" + BF16 = "bf16" + + +@dataclass +class TensorRTCompileSpec: + """Configuration options for TensorRT compilation. + + This dataclass holds all the configuration options needed to compile + a model with TensorRT. It can be serialized to/from CompileSpec format + for use with the ExecuTorch backend interface. + + Attributes: + workspace_size: Maximum GPU memory (in bytes) that TensorRT can use + for temporary storage during engine building. Default is 1GB. + precision: Target precision for the TensorRT engine. + strict_type_constraints: If True, TensorRT will strictly follow the + specified precision. If False, it may use higher precision where + beneficial for accuracy. + max_batch_size: Maximum batch size the engine will be optimized for. + device_id: CUDA device ID to use for building the engine. + dla_core: DLA (Deep Learning Accelerator) core to use, if available. + Set to -1 to disable DLA. + allow_gpu_fallback: If True, allows falling back to GPU for layers + that cannot run on DLA. + """ + + workspace_size: int = 1 << 30 # 1GB default + precision: TensorRTPrecision = TensorRTPrecision.FP32 + strict_type_constraints: bool = False + max_batch_size: int = 1 + device_id: int = 0 + dla_core: int = -1 # -1 means DLA is disabled + allow_gpu_fallback: bool = True + + def to_compile_specs(self) -> List[CompileSpec]: + """Serialize this TensorRTCompileSpec to a list of CompileSpec. + + Returns: + List containing a single CompileSpec with the serialized options. + """ + options = { + "workspace_size": self.workspace_size, + "precision": self.precision.value, + "strict_type_constraints": self.strict_type_constraints, + "max_batch_size": self.max_batch_size, + "device_id": self.device_id, + "dla_core": self.dla_core, + "allow_gpu_fallback": self.allow_gpu_fallback, + } + return [ + CompileSpec( + key=TENSORRT_COMPILE_SPEC_KEY, + value=json.dumps(options).encode("utf-8"), + ) + ] + + @classmethod + def from_compile_specs( + cls, compile_specs: List[CompileSpec] + ) -> Optional["TensorRTCompileSpec"]: + """Deserialize a TensorRTCompileSpec from a list of CompileSpec. + + Args: + compile_specs: List of CompileSpec to search for TensorRT options. + + Returns: + TensorRTCompileSpec if found, None otherwise. + """ + for spec in compile_specs: + if spec.key == TENSORRT_COMPILE_SPEC_KEY: + value = ( + spec.value.decode("utf-8") + if isinstance(spec.value, (bytes, bytearray)) + else spec.value + ) + options = json.loads(value) + return cls( + workspace_size=options.get("workspace_size", 1 << 30), + precision=TensorRTPrecision(options.get("precision", "fp32")), + strict_type_constraints=options.get( + "strict_type_constraints", False + ), + max_batch_size=options.get("max_batch_size", 1), + device_id=options.get("device_id", 0), + dla_core=options.get("dla_core", -1), + allow_gpu_fallback=options.get("allow_gpu_fallback", True), + ) + return None + + def __repr__(self) -> str: + return ( + f"TensorRTCompileSpec(" + f"workspace_size={self.workspace_size}, " + f"precision={self.precision.value}, " + f"max_batch_size={self.max_batch_size})" + ) diff --git a/backends/nvidia/tensorrt/targets.bzl b/backends/nvidia/tensorrt/targets.bzl index f946174dcea..695b55798f6 100644 --- a/backends/nvidia/tensorrt/targets.bzl +++ b/backends/nvidia/tensorrt/targets.bzl @@ -52,3 +52,14 @@ def define_common_targets(): "//deeplearning/trt/python:py_tensorrt", ], ) + + runtime.python_library( + name = "compile_spec", + srcs = [ + "compile_spec.py", + ], + visibility = ["PUBLIC"], + deps = [ + "//executorch/exir/backend:compile_spec_schema", + ], + ) diff --git a/backends/nvidia/tensorrt/test/targets.bzl b/backends/nvidia/tensorrt/test/targets.bzl index 4a2adabb33e..e82fe69c900 100644 --- a/backends/nvidia/tensorrt/test/targets.bzl +++ b/backends/nvidia/tensorrt/test/targets.bzl @@ -6,4 +6,13 @@ def define_common_targets(): The directory containing this targets.bzl file should also contain both TARGETS and BUCK files that call this function. """ - pass + + runtime.python_test( + name = "test_compile_spec", + srcs = [ + "test_compile_spec.py", + ], + deps = [ + "//executorch/backends/nvidia/tensorrt:compile_spec", + ], + ) diff --git a/backends/nvidia/tensorrt/test/test_compile_spec.py b/backends/nvidia/tensorrt/test/test_compile_spec.py new file mode 100644 index 00000000000..5e61e1706e9 --- /dev/null +++ b/backends/nvidia/tensorrt/test/test_compile_spec.py @@ -0,0 +1,105 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""Tests for TensorRT compile spec.""" + +import unittest + + +class CompileSpecTest(unittest.TestCase): + """Tests for TensorRTCompileSpec functionality.""" + + def test_default_values(self) -> None: + from executorch.backends.nvidia.tensorrt.compile_spec import ( + TensorRTCompileSpec, + TensorRTPrecision, + ) + + spec = TensorRTCompileSpec() + self.assertEqual(spec.workspace_size, 1 << 30) # 1GB + self.assertEqual(spec.precision, TensorRTPrecision.FP32) + self.assertFalse(spec.strict_type_constraints) + self.assertEqual(spec.max_batch_size, 1) + self.assertEqual(spec.device_id, 0) + self.assertEqual(spec.dla_core, -1) + self.assertTrue(spec.allow_gpu_fallback) + + def test_custom_values(self) -> None: + from executorch.backends.nvidia.tensorrt.compile_spec import ( + TensorRTCompileSpec, + TensorRTPrecision, + ) + + spec = TensorRTCompileSpec( + workspace_size=2 << 30, # 2GB + precision=TensorRTPrecision.FP16, + strict_type_constraints=True, + max_batch_size=8, + device_id=1, + dla_core=0, + allow_gpu_fallback=False, + ) + self.assertEqual(spec.workspace_size, 2 << 30) + self.assertEqual(spec.precision, TensorRTPrecision.FP16) + self.assertTrue(spec.strict_type_constraints) + self.assertEqual(spec.max_batch_size, 8) + self.assertEqual(spec.device_id, 1) + self.assertEqual(spec.dla_core, 0) + self.assertFalse(spec.allow_gpu_fallback) + + def test_serialization_roundtrip(self) -> None: + from executorch.backends.nvidia.tensorrt.compile_spec import ( + TensorRTCompileSpec, + ) + + # Test with default values + original = TensorRTCompileSpec() + serialized = original.to_compile_specs() + restored = TensorRTCompileSpec.from_compile_specs(serialized) + + self.assertIsNotNone(restored) + self.assertEqual(original.workspace_size, restored.workspace_size) + self.assertEqual(original.precision, restored.precision) + + def test_serialization_roundtrip_custom(self) -> None: + from executorch.backends.nvidia.tensorrt.compile_spec import ( + TensorRTCompileSpec, + TensorRTPrecision, + ) + + # Test with custom values + original = TensorRTCompileSpec( + workspace_size=512 << 20, # 512MB + precision=TensorRTPrecision.INT8, + max_batch_size=16, + ) + serialized = original.to_compile_specs() + restored = TensorRTCompileSpec.from_compile_specs(serialized) + + self.assertIsNotNone(restored) + self.assertEqual(original.workspace_size, restored.workspace_size) + self.assertEqual(original.precision, restored.precision) + self.assertEqual(original.max_batch_size, restored.max_batch_size) + + def test_from_empty_compile_specs(self) -> None: + from executorch.backends.nvidia.tensorrt.compile_spec import ( + TensorRTCompileSpec, + ) + + result = TensorRTCompileSpec.from_compile_specs([]) + self.assertIsNone(result) + + def test_compile_spec_key(self) -> None: + from executorch.backends.nvidia.tensorrt.compile_spec import ( + TENSORRT_COMPILE_SPEC_KEY, + TensorRTCompileSpec, + ) + + spec = TensorRTCompileSpec() + serialized = spec.to_compile_specs() + + self.assertEqual(len(serialized), 1) + self.assertEqual(serialized[0].key, TENSORRT_COMPILE_SPEC_KEY)