diff --git a/backends/nvidia/tensorrt/converters/__init__.py b/backends/nvidia/tensorrt/converters/__init__.py index 71c279176b5..b6390896467 100644 --- a/backends/nvidia/tensorrt/converters/__init__.py +++ b/backends/nvidia/tensorrt/converters/__init__.py @@ -8,3 +8,8 @@ # Import converters to trigger registration via @converter decorator from executorch.backends.nvidia.tensorrt.converters import add # noqa: F401 +from executorch.backends.nvidia.tensorrt.converters import div # noqa: F401 +from executorch.backends.nvidia.tensorrt.converters import mm # noqa: F401 +from executorch.backends.nvidia.tensorrt.converters import mul # noqa: F401 +from executorch.backends.nvidia.tensorrt.converters import relu # noqa: F401 +from executorch.backends.nvidia.tensorrt.converters import sub # noqa: F401 diff --git a/backends/nvidia/tensorrt/converters/targets.bzl b/backends/nvidia/tensorrt/converters/targets.bzl index f77076ab334..c12d72ef352 100644 --- a/backends/nvidia/tensorrt/converters/targets.bzl +++ b/backends/nvidia/tensorrt/converters/targets.bzl @@ -12,6 +12,11 @@ def define_common_targets(): srcs = [ "__init__.py", "add.py", + "div.py", + "mm.py", + "mul.py", + "relu.py", + "sub.py", ], visibility = ["PUBLIC"], deps = [ diff --git a/backends/nvidia/tensorrt/partitioner/operator_support.py b/backends/nvidia/tensorrt/partitioner/operator_support.py index 7d784a8b6c4..d565e2e3998 100644 --- a/backends/nvidia/tensorrt/partitioner/operator_support.py +++ b/backends/nvidia/tensorrt/partitioner/operator_support.py @@ -22,10 +22,19 @@ class TensorRTOperatorSupport(OperatorSupportBase): 3. Its output dtype is in SUPPORTED_DTYPES """ - # Operations that have TensorRT converters. - # Format: "op_name.overload" (e.g., "add.Tensor") + # Operations that have TensorRT converters (sorted alphabetically). SUPPORTED_OPS: Set[str] = { "add.Tensor", + "add_.Tensor", + "div.Tensor", + "div.Tensor_mode", + "mm.default", + "mul.Scalar", + "mul.Tensor", + "mul_.Tensor", + "relu.default", + "relu_.default", + "sub.Tensor", } # Glue operations that don't compute but are needed to keep partitions connected. @@ -86,8 +95,15 @@ def _get_op_name(self, node: torch.fx.Node) -> str: if hasattr(target, "_schema"): schema = target._schema base_name = schema.name.replace("::", ".") - if hasattr(schema, "overload_name") and schema.overload_name: - return f"{base_name}.{schema.overload_name}" + # Note: For the "default" overload, overload_name is an empty string "", + # so we need to check for that and use "default" as the overload name. + if hasattr(schema, "overload_name"): + overload_name = schema.overload_name + if overload_name: + return f"{base_name}.{overload_name}" + else: + # Empty overload_name means "default" overload + return f"{base_name}.default" return base_name # Callable with module info (e.g., operator.getitem) diff --git a/examples/nvidia/tensorrt/export.py b/examples/nvidia/tensorrt/export.py index 096057cc39d..627cfe203b2 100644 --- a/examples/nvidia/tensorrt/export.py +++ b/examples/nvidia/tensorrt/export.py @@ -36,6 +36,7 @@ "add", "add_mul", "mul", + "softmax", } diff --git a/examples/nvidia/tensorrt/tests/test_export.py b/examples/nvidia/tensorrt/tests/test_export.py index 291e725bba9..76441f96d59 100644 --- a/examples/nvidia/tensorrt/tests/test_export.py +++ b/examples/nvidia/tensorrt/tests/test_export.py @@ -95,3 +95,6 @@ def test_add_bf16(self) -> None: exec_prog = edge.to_executorch() self.assertIsNotNone(exec_prog) logger.info("PASS: add model exported with BF16 precision") + + def test_softmax(self) -> None: + _export_and_verify("softmax")