From f3508a1458c0ad55fe509d8d6a7cc4ef6e5cfa89 Mon Sep 17 00:00:00 2001
From: JewelRoam <2752594773@qq.com>
Date: Tue, 27 Jan 2026 19:44:40 +0800
Subject: [PATCH 1/6] Add BaseRunner and BaseBackend

---
 graph_net_bench/torch/backends/protocol.py   | 31 +++++++++++++++++
 graph_net_bench/torch/runners/base_runner.py | 35 ++++++++++++++++++++
 2 files changed, 66 insertions(+)
 create mode 100644 graph_net_bench/torch/backends/protocol.py
 create mode 100644 graph_net_bench/torch/runners/base_runner.py

diff --git a/graph_net_bench/torch/backends/protocol.py b/graph_net_bench/torch/backends/protocol.py
new file mode 100644
index 000000000..4322564b2
--- /dev/null
+++ b/graph_net_bench/torch/backends/protocol.py
@@ -0,0 +1,31 @@
+from abc import ABC, abstractmethod
+from typing import Any, Dict
+
+
+class ExecuteResult:
+    """Result of a single backend execution."""
+
+    def __init__(self, outputs: Any, metrics: Dict[str, float]):
+        self.outputs = outputs
+        self.metrics = metrics
+
+
+class BaseBackend(ABC):
+    """Base class for all backends."""
+
+    def __init__(self, model_path: str, config: Dict[str, Any]):
+        self.model_path = model_path
+        self.config = config
+
+    @abstractmethod
+    def execute(self) -> ExecuteResult:
+        """Execute a single inference.
+
+        Returns:
+            ExecuteResult containing outputs and metrics
+        """
+        pass
+
+    def cleanup(self) -> None:
+        """Release resources. Override if needed."""
+        pass
diff --git a/graph_net_bench/torch/runners/base_runner.py b/graph_net_bench/torch/runners/base_runner.py
new file mode 100644
index 000000000..8f0b38cbb
--- /dev/null
+++ b/graph_net_bench/torch/runners/base_runner.py
@@ -0,0 +1,35 @@
+from abc import ABC, abstractmethod
+from typing import Any, Dict
+
+from ..backends.protocol import BaseBackend
+
+
+class BenchmarkResult:
+    """Aggregated result of a benchmark run."""
+
+    def __init__(self, outputs: Any, metrics: Dict[str, Any]):
+        self.outputs = outputs
+        self.metrics = metrics
+
+
+class BaseRunner(ABC):
+    """Base class for all runners."""
+
+    @abstractmethod
+    def load_backend(
+        self, backend_spec: Dict[str, Any], model_path: str
+    ) -> BaseBackend:
+        """Load and initialize a backend instance."""
+        pass
+
+    @abstractmethod
+    def cleanup_backend(self, backend: BaseBackend) -> None:
+        """Cleanup backend resources."""
+        pass
+
+    @abstractmethod
+    def run_benchmark(
+        self, backend_spec: Dict[str, Any], model_path: str
+    ) -> BenchmarkResult:
+        """Run the full benchmark (warmup + trials) and return aggregated result."""
+        pass

From 1a346f10201c9c037b4c8c57193bae7add24380c Mon Sep 17 00:00:00 2001
From: JewelRoam <2752594773@qq.com>
Date: Tue, 27 Jan 2026 19:46:40 +0800
Subject: [PATCH 2/6] Adjust config format and relative unittest for
 eval_backend_perf

---
 graph_net_bench/torch/eval_backend_diff.py | 42 +++++++++++++++-------
 test/eval_backend_diff_test.sh             | 26 ++++++++++----
 2 files changed, 50 insertions(+), 18 deletions(-)

diff --git a/graph_net_bench/torch/eval_backend_diff.py b/graph_net_bench/torch/eval_backend_diff.py
index cfa171dc6..2afbce88d 100755
--- a/graph_net_bench/torch/eval_backend_diff.py
+++ b/graph_net_bench/torch/eval_backend_diff.py
@@ -189,20 +189,37 @@ def eval_multi_models(args, model_path_prefix=None, use_model_list=False):
             print(f"- {model_path}", file=sys.stderr, flush=True)
 
 
+def _build_args_for_perf(
+    config: dict, model_path: str, output_path: str
+) -> types.SimpleNamespace:
+    """
+    TODO: Remove or modify this function for args building
+    when finish refactoring eval_backend_perf to new runner-backend structure.
+    """
+    backend_config = config.get("backend_config", {})
+    return types.SimpleNamespace(
+        model_path=model_path,
+        output_path=output_path,
+        compiler=backend_config.get("compiler", ""),
+        device=backend_config.get("device", "cuda"),
+        seed=backend_config.get("seed", 123),
+        warmup=config.get("warmup", 3),
+        trials=config.get("trials", 5),
+        log_prompt=backend_config.get("log_prompt", "graph-net-bench-log"),
+        model_path_prefix=backend_config.get("model_path_prefix"),
+        backend_config=backend_config.get("backend_config"),
+    )
+
+
 def eval_single_model(args):
+    ref_config = test_compiler_util.convert_to_dict(args.reference_config)
+    target_config = test_compiler_util.convert_to_dict(args.target_config)
+
     ref_dir = "/tmp/eval_perf_diff/reference"
     target_dir = "/tmp/eval_perf_diff/target"
 
-    ref_args = types.SimpleNamespace(
-        model_path=args.model_path,
-        output_path=ref_dir,
-        **test_compiler_util.convert_to_dict(args.reference_config),
-    )
-    target_args = types.SimpleNamespace(
-        model_path=args.model_path,
-        output_path=target_dir,
-        **test_compiler_util.convert_to_dict(args.target_config),
-    )
+    ref_args = _build_args_for_perf(ref_config, args.model_path, ref_dir)
+    target_args = _build_args_for_perf(target_config, args.model_path, target_dir)
 
     eval_single_model_with_single_backend(ref_args)
     eval_single_model_with_single_backend(target_args)
@@ -230,11 +247,12 @@ def eval_single_model(args):
 
 def main(args):
     ref_config = test_compiler_util.convert_to_dict(args.reference_config)
-    model_path_prefix = ref_config.get("model_path_prefix")
+    backend_config = ref_config.get("backend_config", {})
+    model_path_prefix = backend_config.get("model_path_prefix")
 
     if args.model_path_list and model_path_prefix:
         eval_multi_models(args, model_path_prefix, use_model_list=True)
-    elif os.path.isdir(args.model_path):
+    elif args.model_path and os.path.isdir(args.model_path):
         if path_utils.is_single_model_dir(args.model_path):
             eval_single_model(args)
         else:
diff --git a/test/eval_backend_diff_test.sh b/test/eval_backend_diff_test.sh
index 1eaca5ecd..5a38f1d38 100755
--- a/test/eval_backend_diff_test.sh
+++ b/test/eval_backend_diff_test.sh
@@ -10,21 +10,35 @@ python3 -m graph_net_bench.torch.eval_backend_diff \
     --model-path-list $model_list \
     --reference-config $(base64 -w 0 <<EOF
 {
-    "compiler": "nope",
-    "device": "cuda",
+    "runner_type": "local",
+    "backend_path": "graph_net_bench.torch.backend.nope_backend",
+    "backend_class": "NopeBackend",
     "warmup": 1,
     "trials": 1,
-    "model_path_prefix": "$AI4C_ROOT"
+    "backend_config": {
+        "compiler": "nope",
+        "device": "cuda",
+        "seed": 123,
+        "log_prompt": "graph-net-bench-log",
+        "model_path_prefix": "$AI4C_ROOT"
+    }
 }
 EOF
 ) \
     --target-config $(base64 -w 0 <<EOF
 {
-    "compiler": "nope",
-    "device": "cuda",
+    "runner_type": "local",
+    "backend_path": "graph_net_bench.torch.backend.nope_backend",
+    "backend_class": "NopeBackend",
     "warmup": 1,
     "trials": 1,
-    "model_path_prefix": "$AI4C_ROOT"
+    "backend_config": {
+        "compiler": "nope",
+        "device": "cuda",
+        "seed": 123,
+        "log_prompt": "graph-net-bench-log",
+        "model_path_prefix": "$AI4C_ROOT"
+    }
 }
 EOF
 ) 2>&1 | tee "$OUTPUT_PATH/validation.log"

From 0e4478e0370613ee4e707bf435de86a53e20dee0 Mon Sep 17 00:00:00 2001
From: JewelRoam <2752594773@qq.com>
Date: Tue, 27 Jan 2026 19:57:32 +0800
Subject: [PATCH 3/6] name change

---
 graph_net_bench/torch/backends/{protocol.py => base_backend.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename graph_net_bench/torch/backends/{protocol.py => base_backend.py} (100%)

diff --git a/graph_net_bench/torch/backends/protocol.py b/graph_net_bench/torch/backends/base_backend.py
similarity index 100%
rename from graph_net_bench/torch/backends/protocol.py
rename to graph_net_bench/torch/backends/base_backend.py

From 475c483a61d8da1f4b832fe0c9fd8cd9e53c0723 Mon Sep 17 00:00:00 2001
From: JewelRoam <2752594773@qq.com>
Date: Wed, 28 Jan 2026 17:29:49 +0800
Subject: [PATCH 4/6] minor change

---
 graph_net_bench/torch/backends/base_backend.py | 12 +++++++++++-
 graph_net_bench/torch/runners/base_runner.py   |  2 +-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/graph_net_bench/torch/backends/base_backend.py b/graph_net_bench/torch/backends/base_backend.py
index 4322564b2..431385570 100644
--- a/graph_net_bench/torch/backends/base_backend.py
+++ b/graph_net_bench/torch/backends/base_backend.py
@@ -22,10 +22,20 @@ def execute(self) -> ExecuteResult:
         """Execute a single inference.
 
         Returns:
-            ExecuteResult containing outputs and metrics
+            ExecuteResult containing outputs and metrics.
+            Timing in metrics must include device synchronization.
         """
         pass
 
+    def warmup(self, num_warmup: int) -> None:
+        """Warmup runs before benchmark.
+
+        Default implementation executes num_warmup times.
+        Override for custom warmup logic.
+        """
+        for _ in range(num_warmup):
+            self.execute()
+
     def cleanup(self) -> None:
         """Release resources. Override if needed."""
         pass
diff --git a/graph_net_bench/torch/runners/base_runner.py b/graph_net_bench/torch/runners/base_runner.py
index 8f0b38cbb..1d73fc04c 100644
--- a/graph_net_bench/torch/runners/base_runner.py
+++ b/graph_net_bench/torch/runners/base_runner.py
@@ -1,7 +1,7 @@
 from abc import ABC, abstractmethod
 from typing import Any, Dict
 
-from ..backends.protocol import BaseBackend
+from ..backends.base_backend import BaseBackend
 
 
 class BenchmarkResult:

From 3725b7ee55a266717c22a178ad87e18c247d0d29 Mon Sep 17 00:00:00 2001
From: JewelRoam <2752594773@qq.com>
Date: Mon, 9 Feb 2026 10:29:26 +0800
Subject: [PATCH 5/6] remove runner

---
 .../torch/backends/base_backend.py            | 41 -------------------
 graph_net_bench/torch/runners/base_runner.py  | 35 ----------------
 2 files changed, 76 deletions(-)
 delete mode 100644 graph_net_bench/torch/backends/base_backend.py
 delete mode 100644 graph_net_bench/torch/runners/base_runner.py

diff --git a/graph_net_bench/torch/backends/base_backend.py b/graph_net_bench/torch/backends/base_backend.py
deleted file mode 100644
index 431385570..000000000
--- a/graph_net_bench/torch/backends/base_backend.py
+++ /dev/null
@@ -1,41 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import Any, Dict
-
-
-class ExecuteResult:
-    """Result of a single backend execution."""
-
-    def __init__(self, outputs: Any, metrics: Dict[str, float]):
-        self.outputs = outputs
-        self.metrics = metrics
-
-
-class BaseBackend(ABC):
-    """Base class for all backends."""
-
-    def __init__(self, model_path: str, config: Dict[str, Any]):
-        self.model_path = model_path
-        self.config = config
-
-    @abstractmethod
-    def execute(self) -> ExecuteResult:
-        """Execute a single inference.
-
-        Returns:
-            ExecuteResult containing outputs and metrics.
-            Timing in metrics must include device synchronization.
-        """
-        pass
-
-    def warmup(self, num_warmup: int) -> None:
-        """Warmup runs before benchmark.
-
-        Default implementation executes num_warmup times.
-        Override for custom warmup logic.
-        """
-        for _ in range(num_warmup):
-            self.execute()
-
-    def cleanup(self) -> None:
-        """Release resources. Override if needed."""
-        pass
diff --git a/graph_net_bench/torch/runners/base_runner.py b/graph_net_bench/torch/runners/base_runner.py
deleted file mode 100644
index 1d73fc04c..000000000
--- a/graph_net_bench/torch/runners/base_runner.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import Any, Dict
-
-from ..backends.base_backend import BaseBackend
-
-
-class BenchmarkResult:
-    """Aggregated result of a benchmark run."""
-
-    def __init__(self, outputs: Any, metrics: Dict[str, Any]):
-        self.outputs = outputs
-        self.metrics = metrics
-
-
-class BaseRunner(ABC):
-    """Base class for all runners."""
-
-    @abstractmethod
-    def load_backend(
-        self, backend_spec: Dict[str, Any], model_path: str
-    ) -> BaseBackend:
-        """Load and initialize a backend instance."""
-        pass
-
-    @abstractmethod
-    def cleanup_backend(self, backend: BaseBackend) -> None:
-        """Cleanup backend resources."""
-        pass
-
-    @abstractmethod
-    def run_benchmark(
-        self, backend_spec: Dict[str, Any], model_path: str
-    ) -> BenchmarkResult:
-        """Run the full benchmark (warmup + trials) and return aggregated result."""
-        pass

From fa1c43f017ba3d8c724fab57494e02ec464ecbd2 Mon Sep 17 00:00:00 2001
From: JewelRoam <2752594773@qq.com>
Date: Mon, 9 Feb 2026 16:19:01 +0800
Subject: [PATCH 6/6] Modify auto fault bisearcher to use
 graph_net_bench.torch.eval_backend_diff

---
 .../fault_locator/torch/compiler_evaluator.py | 40 ++++++++++++++-----
 graph_net_bench/torch/eval_backend_diff.py    |  2 +-
 2 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/graph_net/fault_locator/torch/compiler_evaluator.py b/graph_net/fault_locator/torch/compiler_evaluator.py
index b70056b74..bf282b07b 100644
--- a/graph_net/fault_locator/torch/compiler_evaluator.py
+++ b/graph_net/fault_locator/torch/compiler_evaluator.py
@@ -51,21 +51,43 @@ def _prepare_workspace(self, tmp_dir: Path, rel_model_path: str) -> Path:
 
     def _execute_benchmark(self, allow_list_path: Path, log_file: Path):
         """
-        Invokes the torch.test_compiler module and redirects output to a log file.
+        Invokes the torch.eval_backend_diff module and redirects output to a log file.
         Uses sys.executable to ensure the same Python environment is used.
         """
+        import base64
+        import json
+
+        ref_config = {
+            "backend_config": {
+                "compiler": "nope",
+                "device": self.config["device"],
+                "model_path_prefix": self.config["model_path_prefix"],
+            }
+        }
+
+        target_config = {
+            "backend_config": {
+                "compiler": self.config["compiler"],
+                "device": self.config["device"],
+                "model_path_prefix": self.config["model_path_prefix"],
+            }
+        }
+
+        ref_config_b64 = base64.b64encode(json.dumps(ref_config).encode()).decode()
+        target_config_b64 = base64.b64encode(
+            json.dumps(target_config).encode()
+        ).decode()
+
         cmd = [
             sys.executable,
             "-m",
-            "graph_net_bench.torch.test_compiler",
-            "--model-path-prefix",
-            f"{self.config['model_path_prefix']}/",
-            "--allow-list",
+            "graph_net_bench.torch.eval_backend_diff",
+            "--model-path-list",
             str(allow_list_path),
-            "--compiler",
-            self.config["compiler"],
-            "--device",
-            self.config["device"],
+            "--reference-config",
+            ref_config_b64,
+            "--target-config",
+            target_config_b64,
         ]
         print(" ".join(cmd))
         with log_file.open("w") as f:
diff --git a/graph_net_bench/torch/eval_backend_diff.py b/graph_net_bench/torch/eval_backend_diff.py
index 2afbce88d..5e2c89391 100755
--- a/graph_net_bench/torch/eval_backend_diff.py
+++ b/graph_net_bench/torch/eval_backend_diff.py
@@ -154,7 +154,7 @@ def eval_multi_models(args, model_path_prefix=None, use_model_list=False):
     failed_samples = []
     for sample_idx, model_path in enumerate(model_paths):
         print(
-            f"[{sample_idx}] {module_name}, model_path: {model_path}",
+            f"[{sample_idx}][Processing] {module_name}, model_path: {model_path}",
             file=sys.stderr,
             flush=True,
         )