microsoft · haoranpb · May 28, 2026 · May 28, 2026 · May 28, 2026
diff --git a/.github/workflows/claude-evaluation.yml b/.github/workflows/claude-evaluation.yml
@@ -33,6 +33,11 @@ on:
         required: false
         default: false
         type: boolean
+      al-lsp:
+        description: "Enable AL LSP server"
+        required: false
+        default: false
+        type: boolean
       repeat:
         description: "Number of times to run sequentially (ignored for test runs)"
         required: false
@@ -100,9 +105,9 @@ jobs:
           node-version: 24
 
       - name: Install AL Tool
-        if: ${{ inputs.al-mcp }}
+        if: ${{ inputs.al-mcp || inputs.al-lsp }}
         run: |
-          dotnet tool install -g Microsoft.Dynamics.BusinessCentral.Development.Tools --version 17.0.33.55542
+          dotnet tool install -g Microsoft.Dynamics.BusinessCentral.Development.Tools --version 18.0.36.64936-beta
           echo "$HOME\.dotnet\tools" >> $env:GITHUB_PATH
 
       - name: Install Claude Code
@@ -120,7 +125,8 @@ jobs:
             --category "${{ inputs.category }}" `
             --repo-path "${{ steps.setup-env.outputs.repo_path }}" `
             --output-dir "${{ env.EVALUATION_RESULTS_DIR }}" `
-            ${{ inputs.al-mcp && '--al-mcp' || '' }}
+            ${{ inputs.al-mcp && '--al-mcp' || '' }} `
+            ${{ inputs.al-lsp && '--al-lsp' || '' }}
 
       - name: Upload evaluation results
         uses: actions/upload-artifact@v6
@@ -155,4 +161,4 @@ jobs:
       workflow-file: claude-evaluation.yml
       repeat: ${{ inputs.repeat }}
       workflow-inputs: |
-        {"model": "${{ inputs.model }}", "category": "${{ inputs.category }}", "test-run": "${{ inputs.test-run }}", "al-mcp": "${{ inputs.al-mcp }}"}
+        {"model": "${{ inputs.model }}", "category": "${{ inputs.category }}", "test-run": "${{ inputs.test-run }}", "al-mcp": "${{ inputs.al-mcp }}", "al-lsp": "${{ inputs.al-lsp }}"}
diff --git a/EXPERIMENT.md b/EXPERIMENT.md
@@ -77,7 +77,7 @@ Trigger the evaluation workflow from the **Actions** tab:
 
 - **Workflow:** `Evaluation with GitHub Copilot` or `Evaluation with Claude Code`
 - **`test-run`:** `true` (default — runs 4 entries, ~10 min)
-- **`model`**, **`category`**, **`al-mcp`**: as needed
+- **`model`**, **`category`**, **`al-mcp`**, **`al-lsp`**: as needed
 
 This catches configuration mistakes cheaply. Do not skip it.
 

diff --git a/src/bcbench/agent/claude/agent.py b/src/bcbench/agent/claude/agent.py
@@ -6,7 +6,7 @@
 import yaml
 
 from bcbench.agent.claude.metrics import parse_metrics
-from bcbench.agent.shared import build_mcp_config, build_prompt, parse_tool_usage_from_hooks
+from bcbench.agent.shared import build_al_lsp_plugin, build_mcp_config, build_prompt, parse_tool_usage_from_hooks
 from bcbench.config import get_config
 from bcbench.dataset import BaseDatasetEntry
 from bcbench.exceptions import AgentError, AgentTimeoutError
@@ -19,7 +19,14 @@
 
 
 def run_claude_code(
-    entry: BaseDatasetEntry, model: str, category: EvaluationCategory, repo_path: Path, output_dir: Path, al_mcp: bool = False, container_name: str = "bcbench"
+    entry: BaseDatasetEntry,
+    model: str,
+    category: EvaluationCategory,
+    repo_path: Path,
+    output_dir: Path,
+    al_mcp: bool = False,
+    al_lsp: bool = False,
+    container_name: str = "bcbench",
 ) -> tuple[AgentMetrics | None, ExperimentConfiguration]:
     """Run Claude Code on a single dataset entry.
 
@@ -33,12 +40,14 @@ def run_claude_code(
 
     prompt: str = build_prompt(entry, repo_path, claude_config, category, al_mcp=al_mcp)
     mcp_config_json, mcp_server_names = build_mcp_config(claude_config, entry, repo_path, al_mcp=al_mcp, container_name=container_name)
+    lsp_plugin_dir: Path | None = build_al_lsp_plugin(entry, category, repo_path, AgentType.CLAUDE, al_lsp=al_lsp, container_name=container_name)
     instructions_enabled: bool = setup_instructions_from_config(claude_config, entry, repo_path, agent_type=AgentType.CLAUDE)
     skills_enabled: bool = setup_agent_skills(claude_config, entry, repo_path, agent_type=AgentType.CLAUDE)
     custom_agent: str | None = setup_custom_agent(claude_config, entry, repo_path, agent_type=AgentType.CLAUDE)
     tool_log_path: Path = setup_hooks(repo_path, AgentType.CLAUDE, output_dir)
     config = ExperimentConfiguration(
         mcp_servers=mcp_server_names,
+        al_lsp_enabled=lsp_plugin_dir is not None,
         custom_instructions=instructions_enabled,
         skills_enabled=skills_enabled,
         custom_agent=custom_agent,
@@ -65,6 +74,8 @@ def run_claude_code(
         ]
         if mcp_config_json:
             cmd_args.append(f"--mcp-config={mcp_config_json}")
+        if lsp_plugin_dir is not None:
+            cmd_args.append(f"--plugin-dir={lsp_plugin_dir}")
         if custom_agent:
             cmd_args.append(f"--agent={custom_agent}")
         cmd_args.extend(

diff --git a/src/bcbench/agent/copilot/agent.py b/src/bcbench/agent/copilot/agent.py
@@ -8,7 +8,7 @@
 import yaml
 
 from bcbench.agent.copilot.metrics import parse_metrics
-from bcbench.agent.shared import build_lsp_config, build_mcp_config, build_prompt, parse_tool_usage_from_hooks
+from bcbench.agent.shared import build_al_lsp_plugin, build_mcp_config, build_prompt, parse_tool_usage_from_hooks
 from bcbench.config import get_config
 from bcbench.dataset import BaseDatasetEntry
 from bcbench.exceptions import AgentError, AgentTimeoutError
@@ -42,14 +42,14 @@ def run_copilot_agent(
 
     prompt: str = build_prompt(entry, repo_path, copilot_config, category, al_mcp=al_mcp)
     mcp_config_json, mcp_server_names = build_mcp_config(copilot_config, entry, repo_path, al_mcp=al_mcp, container_name=container_name)
-    al_lsp_enabled: bool = build_lsp_config(entry, category, repo_path, al_lsp=al_lsp, container_name=container_name)
+    lsp_plugin_dir: Path | None = build_al_lsp_plugin(entry, category, repo_path, AgentType.COPILOT, al_lsp=al_lsp, container_name=container_name)
     instructions_enabled: bool = setup_instructions_from_config(copilot_config, entry, repo_path, agent_type=AgentType.COPILOT)
     skills_enabled: bool = setup_agent_skills(copilot_config, entry, repo_path, agent_type=AgentType.COPILOT)
     custom_agent: str | None = setup_custom_agent(copilot_config, entry, repo_path, agent_type=AgentType.COPILOT)
     tool_log_path: Path = setup_hooks(repo_path, AgentType.COPILOT, output_dir)
     config = ExperimentConfiguration(
         mcp_servers=mcp_server_names,
-        al_lsp_enabled=al_lsp_enabled,
+        al_lsp_enabled=lsp_plugin_dir is not None,
         custom_instructions=instructions_enabled,
         skills_enabled=skills_enabled,
         custom_agent=custom_agent,
@@ -76,6 +76,8 @@ def run_copilot_agent(
             cmd_args.append("--no-custom-instructions")
         if mcp_config_json:
             cmd_args.append(f"--additional-mcp-config={mcp_config_json}")
+        if lsp_plugin_dir is not None:
+            cmd_args.append(f"--plugin-dir={lsp_plugin_dir}")
         if custom_agent:
             cmd_args.append(f"--agent={custom_agent}")
 

diff --git a/src/bcbench/agent/shared/__init__.py b/src/bcbench/agent/shared/__init__.py
@@ -1,8 +1,8 @@
 """Shared code for CLI-based agents (Claude, Copilot)."""
 
 from bcbench.agent.shared.hooks_parser import parse_tool_usage_from_hooks
-from bcbench.agent.shared.lsp import build_lsp_config
+from bcbench.agent.shared.lsp import build_al_lsp_plugin
 from bcbench.agent.shared.mcp import build_mcp_config
 from bcbench.agent.shared.prompt import build_prompt
 
-__all__ = ["build_lsp_config", "build_mcp_config", "build_prompt", "parse_tool_usage_from_hooks"]
+__all__ = ["build_al_lsp_plugin", "build_mcp_config", "build_prompt", "parse_tool_usage_from_hooks"]
diff --git a/src/bcbench/agent/shared/lsp.py b/src/bcbench/agent/shared/lsp.py
@@ -1,4 +1,5 @@
 import json
+import shutil
 from pathlib import Path
 
 from bcbench.agent.shared.altool_paths import (
@@ -10,11 +11,16 @@
 from bcbench.dataset import BaseDatasetEntry
 from bcbench.exceptions import AgentError
 from bcbench.logger import get_logger
-from bcbench.types import EvaluationCategory
+from bcbench.types import AgentType, EvaluationCategory
 
 logger = get_logger(__name__)
 
-_AL_LSP_RELATIVE_PATH = Path(".github") / "lsp.json"
+# Per-task plugin folder location. Both Copilot CLI and Claude Code accept
+# `--plugin-dir <path>` for ad-hoc plugin loading and both look for the
+# manifest under `.claude-plugin/plugin.json`, so a single neutral path works
+# for either agent. Lives under `.bcbench/` so it's visibly BC-Bench-owned
+# and won't collide with either agent's auto-discovery paths.
+_AL_LSP_PLUGIN_RELATIVE_PATH = Path(".bcbench") / "al-lsp-plugin"
 
 
 def _resolve_symbol_paths(entry: BaseDatasetEntry, category: EvaluationCategory, container_name: str) -> tuple[list[str], list[str]]:
@@ -48,48 +54,68 @@ def _build_lsp_args(project_paths: list[str], package_cache_paths: list[str], as
     return args
 
 
-def build_lsp_config(entry: BaseDatasetEntry, category: EvaluationCategory, repo_path: Path, al_lsp: bool, container_name: str = "") -> bool:
-    """Write Copilot's project-level LSP config to <repo_path>/.github/lsp.json.
+def _lsp_config_for(agent_type: AgentType, args: list[str]) -> dict:
+    """Build the agent-specific `.lsp.json` content.
 
-    When ``al_lsp=False``, removes any stale config left over from a previous run and returns False.
-    When True, writes the `lspServers.altool` entry pointing at `altool launchlspserver` and returns True.
+    Both agents launch the same `al launchlspserver` process — only the surrounding
+    LSP-routing schema differs:
+
+    - Copilot CLI expects `{ "lspServers": { name: { ..., "fileExtensions": {".ext": "lang"} } } }`
+    - Claude Code expects `{ name: { ..., "extensionToLanguage": {".ext": "lang"} } }` (no wrapper, different extension key)
+
+    `command: "al"` is unqualified by design: Copilot CLI silently rejects absolute paths in LSP
+    `command` ("Server <name> is configured but not available"), so the published `altool` wrapper
+    (`al`) must resolve via PATH on both sides.
+    """
+    server = {"command": "al", "args": args}
+    match agent_type:
+        case AgentType.COPILOT:
+            return {"lspServers": {"altool": {**server, "fileExtensions": {".al": "al"}}}}
+        case AgentType.CLAUDE:
+            return {"altool": {**server, "extensionToLanguage": {".al": "al"}}}
+
+
+def build_al_lsp_plugin(entry: BaseDatasetEntry, category: EvaluationCategory, repo_path: Path, agent_type: AgentType, al_lsp: bool, container_name: str = "") -> Path | None:
+    """Build a per-task plugin folder containing the AL LSP server, return its path or None.
+
+    Both Copilot CLI and Claude Code load this via ``--plugin-dir <path>`` for a single session
+    — no marketplace registration, no global state, no cross-run plugin leakage. The plugin
+    folder layout is identical between agents; only the LSP-routing schema in ``.lsp.json``
+    differs (see :func:`_lsp_config_for`).
+
+    Layout written under ``<repo>/.bcbench/al-lsp-plugin/``::
+
+        .claude-plugin/plugin.json   — minimal manifest (only ``name`` is required;
+                                       both CLIs check this path)
+        .lsp.json                    — LSP server config in the agent's schema
+
+    Returns the plugin folder path (to be passed as ``--plugin-dir``), or None when disabled.
     """
-    lsp_config_path = repo_path / _AL_LSP_RELATIVE_PATH
+    plugin_dir = repo_path / _AL_LSP_PLUGIN_RELATIVE_PATH
 
     if not al_lsp:
-        if lsp_config_path.is_file():
-            lsp_config_path.unlink()
-            logger.info(f"Removed stale LSP config: {lsp_config_path}")
-        return False
+        if plugin_dir.exists():
+            shutil.rmtree(plugin_dir)
+            logger.info(f"Removed stale AL LSP plugin: {plugin_dir}")
+        return None
 
     project_paths = [str(repo_path / p) for p in entry.project_paths]
     set_runtime_version(project_paths)
-
     package_cache_paths, assembly_probing_paths = _resolve_symbol_paths(entry, category, container_name)
+    args = _build_lsp_args(project_paths, package_cache_paths, assembly_probing_paths)
 
-    args = _build_lsp_args(
-        project_paths=project_paths,
-        package_cache_paths=package_cache_paths,
-        assembly_probing_paths=assembly_probing_paths,
-    )
-
-    # Copilot CLI resolves `command` via PATH (absolute paths are silently rejected with
-    # "Server <name> is configured but not available"). `al` is the published altool
-    # wrapper installed via the .NET tool — it must be on PATH.
-    lsp_config = {
-        "lspServers": {
-            "altool": {
-                "command": "al",
-                "args": args,
-                "fileExtensions": {".al": "al"},
-            }
-        }
+    plugin_manifest = {
+        "name": "al-lsp",
+        "version": "1.0.0",
+        "description": "AL Language Server for Business Central agentic development",
     }
+    lsp_config = _lsp_config_for(agent_type, args)
 
-    lsp_config_path.parent.mkdir(parents=True, exist_ok=True)
-    lsp_config_path.write_text(json.dumps(lsp_config, indent=2), encoding="utf-8")
+    (plugin_dir / ".claude-plugin").mkdir(parents=True, exist_ok=True)
+    (plugin_dir / ".claude-plugin" / "plugin.json").write_text(json.dumps(plugin_manifest, indent=2), encoding="utf-8")
+    (plugin_dir / ".lsp.json").write_text(json.dumps(lsp_config, indent=2), encoding="utf-8")
 
-    logger.info(f"Wrote AL LSP config: {lsp_config_path}")
+    logger.info(f"Wrote AL LSP plugin for {agent_type.value}: {plugin_dir}")
     logger.debug(f"LSP configuration: {json.dumps(lsp_config, indent=2)}")
 
-    return True
+    return plugin_dir
diff --git a/src/bcbench/commands/evaluate.py b/src/bcbench/commands/evaluate.py
@@ -106,6 +106,7 @@ def evaluate_claude_code(
     output_dir: OutputDir = _config.paths.evaluation_results_path,
     run_id: RunId = "claude_code_test_run",
     al_mcp: Annotated[bool, typer.Option("--al-mcp", help="Enable AL MCP server")] = False,
+    al_lsp: Annotated[bool, typer.Option("--al-lsp", help="Enable AL LSP server")] = False,
 ) -> None:
     """
     Evaluate Claude Code on single dataset entry.
@@ -139,6 +140,7 @@ def evaluate_claude_code(
             model=ctx.model,
             output_dir=ctx.result_dir,
             al_mcp=al_mcp if ctx.container else False,
+            al_lsp=al_lsp,
             container_name=ctx.get_container().name if ctx.container else "",
         ),
     )

diff --git a/src/bcbench/commands/run.py b/src/bcbench/commands/run.py
@@ -65,6 +65,7 @@ def run_claude(
     repo_path: RepoPath = _config.paths.testbed_path,
     output_dir: OutputDir = _config.paths.evaluation_results_path,
     al_mcp: Annotated[bool, typer.Option("--al-mcp", help="Enable AL MCP server")] = False,
+    al_lsp: Annotated[bool, typer.Option("--al-lsp", help="Enable AL LSP server")] = False,
 ) -> None:
     """
     Run Claude Code on a single entry to generate a patch (without building/testing).
@@ -77,4 +78,13 @@ def run_claude(
     entry = category.entry_class.load(category.dataset_path, entry_id=entry_id)[0]
     category.pipeline.setup_workspace(entry, repo_path)
 
-    run_claude_code(entry=entry, repo_path=repo_path, model=model, category=category, output_dir=output_dir, al_mcp=al_mcp, container_name=container_name)
+    run_claude_code(
+        entry=entry,
+        repo_path=repo_path,
+        model=model,
+        category=category,
+        output_dir=output_dir,
+        al_mcp=al_mcp if container_name else False,
+        al_lsp=al_lsp,
+        container_name=container_name or "",
+    )