🤖 fix: handle GitHub Copilot context limits and retry behavior (#2431)

ibetitsmike · web-flow · commit 9bdb70afc569 · 2026-02-14T23:58:33.000+01:00
## Summary

Fix GitHub Copilot models getting stuck in infinite retry loops when the
prompt exceeds the provider's context window. Three root causes
addressed: model stats lookup failures, error misclassification, and
incorrect tokenizer fallbacks.

## Background

A user reported Mux showing repeated **Stream Error (API)** messages
with Copilot models (e.g., `prompt token count of 128067 exceeds the
limit of 128000`) and auto-retrying endlessly (attempt 9+), never
surfacing the existing **"Compact &amp; retry"** recovery UI.

Three independent issues combined to cause this:

1. **Model stats lookup failed for Copilot models.** `models.json` has
Copilot entries under `github_copilot/` keys (with underscore) but
without cost fields. Our `isValidModelData()` required cost fields, and
`generateLookupKeys()` checked bare model names first (matching OpenAI
entries) and didn't normalize `github-copilot` → `github_copilot`.

2. **Token-limit errors were misclassified.** `categorizeError()` only
detected Anthropic-style context errors (`prompt is too long`).
Copilot's `prompt token count ... exceeds the limit ...` message was
classified as `"api"` (retryable), not `"context_exceeded"`
(non-retryable).

3. **Wrong tokenizer fallback.** Copilot hosts models from multiple
providers (Claude, Gemini, GPT), but all `github-copilot:*` models fell
back to the OpenAI tokenizer regardless of the underlying model.

## Implementation

### 1. `modelStats.ts` — Fix Copilot token limit resolution

- Added provider alias mapping (`github-copilot` → `github_copilot`) for
LiteLLM key generation
- Reversed lookup priority: provider-prefixed keys checked first, bare
model name last
- Relaxed `isValidModelData()` to only require `max_input_tokens` (not
cost fields)
- Default missing costs to `0` (Copilot is subscription-based)
- Added `parseNum()` helper for safe numeric string parsing

### 2. `streamManager.ts` — Classify Copilot context errors correctly

- Expanded `categorizeError()` to detect `"token" + "exceeds" + "limit"`
pattern
- This makes Copilot token-limit errors return `context_exceeded`,
which:
  - Stops auto-retry (already in `NON_RETRYABLE_STREAM_ERRORS`)
- Shows existing **"Compact &amp; retry"** UI (already in
`StreamErrorMessage`)

### 3. `tokenizer.ts` — Smart fallback for Copilot model tokenizers

- When provider is `github-copilot`, infer tokenizer from model name
prefix:
  - `claude-*` → Anthropic tokenizer
  - `gemini-*` → Google tokenizer
  - `gpt-*` / others → OpenAI tokenizer

## Validation

- `make static-check` — all checks pass
- `bun test src/common/utils/tokens/modelStats.test.ts` — 27/27 pass (4
new Copilot-specific tests)

---

_Generated with `mux` • Model: `anthropic:claude-opus-4-6` • Thinking:
`xhigh` • Cost: `$1.99`_

&lt;!-- mux-attribution: model=anthropic:claude-opus-4-6 thinking=xhigh
costs=1.99 --&gt;
diff --git a/src/common/utils/tokens/modelStats.test.ts b/src/common/utils/tokens/modelStats.test.ts
@@ -83,6 +83,33 @@ describe("getModelStats", () => {
     });
   });
 
+  describe("github copilot models", () => {
+    test("should prefer github copilot provider-specific limits", () => {
+      const stats = getModelStats("github-copilot:gpt-4-o-preview");
+      expect(stats).not.toBeNull();
+      expect(stats?.max_input_tokens).toBe(64000);
+    });
+
+    test("should default missing copilot costs to zero", () => {
+      const stats = getModelStats("github-copilot:gpt-4.1");
+      expect(stats).not.toBeNull();
+      expect(stats?.max_input_tokens).toBe(128000);
+      expect(stats?.input_cost_per_token).toBe(0);
+      expect(stats?.output_cost_per_token).toBe(0);
+    });
+
+    test("should resolve claude sonnet copilot entries", () => {
+      const stats = getModelStats("github-copilot:claude-sonnet-4.5");
+      expect(stats).not.toBeNull();
+      expect(stats?.max_input_tokens).toBeGreaterThan(0);
+    });
+
+    test("should resolve claude haiku copilot entries", () => {
+      const stats = getModelStats("github-copilot:claude-haiku-4.5");
+      expect(stats).not.toBeNull();
+    });
+  });
+
   describe("unknown models", () => {
     test("should return null for completely unknown model", () => {
       const stats = getModelStats("unknown:fake-model-9000");
diff --git a/src/common/utils/tokens/modelStats.ts b/src/common/utils/tokens/modelStats.ts
@@ -21,29 +21,44 @@ interface RawModelData {
   [key: string]: unknown;
 }
 
+const PROVIDER_KEY_ALIASES: Record<string, string> = {
+  // GitHub Copilot keys in models.json use underscores for LiteLLM provider names.
+  "github-copilot": "github_copilot",
+};
+
+function parseNum(value: unknown): number | null {
+  if (typeof value === "number" && Number.isFinite(value)) {
+    return value;
+  }
+
+  if (typeof value === "string") {
+    const parsed = Number(value.replace(/,/g, "").trim());
+    return Number.isFinite(parsed) ? parsed : null;
+  }
+
+  return null;
+}
+
 /**
  * Validates raw model data has required fields
  */
 function isValidModelData(data: RawModelData): boolean {
-  return (
-    typeof data.max_input_tokens === "number" &&
-    typeof data.input_cost_per_token === "number" &&
-    typeof data.output_cost_per_token === "number"
-  );
+  const maxInputTokens = parseNum(data.max_input_tokens);
+  return maxInputTokens != null && maxInputTokens > 0;
 }
 
 /**
  * Extracts ModelStats from validated raw data
  */
 function extractModelStats(data: RawModelData): ModelStats {
-  // Type assertions are safe here because isValidModelData() already validated these fields
-  /* eslint-disable @typescript-eslint/non-nullable-type-assertion-style */
   return {
-    max_input_tokens: data.max_input_tokens as number,
-    max_output_tokens:
-      typeof data.max_output_tokens === "number" ? data.max_output_tokens : undefined,
-    input_cost_per_token: data.input_cost_per_token as number,
-    output_cost_per_token: data.output_cost_per_token as number,
+    max_input_tokens: parseNum(data.max_input_tokens) ?? 0,
+    max_output_tokens: parseNum(data.max_output_tokens) ?? undefined,
+    // Subscription providers like GitHub Copilot omit per-token costs.
+    input_cost_per_token:
+      typeof data.input_cost_per_token === "number" ? data.input_cost_per_token : 0,
+    output_cost_per_token:
+      typeof data.output_cost_per_token === "number" ? data.output_cost_per_token : 0,
     cache_creation_input_token_cost:
       typeof data.cache_creation_input_token_cost === "number"
         ? data.cache_creation_input_token_cost
@@ -53,7 +68,6 @@ function extractModelStats(data: RawModelData): ModelStats {
         ? data.cache_read_input_token_cost
         : undefined,
   };
-  /* eslint-enable @typescript-eslint/non-nullable-type-assertion-style */
 }
 
 /**
@@ -64,26 +78,24 @@ function generateLookupKeys(modelString: string): string[] {
   const colonIndex = modelString.indexOf(":");
   const provider = colonIndex !== -1 ? modelString.slice(0, colonIndex) : "";
   const modelName = colonIndex !== -1 ? modelString.slice(colonIndex + 1) : modelString;
+  const litellmProvider = PROVIDER_KEY_ALIASES[provider] ?? provider;
 
-  const keys: string[] = [
-    modelName, // Direct model name (e.g., "claude-opus-4-1")
-  ];
+  const keys: string[] = [];
 
-  // Add provider-prefixed variants for Ollama and other providers
+  // Prefer provider-scoped matches first so provider-specific limits win over generic entries.
   if (provider) {
-    keys.push(
-      `${provider}/${modelName}`, // "ollama/gpt-oss:20b"
-      `${provider}/${modelName}-cloud` // "ollama/gpt-oss:20b-cloud" (LiteLLM convention)
-    );
+    keys.push(`${litellmProvider}/${modelName}`, `${litellmProvider}/${modelName}-cloud`);
 
     // Fallback: strip size suffix for base model lookup
     // "ollama:gpt-oss:20b" → "ollama/gpt-oss"
     if (modelName.includes(":")) {
       const baseModel = modelName.split(":")[0];
-      keys.push(`${provider}/${baseModel}`);
+      keys.push(`${litellmProvider}/${baseModel}`);
     }
   }
 
+  keys.push(modelName);
+
   return keys;
 }
 
diff --git a/src/node/services/streamManager.ts b/src/node/services/streamManager.ts
@@ -2353,9 +2353,17 @@ export class StreamManager extends EventEmitter {
         return "model_not_found";
       }
 
-      // Check for Anthropic context exceeded errors
+      // Check for context exceeded errors (Anthropic + OpenAI-compatible / Copilot)
       const msgLower = error.message.toLowerCase();
-      if (msgLower.includes("prompt is too long") || msgLower.includes("input is too long")) {
+
+      // Anthropic: "prompt is too long" / "input is too long"
+      // Copilot / OpenAI-compatible: "prompt token count of X exceeds the limit of Y"
+      const isContextExceeded =
+        msgLower.includes("prompt is too long") ||
+        msgLower.includes("input is too long") ||
+        (msgLower.includes("token") && msgLower.includes("exceeds") && msgLower.includes("limit"));
+
+      if (isContextExceeded) {
         return "context_exceeded";
       }
 
diff --git a/src/node/utils/main/tokenizer.ts b/src/node/utils/main/tokenizer.ts
@@ -81,10 +81,26 @@ function resolveModelName(modelString: string): ModelName {
 
   if (!modelName) {
     const provider = normalized.split(":")[0] || "anthropic";
+
+    // GitHub Copilot hosts models from multiple providers.
+    // Infer the tokenizer family from the model name prefix.
+    let effectiveProvider = provider;
+    if (provider === "github-copilot") {
+      const modelId = normalized.split(":")[1] || "";
+      if (modelId.startsWith("claude-")) {
+        effectiveProvider = "anthropic";
+      } else if (modelId.startsWith("gemini-")) {
+        effectiveProvider = "google";
+      } else {
+        // gpt-*, grok-*, and unknown models use OpenAI tokenizer
+        effectiveProvider = "openai";
+      }
+    }
+
     const fallbackModel =
-      provider === "anthropic"
+      effectiveProvider === "anthropic"
         ? "anthropic/claude-sonnet-4.5"
-        : provider === "google"
+        : effectiveProvider === "google"
           ? "google/gemini-2.5-pro"
           : "openai/gpt-5";