tigergraph · chengbiao-jin · Apr 15, 2026 · Apr 22, 2026 · Apr 14, 2026 · Apr 14, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,15 @@
 # Changelog
 
+## [1.4.0]
+
+### Added
+- **Auto retrieval method selection** — new "Auto" option in the chat dropdown picks among Similarity / Contextual / Hybrid / Community per question
+  - Two-stage selector: deterministic regex rules cover common cases; LLM fallback handles the rest with a subset-aware prompt
+  - Selection visible via a chip below each bot reply (method, reason, auto/manual)
+  - Manual method selection still works as override during the transition
+- **Method selection telemetry** — Prometheus counter `llm_method_selection_total` with `selected_method` and `selection_source` labels
+- **Out-of-corpus short-circuit** — when the chosen retriever returns no results, the system returns an honest "couldn't find relevant info" message instead of letting the LLM hallucinate from empty context
+
 ## [1.3.1]
 
 ### Changed

diff --git a/common/llm_services/base_llm.py b/common/llm_services/base_llm.py
@@ -43,6 +43,16 @@ def get_collected_usage():
     return _usage_collector.get()
 
 
+def reset_usage_collection():
+    """Drop any accumulated usage and disable collection for this context.
+
+    Must be called at the end of a request (success or failure) so stale
+    usage data doesn't bleed into the next request that runs on the same
+    thread (sync FastAPI handlers re-use worker threads from a pool).
+    """
+    _usage_collector.set(None)
+
+
 def _record_usage(caller_name: str, usage_data: dict):
     bucket = _usage_collector.get()
     if bucket is not None:
@@ -286,6 +296,39 @@ def route_response_prompt(self):
 Format: {format_instructions}\
 """
 
+    @property
+    def select_retriever_prompt(self):
+        """Property to get the prompt for the auto-select retriever (RetrieverSelector Stage B).
+
+        Returns the user-facing prompt template; the parser injects format_instructions.
+        """
+        result = self._read_prompt_file(self.prompt_path + "select_retriever.txt")
+        if result is not None:
+            return result
+        return """\
+You are choosing the best retrieval strategy for a knowledge-graph question.
+Pick exactly one of: similarity, contextual, hybrid, community.
+
+Methods:
+- similarity: a single fact / definition / quote; the answer lives in one passage. Cheapest. Pick this for short factoid questions about a single entity.
+- contextual: needs surrounding narrative (a process, a sequence, cause-and-effect). Returns matching chunks plus their lookback/lookahead siblings.
+- hybrid: needs relationships between named entities or multi-hop reasoning. Returns matching chunks plus graph-expansion to nearby entities.
+- community: global, thematic, or aggregate questions over the whole corpus ("main themes", "what topics are covered", "summarize the documents"). Returns community summaries instead of chunks.
+
+Important constraints:
+- similarity returns a strict subset of contextual and hybrid (same vector hits, no expansion). Do NOT pick similarity if the question needs context or relationships — pick contextual or hybrid instead.
+- community is the only method that operates on community summaries. Pick it ONLY for global/thematic questions; do not pick it for questions about specific named entities.
+
+Schema context — the knowledge graph contains these entity types: {v_types}
+And these relationship types: {e_types}
+
+Question: {question}
+Conversation history (last 2 turns, may be empty): {conversation}
+
+Return JSON: {{"method": "<one of: similarity, contextual, hybrid, community>", "reason": "<≤20 words explaining the pick>"}}
+
+Format: {format_instructions}"""
+
     @property
     def hyde_prompt(self):
         """Property to get the prompt for the HyDE tool."""

diff --git a/common/metrics/prometheus_metrics.py b/common/metrics/prometheus_metrics.py
@@ -72,6 +72,11 @@ def __init__(self):
                 "Number of LLM responses that yielded an error result",
                 ["llm_model"],
             )
+            self.llm_method_selection_total = Counter(
+                "llm_method_selection_total",
+                "Number of times each retrieval method was selected (auto + manual)",
+                ["selected_method", "selection_source"],
+            )
 
             # collect metrics for TigerGraph
             self.tigergraph_active_connections = Gauge(

diff --git a/common/utils/text_extractors.py b/common/utils/text_extractors.py
@@ -652,11 +652,16 @@ def extract_text_from_file(file_path, graphname=None):
                 if df.empty:
                     continue
                 df = df.fillna('')
-                # Detect header row: first row is all non-empty strings with
-                # no purely numeric values → treat as column names.
                 first_row = df.iloc[0]
-                if all(isinstance(v, str) and v.strip() for v in first_row):
-                    df.columns = first_row.tolist()
+                first_row_values = [str(v).strip() for v in first_row]
+                looks_like_header = (
+                    len(df) > 1
+                    and all(first_row_values)
+                    and len(set(first_row_values)) == len(first_row_values)
+                    and not any(v.isdigit() for v in first_row_values)
+                )
+                if looks_like_header:
+                    df.columns = first_row_values
                     df = df.iloc[1:].reset_index(drop=True)
                 else:
                     df.columns = [f"Column {i + 1}" for i in range(len(df.columns))]

diff --git a/graphrag-ui/src/components/Bot.tsx b/graphrag-ui/src/components/Bot.tsx
@@ -52,10 +52,11 @@ const Bot = ({ layout, getConversationId }: { layout?: string | undefined, getCo
       }
     }
 
-    // Set default ragPattern if no value in sessionStorage
+    // Set default ragPattern if no value in sessionStorage. "Auto" lets the
+    // backend RetrieverSelector pick a method per question.
     if (!sessionStorage.getItem("ragPattern")) {
-      setRagPattern("Hybrid Search");
-      sessionStorage.setItem("ragPattern", "Hybrid Search");
+      setRagPattern("Auto");
+      sessionStorage.setItem("ragPattern", "Auto");
     }
 
     const date = new Date();
@@ -119,7 +120,7 @@ const Bot = ({ layout, getConversationId }: { layout?: string | undefined, getCo
                 <DropdownMenuLabel>Select a GraphRAG Pattern</DropdownMenuLabel>
                 <DropdownMenuSeparator />
                 <DropdownMenuGroup>
-                  {["Similarity Search", "Contextual Search", "Hybrid Search", "Community Search"].map((f, i) => (
+                  {["Auto", "Similarity Search", "Contextual Search", "Hybrid Search", "Community Search"].map((f, i) => (
                     <DropdownMenuItem key={i} onSelect={() => handleSelectRag(f)}>
                       {/* <User className="mr-2 h-4 w-4" /> */}
                       <span>{f}</span>

diff --git a/graphrag-ui/src/components/CustomChatMessage.tsx b/graphrag-ui/src/components/CustomChatMessage.tsx
@@ -28,6 +28,50 @@ interface IChatbotMessageProps {
 }
 
 const urlRegex = /https?:\/\//
+
+// Phase 1.5 — render a subtle chip showing which retrieval method ran.
+// Reads the auto-selection metadata that supportai_search mirrors into
+// query_sources (chosen_retriever / chosen_retriever_reason / chosen_retriever_source).
+const METHOD_LABELS: Record<string, string> = {
+  similaritysearch: "Similarity",
+  contextualsearch: "Contextual",
+  hybridsearch: "Hybrid",
+  communitysearch: "Community",
+};
+
+const RetrieverBadge: FC<{ message: any }> = ({ message }) => {
+  const qs = message?.query_sources;
+  if (!qs || typeof qs !== "object") return null;
+  const method = qs.chosen_retriever as string | undefined;
+  if (!method) return null;
+  // Suppress for greetings / errors / progress events — those don't run a retriever.
+  if (
+    message.response_type === "progress" ||
+    message.response_type === "greeting" ||
+    message.response_type === "error"
+  ) {
+    return null;
+  }
+  const label = METHOD_LABELS[method] || method;
+  const reason = (qs.chosen_retriever_reason as string | undefined) || "";
+  const source = (qs.chosen_retriever_source as string | undefined) || "";
+  // For source, show "auto" for any of rules/llm/fallback; "manual" stays as-is.
+  const sourceLabel = source === "manual" ? "manual" : "auto";
+  return (
+    <div
+      className="inline-flex items-center gap-1.5 text-[11px] text-gray-500 dark:text-gray-400 bg-gray-100 dark:bg-shadeA rounded-full px-2 py-0.5 mt-1 max-w-full"
+      title={reason ? `${reason} (${sourceLabel})` : sourceLabel}
+    >
+      <span>🔎</span>
+      <span className="font-medium">{label}</span>
+      {reason ? (
+        <span className="truncate italic">· {reason}</span>
+      ) : null}
+      <span className="opacity-60">· {sourceLabel}</span>
+    </div>
+  );
+};
+
 const getReasoning = (msg) => {
 
   if(msg.query_sources.reasoning instanceof Array) {
@@ -185,6 +229,7 @@ export const CustomChatMessage: FC<IChatbotMessageProps> = ({
             ) : (
               <ReactMarkdown remarkPlugins={[remarkGfm]} components={markdownComponents} className={message.response_type === "history" ? undefined : "typewriter"}>{message.content}</ReactMarkdown>
             )}
+            <RetrieverBadge message={message} />
             <Interactions
               message={message} 
               showExplain={handleShowExplain}

diff --git a/graphrag-ui/src/pages/TraceLogs.tsx b/graphrag-ui/src/pages/TraceLogs.tsx
@@ -745,7 +745,7 @@ const TraceLogs: FC = () => {
   const userQuery = stateUserQuery || sessionMessage?.userQuery || apiData?.user_query;
 
   const trace = useMemo(
-    () => buildTraceFromMessage(message, userQuery),
+    () => (message ? buildTraceFromMessage(message, userQuery) : null),
     [message, userQuery]
   );
 
@@ -760,6 +760,7 @@ const TraceLogs: FC = () => {
   };
 
   const handleDownload = () => {
+    if (!trace) return;
     const blob = new Blob([JSON.stringify(trace, null, 2)], {
       type: "application/json",
     });
@@ -779,6 +780,14 @@ const TraceLogs: FC = () => {
     );
   }
 
+  if (!trace) {
+    return (
+      <div className="min-h-screen bg-background flex items-center justify-center">
+        <p className="text-muted-foreground">Trace data not found.</p>
+      </div>
+    );
+  }
+
   return (
     <div className="min-h-screen bg-background">
       {/* Header */}

diff --git a/graphrag/app/agent/agent.py b/graphrag/app/agent/agent.py
@@ -11,7 +11,7 @@
 from common.config import embedding_service, embedding_store, llm_config, get_completion_config, get_chat_config, get_llm_service
 from common.embeddings.base_embedding_store import EmbeddingStore
 from common.embeddings.embedding_services import EmbeddingModel
-from common.llm_services.base_llm import LLM_Model, start_usage_collection, get_collected_usage
+from common.llm_services.base_llm import LLM_Model, start_usage_collection, get_collected_usage, reset_usage_collection
 from common.logs.log import req_id_cv
 from common.logs.logwriter import LogWriter
 from common.metrics.prometheus_metrics import metrics
@@ -44,7 +44,7 @@ def __init__(
         embedding_store: EmbeddingStore,
         use_cypher: bool = False,
         ws=None,
-        supportai_retriever="hybridsearch"
+        supportai_retriever="auto"
     ):
         self.conn = db_connection
 
@@ -257,6 +257,10 @@ def _node_output(node, state):
             traceback.print_exc()
             raise e
         finally:
+            # Clear the per-request LLM usage bucket so it can't leak into the
+            # next request that runs on the same worker thread (sync FastAPI
+            # handlers re-use threads from a pool, where ContextVars persist).
+            reset_usage_collection()
             metrics.llm_request_total.labels(self.model_name).inc()
             metrics.llm_inprogress_requests.labels(self.model_name).dec()
             duration = time.time() - start_time
@@ -265,7 +269,7 @@ def _node_output(node, state):
             )
 
 
-def make_agent(graphname, conn, use_cypher, ws: WebSocket = None, supportai_retriever="hybridsearch") -> TigerGraphAgent:
+def make_agent(graphname, conn, use_cypher, ws: WebSocket = None, supportai_retriever="auto") -> TigerGraphAgent:
     llm_provider = get_llm_service(get_chat_config(graphname))
     chat_config = llm_provider.config