cleanlab · elisno · Jan 24, 2025 · Jan 24, 2025 · Jan 30, 2025 · Jan 30, 2025
diff --git a/src/cleanlab_codex/__init__.py b/src/cleanlab_codex/__init__.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: MIT
 from cleanlab_codex.codex import Codex
+from cleanlab_codex.codex_backup import CodexBackup
 from cleanlab_codex.codex_tool import CodexTool
 
-__all__ = ["Codex", "CodexTool"]
+__all__ = ["Codex", "CodexTool", "CodexBackup"]
diff --git a/src/cleanlab_codex/codex_backup.py b/src/cleanlab_codex/codex_backup.py
@@ -0,0 +1,111 @@
+from __future__ import annotations
+
+from functools import wraps
+from typing import Any, Callable, Optional
+
+from cleanlab_codex.codex import Codex
+from cleanlab_codex.validation import is_bad_response
+
+
+def handle_backup_default(backup_response: str, decorated_instance: Any) -> None:  # noqa: ARG001
+    """Default implementation is a no-op."""
+    return None
+
+
+class CodexBackup:
+    """A backup decorator that connects to a Codex project to answer questions that
+    cannot be adequately answered by the existing agent.
+    """
+
+    DEFAULT_FALLBACK_ANSWER = "Based on the available information, I cannot provide a complete answer to this question."
+
+    def __init__(
+        self,
+        codex_client: Codex,
+        *,
+        project_id: Optional[str] = None,
+        fallback_answer: Optional[str] = DEFAULT_FALLBACK_ANSWER,
+        backup_handler: Callable[[str, Any], None] = handle_backup_default,
+    ):
+        self._codex_client = codex_client
+        self._project_id = project_id
+        self._fallback_answer = fallback_answer
+        self._backup_handler = backup_handler
+
+    @classmethod
+    def from_access_key(
+        cls,
+        access_key: str,
+        *,
+        project_id: Optional[str] = None,
+        fallback_answer: Optional[str] = DEFAULT_FALLBACK_ANSWER,
+        backup_handler: Callable[[str, Any], None] = handle_backup_default,
+    ) -> CodexBackup:
+        """Creates a CodexBackup from an access key. The project ID that the CodexBackup will use is the one that is associated with the access key."""
+        return cls(
+            codex_client=Codex(key=access_key),
+            project_id=project_id,
+            fallback_answer=fallback_answer,
+            backup_handler=backup_handler,
+        )
+
+    @classmethod
+    def from_client(
+        cls,
+        codex_client: Codex,
+        *,
+        project_id: Optional[str] = None,
+        fallback_answer: Optional[str] = DEFAULT_FALLBACK_ANSWER,
+        backup_handler: Callable[[str, Any], None] = handle_backup_default,
+    ) -> CodexBackup:
+        """Creates a CodexBackup from a Codex client.
+        If the Codex client is initialized with a project access key, the CodexBackup will use the project ID that is associated with the access key.
+        If the Codex client is initialized with a user API key, a project ID must be provided.
+        """
+        return cls(
+            codex_client=codex_client,
+            project_id=project_id,
+            fallback_answer=fallback_answer,
+            backup_handler=backup_handler,
+        )
+
+    def to_decorator(self):
+        """Factory that creates a backup decorator using the provided Codex client"""
+
+        def decorator(chat_method):
+            """
+            Decorator for RAG chat methods that adds backup response handling.
+
+            If the original chat method returns an inadequate response, attempts to get
+            a backup response from Codex. Returns the backup response if available,
+            otherwise returns the original response.
+
+            Args:
+                chat_method: Method with signature (self, user_message: str) -> str
+                    where 'self' refers to the instance being decorated, not an instance of CodexBackup.
+            """
+
+            @wraps(chat_method)
+            def wrapper(decorated_instance, user_message):
+                # Call the original chat method
+                assistant_response = chat_method(decorated_instance, user_message)
+
+                # Return original response if it's adequate
+                if not is_bad_response(assistant_response):
+                    return assistant_response
+
+                # Query Codex for a backup response
+                cache_result = self._codex_client.query(user_message)[0]
+                if not cache_result:
+                    return assistant_response
+
+                # Handle backup response if handler exists
+                self._backup_handler(
+                    backup_response=cache_result,
+                    decorated_instance=decorated_instance,
+                )
+                return cache_result
+
+            return wrapper
+
+        return decorator
diff --git a/src/cleanlab_codex/validation.py b/src/cleanlab_codex/validation.py
@@ -0,0 +1,107 @@
+"""
+This module provides validation functions for checking if an LLM response is unhelpful.
+"""
+from __future__ import annotations
+
+from typing import Optional, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from cleanlab_studio.studio.trustworthy_language_model import TLM
+
+
+def is_bad_response(response: str, fallback_answer: str, threshold: int = 70) -> bool:
+    """Check if a response is too similar to a known fallback/unhelpful answer.
+
+    Uses fuzzy string matching to compare the response against a known fallback answer.
+    Returns True if the response is similar enough to be considered unhelpful.
+
+    Args:
+        response: The response to check
+        fallback_answer: A known unhelpful/fallback response to compare against
+        threshold: Similarity threshold (0-100). Higher values require more similarity.
+                  Default 70 means responses that are 70% or more similar are considered bad.
+
+    Returns:
+        bool: True if the response is too similar to the fallback answer, False otherwise
+    """
+    try:
+        from thefuzz import fuzz
+    except ImportError:
+        raise ImportError("The 'thefuzz' library is required. Please install it with `pip install thefuzz`.")
+
+    partial_ratio = fuzz.partial_ratio(fallback_answer.lower(), response.lower())
+    return partial_ratio >= threshold
+
+def is_bad_response_untrustworthy(
+    response: str,
+    context: str,
+    query: str,
+    tlm: TLM,
+    threshold: float = 0.6,
+    # TODO: Optimize prompt template
+    prompt_template: str = "Using the following Context, provide a helpful answer to the Query.\n\n Context:\n{context}\n\n Query: {query}",
+) -> bool:
+    """Check if a response is untrustworthy based on TLM's evaluation.
+
+    Uses TLM to evaluate whether a response is trustworthy given the context and query.
+    Returns True if TLM's trustworthiness score falls below the threshold, indicating
+    the response may be incorrect or unreliable.
+
+    Args:
+        response: The response to check from the assistant
+        context: The context information available for answering the query
+        query: The user's question or request
+        tlm: The TLM model to use for evaluation
+        threshold: Score threshold (0.0-1.0). Lower values allow less trustworthy responses.
+                  Default 0.6, meaning responses with scores less than 0.6 are considered untrustworthy.
+        prompt_template: Template for formatting the evaluation prompt. Must contain {context}
+                       and {query} placeholders.
+
+    Returns:
+        bool: True if the response is deemed untrustworthy by TLM, False otherwise
+    """
+    prompt = prompt_template.format(context=context, query=query)
+    resp = tlm.get_trustworthiness_score(prompt, response)
+    score: float = resp['trustworthiness_score']
+    return score < threshold
+
+# TLM Binary Classification
+def is_bad_response_unhelpful(response: str, tlm: TLM, query: Optional[str] = None, trustworthiness_score_threshold: Optional[float] = None) -> bool:
+    """Check if a response is unhelpful by asking TLM to evaluate it.
+
+    Uses TLM to evaluate whether a response is helpful by asking it to make a Yes/No judgment.
+    The evaluation considers both the TLM's binary classification of helpfulness and its
+    confidence score. Returns True only if TLM classifies the response as unhelpful AND
+    is sufficiently confident in that assessment (if a threshold is provided).
+
+    Args:
+        response: The response to check from the assistant
+        tlm: The TLM model to use for evaluation
+        query: Optional user query to provide context for evaluating helpfulness.
+              If provided, TLM will assess if the response helpfully answers this query.
+        trustworthiness_score_threshold: Optional confidence threshold (0.0-1.0).
+                                       If provided, responses are only marked unhelpful if TLM's
+                                       confidence score exceeds this threshold.
+
+    Returns:
+        bool: True if TLM determines the response is unhelpful with sufficient confidence,
+              False otherwise
+    """
+    if query is None:
+        prompt = (
+            "Consider the following AI Assistant Response.\n\n"
+            f"AI Assistant Response: {response}\n\n"
+            "Is the AI Assistant Response helpful? Remember that abstaining from responding is not helpful. Answer Yes/No only."
+        )
+    else:
+        prompt = (
+            "Consider the following User Query and AI Assistant Response.\n\n"
+            f"User Query: {query}\n\n"
+            f"AI Assistant Response: {response}\n\n"
+            "Is the AI Assistant Response helpful? Remember that abstaining from responding is not helpful. Answer Yes/No only."
+        )
+    output = tlm.prompt(prompt, constrain_outputs=["Yes", "No"])
+    response_marked_unhelpful = output["response"].lower() == "no"
+    # TODO: Decide if we should keep the trustworthiness score threshold.
+    is_trustworthy = trustworthiness_score_threshold is None or (output["trustworthiness_score"] > trustworthiness_score_threshold)
+    return response_marked_unhelpful and is_trustworthy
diff --git a/tests/test_codex_backup.py b/tests/test_codex_backup.py
@@ -0,0 +1,64 @@
+from unittest.mock import MagicMock
+
+from cleanlab_codex.codex_backup import CodexBackup
+
+MOCK_BACKUP_RESPONSE = "This is a test response"
+FALLBACK_MESSAGE = "Based on the available information, I cannot provide a complete answer to this question."
+TEST_MESSAGE = "Hello, world!"
+
+
+def test_codex_backup(mock_client: MagicMock):
+    mock_response = MagicMock()
+    mock_response.answer = MOCK_BACKUP_RESPONSE
+    mock_client.projects.entries.query.return_value = mock_response
+
+    codex_backup = CodexBackup.from_access_key("")
+
+    class MockApp:
+        @codex_backup.to_decorator()
+        def chat(self, user_message: str) -> str:
+            # Just echo the user message
+            return user_message
+
+    app = MockApp()
+
+    # Echo works well
+    response = app.chat(TEST_MESSAGE)
+    assert response == TEST_MESSAGE
+
+    # Backup works well for fallback responses
+    response = app.chat(FALLBACK_MESSAGE)
+    assert response == MOCK_BACKUP_RESPONSE
+
+
+def test_backup_handler(mock_client: MagicMock):
+    mock_response = MagicMock()
+    mock_response.answer = MOCK_BACKUP_RESPONSE
+    mock_client.projects.entries.query.return_value = mock_response
+
+    mock_handler = MagicMock()
+    mock_handler.return_value = None
+    codex_backup = CodexBackup.from_access_key("", backup_handler=mock_handler)
+
+    class MockApp:
+        @codex_backup.to_decorator()
+        def chat(self, user_message: str) -> str:
+            # Just echo the user message
+            return user_message
+
+    app = MockApp()
+
+    response = app.chat(TEST_MESSAGE)
+    assert response == TEST_MESSAGE
+
+    # Handler should not be called for good responses
+    assert mock_handler.call_count == 0
+
+    response = app.chat(FALLBACK_MESSAGE)
+    assert response == MOCK_BACKUP_RESPONSE
+
+    # Handler should be called for bad responses
+    assert mock_handler.call_count == 1
+    # The MockApp is the second argument to the handler, i.e. it has the necessary context
+    # to handle the new response
+    assert mock_handler.call_args.kwargs["decorated_instance"] == app