Skip to content
Merged
1 change: 1 addition & 0 deletions docs/en_US/release_notes_9_14.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,6 @@ Bug fixes
| `Issue #9721 <https://github.com/pgadmin-org/pgadmin4/issues/9721>`_ - Fixed an issue where permissions page is not completely accessible on full scroll.
| `Issue #9729 <https://github.com/pgadmin-org/pgadmin4/issues/9729>`_ - Fixed an issue where some LLM models would not use database tools in the AI assistant, instead returning text descriptions of tool calls.
| `Issue #9732 <https://github.com/pgadmin-org/pgadmin4/issues/9732>`_ - Improve the AI Assistant user prompt to be more descriptive of the actual functionality.
| `Issue #9734 <https://github.com/pgadmin-org/pgadmin4/issues/9734>`_ - Fixed an issue where LLM responses are not streamed or rendered properly in the AI Assistant.
| `Issue #9736 <https://github.com/pgadmin-org/pgadmin4/issues/9736>`_ - Fix an issue where the AI Assistant was not retaining conversation context between messages, with chat history compaction to manage token budgets.
| `Issue #9740 <https://github.com/pgadmin-org/pgadmin4/issues/9740>`_ - Fixed an issue where the AI Assistant input textbox sometimes swallows the first character of input.
117 changes: 115 additions & 2 deletions web/pgadmin/llm/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,11 @@
"""

import json
from typing import Optional
from collections.abc import Generator
from typing import Optional, Union

from pgadmin.llm.client import get_llm_client, is_llm_available
from pgadmin.llm.models import Message, StopReason
from pgadmin.llm.models import Message, LLMResponse, StopReason
from pgadmin.llm.tools import DATABASE_TOOLS, execute_tool, DatabaseToolError
from pgadmin.llm.utils import get_max_tool_iterations

Expand Down Expand Up @@ -153,6 +154,118 @@ def chat_with_database(
)


def chat_with_database_stream(
user_message: str,
sid: int,
did: int,
conversation_history: Optional[list[Message]] = None,
system_prompt: Optional[str] = None,
max_tool_iterations: Optional[int] = None,
provider: Optional[str] = None,
model: Optional[str] = None
) -> Generator[Union[str, tuple], None, None]:
"""
Stream an LLM chat conversation with database tool access.

Like chat_with_database, but yields text chunks as the final
response streams in. During tool-use iterations, no text is
yielded (tools are executed silently).

Yields:
str: Text content chunks from the final LLM response.

The last item yielded is a 3-tuple of
('complete', final_response_text, updated_conversation_history).

Raises:
LLMClientError: If the LLM request fails.
RuntimeError: If LLM is not available or max iterations exceeded.
"""
if not is_llm_available():
raise RuntimeError("LLM is not configured. Please configure an LLM "
"provider in Preferences > AI.")

client = get_llm_client(provider=provider, model=model)
if not client:
raise RuntimeError("Failed to create LLM client")

messages = list(conversation_history) if conversation_history else []
messages.append(Message.user(user_message))

if system_prompt is None:
system_prompt = DEFAULT_SYSTEM_PROMPT

if max_tool_iterations is None:
max_tool_iterations = get_max_tool_iterations()

iteration = 0
while iteration < max_tool_iterations:
iteration += 1

# Stream the LLM response, yielding text chunks as they arrive
response = None
for item in client.chat_stream(
messages=messages,
tools=DATABASE_TOOLS,
system_prompt=system_prompt
):
if isinstance(item, LLMResponse):
response = item
elif isinstance(item, str):
yield item

if response is None:
raise RuntimeError("No response received from LLM")

messages.append(response.to_message())

if response.stop_reason != StopReason.TOOL_USE:
# Final response - yield a 3-tuple to distinguish from
# the 2-tuple tool_use event
yield ('complete', response.content, messages)
return

# Signal that tools are being executed so the caller can
# reset streaming state and show a thinking indicator
yield ('tool_use', [tc.name for tc in response.tool_calls])

# Execute tool calls
tool_results = []
for tool_call in response.tool_calls:
try:
result = execute_tool(
tool_name=tool_call.name,
arguments=tool_call.arguments,
sid=sid,
did=did
)
tool_results.append(Message.tool_result(
tool_call_id=tool_call.id,
content=json.dumps(result, default=str),
is_error=False
))
except (DatabaseToolError, ValueError) as e:
tool_results.append(Message.tool_result(
tool_call_id=tool_call.id,
content=json.dumps({"error": str(e)}),
is_error=True
))
except Exception as e:
tool_results.append(Message.tool_result(
tool_call_id=tool_call.id,
content=json.dumps({
"error": f"Unexpected error: {str(e)}"
}),
is_error=True
))

messages.extend(tool_results)

raise RuntimeError(
f"Exceeded maximum tool iterations ({max_tool_iterations})"
)


def single_query(
question: str,
sid: int,
Expand Down
45 changes: 44 additions & 1 deletion web/pgadmin/llm/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
"""Base LLM client interface and factory."""

from abc import ABC, abstractmethod
from typing import Optional
from collections.abc import Generator
from typing import Optional, Union

from pgadmin.llm.models import (
Message, Tool, LLMResponse, LLMError
Expand Down Expand Up @@ -74,6 +75,48 @@ def chat(
"""
pass

def chat_stream(
self,
messages: list[Message],
tools: Optional[list[Tool]] = None,
system_prompt: Optional[str] = None,
max_tokens: int = 4096,
temperature: float = 0.0,
**kwargs
) -> Generator[Union[str, LLMResponse], None, None]:
"""
Stream a chat response from the LLM.

Yields text chunks (str) as they arrive, then yields
a final LLMResponse with the complete response metadata.

The default implementation falls back to non-streaming chat().

Args:
messages: List of conversation messages.
tools: Optional list of tools the LLM can use.
system_prompt: Optional system prompt to set context.
max_tokens: Maximum tokens in the response.
temperature: Sampling temperature (0.0 = deterministic).
**kwargs: Additional provider-specific parameters.

Yields:
str: Text content chunks as they arrive.
LLMResponse: Final response with complete metadata (last item).
"""
# Default: fall back to non-streaming
response = self.chat(
messages=messages,
tools=tools,
system_prompt=system_prompt,
max_tokens=max_tokens,
temperature=temperature,
**kwargs
)
if response.content:
yield response.content
yield response

def validate_connection(self) -> tuple[bool, Optional[str]]:
"""
Validate the connection to the LLM provider.
Expand Down
15 changes: 6 additions & 9 deletions web/pgadmin/llm/prompts/nlq.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,10 @@
- Use explicit column names instead of SELECT *
- For UPDATE/DELETE, always include WHERE clauses

Once you have explored the database structure using the tools above, \
provide your final answer as a JSON object in this exact format:
{"sql": "YOUR SQL QUERY HERE", "explanation": "Brief explanation"}

Rules for the final response:
- Return ONLY the JSON object, no other text
- No markdown code blocks
- If you need clarification, set "sql" to null and put \
your question in "explanation"
Response format:
- Always put SQL in fenced code blocks with the sql language tag
- You may include multiple SQL blocks if the request needs \
multiple statements
- Briefly explain what each query does
- If you need clarification, just ask — no code blocks needed
"""
Loading
Loading