Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/agents/tracing/span_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def export(self) -> dict[str, Any]:
"type": self.type,
"name": self.name,
"input": self.input,
"output": str(self.output) if self.output else None,
"output": self.output,
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve JSON-safe fallback for function span outputs

Passing self.output through unmodified can break trace export when a tool returns a non-JSON-serializable object (for example a custom class or datetime) and the BackendSpanExporter is configured with a non-default endpoint. In that configuration _should_sanitize_for_openai_tracing_api() is false, so httpx.Client.post(..., json=payload) receives raw span data and raises during JSON encoding, whereas the previous str(...) conversion kept function span payloads serializable. This makes tracing brittle for custom exporters even though the same tool output still works for run execution.

Useful? React with 👍 / 👎.

"mcp_data": self.mcp_data,
}
Comment on lines 88 to 93
Copy link

Copilot AI Apr 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FunctionSpanData.export() now returns self.output verbatim, but Trace.export()/BackendSpanExporter expect the exported payload to be JSON-serializable. Tool results can be arbitrary Python objects (including Pydantic models like ToolOutputText), which will make httpx.post(..., json=payload) raise TypeError for non-OpenAI endpoints (and even for the OpenAI endpoint the sanitizer will degrade such values to a generic "<... truncated>" preview). Consider normalizing output to a JSON-compatible value (e.g., pass through dict/list/str/number/bool/None; convert Pydantic models via model_dump(mode="json"), dataclasses via asdict, tuples/sets via list; otherwise fall back to str(output)), while still preserving falsy values like 0/False/""/[].

Copilot uses AI. Check for mistakes.

Expand Down
6 changes: 3 additions & 3 deletions tests/mcp/test_mcp_tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ async def test_mcp_tracing():
"data": {
"name": "test_tool_1",
"input": "",
"output": "{'type': 'text', 'text': 'result_test_tool_1_{}'}", # noqa: E501
"output": {"type": "text", "text": "result_test_tool_1_{}"}, # noqa: E501
"mcp_data": {"server": "fake_mcp_server"},
},
},
Expand Down Expand Up @@ -133,7 +133,7 @@ async def test_mcp_tracing():
"data": {
"name": "test_tool_2",
"input": "",
"output": "{'type': 'text', 'text': 'result_test_tool_2_{}'}", # noqa: E501
"output": {"type": "text", "text": "result_test_tool_2_{}"}, # noqa: E501
"mcp_data": {"server": "fake_mcp_server"},
},
},
Expand Down Expand Up @@ -197,7 +197,7 @@ async def test_mcp_tracing():
"data": {
"name": "test_tool_3",
"input": "",
"output": "{'type': 'text', 'text': 'result_test_tool_3_{}'}", # noqa: E501
"output": {"type": "text", "text": "result_test_tool_3_{}"}, # noqa: E501
"mcp_data": {"server": "fake_mcp_server"},
},
},
Expand Down
67 changes: 67 additions & 0 deletions tests/tracing/test_span_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""Tests for span data export methods."""

from __future__ import annotations

import pytest

from agents.tracing.span_data import FunctionSpanData


class TestFunctionSpanDataExport:
"""FunctionSpanData.export() must preserve output values faithfully."""

def test_dict_output_preserved_as_dict(self) -> None:
"""Dict outputs should stay as dicts, not be converted to Python repr strings."""
span = FunctionSpanData(name="my_tool", input="query", output={"key": "value", "n": 42})
exported = span.export()
assert exported["output"] == {"key": "value", "n": 42}
assert isinstance(exported["output"], dict)

def test_string_output_preserved(self) -> None:
span = FunctionSpanData(name="my_tool", input="query", output="hello world")
exported = span.export()
assert exported["output"] == "hello world"

def test_none_output_preserved(self) -> None:
span = FunctionSpanData(name="my_tool", input="query", output=None)
exported = span.export()
assert exported["output"] is None

@pytest.mark.parametrize(
"output",
[0, False, "", []],
ids=["zero", "false", "empty_str", "empty_list"],
)
def test_falsy_output_not_converted_to_none(self, output: object) -> None:
"""Falsy but valid outputs (0, False, '', []) must not become None."""
span = FunctionSpanData(name="my_tool", input="query", output=output)
exported = span.export()
assert exported["output"] is not None
assert exported["output"] == output

def test_list_output_preserved(self) -> None:
span = FunctionSpanData(name="my_tool", input="query", output=[1, 2, 3])
exported = span.export()
assert exported["output"] == [1, 2, 3]
assert isinstance(exported["output"], list)

def test_numeric_output_preserved(self) -> None:
span = FunctionSpanData(name="my_tool", input="query", output=42)
exported = span.export()
assert exported["output"] == 42

def test_export_includes_all_fields(self) -> None:
span = FunctionSpanData(
name="my_tool",
input="query",
output="result",
mcp_data={"server": "test"},
)
exported = span.export()
assert exported == {
"type": "function",
"name": "my_tool",
"input": "query",
"output": "result",
"mcp_data": {"server": "test"},
}
Loading