Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1177,6 +1177,30 @@ def streaming_chat_completions_model_response():
]


@pytest.fixture
def nonstreaming_chat_completions_model_response():
return openai.types.chat.ChatCompletion(
id="chatcmpl-test",
choices=[
openai.types.chat.chat_completion.Choice(
index=0,
finish_reason="stop",
message=openai.types.chat.ChatCompletionMessage(
role="assistant", content="Test response"
),
)
],
created=1234567890,
model="gpt-3.5-turbo",
object="chat.completion",
usage=openai.types.CompletionUsage(
prompt_tokens=10,
completion_tokens=20,
total_tokens=30,
),
)


@pytest.fixture
def nonstreaming_responses_model_response():
return openai.types.responses.Response(
Expand Down
51 changes: 34 additions & 17 deletions tests/integrations/litellm/test_litellm.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,13 @@ def model_dump(self):
],
)
def test_nonstreaming_chat_completion(
sentry_init, capture_events, send_default_pii, include_prompts
reset_litellm_executor,
sentry_init,
capture_events,
send_default_pii,
include_prompts,
get_model_response,
nonstreaming_chat_completions_model_response,
):
sentry_init(
integrations=[LiteLLMIntegration(include_prompts=include_prompts)],
Expand All @@ -173,31 +179,42 @@ def test_nonstreaming_chat_completion(
events = capture_events()

messages = [{"role": "user", "content": "Hello!"}]
mock_response = MockCompletionResponse()

with start_transaction(name="litellm test"):
# Simulate what litellm does: call input callback, then success callback
kwargs = {
"model": "gpt-3.5-turbo",
"messages": messages,
}
client = OpenAI(api_key="z")

_input_callback(kwargs)
_success_callback(
kwargs,
mock_response,
datetime.now(),
datetime.now(),
)
model_response = get_model_response(
nonstreaming_chat_completions_model_response,
serialize_pydantic=True,
request_headers={"X-Stainless-Raw-Response": "True"},
)

with mock.patch.object(
client.completions._client._client,
"send",
return_value=model_response,
):
with start_transaction(name="litellm test"):
litellm.completion(
model="gpt-3.5-turbo",
messages=messages,
client=client,
)

litellm_utils.executor.shutdown(wait=True)

assert len(events) == 1
(event,) = events

assert event["type"] == "transaction"
assert event["transaction"] == "litellm test"

assert len(event["spans"]) == 1
(span,) = event["spans"]
chat_spans = list(
x
for x in event["spans"]
if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
)
assert len(chat_spans) == 1
span = chat_spans[0]

assert span["op"] == OP.GEN_AI_CHAT
assert span["description"] == "chat gpt-3.5-turbo"
Expand Down
Loading