diff --git a/tests/conftest.py b/tests/conftest.py index d1cd95fb77..c6dc92d2b2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1177,6 +1177,30 @@ def streaming_chat_completions_model_response(): ] +@pytest.fixture +def nonstreaming_chat_completions_model_response(): + return openai.types.chat.ChatCompletion( + id="chatcmpl-test", + choices=[ + openai.types.chat.chat_completion.Choice( + index=0, + finish_reason="stop", + message=openai.types.chat.ChatCompletionMessage( + role="assistant", content="Test response" + ), + ) + ], + created=1234567890, + model="gpt-3.5-turbo", + object="chat.completion", + usage=openai.types.CompletionUsage( + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + ), + ) + + @pytest.fixture def nonstreaming_responses_model_response(): return openai.types.responses.Response( diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index 3403c2e5a0..9282905fab 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -163,7 +163,13 @@ def model_dump(self): ], ) def test_nonstreaming_chat_completion( - sentry_init, capture_events, send_default_pii, include_prompts + reset_litellm_executor, + sentry_init, + capture_events, + send_default_pii, + include_prompts, + get_model_response, + nonstreaming_chat_completions_model_response, ): sentry_init( integrations=[LiteLLMIntegration(include_prompts=include_prompts)], @@ -173,22 +179,28 @@ def test_nonstreaming_chat_completion( events = capture_events() messages = [{"role": "user", "content": "Hello!"}] - mock_response = MockCompletionResponse() - with start_transaction(name="litellm test"): - # Simulate what litellm does: call input callback, then success callback - kwargs = { - "model": "gpt-3.5-turbo", - "messages": messages, - } + client = OpenAI(api_key="z") - _input_callback(kwargs) - _success_callback( - kwargs, - mock_response, - datetime.now(), - datetime.now(), - ) + model_response = get_model_response( + nonstreaming_chat_completions_model_response, + serialize_pydantic=True, + request_headers={"X-Stainless-Raw-Response": "True"}, + ) + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ): + with start_transaction(name="litellm test"): + litellm.completion( + model="gpt-3.5-turbo", + messages=messages, + client=client, + ) + + litellm_utils.executor.shutdown(wait=True) assert len(events) == 1 (event,) = events @@ -196,8 +208,13 @@ def test_nonstreaming_chat_completion( assert event["type"] == "transaction" assert event["transaction"] == "litellm test" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + chat_spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + ) + assert len(chat_spans) == 1 + span = chat_spans[0] assert span["op"] == OP.GEN_AI_CHAT assert span["description"] == "chat gpt-3.5-turbo"