getsentry · alexander-alderman-webb · Apr 2, 2026 · Apr 2, 2026
@@ -1177,6 +1177,30 @@ def streaming_chat_completions_model_response():
     ]
 
 
+@pytest.fixture
+def nonstreaming_chat_completions_model_response():
+    return openai.types.chat.ChatCompletion(
+        id="chatcmpl-test",
+        choices=[
+            openai.types.chat.chat_completion.Choice(
+                index=0,
+                finish_reason="stop",
+                message=openai.types.chat.ChatCompletionMessage(
+                    role="assistant", content="Test response"
+                ),
+            )
+        ],
+        created=1234567890,
+        model="gpt-3.5-turbo",
+        object="chat.completion",
+        usage=openai.types.CompletionUsage(
+            prompt_tokens=10,
+            completion_tokens=20,
+            total_tokens=30,
+        ),
+    )
+
+
 @pytest.fixture
 def nonstreaming_responses_model_response():
     return openai.types.responses.Response(

@@ -163,7 +163,13 @@ def model_dump(self):
     ],
 )
 def test_nonstreaming_chat_completion(
-    sentry_init, capture_events, send_default_pii, include_prompts
+    reset_litellm_executor,
+    sentry_init,
+    capture_events,
+    send_default_pii,
+    include_prompts,
+    get_model_response,
+    nonstreaming_chat_completions_model_response,
 ):
     sentry_init(
         integrations=[LiteLLMIntegration(include_prompts=include_prompts)],
@@ -173,31 +179,42 @@ def test_nonstreaming_chat_completion(
     events = capture_events()
 
     messages = [{"role": "user", "content": "Hello!"}]
-    mock_response = MockCompletionResponse()
 
-    with start_transaction(name="litellm test"):
-        # Simulate what litellm does: call input callback, then success callback
-        kwargs = {
-            "model": "gpt-3.5-turbo",
-            "messages": messages,
-        }
+    client = OpenAI(api_key="z")
 
-        _input_callback(kwargs)
-        _success_callback(
-            kwargs,
-            mock_response,
-            datetime.now(),
-            datetime.now(),
-        )
+    model_response = get_model_response(
+        nonstreaming_chat_completions_model_response,
+        serialize_pydantic=True,
+        request_headers={"X-Stainless-Raw-Response": "True"},
+    )
+
+    with mock.patch.object(
+        client.completions._client._client,
+        "send",
+        return_value=model_response,
+    ):
+        with start_transaction(name="litellm test"):
+            litellm.completion(
+                model="gpt-3.5-turbo",
+                messages=messages,
+                client=client,
+            )
+
+            litellm_utils.executor.shutdown(wait=True)
 
     assert len(events) == 1
     (event,) = events
 
     assert event["type"] == "transaction"
     assert event["transaction"] == "litellm test"
 
-    assert len(event["spans"]) == 1
-    (span,) = event["spans"]
+    chat_spans = list(
+        x
+        for x in event["spans"]
+        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+    )
+    assert len(chat_spans) == 1
+    span = chat_spans[0]
 
     assert span["op"] == OP.GEN_AI_CHAT
     assert span["description"] == "chat gpt-3.5-turbo"