From 5b32559105f07a981e0cb4656bdd2d18ca31101f Mon Sep 17 00:00:00 2001 From: Yufeng He <40085740+he-yufeng@users.noreply.github.com> Date: Sat, 23 May 2026 00:25:05 +0800 Subject: [PATCH] fix: let Document Intelligence use SDK default API version --- .../converters/_doc_intel_converter.py | 17 ++++--- .../markitdown/tests/test_docintel_html.py | 46 +++++++++++++++++++ 2 files changed, 56 insertions(+), 7 deletions(-) diff --git a/packages/markitdown/src/markitdown/converters/_doc_intel_converter.py b/packages/markitdown/src/markitdown/converters/_doc_intel_converter.py index fd843f231..d1bfbaa7c 100644 --- a/packages/markitdown/src/markitdown/converters/_doc_intel_converter.py +++ b/packages/markitdown/src/markitdown/converters/_doc_intel_converter.py @@ -134,7 +134,7 @@ def __init__( self, *, endpoint: str, - api_version: str = "2024-07-31-preview", + api_version: str | None = None, credential: AzureKeyCredential | TokenCredential | None = None, file_types: List[DocumentIntelligenceFileType] = [ DocumentIntelligenceFileType.DOCX, @@ -152,7 +152,7 @@ def __init__( Args: endpoint (str): The endpoint for the Document Intelligence service. - api_version (str): The API version to use. Defaults to "2024-07-31-preview". + api_version (str | None): The API version to use. Defaults to the Azure SDK default. credential (AzureKeyCredential | TokenCredential | None): The credential to use for authentication. file_types (List[DocumentIntelligenceFileType]): The file types to accept. Defaults to all supported file types. """ @@ -180,11 +180,14 @@ def __init__( self.endpoint = endpoint self.api_version = api_version - self.doc_intel_client = DocumentIntelligenceClient( - endpoint=self.endpoint, - api_version=self.api_version, - credential=credential, - ) + client_args: dict[str, Any] = { + "endpoint": self.endpoint, + "credential": credential, + } + if self.api_version is not None: + client_args["api_version"] = self.api_version + + self.doc_intel_client = DocumentIntelligenceClient(**client_args) def accepts( self, diff --git a/packages/markitdown/tests/test_docintel_html.py b/packages/markitdown/tests/test_docintel_html.py index d0b4caa3e..ed514f99d 100644 --- a/packages/markitdown/tests/test_docintel_html.py +++ b/packages/markitdown/tests/test_docintel_html.py @@ -1,4 +1,5 @@ import io +import markitdown.converters._doc_intel_converter as docintel from markitdown.converters._doc_intel_converter import ( DocumentIntelligenceConverter, DocumentIntelligenceFileType, @@ -24,3 +25,48 @@ def test_docintel_accepts_html_mimetype(): assert conv.accepts(io.BytesIO(b""), stream_info) stream_info = StreamInfo(mimetype="application/xhtml+xml", extension=None) assert conv.accepts(io.BytesIO(b""), stream_info) + + +def test_docintel_uses_sdk_default_api_version(monkeypatch): + captured_args = {} + + class FakeDocumentIntelligenceClient: + def __init__(self, **kwargs): + captured_args.update(kwargs) + + monkeypatch.setattr(docintel, "_dependency_exc_info", None) + monkeypatch.setattr( + docintel, "DocumentIntelligenceClient", FakeDocumentIntelligenceClient + ) + + credential = object() + conv = DocumentIntelligenceConverter( + endpoint="https://example.cognitiveservices.azure.com/", + credential=credential, + ) + + assert conv.api_version is None + assert "api_version" not in captured_args + assert captured_args["endpoint"] == "https://example.cognitiveservices.azure.com/" + assert captured_args["credential"] is credential + + +def test_docintel_passes_explicit_api_version(monkeypatch): + captured_args = {} + + class FakeDocumentIntelligenceClient: + def __init__(self, **kwargs): + captured_args.update(kwargs) + + monkeypatch.setattr(docintel, "_dependency_exc_info", None) + monkeypatch.setattr( + docintel, "DocumentIntelligenceClient", FakeDocumentIntelligenceClient + ) + + DocumentIntelligenceConverter( + endpoint="https://example.cognitiveservices.azure.com/", + credential=object(), + api_version="2024-07-31-preview", + ) + + assert captured_args["api_version"] == "2024-07-31-preview"