From c976abdb5b44b247b4a9c95d25468a65bddc8f86 Mon Sep 17 00:00:00 2001 From: Changjian Wang Date: Thu, 12 Mar 2026 18:36:03 +0800 Subject: [PATCH 1/5] Add ContentRange support for document and media analysis - Implemented ContentRange functionality in sample scripts for analyzing binary documents and URLs. - Added examples for analyzing specific pages and combined page ranges in `sample_analyze_binary.py`. - Enhanced `sample_analyze_url.py` with ContentRange examples for documents, videos, and audio, including time-based ranges. - Created unit tests for ContentRange functionality, covering various scenarios and edge cases. - Updated existing tests to validate ContentRange behavior in document and media analysis. --- .../azure/ai/contentunderstanding/_patch.py | 15 +- .../ai/contentunderstanding/aio/_patch.py | 15 +- .../models/_content_range.py | 197 ++++++++++++ .../ai/contentunderstanding/models/_patch.py | 2 + .../sample_analyze_binary_async.py | 41 +++ .../async_samples/sample_analyze_url_async.py | 192 +++++++++++- .../samples/sample_analyze_binary.py | 35 +++ .../samples/sample_analyze_url.py | 192 +++++++++++- .../samples/test_sample_analyze_binary.py | 134 ++++++++ .../tests/samples/test_sample_analyze_url.py | 292 +++++++++++++++++- .../tests/test_content_range.py | 183 +++++++++++ 11 files changed, 1285 insertions(+), 13 deletions(-) create mode 100644 sdk/contentunderstanding/azure-ai-contentunderstanding/azure/ai/contentunderstanding/models/_content_range.py create mode 100644 sdk/contentunderstanding/azure-ai-contentunderstanding/tests/test_content_range.py diff --git a/sdk/contentunderstanding/azure-ai-contentunderstanding/azure/ai/contentunderstanding/_patch.py b/sdk/contentunderstanding/azure-ai-contentunderstanding/azure/ai/contentunderstanding/_patch.py index 5a4862ed16e6..3fc555488ee5 100644 --- a/sdk/contentunderstanding/azure-ai-contentunderstanding/azure/ai/contentunderstanding/_patch.py +++ b/sdk/contentunderstanding/azure-ai-contentunderstanding/azure/ai/contentunderstanding/_patch.py @@ -236,7 +236,7 @@ def begin_analyze_binary( analyzer_id: str, binary_input: bytes, *, - content_range: Optional[str] = None, + content_range: Optional[Union[str, _models.ContentRange]] = None, content_type: str = "application/octet-stream", processing_location: Optional[Union[str, _models.ProcessingLocation]] = None, **kwargs: Any, @@ -247,9 +247,11 @@ def begin_analyze_binary( :type analyzer_id: str :param binary_input: The binary content of the document to analyze. Required. :type binary_input: bytes - :keyword content_range: Range of the input to analyze (ex. ``1-3,5,9-``). Document content uses - 1-based page numbers, while audio visual content uses integer milliseconds. Default value is None. - :paramtype content_range: str + :keyword content_range: Range of the input to analyze. Accepts a + :class:`~azure.ai.contentunderstanding.models.ContentRange` or a raw string + (ex. ``"1-3,5,9-"``). Document content uses 1-based page numbers, + while audio visual content uses integer milliseconds. Default value is None. + :paramtype content_range: str or ~azure.ai.contentunderstanding.models.ContentRange :keyword content_type: Body Parameter content-type. Content type parameter for binary body. Default value is "application/octet-stream". :paramtype content_type: str @@ -266,13 +268,16 @@ def begin_analyze_binary( matches Python's native string indexing behavior (len() and str[i] use code points). This ensures ContentSpan offsets work correctly with Python string slicing. """ + # Convert ContentRange to string if needed + content_range_str = str(content_range) if content_range is not None else None + # Call parent implementation with string_encoding set to "codePoint" # (matches Python's string indexing) poller = super().begin_analyze_binary( analyzer_id=analyzer_id, binary_input=binary_input, string_encoding="codePoint", - content_range=content_range, + content_range=content_range_str, content_type=content_type, processing_location=processing_location, **kwargs, diff --git a/sdk/contentunderstanding/azure-ai-contentunderstanding/azure/ai/contentunderstanding/aio/_patch.py b/sdk/contentunderstanding/azure-ai-contentunderstanding/azure/ai/contentunderstanding/aio/_patch.py index 316b8f6a008a..14620146bd8b 100644 --- a/sdk/contentunderstanding/azure-ai-contentunderstanding/azure/ai/contentunderstanding/aio/_patch.py +++ b/sdk/contentunderstanding/azure-ai-contentunderstanding/azure/ai/contentunderstanding/aio/_patch.py @@ -237,7 +237,7 @@ async def begin_analyze_binary( analyzer_id: str, binary_input: bytes, *, - content_range: Optional[str] = None, + content_range: Optional[Union[str, _models.ContentRange]] = None, content_type: str = "application/octet-stream", processing_location: Optional[Union[str, _models.ProcessingLocation]] = None, **kwargs: Any, @@ -248,9 +248,11 @@ async def begin_analyze_binary( :type analyzer_id: str :param binary_input: The binary content of the document to analyze. Required. :type binary_input: bytes - :keyword content_range: Range of the input to analyze (ex. ``1-3,5,9-``). Document content uses - 1-based page numbers, while audio visual content uses integer milliseconds. Default value is None. - :paramtype content_range: str + :keyword content_range: Range of the input to analyze. Accepts a + :class:`~azure.ai.contentunderstanding.models.ContentRange` or a raw string + (ex. ``"1-3,5,9-"``). Document content uses 1-based page numbers, + while audio visual content uses integer milliseconds. Default value is None. + :paramtype content_range: str or ~azure.ai.contentunderstanding.models.ContentRange :keyword content_type: Body Parameter content-type. Content type parameter for binary body. Default value is "application/octet-stream". :paramtype content_type: str @@ -267,13 +269,16 @@ async def begin_analyze_binary( matches Python's native string indexing behavior (len() and str[i] use code points). This ensures ContentSpan offsets work correctly with Python string slicing. """ + # Convert ContentRange to string if needed + content_range_str = str(content_range) if content_range is not None else None + # Call parent implementation with string_encoding set to "codePoint" # (matches Python's string indexing) poller = await super().begin_analyze_binary( analyzer_id=analyzer_id, binary_input=binary_input, string_encoding="codePoint", - content_range=content_range, + content_range=content_range_str, content_type=content_type, processing_location=processing_location, **kwargs, diff --git a/sdk/contentunderstanding/azure-ai-contentunderstanding/azure/ai/contentunderstanding/models/_content_range.py b/sdk/contentunderstanding/azure-ai-contentunderstanding/azure/ai/contentunderstanding/models/_content_range.py new file mode 100644 index 000000000000..ba4737210cde --- /dev/null +++ b/sdk/contentunderstanding/azure-ai-contentunderstanding/azure/ai/contentunderstanding/models/_content_range.py @@ -0,0 +1,197 @@ +# coding=utf-8 +# -------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------- + +"""ContentRange value type for specifying content ranges on AnalysisInput.""" + +from datetime import timedelta +from typing import Optional + + +class ContentRange: + """Represents a range of content to analyze. + + For documents, ranges use 1-based page numbers (e.g., ``"1-3"``, ``"5"``, ``"9-"``). + For audio/video, ranges use integer milliseconds (e.g., ``"0-5000"``, ``"5000-"``). + Multiple ranges can be combined with commas (e.g., ``"1-3,5,9-"``). + + Example usage:: + + # Document pages + range = ContentRange.page(5) # "5" + range = ContentRange.pages(1, 3) # "1-3" + range = ContentRange.pages_from(9) # "9-" + + # Audio/video time ranges + range = ContentRange.time_range( + timedelta(0), timedelta(seconds=5)) # "0-5000" + range = ContentRange.time_range_from( + timedelta(seconds=5)) # "5000-" + + # Combine multiple ranges + range = ContentRange.combine( + ContentRange.pages(1, 3), + ContentRange.page(5), + ContentRange.pages_from(9)) # "1-3,5,9-" + + # Or construct from a raw string + range = ContentRange("1-3,5,9-") + """ + + def __init__(self, value: str) -> None: + """Initialize a new ContentRange. + + :param value: The range string value. + :type value: str + :raises ValueError: If value is None or empty. + """ + if value is None: + raise ValueError("value cannot be None.") + self._value = value + + @classmethod + def page(cls, page_number: int) -> "ContentRange": + """Create a ContentRange for a single document page (1-based). + + :param page_number: The 1-based page number. + :type page_number: int + :return: A ContentRange representing a single page, e.g. ``"5"``. + :rtype: ~azure.ai.contentunderstanding.models.ContentRange + :raises ValueError: If page_number is less than 1. + """ + if page_number < 1: + raise ValueError("Page number must be >= 1.") + return cls(str(page_number)) + + @classmethod + def pages(cls, start: int, end: int) -> "ContentRange": + """Create a ContentRange for a contiguous range of document pages (1-based, inclusive). + + :param start: The 1-based start page number (inclusive). + :type start: int + :param end: The 1-based end page number (inclusive). + :type end: int + :return: A ContentRange representing the page range, e.g. ``"1-3"``. + :rtype: ~azure.ai.contentunderstanding.models.ContentRange + :raises ValueError: If start is less than 1, or end is less than start. + """ + if start < 1: + raise ValueError("Start page must be >= 1.") + if end < start: + raise ValueError("End page must be >= start page.") + return cls(f"{start}-{end}") + + @classmethod + def pages_from(cls, start_page: int) -> "ContentRange": + """Create a ContentRange for all pages from a starting page to the end (1-based). + + :param start_page: The 1-based start page number (inclusive). + :type start_page: int + :return: A ContentRange representing the open-ended range, e.g. ``"9-"``. + :rtype: ~azure.ai.contentunderstanding.models.ContentRange + :raises ValueError: If start_page is less than 1. + """ + if start_page < 1: + raise ValueError("Start page must be >= 1.") + return cls(f"{start_page}-") + + @classmethod + def _time_range_ms(cls, start_ms: int, end_ms: int) -> "ContentRange": + """Create a ContentRange for a time range in milliseconds (for audio/video). + + :param start_ms: The start time in milliseconds (inclusive). + :type start_ms: int + :param end_ms: The end time in milliseconds (inclusive). + :type end_ms: int + :return: A ContentRange representing the time range. + :rtype: ~azure.ai.contentunderstanding.models.ContentRange + :raises ValueError: If start_ms is negative or end_ms is less than start_ms. + """ + if start_ms < 0: + raise ValueError("Start time must be >= 0.") + if end_ms < start_ms: + raise ValueError("End time must be >= start time.") + return cls(f"{start_ms}-{end_ms}") + + @classmethod + def _time_range_from_ms(cls, start_ms: int) -> "ContentRange": + """Create a ContentRange from a starting time to the end in milliseconds. + + :param start_ms: The start time in milliseconds (inclusive). + :type start_ms: int + :return: A ContentRange representing the open-ended time range. + :rtype: ~azure.ai.contentunderstanding.models.ContentRange + :raises ValueError: If start_ms is negative. + """ + if start_ms < 0: + raise ValueError("Start time must be >= 0.") + return cls(f"{start_ms}-") + + @classmethod + def time_range(cls, start: timedelta, end: timedelta) -> "ContentRange": + """Create a ContentRange for a time range (for audio/video content). + + :param start: The start time (inclusive). + :type start: ~datetime.timedelta + :param end: The end time (inclusive). + :type end: ~datetime.timedelta + :return: A ContentRange representing the time range, e.g. ``"0-5000"``. + :rtype: ~azure.ai.contentunderstanding.models.ContentRange + :raises ValueError: If start is negative, or end is less than start. + """ + if start < timedelta(0): + raise ValueError("Start time must be non-negative.") + if end < start: + raise ValueError("End time must be >= start time.") + return cls._time_range_ms( + int(start.total_seconds() * 1000), int(end.total_seconds() * 1000) + ) + + @classmethod + def time_range_from(cls, start: timedelta) -> "ContentRange": + """Create a ContentRange from a starting time to the end (for audio/video content). + + :param start: The start time (inclusive). + :type start: ~datetime.timedelta + :return: A ContentRange representing the open-ended time range, e.g. ``"5000-"``. + :rtype: ~azure.ai.contentunderstanding.models.ContentRange + :raises ValueError: If start is negative. + """ + if start < timedelta(0): + raise ValueError("Start time must be non-negative.") + return cls._time_range_from_ms(int(start.total_seconds() * 1000)) + + @classmethod + def combine(cls, *ranges: "ContentRange") -> "ContentRange": + """Combine multiple ContentRange values into a single comma-separated range. + + :param ranges: The ranges to combine. + :type ranges: ~azure.ai.contentunderstanding.models.ContentRange + :return: A ContentRange representing the combined ranges, e.g. ``"1-3,5,9-"``. + :rtype: ~azure.ai.contentunderstanding.models.ContentRange + :raises ValueError: If no ranges are provided. + """ + if not ranges: + raise ValueError("At least one range must be provided.") + return cls(",".join(r._value for r in ranges)) + + def __str__(self) -> str: + return self._value + + def __repr__(self) -> str: + return f"ContentRange({self._value!r})" + + def __eq__(self, other: object) -> bool: + if isinstance(other, ContentRange): + return self._value == other._value + return NotImplemented + + def __ne__(self, other: object) -> bool: + if isinstance(other, ContentRange): + return self._value != other._value + return NotImplemented + + def __hash__(self) -> int: + return hash(self._value) diff --git a/sdk/contentunderstanding/azure-ai-contentunderstanding/azure/ai/contentunderstanding/models/_patch.py b/sdk/contentunderstanding/azure-ai-contentunderstanding/azure/ai/contentunderstanding/models/_patch.py index 9dee69387cba..594ec18145e4 100644 --- a/sdk/contentunderstanding/azure-ai-contentunderstanding/azure/ai/contentunderstanding/models/_patch.py +++ b/sdk/contentunderstanding/azure-ai-contentunderstanding/azure/ai/contentunderstanding/models/_patch.py @@ -14,6 +14,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, TypeVar from azure.core import CaseInsensitiveEnumMeta from azure.core.polling import LROPoller, PollingMethod +from ._content_range import ContentRange from ._models import ( StringField, IntegerField, @@ -76,6 +77,7 @@ def value(self) -> Optional[Any]: ... PollingReturnType_co = TypeVar("PollingReturnType_co", covariant=True) __all__ = [ + "ContentRange", "RecordMergePatchUpdate", "AnalyzeLROPoller", "ProcessingLocation", diff --git a/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/async_samples/sample_analyze_binary_async.py b/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/async_samples/sample_analyze_binary_async.py index 6d5f8856a470..0f741b9a18af 100644 --- a/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/async_samples/sample_analyze_binary_async.py +++ b/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/async_samples/sample_analyze_binary_async.py @@ -58,6 +58,7 @@ from azure.ai.contentunderstanding.aio import ContentUnderstandingClient from azure.ai.contentunderstanding.models import ( AnalysisResult, + ContentRange, DocumentContent, ) from azure.core.credentials import AzureKeyCredential @@ -89,6 +90,46 @@ async def main() -> None: result: AnalysisResult = await poller.result() # [END analyze_document_from_binary] + # [START analyze_binary_with_content_range] + # Analyze only pages 3 onward. + print("\nAnalyzing pages 3 onward with ContentRange...") + range_poller = await client.begin_analyze_binary( + analyzer_id="prebuilt-documentSearch", + binary_input=file_bytes, + content_range=ContentRange.pages_from(3), + ) + range_result: AnalysisResult = await range_poller.result() + + if isinstance(range_result.contents[0], DocumentContent): + range_doc = range_result.contents[0] + print( + f"ContentRange analysis returned pages" + f" {range_doc.start_page_number} - {range_doc.end_page_number}" + ) + # [END analyze_binary_with_content_range] + + # [START analyze_binary_with_combined_content_range] + # Analyze pages 1-3, page 5, and pages 9 onward. + print("\nAnalyzing combined pages (1-3, 5, 9-) with ContentRange...") + combine_range_poller = await client.begin_analyze_binary( + analyzer_id="prebuilt-documentSearch", + binary_input=file_bytes, + content_range=ContentRange.combine( + ContentRange.pages(1, 3), + ContentRange.page(5), + ContentRange.pages_from(9), + ), + ) + combine_range_result: AnalysisResult = await combine_range_poller.result() + + if isinstance(combine_range_result.contents[0], DocumentContent): + combine_doc = combine_range_result.contents[0] + print( + f"Combined ContentRange analysis returned pages" + f" {combine_doc.start_page_number} - {combine_doc.end_page_number}" + ) + # [END analyze_binary_with_combined_content_range] + # [START extract_markdown] print("\nMarkdown Content:") print("=" * 50) diff --git a/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/async_samples/sample_analyze_url_async.py b/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/async_samples/sample_analyze_url_async.py index ee2ce930f11a..7ead6dc4f56b 100644 --- a/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/async_samples/sample_analyze_url_async.py +++ b/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/async_samples/sample_analyze_url_async.py @@ -42,6 +42,7 @@ import asyncio import os +from datetime import timedelta from typing import cast from dotenv import load_dotenv @@ -50,6 +51,7 @@ AnalysisInput, AnalysisResult, AudioVisualContent, + ContentRange, DocumentContent, AnalysisContent, ) @@ -72,7 +74,7 @@ async def main() -> None: print("DOCUMENT ANALYSIS FROM URL") print("=" * 60) # You can replace this URL with your own publicly accessible document URL. - document_url = "https://raw.githubusercontent.com/Azure-Samples/azure-ai-content-understanding-assets/main/document/invoice.pdf" + document_url = "https://raw.githubusercontent.com/Azure-Samples/azure-ai-content-understanding-assets/main/document/mixed_financial_docs.pdf" print(f"Analyzing document from URL with prebuilt-documentSearch...") print(f" URL: {document_url}") @@ -104,6 +106,23 @@ async def main() -> None: print(f" Page {page.page_number}: {page.width} x {page.height} {unit}") # [END analyze_document_from_url] + # [START analyze_document_url_with_content_range] + # Restrict to specific pages with ContentRange + # Extract only page 1 of the document. + print("\nAnalyzing page 1 only with ContentRange...") + range_poller = await client.begin_analyze( + analyzer_id="prebuilt-documentSearch", + inputs=[AnalysisInput(url=document_url, content_range=str(ContentRange.page(1)))], + ) + range_result: AnalysisResult = await range_poller.result() + + range_doc_content = cast(DocumentContent, range_result.contents[0]) + print( + f"ContentRange analysis returned pages" + f" {range_doc_content.start_page_number} - {range_doc_content.end_page_number}" + ) + # [END analyze_document_url_with_content_range] + # [START analyze_video_from_url] print("\n" + "=" * 60) print("VIDEO ANALYSIS FROM URL") @@ -145,6 +164,103 @@ async def main() -> None: segment_index += 1 # [END analyze_video_from_url] + # [START analyze_video_url_with_content_range] + # Restrict to a time window with ContentRange + # Analyze only the first 5 seconds of the video. + print("\nAnalyzing first 5 seconds of video with ContentRange...") + video_range_poller = await client.begin_analyze( + analyzer_id="prebuilt-videoSearch", + inputs=[ + AnalysisInput( + url=video_url, + content_range=str( + ContentRange.time_range(timedelta(0), timedelta(seconds=5)) + ), + ) + ], + ) + video_range_result = await video_range_poller.result() + + for range_media in video_range_result.contents: + range_video_content = cast(AudioVisualContent, range_media) + print( + f"ContentRange segment:" + f" {range_video_content.start_time_ms} ms - {range_video_content.end_time_ms} ms" + ) + # [END analyze_video_url_with_content_range] + + # [START analyze_video_url_with_additional_content_ranges] + # Additional ContentRange examples for video: + + # TimeRangeFrom — analyze from 10 seconds onward (wire format: "10000-") + print("\nAnalyzing video from 10 seconds onward with ContentRange...") + video_from_poller = await client.begin_analyze( + analyzer_id="prebuilt-videoSearch", + inputs=[ + AnalysisInput( + url=video_url, + content_range=str( + ContentRange.time_range_from(timedelta(seconds=10)) + ), + ) + ], + ) + video_from_result = await video_from_poller.result() + for from_media in video_from_result.contents: + from_video = cast(AudioVisualContent, from_media) + print( + f"TimeRangeFrom(10s) segment:" + f" {from_video.start_time_ms} ms - {from_video.end_time_ms} ms" + ) + + # Sub-second precision — analyze from 1.2s to 3.651s (wire format: "1200-3651") + print("\nAnalyzing video with sub-second precision (1.2s to 3.651s)...") + video_subsec_poller = await client.begin_analyze( + analyzer_id="prebuilt-videoSearch", + inputs=[ + AnalysisInput( + url=video_url, + content_range=str( + ContentRange.time_range( + timedelta(milliseconds=1200), timedelta(milliseconds=3651) + ) + ), + ) + ], + ) + video_subsec_result = await video_subsec_poller.result() + for subsec_media in video_subsec_result.contents: + subsec_video = cast(AudioVisualContent, subsec_media) + print( + f"TimeRange(1.2s, 3.651s) segment:" + f" {subsec_video.start_time_ms} ms - {subsec_video.end_time_ms} ms" + ) + + # Combine — multiple disjoint time ranges (wire format: "0-3000,30000-") + print("\nAnalyzing video with combined time ranges (0-3s and 30s onward)...") + video_combine_poller = await client.begin_analyze( + analyzer_id="prebuilt-videoSearch", + inputs=[ + AnalysisInput( + url=video_url, + content_range=str( + ContentRange.combine( + ContentRange.time_range(timedelta(0), timedelta(seconds=3)), + ContentRange.time_range_from(timedelta(seconds=30)), + ) + ), + ) + ], + ) + video_combine_result = await video_combine_poller.result() + for combine_media in video_combine_result.contents: + combine_video = cast(AudioVisualContent, combine_media) + print( + f"Combine(0-3s, 30s-) segment:" + f" {combine_video.start_time_ms} ms - {combine_video.end_time_ms} ms" + ) + # [END analyze_video_url_with_additional_content_ranges] + # [START analyze_audio_from_url] print("\n" + "=" * 60) print("AUDIO ANALYSIS FROM URL") @@ -181,6 +297,80 @@ async def main() -> None: print(f" [{phrase.speaker}] {phrase.start_time_ms} ms: {phrase.text}") # [END analyze_audio_from_url] + # [START analyze_audio_url_with_content_range] + # Restrict to a time range with ContentRange + # Analyze audio from 5 seconds onward. + print("\nAnalyzing audio from 5 seconds onward with ContentRange...") + audio_range_poller = await client.begin_analyze( + analyzer_id="prebuilt-audioSearch", + inputs=[ + AnalysisInput( + url=audio_url, + content_range=str( + ContentRange.time_range_from(timedelta(seconds=5)) + ), + ) + ], + ) + audio_range_result = await audio_range_poller.result() + + range_audio_content = cast(AudioVisualContent, audio_range_result.contents[0]) + print(f"ContentRange audio analysis: {range_audio_content.start_time_ms} ms onward") + range_summary = ( + range_audio_content.fields.get("Summary") if range_audio_content.fields else None + ) + if range_summary and hasattr(range_summary, "value"): + print(f"Summary: {range_summary.value}") + # [END analyze_audio_url_with_content_range] + + # [START analyze_audio_url_with_additional_content_ranges] + # Additional ContentRange examples for audio: + + # TimeRange — analyze a specific time window from 2s to 8s (wire format: "2000-8000") + print("\nAnalyzing audio from 2s to 8s with ContentRange...") + audio_window_poller = await client.begin_analyze( + analyzer_id="prebuilt-audioSearch", + inputs=[ + AnalysisInput( + url=audio_url, + content_range=str( + ContentRange.time_range( + timedelta(seconds=2), timedelta(seconds=8) + ) + ), + ) + ], + ) + audio_window_result = await audio_window_poller.result() + audio_window_content = cast(AudioVisualContent, audio_window_result.contents[0]) + print( + f"TimeRange(2s, 8s):" + f" {audio_window_content.start_time_ms} ms - {audio_window_content.end_time_ms} ms" + ) + + # Sub-second precision — analyze from 1.2s to 3.651s (wire format: "1200-3651") + print("\nAnalyzing audio with sub-second precision (1.2s to 3.651s)...") + audio_subsec_poller = await client.begin_analyze( + analyzer_id="prebuilt-audioSearch", + inputs=[ + AnalysisInput( + url=audio_url, + content_range=str( + ContentRange.time_range( + timedelta(milliseconds=1200), timedelta(milliseconds=3651) + ) + ), + ) + ], + ) + audio_subsec_result = await audio_subsec_poller.result() + audio_subsec_content = cast(AudioVisualContent, audio_subsec_result.contents[0]) + print( + f"TimeRange(1.2s, 3.651s):" + f" {audio_subsec_content.start_time_ms} ms - {audio_subsec_content.end_time_ms} ms" + ) + # [END analyze_audio_url_with_additional_content_ranges] + # [START analyze_image_from_url] print("\n" + "=" * 60) print("IMAGE ANALYSIS FROM URL") diff --git a/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/sample_analyze_binary.py b/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/sample_analyze_binary.py index 27d73490a99f..254140c88bb1 100644 --- a/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/sample_analyze_binary.py +++ b/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/sample_analyze_binary.py @@ -57,6 +57,7 @@ from azure.ai.contentunderstanding import ContentUnderstandingClient from azure.ai.contentunderstanding.models import ( AnalysisResult, + ContentRange, DocumentContent, ) from azure.core.credentials import AzureKeyCredential @@ -87,6 +88,40 @@ def main() -> None: result: AnalysisResult = poller.result() # [END analyze_document_from_binary] + # [START analyze_binary_with_content_range] + # Analyze only pages 3 onward. + print("\nAnalyzing pages 3 onward with ContentRange...") + range_poller = client.begin_analyze_binary( + analyzer_id="prebuilt-documentSearch", + binary_input=file_bytes, + content_range=ContentRange.pages_from(3), + ) + range_result: AnalysisResult = range_poller.result() + + if isinstance(range_result.contents[0], DocumentContent): + range_doc = range_result.contents[0] + print(f"ContentRange analysis returned pages {range_doc.start_page_number} - {range_doc.end_page_number}") + # [END analyze_binary_with_content_range] + + # [START analyze_binary_with_combined_content_range] + # Analyze pages 1-3, page 5, and pages 9 onward. + print("\nAnalyzing combined pages (1-3, 5, 9-) with ContentRange...") + combine_range_poller = client.begin_analyze_binary( + analyzer_id="prebuilt-documentSearch", + binary_input=file_bytes, + content_range=ContentRange.combine( + ContentRange.pages(1, 3), + ContentRange.page(5), + ContentRange.pages_from(9), + ), + ) + combine_range_result: AnalysisResult = combine_range_poller.result() + + if isinstance(combine_range_result.contents[0], DocumentContent): + combine_doc = combine_range_result.contents[0] + print(f"Combined ContentRange analysis returned pages {combine_doc.start_page_number} - {combine_doc.end_page_number}") + # [END analyze_binary_with_combined_content_range] + # [START extract_markdown] print("\nMarkdown Content:") print("=" * 50) diff --git a/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/sample_analyze_url.py b/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/sample_analyze_url.py index 114a4bcb231a..1eae4e5323e9 100644 --- a/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/sample_analyze_url.py +++ b/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/sample_analyze_url.py @@ -41,6 +41,7 @@ """ import os +from datetime import timedelta from typing import cast from dotenv import load_dotenv @@ -49,6 +50,7 @@ AnalysisInput, AnalysisResult, AudioVisualContent, + ContentRange, DocumentContent, AnalysisContent, ) @@ -70,7 +72,7 @@ def main() -> None: print("DOCUMENT ANALYSIS FROM URL") print("=" * 60) # You can replace this URL with your own publicly accessible document URL. - document_url = "https://raw.githubusercontent.com/Azure-Samples/azure-ai-content-understanding-assets/main/document/invoice.pdf" + document_url = "https://raw.githubusercontent.com/Azure-Samples/azure-ai-content-understanding-assets/main/document/mixed_financial_docs.pdf" print(f"Analyzing document from URL with prebuilt-documentSearch...") print(f" URL: {document_url}") @@ -102,6 +104,23 @@ def main() -> None: print(f" Page {page.page_number}: {page.width} x {page.height} {unit}") # [END analyze_document_from_url] + # [START analyze_document_url_with_content_range] + # Restrict to specific pages with ContentRange + # Extract only page 1 of the document. + print("\nAnalyzing page 1 only with ContentRange...") + range_poller = client.begin_analyze( + analyzer_id="prebuilt-documentSearch", + inputs=[AnalysisInput(url=document_url, content_range=str(ContentRange.page(1)))], + ) + range_result: AnalysisResult = range_poller.result() + + range_doc_content = cast(DocumentContent, range_result.contents[0]) + print( + f"ContentRange analysis returned pages" + f" {range_doc_content.start_page_number} - {range_doc_content.end_page_number}" + ) + # [END analyze_document_url_with_content_range] + # [START analyze_video_from_url] print("\n" + "=" * 60) print("VIDEO ANALYSIS FROM URL") @@ -141,6 +160,103 @@ def main() -> None: segment_index += 1 # [END analyze_video_from_url] + # [START analyze_video_url_with_content_range] + # Restrict to a time window with ContentRange + # Analyze only the first 5 seconds of the video. + print("\nAnalyzing first 5 seconds of video with ContentRange...") + video_range_poller = client.begin_analyze( + analyzer_id="prebuilt-videoSearch", + inputs=[ + AnalysisInput( + url=video_url, + content_range=str( + ContentRange.time_range(timedelta(0), timedelta(seconds=5)) + ), + ) + ], + ) + video_range_result = video_range_poller.result() + + for range_media in video_range_result.contents: + range_video_content = cast(AudioVisualContent, range_media) + print( + f"ContentRange segment:" + f" {range_video_content.start_time_ms} ms - {range_video_content.end_time_ms} ms" + ) + # [END analyze_video_url_with_content_range] + + # [START analyze_video_url_with_additional_content_ranges] + # Additional ContentRange examples for video: + + # TimeRangeFrom — analyze from 10 seconds onward (wire format: "10000-") + print("\nAnalyzing video from 10 seconds onward with ContentRange...") + video_from_poller = client.begin_analyze( + analyzer_id="prebuilt-videoSearch", + inputs=[ + AnalysisInput( + url=video_url, + content_range=str( + ContentRange.time_range_from(timedelta(seconds=10)) + ), + ) + ], + ) + video_from_result = video_from_poller.result() + for from_media in video_from_result.contents: + from_video = cast(AudioVisualContent, from_media) + print( + f"TimeRangeFrom(10s) segment:" + f" {from_video.start_time_ms} ms - {from_video.end_time_ms} ms" + ) + + # Sub-second precision — analyze from 1.2s to 3.651s (wire format: "1200-3651") + print("\nAnalyzing video with sub-second precision (1.2s to 3.651s)...") + video_subsec_poller = client.begin_analyze( + analyzer_id="prebuilt-videoSearch", + inputs=[ + AnalysisInput( + url=video_url, + content_range=str( + ContentRange.time_range( + timedelta(milliseconds=1200), timedelta(milliseconds=3651) + ) + ), + ) + ], + ) + video_subsec_result = video_subsec_poller.result() + for subsec_media in video_subsec_result.contents: + subsec_video = cast(AudioVisualContent, subsec_media) + print( + f"TimeRange(1.2s, 3.651s) segment:" + f" {subsec_video.start_time_ms} ms - {subsec_video.end_time_ms} ms" + ) + + # Combine — multiple disjoint time ranges (wire format: "0-3000,30000-") + print("\nAnalyzing video with combined time ranges (0-3s and 30s onward)...") + video_combine_poller = client.begin_analyze( + analyzer_id="prebuilt-videoSearch", + inputs=[ + AnalysisInput( + url=video_url, + content_range=str( + ContentRange.combine( + ContentRange.time_range(timedelta(0), timedelta(seconds=3)), + ContentRange.time_range_from(timedelta(seconds=30)), + ) + ), + ) + ], + ) + video_combine_result = video_combine_poller.result() + for combine_media in video_combine_result.contents: + combine_video = cast(AudioVisualContent, combine_media) + print( + f"Combine(0-3s, 30s-) segment:" + f" {combine_video.start_time_ms} ms - {combine_video.end_time_ms} ms" + ) + # [END analyze_video_url_with_additional_content_ranges] + # [START analyze_audio_from_url] print("\n" + "=" * 60) print("AUDIO ANALYSIS FROM URL") @@ -174,6 +290,80 @@ def main() -> None: print(f" [{phrase.speaker}] {phrase.start_time_ms} ms: {phrase.text}") # [END analyze_audio_from_url] + # [START analyze_audio_url_with_content_range] + # Restrict to a time range with ContentRange + # Analyze audio from 5 seconds onward. + print("\nAnalyzing audio from 5 seconds onward with ContentRange...") + audio_range_poller = client.begin_analyze( + analyzer_id="prebuilt-audioSearch", + inputs=[ + AnalysisInput( + url=audio_url, + content_range=str( + ContentRange.time_range_from(timedelta(seconds=5)) + ), + ) + ], + ) + audio_range_result = audio_range_poller.result() + + range_audio_content = cast(AudioVisualContent, audio_range_result.contents[0]) + print(f"ContentRange audio analysis: {range_audio_content.start_time_ms} ms onward") + range_summary = ( + range_audio_content.fields.get("Summary") if range_audio_content.fields else None + ) + if range_summary and hasattr(range_summary, "value"): + print(f"Summary: {range_summary.value}") + # [END analyze_audio_url_with_content_range] + + # [START analyze_audio_url_with_additional_content_ranges] + # Additional ContentRange examples for audio: + + # TimeRange — analyze a specific time window from 2s to 8s (wire format: "2000-8000") + print("\nAnalyzing audio from 2s to 8s with ContentRange...") + audio_window_poller = client.begin_analyze( + analyzer_id="prebuilt-audioSearch", + inputs=[ + AnalysisInput( + url=audio_url, + content_range=str( + ContentRange.time_range( + timedelta(seconds=2), timedelta(seconds=8) + ) + ), + ) + ], + ) + audio_window_result = audio_window_poller.result() + audio_window_content = cast(AudioVisualContent, audio_window_result.contents[0]) + print( + f"TimeRange(2s, 8s):" + f" {audio_window_content.start_time_ms} ms - {audio_window_content.end_time_ms} ms" + ) + + # Sub-second precision — analyze from 1.2s to 3.651s (wire format: "1200-3651") + print("\nAnalyzing audio with sub-second precision (1.2s to 3.651s)...") + audio_subsec_poller = client.begin_analyze( + analyzer_id="prebuilt-audioSearch", + inputs=[ + AnalysisInput( + url=audio_url, + content_range=str( + ContentRange.time_range( + timedelta(milliseconds=1200), timedelta(milliseconds=3651) + ) + ), + ) + ], + ) + audio_subsec_result = audio_subsec_poller.result() + audio_subsec_content = cast(AudioVisualContent, audio_subsec_result.contents[0]) + print( + f"TimeRange(1.2s, 3.651s):" + f" {audio_subsec_content.start_time_ms} ms - {audio_subsec_content.end_time_ms} ms" + ) + # [END analyze_audio_url_with_additional_content_ranges] + # [START analyze_image_from_url] print("\n" + "=" * 60) print("IMAGE ANALYSIS FROM URL") diff --git a/sdk/contentunderstanding/azure-ai-contentunderstanding/tests/samples/test_sample_analyze_binary.py b/sdk/contentunderstanding/azure-ai-contentunderstanding/tests/samples/test_sample_analyze_binary.py index 18dd12bd0df3..2a0688429a17 100644 --- a/sdk/contentunderstanding/azure-ai-contentunderstanding/tests/samples/test_sample_analyze_binary.py +++ b/sdk/contentunderstanding/azure-ai-contentunderstanding/tests/samples/test_sample_analyze_binary.py @@ -34,6 +34,7 @@ import pytest from devtools_testutils import recorded_by_proxy from testpreparer import ContentUnderstandingPreparer, ContentUnderstandingClientTestBase +from azure.ai.contentunderstanding.models import ContentRange, DocumentContent class TestSampleAnalyzeBinary(ContentUnderstandingClientTestBase): @@ -243,3 +244,136 @@ def _validate_tables(self, tables): ) else: print(f"[PASS] Table {i} validated: {table.row_count} rows x {table.column_count} columns") + + @ContentUnderstandingPreparer() + @recorded_by_proxy + def test_sample_analyze_binary_with_content_range(self, contentunderstanding_endpoint: str) -> None: + """Test analyzing a document from binary data with ContentRange. + + This test validates: + 1. ContentRange.pages_from(3) — analyze pages 3 onward + 2. ContentRange.combine() — analyze disjoint page ranges + 3. ContentRange.page(2) — single page + 4. ContentRange.pages(1, 3) — page range + 5. ContentRange.combine(page(1), pages(3, 4)) — combined page ranges + + 01_AnalyzeBinary.AnalyzeBinaryWithPageContentRangesAsync() + """ + client = self.create_client(endpoint=contentunderstanding_endpoint) + + # Read the sample file (use multi-page document for ContentRange testing) + tests_dir = os.path.dirname(os.path.dirname(__file__)) + file_path = os.path.join(tests_dir, "test_data", "mixed_financial_docs.pdf") + if not os.path.exists(file_path): + file_path = os.path.join(tests_dir, "test_data", "sample_invoice.pdf") + + with open(file_path, "rb") as f: + file_bytes = f.read() + + # Full analysis for comparison + full_poller = client.begin_analyze_binary( + analyzer_id="prebuilt-documentSearch", binary_input=file_bytes + ) + full_result = full_poller.result() + assert full_result.contents is not None + full_doc = full_result.contents[0] + assert isinstance(full_doc, DocumentContent) + full_page_count = len(full_doc.pages) if full_doc.pages else 0 + print(f"[PASS] Full document: {full_page_count} pages, {len(full_doc.markdown or '')} chars") + + # ContentRange.pages_from(3) — pages 3 onward (wire format: "3-") + print("\nAnalyzing pages 3 onward with ContentRange.pages_from(3)...") + range_poller = client.begin_analyze_binary( + analyzer_id="prebuilt-documentSearch", + binary_input=file_bytes, + content_range=ContentRange.pages_from(3), + ) + range_result = range_poller.result() + assert range_result.contents is not None + range_doc = range_result.contents[0] + assert isinstance(range_doc, DocumentContent) + range_page_count = len(range_doc.pages) if range_doc.pages else 0 + assert range_page_count > 0, "PagesFrom(3) should return at least one page" + assert full_page_count >= range_page_count, ( + f"Full document ({full_page_count} pages) should have >= pages than range-limited ({range_page_count})" + ) + print(f"[PASS] PagesFrom(3): {range_page_count} pages (pages {range_doc.start_page_number}-{range_doc.end_page_number})") + + # ContentRange.combine(pages(1, 3), page(5), pages_from(9)) — combined (wire format: "1-3,5,9-") + print("\nAnalyzing combined pages (1-3, 5, 9-) with ContentRange.combine()...") + combine_poller = client.begin_analyze_binary( + analyzer_id="prebuilt-documentSearch", + binary_input=file_bytes, + content_range=ContentRange.combine( + ContentRange.pages(1, 3), + ContentRange.page(5), + ContentRange.pages_from(9), + ), + ) + combine_result = combine_poller.result() + assert combine_result.contents is not None + combine_doc = combine_result.contents[0] + assert isinstance(combine_doc, DocumentContent) + combine_page_count = len(combine_doc.pages) if combine_doc.pages else 0 + assert combine_page_count > 0, "Combine should return at least one page" + assert len(full_doc.markdown or '') >= len(combine_doc.markdown or ''), ( + f"Full document ({len(full_doc.markdown or '')} chars) should be >= Combine ({len(combine_doc.markdown or '')} chars)" + ) + print(f"[PASS] Combine(Pages(1,3), Page(5), PagesFrom(9)): {combine_page_count} pages") + + # ContentRange.page(2) — single page (wire format: "2") + print("\nAnalyzing page 2 only with ContentRange.page(2)...") + page2_poller = client.begin_analyze_binary( + analyzer_id="prebuilt-documentSearch", + binary_input=file_bytes, + content_range=ContentRange.page(2), + ) + page2_result = page2_poller.result() + assert page2_result.contents is not None + page2_doc = page2_result.contents[0] + assert isinstance(page2_doc, DocumentContent) + page2_page_count = len(page2_doc.pages) if page2_doc.pages else 0 + assert page2_page_count == 1, f"Page(2) should return exactly 1 page, got {page2_page_count}" + assert page2_doc.start_page_number == 2, f"Page(2) should start at page 2, got {page2_doc.start_page_number}" + assert page2_doc.end_page_number == 2, f"Page(2) should end at page 2, got {page2_doc.end_page_number}" + print(f"[PASS] Page(2): {page2_page_count} page, {len(page2_doc.markdown or '')} chars") + + # ContentRange.pages(1, 3) — page range (wire format: "1-3") + print("\nAnalyzing pages 1-3 with ContentRange.pages(1, 3)...") + pages13_poller = client.begin_analyze_binary( + analyzer_id="prebuilt-documentSearch", + binary_input=file_bytes, + content_range=ContentRange.pages(1, 3), + ) + pages13_result = pages13_poller.result() + assert pages13_result.contents is not None + pages13_doc = pages13_result.contents[0] + assert isinstance(pages13_doc, DocumentContent) + pages13_page_count = len(pages13_doc.pages) if pages13_doc.pages else 0 + assert pages13_page_count == 3, f"Pages(1,3) should return exactly 3 pages, got {pages13_page_count}" + assert pages13_doc.start_page_number == 1, f"Pages(1,3) should start at page 1, got {pages13_doc.start_page_number}" + assert pages13_doc.end_page_number == 3, f"Pages(1,3) should end at page 3, got {pages13_doc.end_page_number}" + print(f"[PASS] Pages(1,3): {pages13_page_count} pages, {len(pages13_doc.markdown or '')} chars") + + # ContentRange.combine(page(1), pages(3, 4)) — combined (wire format: "1,3-4") + print("\nAnalyzing combined pages (1, 3-4) with ContentRange.combine()...") + combine2_poller = client.begin_analyze_binary( + analyzer_id="prebuilt-documentSearch", + binary_input=file_bytes, + content_range=ContentRange.combine( + ContentRange.page(1), + ContentRange.pages(3, 4), + ), + ) + combine2_result = combine2_poller.result() + assert combine2_result.contents is not None + combine2_doc = combine2_result.contents[0] + assert isinstance(combine2_doc, DocumentContent) + combine2_page_count = len(combine2_doc.pages) if combine2_doc.pages else 0 + assert combine2_page_count >= 2, ( + f"Combine(Page(1), Pages(3,4)) should return at least 2 pages, got {combine2_page_count}" + ) + assert combine2_doc.start_page_number == 1, f"Combine should start at page 1, got {combine2_doc.start_page_number}" + print(f"[PASS] Combine(Page(1), Pages(3,4)): {combine2_page_count} pages, {len(combine2_doc.markdown or '')} chars") + + print("\n[SUCCESS] All ContentRange binary test assertions passed") diff --git a/sdk/contentunderstanding/azure-ai-contentunderstanding/tests/samples/test_sample_analyze_url.py b/sdk/contentunderstanding/azure-ai-contentunderstanding/tests/samples/test_sample_analyze_url.py index 94a094e2f4fe..58f9de0f55d7 100644 --- a/sdk/contentunderstanding/azure-ai-contentunderstanding/tests/samples/test_sample_analyze_url.py +++ b/sdk/contentunderstanding/azure-ai-contentunderstanding/tests/samples/test_sample_analyze_url.py @@ -24,7 +24,7 @@ import pytest from devtools_testutils import recorded_by_proxy from testpreparer import ContentUnderstandingPreparer, ContentUnderstandingClientTestBase -from azure.ai.contentunderstanding.models import AnalysisInput, AudioVisualContent, DocumentContent +from azure.ai.contentunderstanding.models import AnalysisInput, AudioVisualContent, ContentRange, DocumentContent class TestSampleAnalyzeUrl(ContentUnderstandingClientTestBase): @@ -441,3 +441,293 @@ def _validate_tables(self, tables): ) else: print(f"[PASS] Table {i} validated: {table.row_count} rows x {table.column_count} columns") + + @ContentUnderstandingPreparer() + @recorded_by_proxy + def test_sample_analyze_document_url_with_content_range(self, contentunderstanding_endpoint: str) -> None: + """Test analyzing a document URL with ContentRange. + + This test validates: + 1. ContentRange.page(1) — single page extraction + 2. Comparison between full document and range-limited result + + 02_AnalyzeUrl.AnalyzeUrlWithPageContentRangesAsync() + """ + from typing import cast + + client = self.create_client(endpoint=contentunderstanding_endpoint) + + url = "https://raw.githubusercontent.com/Azure-Samples/azure-ai-content-understanding-assets/main/document/mixed_financial_docs.pdf" + + # Full analysis for comparison + full_poller = client.begin_analyze( + analyzer_id="prebuilt-documentSearch", inputs=[AnalysisInput(url=url)] + ) + full_result = full_poller.result() + full_doc = cast(DocumentContent, full_result.contents[0]) + full_page_count = len(full_doc.pages) if full_doc.pages else 0 + assert full_page_count == 4, f"Full document should return all 4 pages, got {full_page_count}" + print(f"[PASS] Full document: {full_page_count} pages, {len(full_doc.markdown or '')} chars") + + # ContentRange.page(1) — single page (wire format: "1") + print("\nAnalyzing page 1 only with ContentRange.page(1)...") + range_poller = client.begin_analyze( + analyzer_id="prebuilt-documentSearch", + inputs=[AnalysisInput(url=url, content_range=str(ContentRange.page(1)))], + ) + range_result = range_poller.result() + range_doc = cast(DocumentContent, range_result.contents[0]) + range_page_count = len(range_doc.pages) if range_doc.pages else 0 + assert range_page_count == 1, f"Page(1) should return only 1 page, got {range_page_count}" + assert range_doc.start_page_number == 1, f"Page(1) should start at page 1, got {range_doc.start_page_number}" + assert range_doc.end_page_number == 1, f"Page(1) should end at page 1, got {range_doc.end_page_number}" + + # Compare full vs range-limited + assert full_page_count > range_page_count, ( + f"Full document ({full_page_count} pages) should have more pages than range-limited ({range_page_count})" + ) + assert len(full_doc.markdown or '') > len(range_doc.markdown or ''), ( + f"Full document markdown ({len(full_doc.markdown or '')} chars) should exceed range-limited ({len(range_doc.markdown or '')} chars)" + ) + print(f"[PASS] Page(1): {range_page_count} page, {len(range_doc.markdown or '')} chars") + print("\n[SUCCESS] All document URL ContentRange assertions passed") + + @ContentUnderstandingPreparer() + @recorded_by_proxy + def test_sample_analyze_video_url_with_content_ranges(self, contentunderstanding_endpoint: str) -> None: + """Test analyzing a video URL with various ContentRange options. + + This test validates: + 1. ContentRange.time_range(0, 5s) — first 5 seconds + 2. ContentRange.time_range_from(10s) — from 10 seconds onward + 3. ContentRange.time_range(1200ms, 3651ms) — sub-second precision + 4. ContentRange.combine() — combined time ranges + + 02_AnalyzeUrl.AnalyzeVideoUrlWithTimeContentRangesAsync() + """ + from datetime import timedelta + from typing import cast + + client = self.create_client(endpoint=contentunderstanding_endpoint) + + url = "https://raw.githubusercontent.com/Azure-Samples/azure-ai-content-understanding-assets/main/videos/sdk_samples/FlightSimulator.mp4" + + # Full analysis for comparison + full_poller = client.begin_analyze( + analyzer_id="prebuilt-videoSearch", + inputs=[AnalysisInput(url=url)], + polling_interval=10, + ) + full_result = full_poller.result() + assert full_result.contents is not None + assert len(full_result.contents) > 0 + full_segments = [cast(AudioVisualContent, c) for c in full_result.contents] + full_total_duration = sum( + (s.end_time_ms or 0) - (s.start_time_ms or 0) for s in full_segments + ) + print(f"[PASS] Full video: {len(full_segments)} segment(s), {full_total_duration} ms") + + # ContentRange.time_range(0, 5s) — first 5 seconds (wire format: "0-5000") + print("\nAnalyzing first 5 seconds with ContentRange.time_range(0, 5s)...") + range_poller = client.begin_analyze( + analyzer_id="prebuilt-videoSearch", + inputs=[ + AnalysisInput( + url=url, + content_range=str(ContentRange.time_range(timedelta(0), timedelta(seconds=5))), + ) + ], + polling_interval=10, + ) + range_result = range_poller.result() + assert range_result.contents is not None + range_segments = [cast(AudioVisualContent, c) for c in range_result.contents] + assert len(range_segments) > 0, "TimeRange(0, 5s) should return segments" + for seg in range_segments: + assert (seg.end_time_ms or 0) > (seg.start_time_ms or 0), "Segment should have EndTime > StartTime" + print(f"[PASS] TimeRange(0, 5s): {len(range_segments)} segment(s)") + + # ContentRange.time_range_from(10s) — from 10 seconds onward (wire format: "10000-") + print("\nAnalyzing from 10 seconds onward with ContentRange.time_range_from(10s)...") + from_poller = client.begin_analyze( + analyzer_id="prebuilt-videoSearch", + inputs=[ + AnalysisInput( + url=url, + content_range=str(ContentRange.time_range_from(timedelta(seconds=10))), + ) + ], + polling_interval=10, + ) + from_result = from_poller.result() + assert from_result.contents is not None + from_segments = [cast(AudioVisualContent, c) for c in from_result.contents] + assert len(from_segments) > 0, "TimeRangeFrom(10s) should return segments" + for seg in from_segments: + assert (seg.end_time_ms or 0) > (seg.start_time_ms or 0), "Segment should have EndTime > StartTime" + assert seg.markdown, "Segment should have markdown" + print(f"[PASS] TimeRangeFrom(10s): {len(from_segments)} segment(s)") + + # ContentRange.time_range(1200ms, 3651ms) — sub-second precision (wire format: "1200-3651") + print("\nAnalyzing with sub-second precision (1.2s to 3.651s)...") + subsec_poller = client.begin_analyze( + analyzer_id="prebuilt-videoSearch", + inputs=[ + AnalysisInput( + url=url, + content_range=str( + ContentRange.time_range(timedelta(milliseconds=1200), timedelta(milliseconds=3651)) + ), + ) + ], + polling_interval=10, + ) + subsec_result = subsec_poller.result() + assert subsec_result.contents is not None + subsec_segments = [cast(AudioVisualContent, c) for c in subsec_result.contents] + assert len(subsec_segments) > 0, "Sub-second TimeRange should return segments" + for seg in subsec_segments: + assert (seg.end_time_ms or 0) > (seg.start_time_ms or 0), "Segment should have EndTime > StartTime" + print(f"[PASS] TimeRange(1.2s, 3.651s): {len(subsec_segments)} segment(s)") + + # ContentRange.combine() — combined time ranges (wire format: "0-3000,30000-") + print("\nAnalyzing with combined time ranges (0-3s and 30s onward)...") + combine_poller = client.begin_analyze( + analyzer_id="prebuilt-videoSearch", + inputs=[ + AnalysisInput( + url=url, + content_range=str( + ContentRange.combine( + ContentRange.time_range(timedelta(0), timedelta(seconds=3)), + ContentRange.time_range_from(timedelta(seconds=30)), + ) + ), + ) + ], + polling_interval=10, + ) + combine_result = combine_poller.result() + assert combine_result.contents is not None + combine_segments = [cast(AudioVisualContent, c) for c in combine_result.contents] + assert len(combine_segments) > 0, "Combine time range should return segments" + for seg in combine_segments: + assert (seg.end_time_ms or 0) > (seg.start_time_ms or 0), "Segment should have EndTime > StartTime" + assert seg.markdown, "Segment should have markdown" + print(f"[PASS] Combine(0-3s, 30s-): {len(combine_segments)} segment(s)") + + print("\n[SUCCESS] All video URL ContentRange assertions passed") + + @ContentUnderstandingPreparer() + @recorded_by_proxy + def test_sample_analyze_audio_url_with_content_ranges(self, contentunderstanding_endpoint: str) -> None: + """Test analyzing an audio URL with various ContentRange options. + + This test validates: + 1. ContentRange.time_range_from(5s) — from 5 seconds onward + 2. ContentRange.time_range(2s, 8s) — specific time window + 3. ContentRange.time_range(1200ms, 3651ms) — sub-second precision + + 02_AnalyzeUrl.AnalyzeAudioUrlWithTimeContentRangesAsync() + """ + from datetime import timedelta + from typing import cast + + client = self.create_client(endpoint=contentunderstanding_endpoint) + + url = "https://raw.githubusercontent.com/Azure-Samples/azure-ai-content-understanding-assets/main/audio/callCenterRecording.mp3" + + # Full analysis for comparison + full_poller = client.begin_analyze( + analyzer_id="prebuilt-audioSearch", + inputs=[AnalysisInput(url=url)], + polling_interval=10, + ) + full_result = full_poller.result() + assert full_result.contents is not None + full_audio = cast(AudioVisualContent, full_result.contents[0]) + full_duration = (full_audio.end_time_ms or 0) - (full_audio.start_time_ms or 0) + full_phrase_count = len(full_audio.transcript_phrases) if full_audio.transcript_phrases else 0 + print(f"[PASS] Full audio: {len(full_audio.markdown or '')} chars, {full_phrase_count} phrases, {full_duration} ms") + + # ContentRange.time_range_from(5s) — from 5 seconds onward (wire format: "5000-") + print("\nAnalyzing audio from 5 seconds onward with ContentRange.time_range_from(5s)...") + from_poller = client.begin_analyze( + analyzer_id="prebuilt-audioSearch", + inputs=[ + AnalysisInput( + url=url, + content_range=str(ContentRange.time_range_from(timedelta(seconds=5))), + ) + ], + polling_interval=10, + ) + from_result = from_poller.result() + assert from_result.contents is not None + from_audio = cast(AudioVisualContent, from_result.contents[0]) + assert len(full_audio.markdown or '') >= len(from_audio.markdown or ''), ( + f"Full audio markdown ({len(full_audio.markdown or '')} chars) should be >= range-limited ({len(from_audio.markdown or '')} chars)" + ) + from_phrase_count = len(from_audio.transcript_phrases) if from_audio.transcript_phrases else 0 + assert full_phrase_count >= from_phrase_count, ( + f"Full audio ({full_phrase_count} phrases) should have >= phrases than range-limited ({from_phrase_count})" + ) + print(f"[PASS] TimeRangeFrom(5s): {len(from_audio.markdown or '')} chars, {from_phrase_count} phrases") + + # ContentRange.time_range(2s, 8s) — specific time window (wire format: "2000-8000") + print("\nAnalyzing audio from 2s to 8s with ContentRange.time_range(2s, 8s)...") + window_poller = client.begin_analyze( + analyzer_id="prebuilt-audioSearch", + inputs=[ + AnalysisInput( + url=url, + content_range=str( + ContentRange.time_range(timedelta(seconds=2), timedelta(seconds=8)) + ), + ) + ], + polling_interval=10, + ) + window_result = window_poller.result() + assert window_result.contents is not None + window_audio = cast(AudioVisualContent, window_result.contents[0]) + assert (window_audio.end_time_ms or 0) > (window_audio.start_time_ms or 0), ( + "TimeRange(2s, 8s) should have EndTime > StartTime" + ) + assert window_audio.markdown, "TimeRange(2s, 8s) should have markdown" + assert len(window_audio.markdown) > 0, "TimeRange(2s, 8s) markdown should not be empty" + window_duration = (window_audio.end_time_ms or 0) - (window_audio.start_time_ms or 0) + assert full_duration >= window_duration, ( + f"Full audio duration ({full_duration} ms) should be >= time-windowed duration ({window_duration} ms)" + ) + print(f"[PASS] TimeRange(2s, 8s): {len(window_audio.markdown)} chars, {window_duration} ms") + + # ContentRange.time_range(1200ms, 3651ms) — sub-second precision (wire format: "1200-3651") + print("\nAnalyzing audio with sub-second precision (1.2s to 3.651s)...") + subsec_poller = client.begin_analyze( + analyzer_id="prebuilt-audioSearch", + inputs=[ + AnalysisInput( + url=url, + content_range=str( + ContentRange.time_range(timedelta(milliseconds=1200), timedelta(milliseconds=3651)) + ), + ) + ], + polling_interval=10, + ) + subsec_result = subsec_poller.result() + assert subsec_result.contents is not None + subsec_audio = cast(AudioVisualContent, subsec_result.contents[0]) + assert (subsec_audio.end_time_ms or 0) > (subsec_audio.start_time_ms or 0), ( + "TimeRange(1.2s, 3.651s) should have EndTime > StartTime" + ) + assert subsec_audio.markdown, "TimeRange(1.2s, 3.651s) should have markdown" + assert len(subsec_audio.markdown) > 0, "TimeRange(1.2s, 3.651s) markdown should not be empty" + subsec_duration = (subsec_audio.end_time_ms or 0) - (subsec_audio.start_time_ms or 0) + assert full_duration >= subsec_duration, ( + f"Full audio duration ({full_duration} ms) should be >= sub-second duration ({subsec_duration} ms)" + ) + print(f"[PASS] TimeRange(1.2s, 3.651s): {len(subsec_audio.markdown)} chars, {subsec_duration} ms") + + print("\n[SUCCESS] All audio URL ContentRange assertions passed") diff --git a/sdk/contentunderstanding/azure-ai-contentunderstanding/tests/test_content_range.py b/sdk/contentunderstanding/azure-ai-contentunderstanding/tests/test_content_range.py new file mode 100644 index 000000000000..1768d6275a42 --- /dev/null +++ b/sdk/contentunderstanding/azure-ai-contentunderstanding/tests/test_content_range.py @@ -0,0 +1,183 @@ +# coding=utf-8 +# -------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------- + +"""Unit tests for ContentRange.""" + +import pytest +from datetime import timedelta +from azure.ai.contentunderstanding.models import ContentRange, AnalysisInput + + +class TestContentRangeConstructor: + """Tests for ContentRange constructor.""" + + def test_constructor_with_value_stores_value(self): + cr = ContentRange("1-3") + assert str(cr) == "1-3" + + def test_constructor_none_value_raises(self): + with pytest.raises(ValueError): + ContentRange(None) # type: ignore + + +class TestContentRangePageMethods: + """Tests for page-related factory methods.""" + + def test_page_valid_page_number(self): + cr = ContentRange.page(5) + assert str(cr) == "5" + + def test_page_page_one(self): + cr = ContentRange.page(1) + assert str(cr) == "1" + + def test_page_zero_raises(self): + with pytest.raises(ValueError): + ContentRange.page(0) + + def test_page_negative_raises(self): + with pytest.raises(ValueError): + ContentRange.page(-1) + + def test_pages_valid_range(self): + cr = ContentRange.pages(1, 3) + assert str(cr) == "1-3" + + def test_pages_same_start_and_end(self): + cr = ContentRange.pages(5, 5) + assert str(cr) == "5-5" + + def test_pages_zero_start_raises(self): + with pytest.raises(ValueError): + ContentRange.pages(0, 3) + + def test_pages_end_before_start_raises(self): + with pytest.raises(ValueError): + ContentRange.pages(5, 3) + + def test_pages_from_valid(self): + cr = ContentRange.pages_from(9) + assert str(cr) == "9-" + + def test_pages_from_zero_raises(self): + with pytest.raises(ValueError): + ContentRange.pages_from(0) + + +class TestContentRangeTimeMethods: + """Tests for time-related factory methods.""" + + def test_time_range_valid(self): + cr = ContentRange.time_range(timedelta(0), timedelta(milliseconds=5000)) + assert str(cr) == "0-5000" + + def test_time_range_same_start_and_end(self): + cr = ContentRange.time_range( + timedelta(milliseconds=1000), timedelta(milliseconds=1000) + ) + assert str(cr) == "1000-1000" + + def test_time_range_negative_start_raises(self): + with pytest.raises(ValueError): + ContentRange.time_range( + timedelta(milliseconds=-1), timedelta(milliseconds=5000) + ) + + def test_time_range_end_before_start_raises(self): + with pytest.raises(ValueError): + ContentRange.time_range( + timedelta(milliseconds=5000), timedelta(milliseconds=1000) + ) + + def test_time_range_from_valid(self): + cr = ContentRange.time_range_from(timedelta(milliseconds=5000)) + assert str(cr) == "5000-" + + def test_time_range_from_zero(self): + cr = ContentRange.time_range_from(timedelta(0)) + assert str(cr) == "0-" + + def test_time_range_from_negative_raises(self): + with pytest.raises(ValueError): + ContentRange.time_range_from(timedelta(milliseconds=-1)) + + def test_time_range_seconds(self): + cr = ContentRange.time_range(timedelta(0), timedelta(seconds=5)) + assert str(cr) == "0-5000" + + def test_time_range_minutes(self): + cr = ContentRange.time_range(timedelta(0), timedelta(minutes=1)) + assert str(cr) == "0-60000" + + +class TestContentRangeCombine: + """Tests for combine factory method.""" + + def test_combine_multiple_ranges(self): + combined = ContentRange.combine( + ContentRange.pages(1, 3), ContentRange.page(5), ContentRange.pages_from(9) + ) + assert str(combined) == "1-3,5,9-" + + def test_combine_single_range(self): + combined = ContentRange.combine(ContentRange.page(1)) + assert str(combined) == "1" + + def test_combine_empty_raises(self): + with pytest.raises(ValueError): + ContentRange.combine() + + +class TestContentRangeEquality: + """Tests for equality operations.""" + + def test_equals_same_value(self): + r1 = ContentRange.pages(1, 3) + r2 = ContentRange("1-3") + assert r1 == r2 + + def test_equals_different_value(self): + r1 = ContentRange.pages(1, 3) + r2 = ContentRange.pages(1, 5) + assert r1 != r2 + + def test_hash_equal_for_same_values(self): + r1 = ContentRange.pages(1, 3) + r2 = ContentRange("1-3") + assert hash(r1) == hash(r2) + + def test_hash_different_for_different_values(self): + r1 = ContentRange.pages(1, 3) + r2 = ContentRange.pages(1, 5) + assert hash(r1) != hash(r2) + + def test_equals_non_content_range_returns_not_implemented(self): + r1 = ContentRange.pages(1, 3) + assert r1 != "1-3" # Different type + + +class TestContentRangeStr: + """Tests for string conversion.""" + + def test_str(self): + cr = ContentRange.pages(1, 3) + assert str(cr) == "1-3" + + def test_repr(self): + cr = ContentRange.pages(1, 3) + assert repr(cr) == "ContentRange('1-3')" + + +class TestAnalysisInputContentRange: + """Tests for ContentRange integration with AnalysisInput.""" + + def test_analysis_input_accepts_content_range_string(self): + ai = AnalysisInput(content_range="1-3") + assert ai.content_range == "1-3" + + def test_analysis_input_content_range_none_by_default(self): + ai = AnalysisInput() + assert ai.content_range is None From 41153987ecbc5a36597f512513db1862693b3bee Mon Sep 17 00:00:00 2001 From: Changjian Wang Date: Fri, 13 Mar 2026 08:24:21 +0800 Subject: [PATCH 2/5] Update Tag in assets.json to reflect latest version --- .../azure-ai-contentunderstanding/assets.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/contentunderstanding/azure-ai-contentunderstanding/assets.json b/sdk/contentunderstanding/azure-ai-contentunderstanding/assets.json index b5b691fd736d..7c2c0922a9cb 100644 --- a/sdk/contentunderstanding/azure-ai-contentunderstanding/assets.json +++ b/sdk/contentunderstanding/azure-ai-contentunderstanding/assets.json @@ -2,5 +2,5 @@ "AssetsRepo": "Azure/azure-sdk-assets", "AssetsRepoPrefixPath": "python", "TagPrefix": "python/contentunderstanding/azure-ai-contentunderstanding", - "Tag": "python/contentunderstanding/azure-ai-contentunderstanding_8f5aa72c31" + "Tag": "python/contentunderstanding/azure-ai-contentunderstanding_df271d5db5" } From c6e8e98d6a68f73a134776f4de5151e14d89af70 Mon Sep 17 00:00:00 2001 From: Changjian Wang Date: Mon, 16 Mar 2026 18:25:53 +0800 Subject: [PATCH 3/5] Add raw ContentRange examples and update test data for document and media analysis --- .../azure-ai-contentunderstanding/assets.json | 2 +- .../sample_analyze_binary_async.py | 22 +++++ .../async_samples/sample_analyze_url_async.py | 49 ++++++++++ .../samples/sample_analyze_binary.py | 19 ++++ .../samples/sample_analyze_url.py | 49 ++++++++++ .../samples/test_sample_analyze_binary.py | 86 +++++++++++++++++- .../tests/samples/test_sample_analyze_url.py | 45 +++++++++ .../test_data/mixed_financial_invoices.pdf | Bin 0 -> 15324 bytes 8 files changed, 270 insertions(+), 2 deletions(-) create mode 100644 sdk/contentunderstanding/azure-ai-contentunderstanding/tests/test_data/mixed_financial_invoices.pdf diff --git a/sdk/contentunderstanding/azure-ai-contentunderstanding/assets.json b/sdk/contentunderstanding/azure-ai-contentunderstanding/assets.json index 7c2c0922a9cb..6e9e12944245 100644 --- a/sdk/contentunderstanding/azure-ai-contentunderstanding/assets.json +++ b/sdk/contentunderstanding/azure-ai-contentunderstanding/assets.json @@ -2,5 +2,5 @@ "AssetsRepo": "Azure/azure-sdk-assets", "AssetsRepoPrefixPath": "python", "TagPrefix": "python/contentunderstanding/azure-ai-contentunderstanding", - "Tag": "python/contentunderstanding/azure-ai-contentunderstanding_df271d5db5" + "Tag": "python/contentunderstanding/azure-ai-contentunderstanding_4b81bc4b88" } diff --git a/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/async_samples/sample_analyze_binary_async.py b/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/async_samples/sample_analyze_binary_async.py index 0f741b9a18af..92104eabbb15 100644 --- a/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/async_samples/sample_analyze_binary_async.py +++ b/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/async_samples/sample_analyze_binary_async.py @@ -130,6 +130,28 @@ async def main() -> None: ) # [END analyze_binary_with_combined_content_range] + # [START analyze_binary_with_raw_content_range] + # You can also pass a range string directly to the ContentRange constructor. + # This is equivalent to using the factory methods and is useful for dynamically + # constructed or user-supplied ranges. + # Analyze pages 1-3, page 5, and pages 9 onward using a raw range string. + # This is equivalent to: ContentRange.combine(ContentRange.pages(1, 3), ContentRange.page(5), ContentRange.pages_from(9)) + print("\nAnalyzing with raw ContentRange string '1-3,5,9-'...") + raw_range_poller = await client.begin_analyze_binary( + analyzer_id="prebuilt-documentSearch", + binary_input=file_bytes, + content_range=ContentRange("1-3,5,9-"), + ) + raw_range_result: AnalysisResult = await raw_range_poller.result() + + if isinstance(raw_range_result.contents[0], DocumentContent): + raw_doc = raw_range_result.contents[0] + print( + f"Raw ContentRange analysis returned pages" + f" {raw_doc.start_page_number} - {raw_doc.end_page_number}" + ) + # [END analyze_binary_with_raw_content_range] + # [START extract_markdown] print("\nMarkdown Content:") print("=" * 50) diff --git a/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/async_samples/sample_analyze_url_async.py b/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/async_samples/sample_analyze_url_async.py index 7ead6dc4f56b..f902470487f2 100644 --- a/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/async_samples/sample_analyze_url_async.py +++ b/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/async_samples/sample_analyze_url_async.py @@ -261,6 +261,32 @@ async def main() -> None: ) # [END analyze_video_url_with_additional_content_ranges] + # [START analyze_video_url_with_raw_content_range] + # You can also pass a range string directly to the ContentRange constructor. + # Time ranges use milliseconds on the wire. This is useful for dynamically + # constructed or user-supplied ranges. + # Analyze the first 5 seconds using a raw range string (milliseconds). + # This is equivalent to: ContentRange.time_range(timedelta(0), timedelta(seconds=5)) + print("\nAnalyzing first 5 seconds of video with raw ContentRange string '0-5000'...") + raw_video_range_poller = await client.begin_analyze( + analyzer_id="prebuilt-videoSearch", + inputs=[ + AnalysisInput( + url=video_url, + content_range=str(ContentRange("0-5000")), + ) + ], + ) + raw_video_range_result = await raw_video_range_poller.result() + + for raw_media in raw_video_range_result.contents: + raw_video_content = cast(AudioVisualContent, raw_media) + print( + f"Raw ContentRange segment:" + f" {raw_video_content.start_time_ms} ms - {raw_video_content.end_time_ms} ms" + ) + # [END analyze_video_url_with_raw_content_range] + # [START analyze_audio_from_url] print("\n" + "=" * 60) print("AUDIO ANALYSIS FROM URL") @@ -371,6 +397,29 @@ async def main() -> None: ) # [END analyze_audio_url_with_additional_content_ranges] + # [START analyze_audio_url_with_raw_content_range] + # You can also pass a range string directly for audio time ranges. + # Analyze audio from 5 seconds onward using a raw range string (milliseconds). + # This is equivalent to: ContentRange.time_range_from(timedelta(seconds=5)) + print("\nAnalyzing audio from 5 seconds onward with raw ContentRange string '5000-'...") + raw_audio_range_poller = await client.begin_analyze( + analyzer_id="prebuilt-audioSearch", + inputs=[ + AnalysisInput( + url=audio_url, + content_range=str(ContentRange("5000-")), + ) + ], + ) + raw_audio_range_result = await raw_audio_range_poller.result() + + raw_audio_content = cast(AudioVisualContent, raw_audio_range_result.contents[0]) + print( + f"Raw ContentRange audio analysis:" + f" {raw_audio_content.start_time_ms} ms onward" + ) + # [END analyze_audio_url_with_raw_content_range] + # [START analyze_image_from_url] print("\n" + "=" * 60) print("IMAGE ANALYSIS FROM URL") diff --git a/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/sample_analyze_binary.py b/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/sample_analyze_binary.py index 254140c88bb1..8fd88ae6e73d 100644 --- a/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/sample_analyze_binary.py +++ b/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/sample_analyze_binary.py @@ -122,6 +122,25 @@ def main() -> None: print(f"Combined ContentRange analysis returned pages {combine_doc.start_page_number} - {combine_doc.end_page_number}") # [END analyze_binary_with_combined_content_range] + # [START analyze_binary_with_raw_content_range] + # You can also pass a range string directly to the ContentRange constructor. + # This is equivalent to using the factory methods and is useful for dynamically + # constructed or user-supplied ranges. + # Analyze pages 1-3, page 5, and pages 9 onward using a raw range string. + # This is equivalent to: ContentRange.combine(ContentRange.pages(1, 3), ContentRange.page(5), ContentRange.pages_from(9)) + print("\nAnalyzing with raw ContentRange string '1-3,5,9-'...") + raw_range_poller = client.begin_analyze_binary( + analyzer_id="prebuilt-documentSearch", + binary_input=file_bytes, + content_range=ContentRange("1-3,5,9-"), + ) + raw_range_result: AnalysisResult = raw_range_poller.result() + + if isinstance(raw_range_result.contents[0], DocumentContent): + raw_doc = raw_range_result.contents[0] + print(f"Raw ContentRange analysis returned pages {raw_doc.start_page_number} - {raw_doc.end_page_number}") + # [END analyze_binary_with_raw_content_range] + # [START extract_markdown] print("\nMarkdown Content:") print("=" * 50) diff --git a/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/sample_analyze_url.py b/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/sample_analyze_url.py index 1eae4e5323e9..6dc25d9f3b7c 100644 --- a/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/sample_analyze_url.py +++ b/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/sample_analyze_url.py @@ -257,6 +257,32 @@ def main() -> None: ) # [END analyze_video_url_with_additional_content_ranges] + # [START analyze_video_url_with_raw_content_range] + # You can also pass a range string directly to the ContentRange constructor. + # Time ranges use milliseconds on the wire. This is useful for dynamically + # constructed or user-supplied ranges. + # Analyze the first 5 seconds using a raw range string (milliseconds). + # This is equivalent to: ContentRange.time_range(timedelta(0), timedelta(seconds=5)) + print("\nAnalyzing first 5 seconds of video with raw ContentRange string '0-5000'...") + raw_video_range_poller = client.begin_analyze( + analyzer_id="prebuilt-videoSearch", + inputs=[ + AnalysisInput( + url=video_url, + content_range=str(ContentRange("0-5000")), + ) + ], + ) + raw_video_range_result = raw_video_range_poller.result() + + for raw_media in raw_video_range_result.contents: + raw_video_content = cast(AudioVisualContent, raw_media) + print( + f"Raw ContentRange segment:" + f" {raw_video_content.start_time_ms} ms - {raw_video_content.end_time_ms} ms" + ) + # [END analyze_video_url_with_raw_content_range] + # [START analyze_audio_from_url] print("\n" + "=" * 60) print("AUDIO ANALYSIS FROM URL") @@ -364,6 +390,29 @@ def main() -> None: ) # [END analyze_audio_url_with_additional_content_ranges] + # [START analyze_audio_url_with_raw_content_range] + # You can also pass a range string directly for audio time ranges. + # Analyze audio from 5 seconds onward using a raw range string (milliseconds). + # This is equivalent to: ContentRange.time_range_from(timedelta(seconds=5)) + print("\nAnalyzing audio from 5 seconds onward with raw ContentRange string '5000-'...") + raw_audio_range_poller = client.begin_analyze( + analyzer_id="prebuilt-audioSearch", + inputs=[ + AnalysisInput( + url=audio_url, + content_range=str(ContentRange("5000-")), + ) + ], + ) + raw_audio_range_result = raw_audio_range_poller.result() + + raw_audio_content = cast(AudioVisualContent, raw_audio_range_result.contents[0]) + print( + f"Raw ContentRange audio analysis:" + f" {raw_audio_content.start_time_ms} ms onward" + ) + # [END analyze_audio_url_with_raw_content_range] + # [START analyze_image_from_url] print("\n" + "=" * 60) print("IMAGE ANALYSIS FROM URL") diff --git a/sdk/contentunderstanding/azure-ai-contentunderstanding/tests/samples/test_sample_analyze_binary.py b/sdk/contentunderstanding/azure-ai-contentunderstanding/tests/samples/test_sample_analyze_binary.py index 2a0688429a17..98ec72801889 100644 --- a/sdk/contentunderstanding/azure-ai-contentunderstanding/tests/samples/test_sample_analyze_binary.py +++ b/sdk/contentunderstanding/azure-ai-contentunderstanding/tests/samples/test_sample_analyze_binary.py @@ -263,7 +263,7 @@ def test_sample_analyze_binary_with_content_range(self, contentunderstanding_end # Read the sample file (use multi-page document for ContentRange testing) tests_dir = os.path.dirname(os.path.dirname(__file__)) - file_path = os.path.join(tests_dir, "test_data", "mixed_financial_docs.pdf") + file_path = os.path.join(tests_dir, "test_data", "mixed_financial_invoices.pdf") if not os.path.exists(file_path): file_path = os.path.join(tests_dir, "test_data", "sample_invoice.pdf") @@ -376,4 +376,88 @@ def test_sample_analyze_binary_with_content_range(self, contentunderstanding_end assert combine2_doc.start_page_number == 1, f"Combine should start at page 1, got {combine2_doc.start_page_number}" print(f"[PASS] Combine(Page(1), Pages(3,4)): {combine2_page_count} pages, {len(combine2_doc.markdown or '')} chars") + # --- Raw string ContentRange tests --- + # Verify raw string constructor produces identical results to factory methods. + + # Raw string "2" — single page, equivalent to ContentRange.page(2) + print("\nVerifying raw ContentRange('2') matches Page(2)...") + raw_page2_poller = client.begin_analyze_binary( + analyzer_id="prebuilt-documentSearch", + binary_input=file_bytes, + content_range=ContentRange("2"), + ) + raw_page2_result = raw_page2_poller.result() + raw_page2_doc = raw_page2_result.contents[0] + assert isinstance(raw_page2_doc, DocumentContent) + raw_page2_page_count = len(raw_page2_doc.pages) if raw_page2_doc.pages else 0 + assert raw_page2_page_count == 1, f"Raw ContentRange('2') should return exactly 1 page, got {raw_page2_page_count}" + assert raw_page2_doc.start_page_number == 2, f"Raw ContentRange('2') should start at page 2" + assert raw_page2_doc.end_page_number == 2, f"Raw ContentRange('2') should end at page 2" + assert len(page2_doc.markdown or '') == len(raw_page2_doc.markdown or ''), ( + f"Raw ContentRange('2') should return same markdown length as Page(2) " + f"({len(page2_doc.markdown or '')} vs {len(raw_page2_doc.markdown or '')})" + ) + print(f"[PASS] Raw ContentRange('2'): matches Page(2) result") + + # Raw string "1-3" — page range, equivalent to ContentRange.pages(1, 3) + print("\nVerifying raw ContentRange('1-3') matches Pages(1, 3)...") + raw_pages13_poller = client.begin_analyze_binary( + analyzer_id="prebuilt-documentSearch", + binary_input=file_bytes, + content_range=ContentRange("1-3"), + ) + raw_pages13_result = raw_pages13_poller.result() + raw_pages13_doc = raw_pages13_result.contents[0] + assert isinstance(raw_pages13_doc, DocumentContent) + raw_pages13_page_count = len(raw_pages13_doc.pages) if raw_pages13_doc.pages else 0 + assert raw_pages13_page_count == 3, f"Raw ContentRange('1-3') should return exactly 3 pages, got {raw_pages13_page_count}" + assert raw_pages13_doc.start_page_number == 1, f"Raw ContentRange('1-3') should start at page 1" + assert raw_pages13_doc.end_page_number == 3, f"Raw ContentRange('1-3') should end at page 3" + assert len(pages13_doc.markdown or '') == len(raw_pages13_doc.markdown or ''), ( + f"Raw ContentRange('1-3') should return same markdown length as Pages(1,3) " + f"({len(pages13_doc.markdown or '')} vs {len(raw_pages13_doc.markdown or '')})" + ) + print(f"[PASS] Raw ContentRange('1-3'): matches Pages(1, 3) result") + + # Raw string "3-" — pages from, equivalent to ContentRange.pages_from(3) + print("\nVerifying raw ContentRange('3-') matches PagesFrom(3)...") + raw_from3_poller = client.begin_analyze_binary( + analyzer_id="prebuilt-documentSearch", + binary_input=file_bytes, + content_range=ContentRange("3-"), + ) + raw_from3_result = raw_from3_poller.result() + raw_from3_doc = raw_from3_result.contents[0] + assert isinstance(raw_from3_doc, DocumentContent) + raw_from3_page_count = len(raw_from3_doc.pages) if raw_from3_doc.pages else 0 + assert raw_from3_page_count == range_page_count, ( + f"Raw ContentRange('3-') should return same page count as PagesFrom(3), " + f"got {raw_from3_page_count} vs {range_page_count}" + ) + assert len(range_doc.markdown or '') == len(raw_from3_doc.markdown or ''), ( + f"Raw ContentRange('3-') should return same markdown length as PagesFrom(3)" + ) + print(f"[PASS] Raw ContentRange('3-'): matches PagesFrom(3) result") + + # Raw string "1-3,5,9-" — combined ranges, equivalent to Combine(Pages(1,3), Page(5), PagesFrom(9)) + print("\nVerifying raw ContentRange('1-3,5,9-')...") + raw_combine_poller = client.begin_analyze_binary( + analyzer_id="prebuilt-documentSearch", + binary_input=file_bytes, + content_range=ContentRange("1-3,5,9-"), + ) + raw_combine_result = raw_combine_poller.result() + assert raw_combine_result.contents is not None + raw_combine_doc = raw_combine_result.contents[0] + assert isinstance(raw_combine_doc, DocumentContent) + raw_combine_page_count = len(raw_combine_doc.pages) if raw_combine_doc.pages else 0 + assert raw_combine_page_count == combine_page_count, ( + f"Raw ContentRange('1-3,5,9-') should return same page count as Combine equivalent, " + f"got {raw_combine_page_count} vs {combine_page_count}" + ) + assert len(combine_doc.markdown or '') == len(raw_combine_doc.markdown or ''), ( + f"Raw ContentRange('1-3,5,9-') should return same markdown length as Combine equivalent" + ) + print(f"[PASS] Raw ContentRange('1-3,5,9-'): matches Combine result") + print("\n[SUCCESS] All ContentRange binary test assertions passed") diff --git a/sdk/contentunderstanding/azure-ai-contentunderstanding/tests/samples/test_sample_analyze_url.py b/sdk/contentunderstanding/azure-ai-contentunderstanding/tests/samples/test_sample_analyze_url.py index 58f9de0f55d7..860c66c0ff32 100644 --- a/sdk/contentunderstanding/azure-ai-contentunderstanding/tests/samples/test_sample_analyze_url.py +++ b/sdk/contentunderstanding/azure-ai-contentunderstanding/tests/samples/test_sample_analyze_url.py @@ -616,6 +616,29 @@ def test_sample_analyze_video_url_with_content_ranges(self, contentunderstanding assert seg.markdown, "Segment should have markdown" print(f"[PASS] Combine(0-3s, 30s-): {len(combine_segments)} segment(s)") + # --- Raw string ContentRange test for video --- + # Raw string "0-5000" — equivalent to ContentRange.time_range(0, 5s) + print("\nVerifying raw ContentRange('0-5000') matches TimeRange(0, 5s)...") + raw_video_poller = client.begin_analyze( + analyzer_id="prebuilt-videoSearch", + inputs=[ + AnalysisInput( + url=url, + content_range=str(ContentRange("0-5000")), + ) + ], + polling_interval=10, + ) + raw_video_result = raw_video_poller.result() + assert raw_video_result.contents is not None + raw_video_segments = [cast(AudioVisualContent, c) for c in raw_video_result.contents] + assert len(raw_video_segments) > 0, "Raw ContentRange('0-5000') should return segments" + assert len(raw_video_segments) == len(range_segments), ( + f"Raw ContentRange('0-5000') should return same segment count as TimeRange equivalent " + f"({len(raw_video_segments)} vs {len(range_segments)})" + ) + print(f"[PASS] Raw ContentRange('0-5000'): {len(raw_video_segments)} segment(s), matches TimeRange result") + print("\n[SUCCESS] All video URL ContentRange assertions passed") @ContentUnderstandingPreparer() @@ -730,4 +753,26 @@ def test_sample_analyze_audio_url_with_content_ranges(self, contentunderstanding ) print(f"[PASS] TimeRange(1.2s, 3.651s): {len(subsec_audio.markdown)} chars, {subsec_duration} ms") + # --- Raw string ContentRange test for audio --- + # Raw string "5000-" — equivalent to ContentRange.time_range_from(5s) + print("\nVerifying raw ContentRange('5000-') matches TimeRangeFrom(5s)...") + raw_audio_poller = client.begin_analyze( + analyzer_id="prebuilt-audioSearch", + inputs=[ + AnalysisInput( + url=url, + content_range=str(ContentRange("5000-")), + ) + ], + polling_interval=10, + ) + raw_audio_result = raw_audio_poller.result() + assert raw_audio_result.contents is not None + raw_audio = cast(AudioVisualContent, raw_audio_result.contents[0]) + assert len(from_audio.markdown or '') == len(raw_audio.markdown or ''), ( + f"Raw ContentRange('5000-') should return same markdown length as TimeRangeFrom(5s) " + f"({len(from_audio.markdown or '')} vs {len(raw_audio.markdown or '')})" + ) + print(f"[PASS] Raw ContentRange('5000-'): {len(raw_audio.markdown or '')} chars, matches TimeRangeFrom(5s) result") + print("\n[SUCCESS] All audio URL ContentRange assertions passed") diff --git a/sdk/contentunderstanding/azure-ai-contentunderstanding/tests/test_data/mixed_financial_invoices.pdf b/sdk/contentunderstanding/azure-ai-contentunderstanding/tests/test_data/mixed_financial_invoices.pdf new file mode 100644 index 0000000000000000000000000000000000000000..793c3d4e24b224a54b3080866a997d7df1dee832 GIT binary patch literal 15324 zcmch8byyYLvbS`1NVAbfVh6G5M!FjWiA_m&HzJLMbhjW#OGt;LARrRb-64(ofp1)W z^_+8`=ezgrKQ`ka6 zHrRGqD)%VQ*QYd)D=J9cgH_+Qh$IVM!JVg>66bKgP9TsI(Y!<7(M~-cb4g{udVGaX zqHgFvniT6Jj@Zm^LW%X5x2wnSVFYkmm6Am;`@F&kW|vDC=-!wb8i9Gh?<*k#y+WNF z<&0Pg5k~OC6RCOIA&HA@PsteDGdcA#D;PKU*!`JV*nDL0DvM5!WwKavvJc&Y9rr=? zERhCQlegZUp{N0&OT&Cti*4!h-iY@U(HAmB$l81Q_wc<&U6ojB<=BP;WZ~Db;s!|v z^xyO=_K(DMa9@&8B!AM%D?y?iM0%ht(T{v>OvX_KjiPzTmQ>9o&o4r5i&bA7vxdFc z998ZbYGv4-2IlakIKEW=a0Qn`K5zXvEy|%QBgppHumT`hjzf*|&8FNm+(dn5a?MX%<3oj14%6orheFYXIaHEZD)?_) z(v8Lqr>QKB1{b?bbZ3LrY?ZFj3gn@83u4OkH_=51mAXed0Uj{PGMGQzq5#ROt03S&U1Y;x7yTn)E7i+ z9r5w=(z^VD0s40Cx_nj^ppJt5z7GK#hqfm9>5R*#C7P>RzP*XWTG21>txb`)*NQK^ z1=!e&v6eXuXBJQw*(-q`Iv7(sh)Jln-0!&g*#Saq?|ilO3WD36AM$T`ola1nVZZgR z!l08or48u*}GoOGj8tW;l6Le>Z5y4sGzTLwmp9?*VbSsdyb@>iJ@S zqV*S4dj6uKF{4-?8Dsc}iE9>l-&R+y8Ddr~{Jw-8**f3g)K8tr5=>LeVatxES1zR~ zbsG#UyI7FYluNv==S?r~`mC|auU%3Kzb##xS`uYP)i`?Lafv5uuvjDe+2igiTv;z9 z41aPByO{dq&0@*d!(#5odj@GdN~ef0N2EvVPA<9y7OC_YGf1PSZvJel2_a6&J?$6K zg0-4drz$&x#!#nrXA**Rpk5J@5uZXXY-}0JLtnPzoYcZY0gaQY9u}{*{ANIiCB0gz zbzOVhau_G7Iu!>*tSROmQ`_NFoB1f;f}ItsTK>{(MFRh0;6QJP%nXpGoz#IcBjJLY z5ZhJzx^ii_?_NewxwiiL`LTQL+Zx*s;1nYd8>fksfFa?>AA@M$ISMQ_2rO~c6o!EYZVUl7EVi$)Mkw`vl(0Cizv$+R<`?ix$$v$8tKrH}H=$<^GPFhv)2IeRttN!%0qhAAb|qAyRX z*mS_Oi=+S|R*)REfzE9f9NzAg-!P~kTT`Y&&q>@8;x(Qx5kaOQCXTkFsIFOrSdSW? zC;KQK0vNi!a0Fu@1QD=VjiW^oA>$@op(L-)?7N4ach@g}jrE&LU;yb}7n88eafRLR z-*v-3sIe+4?1q061l!Xao;`DN?6>o6j{f+Ub@nCAwy^yY<-{#|{)s4m;^-|f|Aj|? z1Kghg_AdeM4;cRenSTJd+rEDh{I>uH&O}00m6x8>InWkiBr2M>Nw-wmCA004d3++xGYSN03&6~#f&ghzL!k2w4g_TmQ_)kURw50O^rzlNgPhqhQ6EN(Zuzw{UX0w0+v z{uKf3yv`gU=ma6x-fn|H%8M%-3?D9>k!Yv@CV3}SKSnnMXSi%uC2dQ!;0W$#tmbS9 zfKqnS4aGaBM=m>@!%=^3&Fc8H+{0|I78{TJxoq$xn-&8o>*l>deiCyayUQKU3FdZ)@l<#?p#|VsvtVxlc9#fDj4vLQ-^`g0h)rk4CQU4tLv(yp2Q6m zV%L{$lFNaZV@|SIm6&9ox>zJ+JGwbwhA$J+$^j5+aU_Zk-oIDtFN#ziBJ)J;`gW4- zeXmP2)qtb<3PTQCaAi<5X|I)IW~+of6~9pOSd)uf-%dDuz5bI=q;TfU*2a9dPUu(m z$eAsBkT@kDq_u7Wg!xV*bU0m-F5KGTa~uR)+&BUFm_l$hvNPXX*5Xl^_-USf7Y!la zS5dNuv+k4ev#Jj`c(;MYOO_f&B#*soJbL+Q^f~nOa0PcT#X=Kbq9lPoXWu`aFiuV? zMake@hQ}EtmQdmi0y25mpuJ{$n(2sar21OVUJf@9f0aLIVYJdY(ZhkS>hp;BFqJd& z%cQh^a-!{Zt6Ke5_&NE01(-r^eTK|N-7ES<)hBe$-I-lLkQP3l6R~T(jUK+>s(eTG zfzwbTTUe$egOO?uxQqs^S%!H}KF|f^BUH*-IH&Fhq)6SMHzz>W_}Uh_X^y1xkiJ3Yn&h5oF@5y~1CdM+0YE#~qQs`l~Xm zG%=mU*RShxJ8Y7lnR#8jBU#f)OyceEtd=2oxWx0sMQ8P~4e^(cUBxdjeA4a;%gI-N zWF-od^)h#i-&q!COR^_djMpN6OzV%ARJ%;LD4MvO5_P1Yq0Ld+%(o_d;#xstt!O=( zhqi?qaptgo9ms=m3bNxEUJS0Lb!c+XO3%ltDUjSknipo6VtHvoDb99m{^iRUa|v5h z;InQ^|-u3*yadt&H#GywBfn_Tsd|JUgJH);J}o@QCtqdTNQmx()7&eC7w;`G(Lf z+ji$yVZFWo|0P?03oGdM^8fFo^$)MX@2!sSo&O^F@5GfG`m@#HqORaH!;SmqN~>8M zdp9yFa3RTCo%V%F&ohd!l*kz1jXqLE#PWB%-mgv|A1ItzG;(|1HuLF>s^s88ES$5K zHdLn2u^G$O?a(|-cnz;SB!Q`(sXqe6o-N9l1|px2?+{qEMlLfPQs2INF!OQ_5C0?E zy$k;SR#JFFzxFlSkI=iMT8x!XS0GmGV~?eMk;{(8jDx1Omg_mXs}R>Ih_{#4u#dZY z9V>Me?SK!5Uui$@cGMgM8osR585-Cha|zHngw@U_Y;W4**Ux{bFIA{)xAJge1lo{M zl)QW?UW&8Ak+VJh?5ybXqbqJq8NDnws6yHMFlmz&54xirVW~PJCG1*kvT3}!%efR; zlo#DkF_8+StR+~?fu@#pE0DfNU6SN)Ya82NR%`5Uh}g6lz9}gIv0dO<4>Fm2M|WdV)kuAtV4~k{ zPA^3iPu&UpYUXggH8nE0*Z9HCH!W7NP5VLd7_T) zGwB9L@^iMG%k~9@2xmBoIP0S)<<&d9pe+inwFL%VOUx zUl4$o@^lG5Kdy(vD;sq;q=yL1Kmx1yZ-=B*KA;fI7s!PR;8_GlwFavt(`7A562}meNq=T2UA2#~ zpL5ufZ&}wPsld@_Xz?rga77sz{!k-fTCi?;n|c4}0MwHLnr+?+u|#eNQ1bpJV9~-+ zibZ@#W^W81qh?06igs`IxY#}fqV^`C+tJWhSfp9+%{cQq`SBxCT;g;Txk1R>?rFuv zA@a6-1r3|#j88@nIJm7S?u@aMhj?_A{jiIa>fK(PJK@O#wPTBL*YnGbm^WxUSmsoW zaF9r9`~FYu_97*pgA5hKmyTCGJjsCA)jTV?qQ&PTtD$^phQl2$N-^?SB=ar22t*7)5q6;swFW{5!JsmNoM4IWVjxO}YvlpckuUh>V##=0q{` zvGF}mdxPvrap;6F-*6I+HYf!n_K`jlvJNm(>c|}Roo`!mhI841I_cu=svTW@ZgxAS9F=uPz+pWtc!4WsszZwbs@`hsw`YVrWf(WBb z?6DhvU~mJU4F#K&r9eeJTao}fz0oxyA)Et3M(;z9$daS4Z(CT>rS6eGP*#`TN}t)J zmwH5>zMy|U{lRFbbdjwbnOP=_9Mdr09?R-QPyS=+x1tE%N{q0@EOZn%_-lO!!tOjl z>{Oc9!)Qf)yR6mwrMi%2lO*1hfGZu%YL4mbWzViwi*8G9d+#7(tc|h|F?4&Bjg5TC z5b}~{{q`lxFW1d;f*r+OQS{d*bBBJ=&lU6X<(w*tPGyLT?`Z?Z^Q5g*yPsG#Mm-4= zi)XPp)YTYYS}YY21hK4zJBgL_B_g{xN{aHTQx(aIPe>LpJR}>Jxl0s6pm4?HI^r65ta=9!)o1*%+Xs zAIs3$C-UO5-jR&ko7~15|RATl`+DmZ#D0u<>L9o7LB#x(co8&IZqa(c&w8@ zU=f+P=CNfAVG%inJQjJRnn$BEF(QqLg3Q20mY{(}Nwlzng3NV(IBadTK=pCoKqAzL z`*jM*poNGgMEyNhi;3L_93sr)7cBi*z;~@aJZPh_FU5*YZI9ZeEPa%j*AnwzCJmtw zIni%Tf!XP?2CRa69PQsn%E<&(k>%tF8jSvMYMG+8{u5}GGXu#o1a z=Nt!fL==4)fUD@E4&P#&B??8K>0Uosu=N4fM=(4c!e&EFVi2@Wmcqd@`H;t!DT#yS z)J@^zHZnb$ZSA_j6x~Xe6o(9}7Jq<7%|rF1K;luu*KX`Af-ILW#z*wzrde06HFTwv zEG%9u{cL+ITS6-PVxei?XK#uzRcowE30cxoKH%OjOcd)TDS%E4GK7RT#FaO|YM4q1 z$=c(#)D%@f+sP!|hFvEz# zaxrosSQA{7rRi!*EoFkj`?cY_je%C*@G_6HW{kBcef=zA&QZE!ysw&>-+G><+I_fx zPHlQVODz=W8+qjSHh&T6HxT{X;1TFQM2{dpMWk+HS3fw{zIXl$_5U42LwP_y-AY~5 z`yBJ6Z_M2LM;|E{%Z*Sj3O|7JZXB)l&d^*Adg(ASgpi01tD#9H;<_@JvOUwioVfw- z3ozxmgE>ezSdda>A=uud_|vB|lCLB)BPP$04vD1>KiyXztbb^RgsX$?B=}UVaWRDO zwZJ%b3+n|o&7B6MnM?FTVMMr?2Okg6KVa#IjJ}1{okWxA`r@#9VrpJbnqqZ!FIaM? z_KNH~%WLLsLLwQ1+^dq$ibk?q8oWAY z)u0{o;Z?Fy7U}0>in&+v1%YvQwdWC()IwZ7wGdY^*LpCIsbSglL{o$2Lx43pB+mJ) zE>F-^%X#fXUagYT1eKKeX4>7@djq&1v2oN0N=jKU&s~fu8oD+ zEU6*;IVPei8X{K}J0AMxL5cuRwN0}er&VOHbFQ@J&7PfH-1i?6s)6+Qe3oDQI!c`| zs`;V=C?1e{spA&ZI7eUHx!@JQr;PB$3 zxuKkV7$^#37hFjw&wklVY@reK9|b&@yLfIIn(5+p&(a{S3Q5l?SfY2 zB?i|`@Ts52m%GLE6JmL%*tft(l4K?dw5Syd{QP?K7$YfSoZ=kvdgl)0<7}OD+z;;( z4T0Znlgrn=6Wb5mTsIe&7H5mU3#rh9oe!+s$(1KOO2xN?FclX@HWt!bchw6n$@y*+ z9^ISf9`2ix+0GB^S$Z;Pu#jPBp#jCtv`y=GI^;;DtnzRx?vQ1{N<$d5SO8})H9iw` z%=ZcKrZ9k4%vU$PyZlaeMO8fGO%wU48GYmsxA!8F{Ho%3D)J_B*yFk_q&`amT!jqN zWNuz6MowNGca90UjJ_@8su>Jzd$MBQuOE85FAr2MzMakaJ;_3>_>Btqt$Q8xAN=c( zKdt!RTj~F82Rj7xvy~o_Ea#L5#r5C6Ao4k&QA@OVCW0CrP2DsnV_l(O#iG%G%j)$J z_W*uaer?juWHGY9f-_Nl@o4#Rqi{(`*TOzVj6K}^0e*j?M|hp7^G2?~Jl1pLmQsT6Nh!#B;H()#D&Ok%X(LnpNjY*o?3)2Yj-&n&RwD z;+V*>Zo0d=W{%y1k=1QG6Y9er>SLrTV(gbUzj-yxkw!jEgA; z?5!=X^Nhe3PL-a}&%1-h6_K0PcLo@t)dA>?)H z)rs64e1s*2-$ndDaC1eP7731a5F&$9HCm+xK4|f!4Z^q=%p9a+%9Nn@1iLA*sm34H z*e-Phh{(NkMXIl|f!Q2qpDW_<`KIi-yzdLVzd0AENVZ_H&W#biQj;COLky*_wAV## zR*Mj>UeIG@N|*`6Zt5GrVkgMEH_yYg#QoA?)Vnz!v+9EotvY5AnqU{Fjjm3AW95`R z{rt>`B?85yCixAiocJB?YyYmCG2~-5ZukhS5XYGjqr4a8un%h*XlT)fg!mm13V~w$ z^c$uR^c-Qc_eXRPq>LZysnQj}XK+@qA|Hf;+{g~ZC=d$aY(d$|wg`OItZD0Y)190g zbgWwsb=QPPy8yvY-O;H zqji|f-V3rNGkmm7=XKE;+tpHiovznUt3Qe_KOSplMIa6c9&|n zaK>%*jXbE$hN%8_HRQ`&s3N&DzeO63q~qm?OkH;X3Mec^EtDFsU7oU1GE}U=W#g5t zL))%hK+{Q$6!oAwa@A%<`eA<{V2Jz-PbkY&{qkw^L7Y2dMC2@uz<3N zT$mdsxjcU?r4HBbNm+>Hv%N;rcAC;~S?WecjPSjhM{RzU3{bT_`VBg7=~`P-&$zJw z#rG7SA22^VVWNNPq1BUB#0AYwT`oc6j@(1spl8FG;e3o!TUL?QY0DTxJyE4_41cW4T?01D+Vfn??b>T>uk~Q@3H!A$MkplIgwB z5`pR~JT5yG+PpL@WCg_GWNpFNwWu+|*tb@bK{z^}Rm6t$M#{W+pV^y}ShXkVKT;FM z{z69ko7w-?RSW(Pu3E^SHtuf$?0e_`jtvXqgmV2f|HNdu__RL(ELptHYD60T@FTfN zaySn+)E^?F!M>ABw{!li`!xS_-p=GzWw31DIK?&7=CzwjEf%7nqmI1+7 z!&E!FJ&f00k8p8ibI#En4%ZCZ_7SVmT-uA$KVlQ!Prvaog$o+_FtL>Oa4EKc}tjE^8dpf&BX zL_qz1F5!)fY2ptTQyQv+7y^7e=lwZBMTf+A=N4%KR?+h8ol(~lx;?yMl=tPl-rscW zv~Vj*i^QvEJ}7xT-AeK_+^_5B^Ac;?6=&y`B4tuhYgM1Nv|S*WZRB<(*O^}`k7C19&L&3UoK%T z7&^7nJ9X!4pc_9^U<^;!;E;EO z+MX^!1h}6oVx)-Xfb>{^3+rje5>W#auGKuqbJFa1M(6e3MWrR&=+Ok}+dC|K76*-( z=?i2)oi$0m7>IPo#pCy}%toK_s0U!g!i7^;6f-KpAFauTefjUF5hz7gr%F}4l`)d1 zz3-i_3s3z-g)DAkma*~39@t63|uE4kk<{D4#(V~ z@jhcw)j<>9))ed$c5O|k>VwxTM&CeiAOV&a{ZTaUn;Rd14*g2}+ymLUY@O}3Tu;X<;-y(jwfWi<}7!vsB>f>|D)2gJVW4k-X_Z ztpZ60p}xY1(<8^BWii~IcOv`%+w7y)_Z2~~xzMHRu1z|ygLODMT4Hs&p^0@_FoC{R z$dyG5u)~r(K2XQ>JxzF1w40oRPO?208n1=_eo6klYdVJm_BIZp2Py)5OWCB{TD*$_ z4;ex(Y$wtdGcE_GFqF&f*7B<%_}=Ej=|;FxeUj? z^?n+Ww(|8^9z3ZJcfLuMo2Ho4;xmzA>k<&=lVs?_conEaF(ki2$L^^<bu)MrJcFno=-UJ zrXczAJoPUr50)*zt>R=*J|xVebR;aV@9f&#jpdCv;B!^CgU86Ro}|o5S|iP3gmKKD_LFH|$NUQ;iZuN%__yRBZaCmIgu$S9H}Q&BXiL zIVCSnvqaLOh9lVnM=^N=-&1>m`oH(f2#ke;V zd9v{}%3y@B*L>*+zIjIlyd2A~NEO9EatiV)5LDoc>qAfMLPVp;AfoIy4GCWwXBZ~h z_Uok|_4pNOzP|?k3l{zkXn_B`3jIr*{o`Tr?{W5f=f6n)J8uaH0{%HF7?q@FH^WWn zKYw)}`_&_hfK*y$)g*zT%EWFx|A`ulOT=Lve@I+fO1*r?dgQqZkL1!FUBkpgDYic0QXL2i{Lh#;eaTCWZqMS7bz>y4_=#Nd zJo{QT>~-+e=SxVyq9>N&m;kfv+hv7D9~_6cgiTkyecSMd1|=B+yaM7k?~#O_Vc(+7 zW3-0i%rX49*WonmYVMKL{nkWUmST~n_K#)zb~oHy(1(q56DE%9S(P5CTN{K6boYk( zJ*t_}>}Keu#v~Zjti3-LBY6SCh!<$D=w~&^5_s-vxJU3Hu$SZ+p?*zo2pq;!jyv&A zDxvi1LZ8|gTm0NNn3@AdttqI?h1=AtpLgqvB0`Mh?K|}2xv`V-x=qVDp=%DzXv=IP zXiv$L1@>fJl!DG^F3TL?b5y5$9hkz|6^3_*Kt1j9PqPPz6rt;gw2!$k2fYJ)DdG_o z)usfKo!Q{fhM(@Imnq7dAh%E_ z8*|`vGzzsm#2#C;Y8KgDFEZfFAI88Uf=ZwhaIqW-Jk&Txel5FtJqv)!FcqSSm?T16^t+P!46gwYKKC#! z!@c>bm5=jzTG(Hx2DhZB3mrO9PdL)Z74|M?M?h8*L2V4ZR1^0@!D4S3}V zGhMs;`a6nYmN@~Qlh(@E`rF!G%^xC-UVD2``ZjpA83~nJ8bp=ykX#wyMllr2y*61I z{&;oA1VkLj2i-(@akLxwIJ$Phbm7JOV!ia`{SE!+6(5@D6acC9(Pi8ZM$^Y;f&O=UTVhB#>#_WA=jj z%%fRS{8XiR988FYVx(GmKP>m~FJrWo)#T}LHrGkc-kW*qLN_ec3rdlNOR_?*5yg1y z!_F+IX+ibvDzBr0^k1q|KS{EE|II?ByyAiDb1UtJkdpykq%>f)*mbRj{PL_WapmV6X7(0K@`MAA4fc{DVxux`jCmj4^BE|1U60@^5 z{T~?SM+e?-hWQ^D;U|CI?Q#hSa7cjw;6HDWQeeRMx!y$er62(Ccf7mF3*k_9v@=nI zIRo@KZjyBX9O^Lln}wFKxrrBu{rOk?&%ZJce_Xd8ue0BG=YMy&pRT(9`?LG;y86FA zyYJ5_{x&fF*Jt-VH1vNs+^y~Pe>mKay5pC<`~Nud-@Ot4hx7PRW&ARa8!7&u6==wh z`a{&k*}~2d!1MrS3v;|lDl-L`-TV~wba8|ML}dVfn$-XeTT_^$le3ZScMYs2AqQ|W zvaz>@IWgZP_}%22-KNq>+;j&pN$`PzU~VAfCLQg@NCt7~-n3A+bhf^!lCu1sa%W@> z(7eqha0JNMy4qRZCcv=))I=4O(Q!YQ-Av-wG7#`aIsM!Y0_M62 zzy6~PavP8Nxr`IU`)e7P=LVC0ZpZbTak)UZj}?Dz$IT1~_~4L~3W^q)L~K%Bqq2M7%1`sWxRFz>JJAi!Vt z4RmwH{>k&rk>mO28h{`W=s))y1mWcUWn2jNFZ&CE@Z9E}{dk$4QJ7>U6quZx?GPY)RfbT+l`$Gbt zC%_Apfbj5ef+WRwxgZi;T%uA?Fc%aoA;k^k;)IHE3;pL7-x2>tz#W~xOB;xj6U>WF LPcNw?h5r8lGu)Xy literal 0 HcmV?d00001 From 88dbaed6d6d8cac49e0299168bbe13a7512a8cc8 Mon Sep 17 00:00:00 2001 From: Changjian Wang Date: Mon, 16 Mar 2026 18:35:57 +0800 Subject: [PATCH 4/5] Enhance ContentRange tests with additional assertions for time range validations in audio and video segments --- .../tests/samples/test_sample_analyze_url.py | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/sdk/contentunderstanding/azure-ai-contentunderstanding/tests/samples/test_sample_analyze_url.py b/sdk/contentunderstanding/azure-ai-contentunderstanding/tests/samples/test_sample_analyze_url.py index 860c66c0ff32..bdee6b1d2a51 100644 --- a/sdk/contentunderstanding/azure-ai-contentunderstanding/tests/samples/test_sample_analyze_url.py +++ b/sdk/contentunderstanding/azure-ai-contentunderstanding/tests/samples/test_sample_analyze_url.py @@ -545,6 +545,12 @@ def test_sample_analyze_video_url_with_content_ranges(self, contentunderstanding assert len(range_segments) > 0, "TimeRange(0, 5s) should return segments" for seg in range_segments: assert (seg.end_time_ms or 0) > (seg.start_time_ms or 0), "Segment should have EndTime > StartTime" + assert (seg.start_time_ms or 0) >= 0, ( + f"Range(0-5s) segment StartTime ({seg.start_time_ms} ms) should be >= 0 ms" + ) + assert (seg.end_time_ms or 0) <= 5000, ( + f"Range(0-5s) segment EndTime ({seg.end_time_ms} ms) should be <= 5000 ms" + ) print(f"[PASS] TimeRange(0, 5s): {len(range_segments)} segment(s)") # ContentRange.time_range_from(10s) — from 10 seconds onward (wire format: "10000-") @@ -566,6 +572,9 @@ def test_sample_analyze_video_url_with_content_ranges(self, contentunderstanding for seg in from_segments: assert (seg.end_time_ms or 0) > (seg.start_time_ms or 0), "Segment should have EndTime > StartTime" assert seg.markdown, "Segment should have markdown" + assert (seg.start_time_ms or 0) >= 10000, ( + f"TimeRangeFrom(10s) segment StartTime ({seg.start_time_ms} ms) should be >= 10000 ms" + ) print(f"[PASS] TimeRangeFrom(10s): {len(from_segments)} segment(s)") # ContentRange.time_range(1200ms, 3651ms) — sub-second precision (wire format: "1200-3651") @@ -588,6 +597,12 @@ def test_sample_analyze_video_url_with_content_ranges(self, contentunderstanding assert len(subsec_segments) > 0, "Sub-second TimeRange should return segments" for seg in subsec_segments: assert (seg.end_time_ms or 0) > (seg.start_time_ms or 0), "Segment should have EndTime > StartTime" + assert (seg.start_time_ms or 0) >= 1200, ( + f"Range(1200-3651ms) segment StartTime ({seg.start_time_ms} ms) should be >= 1200 ms" + ) + assert (seg.end_time_ms or 0) <= 3651, ( + f"Range(1200-3651ms) segment EndTime ({seg.end_time_ms} ms) should be <= 3651 ms" + ) print(f"[PASS] TimeRange(1.2s, 3.651s): {len(subsec_segments)} segment(s)") # ContentRange.combine() — combined time ranges (wire format: "0-3000,30000-") @@ -614,6 +629,14 @@ def test_sample_analyze_video_url_with_content_ranges(self, contentunderstanding for seg in combine_segments: assert (seg.end_time_ms or 0) > (seg.start_time_ms or 0), "Segment should have EndTime > StartTime" assert seg.markdown, "Segment should have markdown" + # Each segment should fall within one of the combined ranges: 0-3s or 30s- + seg_start = seg.start_time_ms or 0 + seg_end = seg.end_time_ms or 0 + in_first_range = seg_start >= 0 and seg_end <= 3000 + in_second_range = seg_start >= 30000 + assert in_first_range or in_second_range, ( + f"Combine(0-3s, 30s-) segment ({seg_start}-{seg_end} ms) should fall within 0-3000 ms or >= 30000 ms" + ) print(f"[PASS] Combine(0-3s, 30s-): {len(combine_segments)} segment(s)") # --- Raw string ContentRange test for video --- @@ -637,6 +660,13 @@ def test_sample_analyze_video_url_with_content_ranges(self, contentunderstanding f"Raw ContentRange('0-5000') should return same segment count as TimeRange equivalent " f"({len(raw_video_segments)} vs {len(range_segments)})" ) + for seg in raw_video_segments: + assert (seg.start_time_ms or 0) >= 0, ( + f"Raw Range(0-5000) segment StartTime ({seg.start_time_ms} ms) should be >= 0 ms" + ) + assert (seg.end_time_ms or 0) <= 5000, ( + f"Raw Range(0-5000) segment EndTime ({seg.end_time_ms} ms) should be <= 5000 ms" + ) print(f"[PASS] Raw ContentRange('0-5000'): {len(raw_video_segments)} segment(s), matches TimeRange result") print("\n[SUCCESS] All video URL ContentRange assertions passed") @@ -688,6 +718,9 @@ def test_sample_analyze_audio_url_with_content_ranges(self, contentunderstanding from_result = from_poller.result() assert from_result.contents is not None from_audio = cast(AudioVisualContent, from_result.contents[0]) + assert (from_audio.start_time_ms or 0) >= 5000, ( + f"TimeRangeFrom(5s) audio StartTime ({from_audio.start_time_ms} ms) should be >= 5000 ms" + ) assert len(full_audio.markdown or '') >= len(from_audio.markdown or ''), ( f"Full audio markdown ({len(full_audio.markdown or '')} chars) should be >= range-limited ({len(from_audio.markdown or '')} chars)" ) @@ -717,6 +750,12 @@ def test_sample_analyze_audio_url_with_content_ranges(self, contentunderstanding assert (window_audio.end_time_ms or 0) > (window_audio.start_time_ms or 0), ( "TimeRange(2s, 8s) should have EndTime > StartTime" ) + assert (window_audio.start_time_ms or 0) >= 2000, ( + f"TimeRange(2s, 8s) audio StartTime ({window_audio.start_time_ms} ms) should be >= 2000 ms" + ) + assert (window_audio.end_time_ms or 0) <= 8000, ( + f"TimeRange(2s, 8s) audio EndTime ({window_audio.end_time_ms} ms) should be <= 8000 ms" + ) assert window_audio.markdown, "TimeRange(2s, 8s) should have markdown" assert len(window_audio.markdown) > 0, "TimeRange(2s, 8s) markdown should not be empty" window_duration = (window_audio.end_time_ms or 0) - (window_audio.start_time_ms or 0) @@ -745,6 +784,12 @@ def test_sample_analyze_audio_url_with_content_ranges(self, contentunderstanding assert (subsec_audio.end_time_ms or 0) > (subsec_audio.start_time_ms or 0), ( "TimeRange(1.2s, 3.651s) should have EndTime > StartTime" ) + assert (subsec_audio.start_time_ms or 0) >= 1200, ( + f"TimeRange(1.2s, 3.651s) audio StartTime ({subsec_audio.start_time_ms} ms) should be >= 1200 ms" + ) + assert (subsec_audio.end_time_ms or 0) <= 3651, ( + f"TimeRange(1.2s, 3.651s) audio EndTime ({subsec_audio.end_time_ms} ms) should be <= 3651 ms" + ) assert subsec_audio.markdown, "TimeRange(1.2s, 3.651s) should have markdown" assert len(subsec_audio.markdown) > 0, "TimeRange(1.2s, 3.651s) markdown should not be empty" subsec_duration = (subsec_audio.end_time_ms or 0) - (subsec_audio.start_time_ms or 0) @@ -769,6 +814,9 @@ def test_sample_analyze_audio_url_with_content_ranges(self, contentunderstanding raw_audio_result = raw_audio_poller.result() assert raw_audio_result.contents is not None raw_audio = cast(AudioVisualContent, raw_audio_result.contents[0]) + assert (raw_audio.start_time_ms or 0) >= 5000, ( + f"Raw ContentRange('5000-') audio StartTime ({raw_audio.start_time_ms} ms) should be >= 5000 ms" + ) assert len(from_audio.markdown or '') == len(raw_audio.markdown or ''), ( f"Raw ContentRange('5000-') should return same markdown length as TimeRangeFrom(5s) " f"({len(from_audio.markdown or '')} vs {len(raw_audio.markdown or '')})" From 77675446c139112f9c253af3a21d488acbf7327e Mon Sep 17 00:00:00 2001 From: Changjian Wang Date: Mon, 16 Mar 2026 19:12:02 +0800 Subject: [PATCH 5/5] Add ContentRange sample demonstrations using a multi-page document --- .../sample_analyze_binary_async.py | 11 ++++++++--- .../samples/sample_analyze_binary.py | 11 ++++++++--- .../sample_files/mixed_financial_invoices.pdf | Bin 0 -> 15324 bytes 3 files changed, 16 insertions(+), 6 deletions(-) create mode 100644 sdk/contentunderstanding/azure-ai-contentunderstanding/samples/sample_files/mixed_financial_invoices.pdf diff --git a/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/async_samples/sample_analyze_binary_async.py b/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/async_samples/sample_analyze_binary_async.py index 92104eabbb15..3aa513559a1e 100644 --- a/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/async_samples/sample_analyze_binary_async.py +++ b/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/async_samples/sample_analyze_binary_async.py @@ -91,11 +91,16 @@ async def main() -> None: # [END analyze_document_from_binary] # [START analyze_binary_with_content_range] + # Use a multi-page document for ContentRange demonstrations. + multi_page_path = "sample_files/mixed_financial_invoices.pdf" + with open(multi_page_path, "rb") as f: + multi_page_bytes = f.read() + # Analyze only pages 3 onward. print("\nAnalyzing pages 3 onward with ContentRange...") range_poller = await client.begin_analyze_binary( analyzer_id="prebuilt-documentSearch", - binary_input=file_bytes, + binary_input=multi_page_bytes, content_range=ContentRange.pages_from(3), ) range_result: AnalysisResult = await range_poller.result() @@ -113,7 +118,7 @@ async def main() -> None: print("\nAnalyzing combined pages (1-3, 5, 9-) with ContentRange...") combine_range_poller = await client.begin_analyze_binary( analyzer_id="prebuilt-documentSearch", - binary_input=file_bytes, + binary_input=multi_page_bytes, content_range=ContentRange.combine( ContentRange.pages(1, 3), ContentRange.page(5), @@ -139,7 +144,7 @@ async def main() -> None: print("\nAnalyzing with raw ContentRange string '1-3,5,9-'...") raw_range_poller = await client.begin_analyze_binary( analyzer_id="prebuilt-documentSearch", - binary_input=file_bytes, + binary_input=multi_page_bytes, content_range=ContentRange("1-3,5,9-"), ) raw_range_result: AnalysisResult = await raw_range_poller.result() diff --git a/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/sample_analyze_binary.py b/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/sample_analyze_binary.py index 8fd88ae6e73d..0ced2a6b1840 100644 --- a/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/sample_analyze_binary.py +++ b/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/sample_analyze_binary.py @@ -89,11 +89,16 @@ def main() -> None: # [END analyze_document_from_binary] # [START analyze_binary_with_content_range] + # Use a multi-page document for ContentRange demonstrations. + multi_page_path = "sample_files/mixed_financial_invoices.pdf" + with open(multi_page_path, "rb") as f: + multi_page_bytes = f.read() + # Analyze only pages 3 onward. print("\nAnalyzing pages 3 onward with ContentRange...") range_poller = client.begin_analyze_binary( analyzer_id="prebuilt-documentSearch", - binary_input=file_bytes, + binary_input=multi_page_bytes, content_range=ContentRange.pages_from(3), ) range_result: AnalysisResult = range_poller.result() @@ -108,7 +113,7 @@ def main() -> None: print("\nAnalyzing combined pages (1-3, 5, 9-) with ContentRange...") combine_range_poller = client.begin_analyze_binary( analyzer_id="prebuilt-documentSearch", - binary_input=file_bytes, + binary_input=multi_page_bytes, content_range=ContentRange.combine( ContentRange.pages(1, 3), ContentRange.page(5), @@ -131,7 +136,7 @@ def main() -> None: print("\nAnalyzing with raw ContentRange string '1-3,5,9-'...") raw_range_poller = client.begin_analyze_binary( analyzer_id="prebuilt-documentSearch", - binary_input=file_bytes, + binary_input=multi_page_bytes, content_range=ContentRange("1-3,5,9-"), ) raw_range_result: AnalysisResult = raw_range_poller.result() diff --git a/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/sample_files/mixed_financial_invoices.pdf b/sdk/contentunderstanding/azure-ai-contentunderstanding/samples/sample_files/mixed_financial_invoices.pdf new file mode 100644 index 0000000000000000000000000000000000000000..793c3d4e24b224a54b3080866a997d7df1dee832 GIT binary patch literal 15324 zcmch8byyYLvbS`1NVAbfVh6G5M!FjWiA_m&HzJLMbhjW#OGt;LARrRb-64(ofp1)W z^_+8`=ezgrKQ`ka6 zHrRGqD)%VQ*QYd)D=J9cgH_+Qh$IVM!JVg>66bKgP9TsI(Y!<7(M~-cb4g{udVGaX zqHgFvniT6Jj@Zm^LW%X5x2wnSVFYkmm6Am;`@F&kW|vDC=-!wb8i9Gh?<*k#y+WNF z<&0Pg5k~OC6RCOIA&HA@PsteDGdcA#D;PKU*!`JV*nDL0DvM5!WwKavvJc&Y9rr=? zERhCQlegZUp{N0&OT&Cti*4!h-iY@U(HAmB$l81Q_wc<&U6ojB<=BP;WZ~Db;s!|v z^xyO=_K(DMa9@&8B!AM%D?y?iM0%ht(T{v>OvX_KjiPzTmQ>9o&o4r5i&bA7vxdFc z998ZbYGv4-2IlakIKEW=a0Qn`K5zXvEy|%QBgppHumT`hjzf*|&8FNm+(dn5a?MX%<3oj14%6orheFYXIaHEZD)?_) z(v8Lqr>QKB1{b?bbZ3LrY?ZFj3gn@83u4OkH_=51mAXed0Uj{PGMGQzq5#ROt03S&U1Y;x7yTn)E7i+ z9r5w=(z^VD0s40Cx_nj^ppJt5z7GK#hqfm9>5R*#C7P>RzP*XWTG21>txb`)*NQK^ z1=!e&v6eXuXBJQw*(-q`Iv7(sh)Jln-0!&g*#Saq?|ilO3WD36AM$T`ola1nVZZgR z!l08or48u*}GoOGj8tW;l6Le>Z5y4sGzTLwmp9?*VbSsdyb@>iJ@S zqV*S4dj6uKF{4-?8Dsc}iE9>l-&R+y8Ddr~{Jw-8**f3g)K8tr5=>LeVatxES1zR~ zbsG#UyI7FYluNv==S?r~`mC|auU%3Kzb##xS`uYP)i`?Lafv5uuvjDe+2igiTv;z9 z41aPByO{dq&0@*d!(#5odj@GdN~ef0N2EvVPA<9y7OC_YGf1PSZvJel2_a6&J?$6K zg0-4drz$&x#!#nrXA**Rpk5J@5uZXXY-}0JLtnPzoYcZY0gaQY9u}{*{ANIiCB0gz zbzOVhau_G7Iu!>*tSROmQ`_NFoB1f;f}ItsTK>{(MFRh0;6QJP%nXpGoz#IcBjJLY z5ZhJzx^ii_?_NewxwiiL`LTQL+Zx*s;1nYd8>fksfFa?>AA@M$ISMQ_2rO~c6o!EYZVUl7EVi$)Mkw`vl(0Cizv$+R<`?ix$$v$8tKrH}H=$<^GPFhv)2IeRttN!%0qhAAb|qAyRX z*mS_Oi=+S|R*)REfzE9f9NzAg-!P~kTT`Y&&q>@8;x(Qx5kaOQCXTkFsIFOrSdSW? zC;KQK0vNi!a0Fu@1QD=VjiW^oA>$@op(L-)?7N4ach@g}jrE&LU;yb}7n88eafRLR z-*v-3sIe+4?1q061l!Xao;`DN?6>o6j{f+Ub@nCAwy^yY<-{#|{)s4m;^-|f|Aj|? z1Kghg_AdeM4;cRenSTJd+rEDh{I>uH&O}00m6x8>InWkiBr2M>Nw-wmCA004d3++xGYSN03&6~#f&ghzL!k2w4g_TmQ_)kURw50O^rzlNgPhqhQ6EN(Zuzw{UX0w0+v z{uKf3yv`gU=ma6x-fn|H%8M%-3?D9>k!Yv@CV3}SKSnnMXSi%uC2dQ!;0W$#tmbS9 zfKqnS4aGaBM=m>@!%=^3&Fc8H+{0|I78{TJxoq$xn-&8o>*l>deiCyayUQKU3FdZ)@l<#?p#|VsvtVxlc9#fDj4vLQ-^`g0h)rk4CQU4tLv(yp2Q6m zV%L{$lFNaZV@|SIm6&9ox>zJ+JGwbwhA$J+$^j5+aU_Zk-oIDtFN#ziBJ)J;`gW4- zeXmP2)qtb<3PTQCaAi<5X|I)IW~+of6~9pOSd)uf-%dDuz5bI=q;TfU*2a9dPUu(m z$eAsBkT@kDq_u7Wg!xV*bU0m-F5KGTa~uR)+&BUFm_l$hvNPXX*5Xl^_-USf7Y!la zS5dNuv+k4ev#Jj`c(;MYOO_f&B#*soJbL+Q^f~nOa0PcT#X=Kbq9lPoXWu`aFiuV? zMake@hQ}EtmQdmi0y25mpuJ{$n(2sar21OVUJf@9f0aLIVYJdY(ZhkS>hp;BFqJd& z%cQh^a-!{Zt6Ke5_&NE01(-r^eTK|N-7ES<)hBe$-I-lLkQP3l6R~T(jUK+>s(eTG zfzwbTTUe$egOO?uxQqs^S%!H}KF|f^BUH*-IH&Fhq)6SMHzz>W_}Uh_X^y1xkiJ3Yn&h5oF@5y~1CdM+0YE#~qQs`l~Xm zG%=mU*RShxJ8Y7lnR#8jBU#f)OyceEtd=2oxWx0sMQ8P~4e^(cUBxdjeA4a;%gI-N zWF-od^)h#i-&q!COR^_djMpN6OzV%ARJ%;LD4MvO5_P1Yq0Ld+%(o_d;#xstt!O=( zhqi?qaptgo9ms=m3bNxEUJS0Lb!c+XO3%ltDUjSknipo6VtHvoDb99m{^iRUa|v5h z;InQ^|-u3*yadt&H#GywBfn_Tsd|JUgJH);J}o@QCtqdTNQmx()7&eC7w;`G(Lf z+ji$yVZFWo|0P?03oGdM^8fFo^$)MX@2!sSo&O^F@5GfG`m@#HqORaH!;SmqN~>8M zdp9yFa3RTCo%V%F&ohd!l*kz1jXqLE#PWB%-mgv|A1ItzG;(|1HuLF>s^s88ES$5K zHdLn2u^G$O?a(|-cnz;SB!Q`(sXqe6o-N9l1|px2?+{qEMlLfPQs2INF!OQ_5C0?E zy$k;SR#JFFzxFlSkI=iMT8x!XS0GmGV~?eMk;{(8jDx1Omg_mXs}R>Ih_{#4u#dZY z9V>Me?SK!5Uui$@cGMgM8osR585-Cha|zHngw@U_Y;W4**Ux{bFIA{)xAJge1lo{M zl)QW?UW&8Ak+VJh?5ybXqbqJq8NDnws6yHMFlmz&54xirVW~PJCG1*kvT3}!%efR; zlo#DkF_8+StR+~?fu@#pE0DfNU6SN)Ya82NR%`5Uh}g6lz9}gIv0dO<4>Fm2M|WdV)kuAtV4~k{ zPA^3iPu&UpYUXggH8nE0*Z9HCH!W7NP5VLd7_T) zGwB9L@^iMG%k~9@2xmBoIP0S)<<&d9pe+inwFL%VOUx zUl4$o@^lG5Kdy(vD;sq;q=yL1Kmx1yZ-=B*KA;fI7s!PR;8_GlwFavt(`7A562}meNq=T2UA2#~ zpL5ufZ&}wPsld@_Xz?rga77sz{!k-fTCi?;n|c4}0MwHLnr+?+u|#eNQ1bpJV9~-+ zibZ@#W^W81qh?06igs`IxY#}fqV^`C+tJWhSfp9+%{cQq`SBxCT;g;Txk1R>?rFuv zA@a6-1r3|#j88@nIJm7S?u@aMhj?_A{jiIa>fK(PJK@O#wPTBL*YnGbm^WxUSmsoW zaF9r9`~FYu_97*pgA5hKmyTCGJjsCA)jTV?qQ&PTtD$^phQl2$N-^?SB=ar22t*7)5q6;swFW{5!JsmNoM4IWVjxO}YvlpckuUh>V##=0q{` zvGF}mdxPvrap;6F-*6I+HYf!n_K`jlvJNm(>c|}Roo`!mhI841I_cu=svTW@ZgxAS9F=uPz+pWtc!4WsszZwbs@`hsw`YVrWf(WBb z?6DhvU~mJU4F#K&r9eeJTao}fz0oxyA)Et3M(;z9$daS4Z(CT>rS6eGP*#`TN}t)J zmwH5>zMy|U{lRFbbdjwbnOP=_9Mdr09?R-QPyS=+x1tE%N{q0@EOZn%_-lO!!tOjl z>{Oc9!)Qf)yR6mwrMi%2lO*1hfGZu%YL4mbWzViwi*8G9d+#7(tc|h|F?4&Bjg5TC z5b}~{{q`lxFW1d;f*r+OQS{d*bBBJ=&lU6X<(w*tPGyLT?`Z?Z^Q5g*yPsG#Mm-4= zi)XPp)YTYYS}YY21hK4zJBgL_B_g{xN{aHTQx(aIPe>LpJR}>Jxl0s6pm4?HI^r65ta=9!)o1*%+Xs zAIs3$C-UO5-jR&ko7~15|RATl`+DmZ#D0u<>L9o7LB#x(co8&IZqa(c&w8@ zU=f+P=CNfAVG%inJQjJRnn$BEF(QqLg3Q20mY{(}Nwlzng3NV(IBadTK=pCoKqAzL z`*jM*poNGgMEyNhi;3L_93sr)7cBi*z;~@aJZPh_FU5*YZI9ZeEPa%j*AnwzCJmtw zIni%Tf!XP?2CRa69PQsn%E<&(k>%tF8jSvMYMG+8{u5}GGXu#o1a z=Nt!fL==4)fUD@E4&P#&B??8K>0Uosu=N4fM=(4c!e&EFVi2@Wmcqd@`H;t!DT#yS z)J@^zHZnb$ZSA_j6x~Xe6o(9}7Jq<7%|rF1K;luu*KX`Af-ILW#z*wzrde06HFTwv zEG%9u{cL+ITS6-PVxei?XK#uzRcowE30cxoKH%OjOcd)TDS%E4GK7RT#FaO|YM4q1 z$=c(#)D%@f+sP!|hFvEz# zaxrosSQA{7rRi!*EoFkj`?cY_je%C*@G_6HW{kBcef=zA&QZE!ysw&>-+G><+I_fx zPHlQVODz=W8+qjSHh&T6HxT{X;1TFQM2{dpMWk+HS3fw{zIXl$_5U42LwP_y-AY~5 z`yBJ6Z_M2LM;|E{%Z*Sj3O|7JZXB)l&d^*Adg(ASgpi01tD#9H;<_@JvOUwioVfw- z3ozxmgE>ezSdda>A=uud_|vB|lCLB)BPP$04vD1>KiyXztbb^RgsX$?B=}UVaWRDO zwZJ%b3+n|o&7B6MnM?FTVMMr?2Okg6KVa#IjJ}1{okWxA`r@#9VrpJbnqqZ!FIaM? z_KNH~%WLLsLLwQ1+^dq$ibk?q8oWAY z)u0{o;Z?Fy7U}0>in&+v1%YvQwdWC()IwZ7wGdY^*LpCIsbSglL{o$2Lx43pB+mJ) zE>F-^%X#fXUagYT1eKKeX4>7@djq&1v2oN0N=jKU&s~fu8oD+ zEU6*;IVPei8X{K}J0AMxL5cuRwN0}er&VOHbFQ@J&7PfH-1i?6s)6+Qe3oDQI!c`| zs`;V=C?1e{spA&ZI7eUHx!@JQr;PB$3 zxuKkV7$^#37hFjw&wklVY@reK9|b&@yLfIIn(5+p&(a{S3Q5l?SfY2 zB?i|`@Ts52m%GLE6JmL%*tft(l4K?dw5Syd{QP?K7$YfSoZ=kvdgl)0<7}OD+z;;( z4T0Znlgrn=6Wb5mTsIe&7H5mU3#rh9oe!+s$(1KOO2xN?FclX@HWt!bchw6n$@y*+ z9^ISf9`2ix+0GB^S$Z;Pu#jPBp#jCtv`y=GI^;;DtnzRx?vQ1{N<$d5SO8})H9iw` z%=ZcKrZ9k4%vU$PyZlaeMO8fGO%wU48GYmsxA!8F{Ho%3D)J_B*yFk_q&`amT!jqN zWNuz6MowNGca90UjJ_@8su>Jzd$MBQuOE85FAr2MzMakaJ;_3>_>Btqt$Q8xAN=c( zKdt!RTj~F82Rj7xvy~o_Ea#L5#r5C6Ao4k&QA@OVCW0CrP2DsnV_l(O#iG%G%j)$J z_W*uaer?juWHGY9f-_Nl@o4#Rqi{(`*TOzVj6K}^0e*j?M|hp7^G2?~Jl1pLmQsT6Nh!#B;H()#D&Ok%X(LnpNjY*o?3)2Yj-&n&RwD z;+V*>Zo0d=W{%y1k=1QG6Y9er>SLrTV(gbUzj-yxkw!jEgA; z?5!=X^Nhe3PL-a}&%1-h6_K0PcLo@t)dA>?)H z)rs64e1s*2-$ndDaC1eP7731a5F&$9HCm+xK4|f!4Z^q=%p9a+%9Nn@1iLA*sm34H z*e-Phh{(NkMXIl|f!Q2qpDW_<`KIi-yzdLVzd0AENVZ_H&W#biQj;COLky*_wAV## zR*Mj>UeIG@N|*`6Zt5GrVkgMEH_yYg#QoA?)Vnz!v+9EotvY5AnqU{Fjjm3AW95`R z{rt>`B?85yCixAiocJB?YyYmCG2~-5ZukhS5XYGjqr4a8un%h*XlT)fg!mm13V~w$ z^c$uR^c-Qc_eXRPq>LZysnQj}XK+@qA|Hf;+{g~ZC=d$aY(d$|wg`OItZD0Y)190g zbgWwsb=QPPy8yvY-O;H zqji|f-V3rNGkmm7=XKE;+tpHiovznUt3Qe_KOSplMIa6c9&|n zaK>%*jXbE$hN%8_HRQ`&s3N&DzeO63q~qm?OkH;X3Mec^EtDFsU7oU1GE}U=W#g5t zL))%hK+{Q$6!oAwa@A%<`eA<{V2Jz-PbkY&{qkw^L7Y2dMC2@uz<3N zT$mdsxjcU?r4HBbNm+>Hv%N;rcAC;~S?WecjPSjhM{RzU3{bT_`VBg7=~`P-&$zJw z#rG7SA22^VVWNNPq1BUB#0AYwT`oc6j@(1spl8FG;e3o!TUL?QY0DTxJyE4_41cW4T?01D+Vfn??b>T>uk~Q@3H!A$MkplIgwB z5`pR~JT5yG+PpL@WCg_GWNpFNwWu+|*tb@bK{z^}Rm6t$M#{W+pV^y}ShXkVKT;FM z{z69ko7w-?RSW(Pu3E^SHtuf$?0e_`jtvXqgmV2f|HNdu__RL(ELptHYD60T@FTfN zaySn+)E^?F!M>ABw{!li`!xS_-p=GzWw31DIK?&7=CzwjEf%7nqmI1+7 z!&E!FJ&f00k8p8ibI#En4%ZCZ_7SVmT-uA$KVlQ!Prvaog$o+_FtL>Oa4EKc}tjE^8dpf&BX zL_qz1F5!)fY2ptTQyQv+7y^7e=lwZBMTf+A=N4%KR?+h8ol(~lx;?yMl=tPl-rscW zv~Vj*i^QvEJ}7xT-AeK_+^_5B^Ac;?6=&y`B4tuhYgM1Nv|S*WZRB<(*O^}`k7C19&L&3UoK%T z7&^7nJ9X!4pc_9^U<^;!;E;EO z+MX^!1h}6oVx)-Xfb>{^3+rje5>W#auGKuqbJFa1M(6e3MWrR&=+Ok}+dC|K76*-( z=?i2)oi$0m7>IPo#pCy}%toK_s0U!g!i7^;6f-KpAFauTefjUF5hz7gr%F}4l`)d1 zz3-i_3s3z-g)DAkma*~39@t63|uE4kk<{D4#(V~ z@jhcw)j<>9))ed$c5O|k>VwxTM&CeiAOV&a{ZTaUn;Rd14*g2}+ymLUY@O}3Tu;X<;-y(jwfWi<}7!vsB>f>|D)2gJVW4k-X_Z ztpZ60p}xY1(<8^BWii~IcOv`%+w7y)_Z2~~xzMHRu1z|ygLODMT4Hs&p^0@_FoC{R z$dyG5u)~r(K2XQ>JxzF1w40oRPO?208n1=_eo6klYdVJm_BIZp2Py)5OWCB{TD*$_ z4;ex(Y$wtdGcE_GFqF&f*7B<%_}=Ej=|;FxeUj? z^?n+Ww(|8^9z3ZJcfLuMo2Ho4;xmzA>k<&=lVs?_conEaF(ki2$L^^<bu)MrJcFno=-UJ zrXczAJoPUr50)*zt>R=*J|xVebR;aV@9f&#jpdCv;B!^CgU86Ro}|o5S|iP3gmKKD_LFH|$NUQ;iZuN%__yRBZaCmIgu$S9H}Q&BXiL zIVCSnvqaLOh9lVnM=^N=-&1>m`oH(f2#ke;V zd9v{}%3y@B*L>*+zIjIlyd2A~NEO9EatiV)5LDoc>qAfMLPVp;AfoIy4GCWwXBZ~h z_Uok|_4pNOzP|?k3l{zkXn_B`3jIr*{o`Tr?{W5f=f6n)J8uaH0{%HF7?q@FH^WWn zKYw)}`_&_hfK*y$)g*zT%EWFx|A`ulOT=Lve@I+fO1*r?dgQqZkL1!FUBkpgDYic0QXL2i{Lh#;eaTCWZqMS7bz>y4_=#Nd zJo{QT>~-+e=SxVyq9>N&m;kfv+hv7D9~_6cgiTkyecSMd1|=B+yaM7k?~#O_Vc(+7 zW3-0i%rX49*WonmYVMKL{nkWUmST~n_K#)zb~oHy(1(q56DE%9S(P5CTN{K6boYk( zJ*t_}>}Keu#v~Zjti3-LBY6SCh!<$D=w~&^5_s-vxJU3Hu$SZ+p?*zo2pq;!jyv&A zDxvi1LZ8|gTm0NNn3@AdttqI?h1=AtpLgqvB0`Mh?K|}2xv`V-x=qVDp=%DzXv=IP zXiv$L1@>fJl!DG^F3TL?b5y5$9hkz|6^3_*Kt1j9PqPPz6rt;gw2!$k2fYJ)DdG_o z)usfKo!Q{fhM(@Imnq7dAh%E_ z8*|`vGzzsm#2#C;Y8KgDFEZfFAI88Uf=ZwhaIqW-Jk&Txel5FtJqv)!FcqSSm?T16^t+P!46gwYKKC#! z!@c>bm5=jzTG(Hx2DhZB3mrO9PdL)Z74|M?M?h8*L2V4ZR1^0@!D4S3}V zGhMs;`a6nYmN@~Qlh(@E`rF!G%^xC-UVD2``ZjpA83~nJ8bp=ykX#wyMllr2y*61I z{&;oA1VkLj2i-(@akLxwIJ$Phbm7JOV!ia`{SE!+6(5@D6acC9(Pi8ZM$^Y;f&O=UTVhB#>#_WA=jj z%%fRS{8XiR988FYVx(GmKP>m~FJrWo)#T}LHrGkc-kW*qLN_ec3rdlNOR_?*5yg1y z!_F+IX+ibvDzBr0^k1q|KS{EE|II?ByyAiDb1UtJkdpykq%>f)*mbRj{PL_WapmV6X7(0K@`MAA4fc{DVxux`jCmj4^BE|1U60@^5 z{T~?SM+e?-hWQ^D;U|CI?Q#hSa7cjw;6HDWQeeRMx!y$er62(Ccf7mF3*k_9v@=nI zIRo@KZjyBX9O^Lln}wFKxrrBu{rOk?&%ZJce_Xd8ue0BG=YMy&pRT(9`?LG;y86FA zyYJ5_{x&fF*Jt-VH1vNs+^y~Pe>mKay5pC<`~Nud-@Ot4hx7PRW&ARa8!7&u6==wh z`a{&k*}~2d!1MrS3v;|lDl-L`-TV~wba8|ML}dVfn$-XeTT_^$le3ZScMYs2AqQ|W zvaz>@IWgZP_}%22-KNq>+;j&pN$`PzU~VAfCLQg@NCt7~-n3A+bhf^!lCu1sa%W@> z(7eqha0JNMy4qRZCcv=))I=4O(Q!YQ-Av-wG7#`aIsM!Y0_M62 zzy6~PavP8Nxr`IU`)e7P=LVC0ZpZbTak)UZj}?Dz$IT1~_~4L~3W^q)L~K%Bqq2M7%1`sWxRFz>JJAi!Vt z4RmwH{>k&rk>mO28h{`W=s))y1mWcUWn2jNFZ&CE@Z9E}{dk$4QJ7>U6quZx?GPY)RfbT+l`$Gbt zC%_Apfbj5ef+WRwxgZi;T%uA?Fc%aoA;k^k;)IHE3;pL7-x2>tz#W~xOB;xj6U>WF LPcNw?h5r8lGu)Xy literal 0 HcmV?d00001