diff --git a/sdk/ai/azure-ai-projects/azure/ai/projects/aio/operations/_patch_evaluators_async.py b/sdk/ai/azure-ai-projects/azure/ai/projects/aio/operations/_patch_evaluators_async.py new file mode 100644 index 000000000000..966e377d68a1 --- /dev/null +++ b/sdk/ai/azure-ai-projects/azure/ai/projects/aio/operations/_patch_evaluators_async.py @@ -0,0 +1,203 @@ +# pylint: disable=line-too-long,useless-suppression +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ +"""Customize generated code here. + +Follow our quickstart for examples: https://aka.ms/azsdk/python/dpcodegen/python/customize +""" +import os +import logging +from typing import Any, IO, Tuple, Optional, Union +from pathlib import Path +from urllib.parse import urlsplit +from azure.storage.blob.aio import ContainerClient +from azure.core.tracing.decorator_async import distributed_trace_async +from azure.core.exceptions import HttpResponseError, ResourceNotFoundError +from ._operations import BetaEvaluatorsOperations as EvaluatorsOperationsGenerated, JSON +from ...models._models import ( + EvaluatorVersion, +) + +logger = logging.getLogger(__name__) + + +class EvaluatorsOperations(EvaluatorsOperationsGenerated): + """ + .. warning:: + **DO NOT** instantiate this class directly. + + Instead, you should access the following operations through + :class:`~azure.ai.projects.aio.AIProjectClient`'s + :attr:`beta.evaluators` attribute. + """ + + async def _start_pending_upload_and_get_container_client( + self, + name: str, + version: str, + connection_name: Optional[str] = None, + ) -> Tuple[ContainerClient, str, str]: + """Call startPendingUpload to get a SAS URI and return a ContainerClient and blob URI.""" + + request_body: dict = {} + if connection_name: + request_body["connectionName"] = connection_name + + pending_upload_response = await self.pending_upload( + name=name, + version=version, + pending_upload_request=request_body, + ) + + # The service returns blobReferenceForConsumption + blob_ref = pending_upload_response.get("blobReferenceForConsumption") + if not blob_ref: + raise ValueError("Blob reference is not present in the pending upload response") + + credential = blob_ref.get("credential") if isinstance(blob_ref, dict) else None + if not credential: + raise ValueError("SAS credential is not present in the pending upload response") + + sas_uri = credential.get("sasUri") if isinstance(credential, dict) else None + if not sas_uri: + raise ValueError("SAS URI is missing or empty in the pending upload response") + + blob_uri = blob_ref.get("blobUri") if isinstance(blob_ref, dict) else None + if not blob_uri: + raise ValueError("Blob URI is missing or empty in the pending upload response") + + return ( + ContainerClient.from_container_url(container_url=sas_uri), + version, + blob_uri, + ) + + async def _get_next_version(self, name: str) -> str: + """Get the next version number for an evaluator by fetching existing versions.""" + try: + versions = [] + async for v in self.list_versions(name=name): + versions.append(v) + if versions: + numeric_versions = [] + for v in versions: + ver = v.get("version") if isinstance(v, dict) else getattr(v, "version", None) + if ver and ver.isdigit(): + numeric_versions.append(int(ver)) + if numeric_versions: + return str(max(numeric_versions) + 1) + return "1" + except ResourceNotFoundError: + return "1" + + @distributed_trace_async + async def upload( + self, + name: str, + evaluator_version: Union[EvaluatorVersion, JSON, IO[bytes]], + *, + folder: str, + connection_name: Optional[str] = None, + **kwargs: Any, + ) -> EvaluatorVersion: + """Upload all files in a folder to blob storage and create a code-based evaluator version + that references the uploaded code. + + This method calls startPendingUpload to get a SAS URI, uploads files from the folder + to blob storage, then creates an evaluator version referencing the uploaded blob. + + The version is automatically determined by incrementing the latest existing version. + + :param name: The name of the evaluator. Required. + :type name: str + :param evaluator_version: The evaluator version definition. This is the same object accepted + by ``create_version``. Is one of the following types: EvaluatorVersion, JSON, + IO[bytes]. Required. + :type evaluator_version: ~azure.ai.projects.models.EvaluatorVersion or JSON or IO[bytes] + :keyword folder: Path to the folder containing the evaluator Python code. Required. + :paramtype folder: str + :keyword connection_name: The name of an Azure Storage Account connection where the files + should be uploaded. If not specified, the default Azure Storage Account connection will be + used. Optional. + :paramtype connection_name: str + :return: The created evaluator version. + :rtype: ~azure.ai.projects.models.EvaluatorVersion + :raises ~azure.core.exceptions.HttpResponseError: If an error occurs during the HTTP request. + """ + path_folder = Path(folder) + if not path_folder.exists(): + raise ValueError(f"The provided folder `{folder}` does not exist.") + if path_folder.is_file(): + raise ValueError("The provided path is a file, not a folder.") + + version = await self._get_next_version(name) + logger.info("[upload] Auto-resolved version to '%s'.", version) + + # Get SAS URI via startPendingUpload + container_client, output_version, blob_uri = await self._start_pending_upload_and_get_container_client( + name=name, + version=version, + connection_name=connection_name, + ) + + async with container_client: + # Upload all files from the folder (including nested subdirectories) + skip_dirs = {"__pycache__", ".git", ".venv", "venv", "node_modules"} + skip_extensions = {".pyc", ".pyo"} + files_uploaded: bool = False + for root, dirs, files in os.walk(folder): + # Prune directories we don't want to traverse + dirs[:] = [d for d in dirs if d not in skip_dirs] + for file in files: + if any(file.endswith(ext) for ext in skip_extensions): + continue + file_path = os.path.join(root, file) + blob_name = os.path.relpath(file_path, folder).replace("\\", "/") + logger.debug( + "[upload] Start uploading file `%s` as blob `%s`.", + file_path, + blob_name, + ) + with open(file=file_path, mode="rb") as data: + try: + await container_client.upload_blob(name=str(blob_name), data=data, **kwargs) + except HttpResponseError as e: + if e.error_code == "AuthorizationPermissionMismatch": + storage_account = urlsplit(container_client.url).hostname + raise HttpResponseError( + message=( + f"Failed to upload file '{blob_name}' to blob storage: " + f"permission denied. Ensure the identity that signed the SAS token " + f"has the 'Storage Blob Data Contributor' role on the storage account " + f"'{storage_account}'. " + f"Original error: {e.message}" + ), + response=e.response, + ) from e + raise + logger.debug("[upload] Done uploading file") + files_uploaded = True + logger.debug("[upload] Done uploading all files.") + + if not files_uploaded: + raise ValueError("The provided folder is empty.") + + # Set the blob_uri in the evaluator version definition + if isinstance(evaluator_version, dict): + definition = evaluator_version.get("definition", {}) + if isinstance(definition, dict): + definition["blob_uri"] = blob_uri + else: + definition.blob_uri = blob_uri + else: + if hasattr(evaluator_version, "definition") and evaluator_version.definition: + evaluator_version.definition.blob_uri = blob_uri + + result = await self.create_version( + name=name, + evaluator_version=evaluator_version, + ) + + return result diff --git a/sdk/ai/azure-ai-projects/azure/ai/projects/operations/_patch.py b/sdk/ai/azure-ai-projects/azure/ai/projects/operations/_patch.py index bc78f4d6baf8..f628dcfce670 100644 --- a/sdk/ai/azure-ai-projects/azure/ai/projects/operations/_patch.py +++ b/sdk/ai/azure-ai-projects/azure/ai/projects/operations/_patch.py @@ -12,12 +12,12 @@ from ._patch_agents import AgentsOperations from ._patch_datasets import DatasetsOperations from ._patch_evaluation_rules import EvaluationRulesOperations +from ._patch_evaluators import EvaluatorsOperations as BetaEvaluatorsOperations from ._patch_telemetry import TelemetryOperations from ._patch_connections import ConnectionsOperations from ._patch_memories import BetaMemoryStoresOperations from ._operations import ( BetaEvaluationTaxonomiesOperations, - BetaEvaluatorsOperations, BetaInsightsOperations, BetaRedTeamsOperations, BetaSchedulesOperations, @@ -50,6 +50,8 @@ class BetaOperations(GeneratedBetaOperations): def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, **kwargs) + # Replace with patched class that includes upload() + self.evaluators = BetaEvaluatorsOperations(self._client, self._config, self._serialize, self._deserialize) # Replace with patched class that includes begin_update_memories self.memory_stores = BetaMemoryStoresOperations(self._client, self._config, self._serialize, self._deserialize) diff --git a/sdk/ai/azure-ai-projects/azure/ai/projects/operations/_patch_evaluators.py b/sdk/ai/azure-ai-projects/azure/ai/projects/operations/_patch_evaluators.py new file mode 100644 index 000000000000..b6ebb813992d --- /dev/null +++ b/sdk/ai/azure-ai-projects/azure/ai/projects/operations/_patch_evaluators.py @@ -0,0 +1,201 @@ +# pylint: disable=line-too-long,useless-suppression +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ +"""Customize generated code here. + +Follow our quickstart for examples: https://aka.ms/azsdk/python/dpcodegen/python/customize +""" +import os +import logging +from typing import Any, IO, Tuple, Optional, Union +from pathlib import Path +from urllib.parse import urlsplit +from azure.storage.blob import ContainerClient +from azure.core.tracing.decorator import distributed_trace +from azure.core.exceptions import HttpResponseError, ResourceNotFoundError +from ._operations import BetaEvaluatorsOperations as EvaluatorsOperationsGenerated, JSON +from ..models._models import ( + EvaluatorVersion, +) + +logger = logging.getLogger(__name__) + + +class EvaluatorsOperations(EvaluatorsOperationsGenerated): + """ + .. warning:: + **DO NOT** instantiate this class directly. + + Instead, you should access the following operations through + :class:`~azure.ai.projects.AIProjectClient`'s + :attr:`beta.evaluators` attribute. + """ + + def _start_pending_upload_and_get_container_client( + self, + name: str, + version: str, + connection_name: Optional[str] = None, + ) -> Tuple[ContainerClient, str, str]: + """Call startPendingUpload to get a SAS URI and return a ContainerClient and blob URI.""" + + request_body: dict = {} + if connection_name: + request_body["connectionName"] = connection_name + + pending_upload_response = self.pending_upload( + name=name, + version=version, + pending_upload_request=request_body, + ) + + # The service returns blobReferenceForConsumption + blob_ref = pending_upload_response.get("blobReferenceForConsumption") + if not blob_ref: + raise ValueError("Blob reference is not present in the pending upload response") + + credential = blob_ref.get("credential") if isinstance(blob_ref, dict) else None + if not credential: + raise ValueError("SAS credential is not present in the pending upload response") + + sas_uri = credential.get("sasUri") if isinstance(credential, dict) else None + if not sas_uri: + raise ValueError("SAS URI is missing or empty in the pending upload response") + + blob_uri = blob_ref.get("blobUri") if isinstance(blob_ref, dict) else None + if not blob_uri: + raise ValueError("Blob URI is missing or empty in the pending upload response") + + return ( + ContainerClient.from_container_url(container_url=sas_uri), + version, + blob_uri, + ) + + def _get_next_version(self, name: str) -> str: + """Get the next version number for an evaluator by fetching existing versions.""" + try: + versions = list(self.list_versions(name=name)) + if versions: + numeric_versions = [] + for v in versions: + ver = v.get("version") if isinstance(v, dict) else getattr(v, "version", None) + if ver and ver.isdigit(): + numeric_versions.append(int(ver)) + if numeric_versions: + return str(max(numeric_versions) + 1) + return "1" + except ResourceNotFoundError: + return "1" + + @distributed_trace + def upload( + self, + name: str, + evaluator_version: Union[EvaluatorVersion, JSON, IO[bytes]], + *, + folder: str, + connection_name: Optional[str] = None, + **kwargs: Any, + ) -> EvaluatorVersion: + """Upload all files in a folder to blob storage and create a code-based evaluator version + that references the uploaded code. + + This method calls startPendingUpload to get a SAS URI, uploads files from the folder + to blob storage, then creates an evaluator version referencing the uploaded blob. + + The version is automatically determined by incrementing the latest existing version. + + :param name: The name of the evaluator. Required. + :type name: str + :param evaluator_version: The evaluator version definition. This is the same object accepted + by ``create_version``. Is one of the following types: EvaluatorVersion, JSON, + IO[bytes]. Required. + :type evaluator_version: ~azure.ai.projects.models.EvaluatorVersion or JSON or IO[bytes] + :keyword folder: Path to the folder containing the evaluator Python code. Required. + :paramtype folder: str + :keyword connection_name: The name of an Azure Storage Account connection where the files + should be uploaded. If not specified, the default Azure Storage Account connection will be + used. Optional. + :paramtype connection_name: str + :return: The created evaluator version. + :rtype: ~azure.ai.projects.models.EvaluatorVersion + :raises ~azure.core.exceptions.HttpResponseError: If an error occurs during the HTTP request. + """ + path_folder = Path(folder) + if not path_folder.exists(): + raise ValueError(f"The provided folder `{folder}` does not exist.") + if path_folder.is_file(): + raise ValueError("The provided path is a file, not a folder.") + + version = self._get_next_version(name) + logger.info("[upload] Auto-resolved version to '%s'.", version) + + # Get SAS URI via startPendingUpload + container_client, output_version, blob_uri = self._start_pending_upload_and_get_container_client( + name=name, + version=version, + connection_name=connection_name, + ) + + with container_client: + # Upload all files from the folder (including nested subdirectories) + skip_dirs = {"__pycache__", ".git", ".venv", "venv", "node_modules"} + skip_extensions = {".pyc", ".pyo"} + files_uploaded: bool = False + for root, dirs, files in os.walk(folder): + # Prune directories we don't want to traverse + dirs[:] = [d for d in dirs if d not in skip_dirs] + for file in files: + if any(file.endswith(ext) for ext in skip_extensions): + continue + file_path = os.path.join(root, file) + blob_name = os.path.relpath(file_path, folder).replace("\\", "/") + logger.debug( + "[upload] Start uploading file `%s` as blob `%s`.", + file_path, + blob_name, + ) + with open(file=file_path, mode="rb") as data: + try: + container_client.upload_blob(name=str(blob_name), data=data, **kwargs) + except HttpResponseError as e: + if e.error_code == "AuthorizationPermissionMismatch": + storage_account = urlsplit(container_client.url).hostname + raise HttpResponseError( + message=( + f"Failed to upload file '{blob_name}' to blob storage: " + f"permission denied. Ensure the identity that signed the SAS token " + f"has the 'Storage Blob Data Contributor' role on the storage account " + f"'{storage_account}'. " + f"Original error: {e.message}" + ), + response=e.response, + ) from e + raise + logger.debug("[upload] Done uploading file") + files_uploaded = True + logger.debug("[upload] Done uploading all files.") + + if not files_uploaded: + raise ValueError("The provided folder is empty.") + + # Set the blob_uri in the evaluator version definition + if isinstance(evaluator_version, dict): + definition = evaluator_version.get("definition", {}) + if isinstance(definition, dict): + definition["blob_uri"] = blob_uri + else: + definition.blob_uri = blob_uri + else: + if hasattr(evaluator_version, "definition") and evaluator_version.definition: + evaluator_version.definition.blob_uri = blob_uri + + result = self.create_version( + name=name, + evaluator_version=evaluator_version, + ) + + return result diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluators/answer_length_evaluator/answer_length_evaluator.py b/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluators/answer_length_evaluator/answer_length_evaluator.py new file mode 100644 index 000000000000..1fa95ab19b1d --- /dev/null +++ b/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluators/answer_length_evaluator/answer_length_evaluator.py @@ -0,0 +1,14 @@ +"""Custom evaluator that measures the length of a response.""" + + +class AnswerLengthEvaluator: + def __init__(self, *, model_config): + self.model_config = model_config + + def __call__(self, *args, **kwargs): + return {"result": evaluate_answer_length(kwargs.get("response"))} + + +def evaluate_answer_length(answer: str): + return len(answer) + diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluators/friendly_evaluator/common_util/__init__.py b/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluators/friendly_evaluator/common_util/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluators/friendly_evaluator/common_util/util.py b/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluators/friendly_evaluator/common_util/util.py new file mode 100644 index 000000000000..7499261ba7c6 --- /dev/null +++ b/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluators/friendly_evaluator/common_util/util.py @@ -0,0 +1,72 @@ +"""Utility functions for custom evaluators.""" + +FRIENDLINESS_SYSTEM_PROMPT = """You are an expert evaluator that assesses how friendly, warm, and approachable +a response is. You evaluate responses on a scale of 1 to 5 based on the following criteria: + +Score 1 (Very Unfriendly): The response is cold, dismissive, rude, or hostile. +Score 2 (Unfriendly): The response is curt, impersonal, or lacks warmth. +Score 3 (Neutral): The response is acceptable but neither particularly friendly nor unfriendly. +Score 4 (Friendly): The response is warm, polite, and shows genuine interest in helping. +Score 5 (Very Friendly): The response is exceptionally warm, encouraging, empathetic, and makes the user feel valued. + +You MUST respond in the following JSON format only: +{ + "score": , + "label": "", + "reason": "", + "explanation": "" +} + +A score of 3 or above is considered "Pass", below 3 is "Fail". +""" + + +def build_evaluation_messages(query: str, response: str) -> list: + """Build the messages list for the LLM evaluation call. + + :param query: The original user query. + :param response: The response to evaluate for friendliness. + :return: A list of message dicts for the chat completion API. + """ + return [ + {"role": "system", "content": FRIENDLINESS_SYSTEM_PROMPT}, + { + "role": "user", + "content": ( + f"Please evaluate the friendliness of the following response.\n\n" + f"Original query: {query}\n\n" + f"Response to evaluate: {response}" + ), + }, + ] + + +def parse_evaluation_result(raw_result: str) -> dict: + """Parse the LLM's JSON response into a structured evaluation result. + + :param raw_result: The raw string output from the LLM. + :return: A dict with score, label, reason, and explanation. + """ + import json + + try: + # Try to extract JSON from the response (handle markdown code blocks) + text = raw_result.strip() + if text.startswith("```"): + text = text.split("\n", 1)[1] if "\n" in text else text[3:] + text = text.rsplit("```", 1)[0] + result = json.loads(text.strip()) + score = int(result.get("score", 3)) + return { + "score": max(1, min(5, score)), + "label": result.get("label", "Pass" if score >= 3 else "Fail"), + "reason": result.get("reason", "No reason provided"), + "explanation": result.get("explanation", "No explanation provided"), + } + except (json.JSONDecodeError, ValueError, KeyError): + return { + "score": 3, + "label": "Pass", + "reason": "Could not parse LLM response", + "explanation": f"Raw LLM output: {raw_result}", + } diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluators/friendly_evaluator/friendly_evaluator.py b/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluators/friendly_evaluator/friendly_evaluator.py new file mode 100644 index 000000000000..c58ff350ba25 --- /dev/null +++ b/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluators/friendly_evaluator/friendly_evaluator.py @@ -0,0 +1,62 @@ +"""Custom evaluator that uses an LLM to assess the friendliness of a response.""" + +from openai import AzureOpenAI +from common_util.util import build_evaluation_messages, parse_evaluation_result + + +class FriendlyEvaluator: + """Evaluates how friendly and approachable a response is using an LLM judge. + + This evaluator sends the query and response to an LLM, which returns a + friendliness score (1-5), a pass/fail label, a reason, and a detailed explanation. + + :param model_config: A dict containing Azure OpenAI connection info. Expected keys: + - azure_endpoint: The Azure OpenAI endpoint URL. + - azure_deployment: The deployment/model name. + - api_version: The API version (default: "2024-06-01"). + - api_key: (Optional) The API key. If not provided, DefaultAzureCredential is used. + """ + + def __init__(self, *, model_config: dict): + self.model_config = model_config + api_key = model_config.get("api_key") + + if api_key: + self.client = AzureOpenAI( + azure_endpoint=model_config["azure_endpoint"], + api_key=api_key, + api_version=model_config.get("api_version", "2024-06-01"), + ) + else: + from azure.identity import DefaultAzureCredential, get_bearer_token_provider + + token_provider = get_bearer_token_provider( + DefaultAzureCredential(), + "https://cognitiveservices.azure.com/.default", + ) + self.client = AzureOpenAI( + azure_endpoint=model_config["azure_endpoint"], + azure_ad_token_provider=token_provider, + api_version=model_config.get("api_version", "2024-06-01"), + ) + + self.deployment = model_config["azure_deployment"] + + def __call__(self, *, query: str, response: str, **kwargs) -> dict: + """Evaluate the friendliness of a response. + + :param query: The original user query. + :param response: The response to evaluate. + :return: A dict with score, label, reason, and explanation. + """ + messages = build_evaluation_messages(query, response) + + completion = self.client.chat.completions.create( + model=self.deployment, + messages=messages, + temperature=0.0, + max_tokens=500, + ) + + raw_result = completion.choices[0].message.content + return parse_evaluation_result(raw_result) diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_custom_evaluator.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_custom_evaluator.py new file mode 100644 index 000000000000..d8326f8d2c33 --- /dev/null +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_custom_evaluator.py @@ -0,0 +1,224 @@ +# pylint: disable=line-too-long,useless-suppression +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ + +""" +DESCRIPTION: + Given an AIProjectClient, this sample demonstrates how to: + 1. Upload a local folder containing custom evaluator Python code and + register it as a code-based evaluator version using `evaluators.upload()`. + 2. Create an evaluation (eval) that references the uploaded evaluator. + 3. Run the evaluation with inline data and poll for results. + +USAGE: + python sample_eval_upload_custom_evaluator.py + + Before running the sample: + + pip install "azure-ai-projects>=2.0.0b4" azure-storage-blob python-dotenv azure-identity openai + + Set these environment variables with your own values: + 1) FOUNDRY_PROJECT_ENDPOINT - Required. The Azure AI Project endpoint, as found in the overview page of your + Microsoft Foundry project. It has the form: https://.services.ai.azure.com/api/projects/. + 2) FOUNDRY_MODEL_NAME - Optional. The name of the model deployment to use for evaluation. +""" + +import os +import time +import random +import string +from pathlib import Path +from pprint import pprint + +from dotenv import load_dotenv +from openai.types.evals.create_eval_jsonl_run_data_source_param import ( + CreateEvalJSONLRunDataSourceParam, + SourceFileContent, + SourceFileContentContent, +) +from openai.types.eval_create_params import DataSourceConfigCustom +from azure.identity import DefaultAzureCredential +from azure.ai.projects import AIProjectClient +from azure.ai.projects.models import ( + CodeBasedEvaluatorDefinition, + EvaluatorCategory, + EvaluatorMetric, + EvaluatorMetricType, + EvaluatorMetricDirection, + EvaluatorType, + EvaluatorVersion, +) + +load_dotenv() + +endpoint = os.environ["FOUNDRY_PROJECT_ENDPOINT"] +model_deployment_name = os.environ.get("FOUNDRY_MODEL_NAME") + +# The folder containing the AnswerLength evaluator code, relative to this sample file. +local_upload_folder = str(Path(__file__).parent / "custom_evaluators" / "answer_length_evaluator") + +with ( + DefaultAzureCredential() as credential, + AIProjectClient(endpoint=endpoint, credential=credential) as project_client, + project_client.get_openai_client() as client, +): + # --------------------------------------------------------------- + # 1. Upload evaluator code and create evaluator version + # upload() internally calls startPendingUpload to get a SAS URI, + # uploads the folder contents to blob storage, then creates the + # evaluator version with the blob URI. + # --------------------------------------------------------------- + suffix = "".join(random.choices(string.ascii_lowercase, k=5)) + evaluator_name = f"answer_length_evaluator_{suffix}" + evaluator_version = EvaluatorVersion( + evaluator_type=EvaluatorType.CUSTOM, + categories=[EvaluatorCategory.QUALITY], + display_name="Answer Length Evaluator", + description="Custom evaluator to calculate length of content", + definition=CodeBasedEvaluatorDefinition( + entry_point="answer_length_evaluator:AnswerLengthEvaluator", + init_parameters={ + "type": "object", + "properties": {"model_config": {"type": "string"}}, + "required": ["model_config"], + }, + data_schema={ + "type": "object", + "properties": { + "query": {"type": "string"}, + "response": {"type": "string"}, + }, + "required": ["query", "response"], + }, + metrics={ + "score": EvaluatorMetric( + type=EvaluatorMetricType.ORDINAL, + desirable_direction=EvaluatorMetricDirection.INCREASE, + min_value=1, + max_value=5, + ) + }, + ), + ) + + print("Uploading custom evaluator code and creating evaluator version...") + code_evaluator = project_client.beta.evaluators.upload( + name=evaluator_name, + evaluator_version=evaluator_version, + folder=local_upload_folder, + overwrite=True, + ) + + print(f"Evaluator created: name={code_evaluator.name}, version={code_evaluator.version}") + print(f"Evaluator ID: {code_evaluator.id}") + pprint(code_evaluator) + + # --------------------------------------------------------------- + # 2. Create an evaluation referencing the uploaded evaluator + # --------------------------------------------------------------- + data_source_config = DataSourceConfigCustom( + { + "type": "custom", + "item_schema": { + "type": "object", + "properties": { + "query": {"type": "string"}, + "response": {"type": "string"}, + }, + "required": ["query", "response"], + }, + "include_sample_schema": True, + } + ) + + testing_criteria = [ + { + "type": "azure_ai_evaluator", + "name": evaluator_name, + "evaluator_name": evaluator_name, + "initialization_parameters": { + "model_config": f"{model_deployment_name}", + }, + } + ] + + print("\nCreating evaluation...") + eval_object = client.evals.create( + name=f"Answer Length Evaluation - {suffix}", + data_source_config=data_source_config, + testing_criteria=testing_criteria, # type: ignore + ) + print(f"Evaluation created (id: {eval_object.id}, name: {eval_object.name})") + + # --------------------------------------------------------------- + # 3. Run the evaluation with inline data + # --------------------------------------------------------------- + print("\nCreating evaluation run with inline data...") + eval_run_object = client.evals.runs.create( + eval_id=eval_object.id, + name=f"Answer Length Eval Run - {suffix}", + metadata={"team": "eval-exp", "scenario": "answer-length-v1"}, + data_source=CreateEvalJSONLRunDataSourceParam( + type="jsonl", + source=SourceFileContent( + type="file_content", + content=[ + SourceFileContentContent( + item={ + "query": "What is the capital of France?", + "response": "Paris", + } + ), + SourceFileContentContent( + item={ + "query": "Explain quantum computing", + "response": "Quantum computing leverages quantum mechanical phenomena like superposition and entanglement to process information in fundamentally different ways than classical computers.", + } + ), + SourceFileContentContent( + item={ + "query": "What is AI?", + "response": "AI stands for Artificial Intelligence. It is a branch of computer science that aims to create intelligent machines that can perform tasks that typically require human intelligence, such as visual perception, speech recognition, decision-making, and language translation.", + } + ), + SourceFileContentContent( + item={ + "query": "Say hello", + "response": "Hi!", + } + ), + ], + ), + ), + ) + + print(f"Evaluation run created (id: {eval_run_object.id})") + pprint(eval_run_object) + + # --------------------------------------------------------------- + # 4. Poll for evaluation run completion + # --------------------------------------------------------------- + while True: + run = client.evals.runs.retrieve(run_id=eval_run_object.id, eval_id=eval_object.id) + if run.status in ("completed", "failed"): + print(f"\nEvaluation run finished with status: {run.status}") + output_items = list(client.evals.runs.output_items.list(run_id=run.id, eval_id=eval_object.id)) + pprint(output_items) + print(f"\nEvaluation run Report URL: {run.report_url}") + break + time.sleep(5) + print("Waiting for evaluation run to complete...") + + # --------------------------------------------------------------- + # 5. Cleanup (uncomment to delete) + # --------------------------------------------------------------- + # print("\nCleaning up...") + # project_client.beta.evaluators.delete_version( + # name=code_evaluator.name, + # version=code_evaluator.version, + # ) + # client.evals.delete(eval_id=eval_object.id) + # print("Cleanup done.") + print("\nDone - upload, eval creation, and eval run verified successfully.") diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_friendly_evaluator.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_friendly_evaluator.py new file mode 100644 index 000000000000..bcea7e6aff42 --- /dev/null +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_friendly_evaluator.py @@ -0,0 +1,247 @@ +# pylint: disable=line-too-long,useless-suppression +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ + +""" +DESCRIPTION: + Given an AIProjectClient, this sample demonstrates how to: + 1. Upload a custom LLM-based evaluator (FriendlyEvaluator) with nested + folder structure (common_util/) using `evaluators.upload()`. + 2. Create an evaluation (eval) that references the uploaded evaluator. + 3. Run the evaluation with inline data and poll for results. + + The FriendlyEvaluator calls Azure OpenAI to judge the friendliness of a + response and returns score, label, reason, and explanation. + +USAGE: + python sample_eval_upload_friendly_evaluator.py + + Before running the sample: + + pip install "azure-ai-projects>=2.0.0b4" azure-storage-blob python-dotenv azure-identity openai + + Set these environment variables with your own values: + 1) FOUNDRY_PROJECT_ENDPOINT - Required. The Azure AI Project endpoint. + 2) FOUNDRY_MODEL_NAME - Optional. The name of the model deployment to use for evaluation. +""" + +import os +import time +import random +import string +from pathlib import Path +from pprint import pprint + +from dotenv import load_dotenv +from openai.types.evals.create_eval_jsonl_run_data_source_param import ( + CreateEvalJSONLRunDataSourceParam, + SourceFileContent, + SourceFileContentContent, +) +from openai.types.eval_create_params import DataSourceConfigCustom +from azure.identity import DefaultAzureCredential +from azure.ai.projects import AIProjectClient +from azure.ai.projects.models import ( + CodeBasedEvaluatorDefinition, + EvaluatorCategory, + EvaluatorMetric, + EvaluatorMetricType, + EvaluatorMetricDirection, + EvaluatorType, + EvaluatorVersion, +) + +load_dotenv() + +endpoint = os.environ["FOUNDRY_PROJECT_ENDPOINT"] +model_deployment_name = os.environ.get("FOUNDRY_MODEL_NAME") +azure_openai_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"] +azure_openai_api_key = os.environ["AZURE_OPENAI_API_KEY"] + +# The folder containing the FriendlyEvaluator code, including common_util/ subfolder +local_upload_folder = str(Path(__file__).parent / "custom_evaluators" / "friendly_evaluator") + +with ( + DefaultAzureCredential() as credential, + AIProjectClient(endpoint=endpoint, credential=credential) as project_client, + project_client.get_openai_client() as client, +): + # --------------------------------------------------------------- + # 1. Upload evaluator code and create evaluator version + # The folder structure uploaded is: + # friendly_evaluator/ + # friendly_evaluator.py <- entry point + # common_util/ + # __init__.py + # util.py <- helper functions + # --------------------------------------------------------------- + suffix = "".join(random.choices(string.ascii_lowercase, k=5)) + evaluator_name = f"friendly_evaluator_{suffix}" + + evaluator_version = EvaluatorVersion( + evaluator_type=EvaluatorType.CUSTOM, + categories=[EvaluatorCategory.QUALITY], + display_name="Friendliness Evaluator", + description="LLM-based evaluator that scores how friendly a response is (1-5)", + definition=CodeBasedEvaluatorDefinition( + entry_point="friendly_evaluator:FriendlyEvaluator", + init_parameters={ + "type": "object", + "properties": { + "model_config": { + "type": "object", + "description": "Azure OpenAI configuration for the LLM judge", + "properties": { + "azure_endpoint": {"type": "string"}, + "api_version": {"type": "string"}, + "api_key": {"type": "string"}, + }, + "required": ["azure_endpoint", "api_key"], + } + }, + "required": ["model_config"], + }, + data_schema={ + "type": "object", + "properties": { + "query": {"type": "string", "description": "The original user query"}, + "response": {"type": "string", "description": "The response to evaluate for friendliness"}, + }, + "required": ["query", "response"], + }, + metrics={ + "score": EvaluatorMetric( + type=EvaluatorMetricType.ORDINAL, + desirable_direction=EvaluatorMetricDirection.INCREASE, + min_value=1, + max_value=5, + ) + }, + ), + ) + + print("Uploading FriendlyEvaluator (with nested common_util folder)...") + friendly_evaluator = project_client.beta.evaluators.upload( + name=evaluator_name, + evaluator_version=evaluator_version, + folder=local_upload_folder, + overwrite=True, + ) + + print(f"\nEvaluator created: name={friendly_evaluator.name}, version={friendly_evaluator.version}") + print(f"Evaluator ID: {friendly_evaluator.id}") + pprint(friendly_evaluator) + + # --------------------------------------------------------------- + # 2. Create an evaluation referencing the uploaded evaluator + # --------------------------------------------------------------- + data_source_config = DataSourceConfigCustom( + { + "type": "custom", + "item_schema": { + "type": "object", + "properties": { + "query": {"type": "string"}, + "response": {"type": "string"}, + }, + "required": ["query", "response"], + }, + "include_sample_schema": True, + } + ) + + testing_criteria = [ + { + "type": "azure_ai_evaluator", + "name": evaluator_name, + "evaluator_name": evaluator_name, + "initialization_parameters": { + "model_config": { + "azure_endpoint": azure_openai_endpoint, + "api_key": f"{azure_openai_api_key}", + "api_version": "2024-06-01", + }, + }, + } + ] + + print("\nCreating evaluation...") + eval_object = client.evals.create( + name=f"Friendliness Evaluation - {suffix}", + data_source_config=data_source_config, + testing_criteria=testing_criteria, # type: ignore + ) + print(f"Evaluation created (id: {eval_object.id}, name: {eval_object.name})") + + # --------------------------------------------------------------- + # 3. Run the evaluation with inline data + # --------------------------------------------------------------- + print("\nCreating evaluation run with inline data...") + eval_run_object = client.evals.runs.create( + eval_id=eval_object.id, + name=f"Friendliness Eval Run - {suffix}", + metadata={"team": "eval-exp", "scenario": "friendliness-v1"}, + data_source=CreateEvalJSONLRunDataSourceParam( + type="jsonl", + source=SourceFileContent( + type="file_content", + content=[ + SourceFileContentContent( + item={ + "query": "How do I reset my password?", + "response": "Go to settings and click reset. That's it.", + } + ), + SourceFileContentContent( + item={ + "query": "I'm having trouble with my account", + "response": "I'm really sorry to hear you're having trouble! I'd love to help you get this sorted out. Could you tell me a bit more about what's happening so I can assist you better?", + } + ), + SourceFileContentContent( + item={ + "query": "Can you help me?", + "response": "Read the docs.", + } + ), + SourceFileContentContent( + item={ + "query": "What's the weather like today?", + "response": "Great question! While I'm not a weather service, I'd be happy to suggest some wonderful weather apps that can give you accurate forecasts. Would you like some recommendations? 😊", + } + ), + ], + ), + ), + ) + + print(f"Evaluation run created (id: {eval_run_object.id})") + pprint(eval_run_object) + + # --------------------------------------------------------------- + # 4. Poll for evaluation run completion + # --------------------------------------------------------------- + while True: + run = client.evals.runs.retrieve(run_id=eval_run_object.id, eval_id=eval_object.id) + if run.status in ("completed", "failed"): + print(f"\nEvaluation run finished with status: {run.status}") + output_items = list(client.evals.runs.output_items.list(run_id=run.id, eval_id=eval_object.id)) + pprint(output_items) + print(f"\nEvaluation run Report URL: {run.report_url}") + break + time.sleep(5) + print("Waiting for evaluation run to complete...") + + # --------------------------------------------------------------- + # 5. Cleanup (uncomment to delete) + # --------------------------------------------------------------- + # print("\nCleaning up...") + # project_client.beta.evaluators.delete_version( + # name=friendly_evaluator.name, + # version=friendly_evaluator.version, + # ) + # client.evals.delete(eval_id=eval_object.id) + # print("Cleanup done.") + print("\nDone - FriendlyEvaluator upload, eval creation, and eval run verified successfully.") diff --git a/sdk/ai/azure-ai-projects/tests/evaluators/test_evaluators_upload.py b/sdk/ai/azure-ai-projects/tests/evaluators/test_evaluators_upload.py new file mode 100644 index 000000000000..a222b1d1db1d --- /dev/null +++ b/sdk/ai/azure-ai-projects/tests/evaluators/test_evaluators_upload.py @@ -0,0 +1,450 @@ +# pylint: disable=line-too-long,useless-suppression +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ +import os +import tempfile +import pytest +from unittest.mock import MagicMock, patch, call +from azure.core.exceptions import HttpResponseError, ResourceNotFoundError +from azure.ai.projects.operations._patch_evaluators import EvaluatorsOperations +from azure.ai.projects.models import EvaluatorVersion + + +class TestEvaluatorsUpload: + """Unit tests for EvaluatorsOperations.upload() method.""" + + def _create_operations(self): + """Create a mock EvaluatorsOperations instance with mocked service calls.""" + ops = object.__new__(EvaluatorsOperations) + ops.pending_upload = MagicMock() + ops.list_versions = MagicMock() + ops.create_version = MagicMock() + return ops + + def _create_temp_folder(self, files=None): + """Create a temporary folder with files for testing. + + :param files: dict of {relative_path: content_bytes} + :return: path to temp folder + """ + tmp_dir = tempfile.mkdtemp() + if files is None: + files = {"evaluator.py": b"class MyEvaluator:\n pass\n"} + for rel_path, content in files.items(): + full_path = os.path.join(tmp_dir, rel_path) + os.makedirs(os.path.dirname(full_path), exist_ok=True) + with open(full_path, "wb") as f: + f.write(content) + return tmp_dir + + def _mock_pending_upload_response(self, blob_uri="https://storage.blob.core.windows.net/container-1"): + """Return a mock pending upload response dict.""" + return { + "blobReferenceForConsumption": { + "blobUri": blob_uri, + "credential": { + "sasUri": f"{blob_uri}?sv=2025-01-05&sig=fakesig", + }, + } + } + + # --------------------------------------------------------------- + # upload() - input validation tests + # --------------------------------------------------------------- + + def test_upload_raises_if_folder_does_not_exist(self): + ops = self._create_operations() + ops.list_versions.side_effect = ResourceNotFoundError("Not found") + + with pytest.raises(ValueError, match="does not exist"): + ops.upload( + name="test_evaluator", + evaluator_version={"definition": {}}, + folder="/nonexistent/path/abc123", + ) + + def test_upload_raises_if_path_is_file(self): + ops = self._create_operations() + ops.list_versions.side_effect = ResourceNotFoundError("Not found") + + with tempfile.NamedTemporaryFile(suffix=".py") as tmp: + with pytest.raises(ValueError, match="file, not a folder"): + ops.upload( + name="test_evaluator", + evaluator_version={"definition": {}}, + folder=tmp.name, + ) + + def test_upload_raises_if_folder_is_empty(self): + ops = self._create_operations() + ops.list_versions.side_effect = ResourceNotFoundError("Not found") + ops.pending_upload.return_value = self._mock_pending_upload_response() + + empty_dir = tempfile.mkdtemp() + + with patch("azure.ai.projects.operations._patch_evaluators.ContainerClient") as MockContainerClient: + mock_container = MagicMock() + MockContainerClient.from_container_url.return_value = mock_container + mock_container.__enter__ = MagicMock(return_value=mock_container) + mock_container.__exit__ = MagicMock(return_value=False) + + with pytest.raises(ValueError, match="folder is empty"): + ops.upload( + name="test_evaluator", + evaluator_version={"definition": {}}, + folder=empty_dir, + ) + + # --------------------------------------------------------------- + # upload() - version auto-increment tests + # --------------------------------------------------------------- + + def test_get_next_version_returns_1_for_new_evaluator(self): + ops = self._create_operations() + ops.list_versions.side_effect = ResourceNotFoundError("Not found") + assert ops._get_next_version("new_evaluator") == "1" + + def test_get_next_version_returns_1_for_empty_list(self): + ops = self._create_operations() + ops.list_versions.return_value = [] + assert ops._get_next_version("empty_evaluator") == "1" + + def test_get_next_version_increments_highest_version(self): + ops = self._create_operations() + ops.list_versions.return_value = [ + {"version": "1"}, + {"version": "3"}, + {"version": "2"}, + ] + assert ops._get_next_version("existing_evaluator") == "4" + + def test_get_next_version_ignores_non_numeric_versions(self): + ops = self._create_operations() + ops.list_versions.return_value = [ + {"version": "1"}, + {"version": "latest"}, + {"version": "beta"}, + ] + assert ops._get_next_version("mixed_evaluator") == "2" + + # --------------------------------------------------------------- + # upload() - pending upload / SAS URI validation tests + # --------------------------------------------------------------- + + def test_start_pending_upload_raises_if_no_blob_ref(self): + ops = self._create_operations() + ops.pending_upload.return_value = {} + + with pytest.raises(ValueError, match="Blob reference is not present"): + ops._start_pending_upload_and_get_container_client("test", "1") + + def test_start_pending_upload_raises_if_no_credential(self): + ops = self._create_operations() + ops.pending_upload.return_value = { + "blobReferenceForConsumption": { + "blobUri": "https://storage.blob.core.windows.net/container", + } + } + + with pytest.raises(ValueError, match="SAS credential is not present"): + ops._start_pending_upload_and_get_container_client("test", "1") + + def test_start_pending_upload_raises_if_no_sas_uri(self): + ops = self._create_operations() + ops.pending_upload.return_value = { + "blobReferenceForConsumption": { + "blobUri": "https://storage.blob.core.windows.net/container", + "credential": {"type": "SAS"}, + } + } + + with pytest.raises(ValueError, match="SAS URI is missing"): + ops._start_pending_upload_and_get_container_client("test", "1") + + def test_start_pending_upload_raises_if_no_blob_uri(self): + ops = self._create_operations() + ops.pending_upload.return_value = { + "blobReferenceForConsumption": { + "credential": { + "sasUri": "https://storage.blob.core.windows.net/container?sig=fake", + }, + } + } + + with pytest.raises(ValueError, match="Blob URI is missing"): + ops._start_pending_upload_and_get_container_client("test", "1") + + def test_start_pending_upload_passes_connection_name(self): + ops = self._create_operations() + ops.pending_upload.return_value = self._mock_pending_upload_response() + + with patch("azure.ai.projects.operations._patch_evaluators.ContainerClient"): + ops._start_pending_upload_and_get_container_client("test", "1", connection_name="my-connection") + + ops.pending_upload.assert_called_once_with( + name="test", + version="1", + pending_upload_request={"connectionName": "my-connection"}, + ) + + # --------------------------------------------------------------- + # upload() - file upload behavior tests + # --------------------------------------------------------------- + + def test_upload_uploads_single_file(self): + ops = self._create_operations() + ops.list_versions.side_effect = ResourceNotFoundError("Not found") + ops.pending_upload.return_value = self._mock_pending_upload_response() + ops.create_version.return_value = {"name": "test", "version": "1"} + + folder = self._create_temp_folder({"evaluator.py": b"class Eval: pass"}) + + with patch("azure.ai.projects.operations._patch_evaluators.ContainerClient") as MockContainerClient: + mock_container = MagicMock() + MockContainerClient.from_container_url.return_value = mock_container + mock_container.__enter__ = MagicMock(return_value=mock_container) + mock_container.__exit__ = MagicMock(return_value=False) + + ops.upload( + name="test", + evaluator_version={"definition": {}}, + folder=folder, + ) + + mock_container.upload_blob.assert_called_once() + blob_name = mock_container.upload_blob.call_args.kwargs.get("name") or mock_container.upload_blob.call_args[1].get("name") + assert blob_name == "evaluator.py" + + def test_upload_handles_nested_folders(self): + ops = self._create_operations() + ops.list_versions.side_effect = ResourceNotFoundError("Not found") + ops.pending_upload.return_value = self._mock_pending_upload_response() + ops.create_version.return_value = {"name": "test", "version": "1"} + + folder = self._create_temp_folder({ + "evaluator.py": b"class Eval: pass", + "utils/__init__.py": b"", + "utils/helper.py": b"def helper(): pass", + }) + + with patch("azure.ai.projects.operations._patch_evaluators.ContainerClient") as MockContainerClient: + mock_container = MagicMock() + MockContainerClient.from_container_url.return_value = mock_container + mock_container.__enter__ = MagicMock(return_value=mock_container) + mock_container.__exit__ = MagicMock(return_value=False) + + ops.upload( + name="test", + evaluator_version={"definition": {}}, + folder=folder, + ) + + assert mock_container.upload_blob.call_count == 3 + uploaded_names = sorted( + c.kwargs.get("name") or c[1].get("name") + for c in mock_container.upload_blob.call_args_list + ) + assert uploaded_names == sorted(["evaluator.py", "utils/__init__.py", "utils/helper.py"]) + + def test_upload_skips_pycache_and_pyc_files(self): + ops = self._create_operations() + ops.list_versions.side_effect = ResourceNotFoundError("Not found") + ops.pending_upload.return_value = self._mock_pending_upload_response() + ops.create_version.return_value = {"name": "test", "version": "1"} + + folder = self._create_temp_folder({ + "evaluator.py": b"class Eval: pass", + "__pycache__/evaluator.cpython-312.pyc": b"compiled", + "other.pyc": b"compiled", + "other.pyo": b"optimized", + }) + + with patch("azure.ai.projects.operations._patch_evaluators.ContainerClient") as MockContainerClient: + mock_container = MagicMock() + MockContainerClient.from_container_url.return_value = mock_container + mock_container.__enter__ = MagicMock(return_value=mock_container) + mock_container.__exit__ = MagicMock(return_value=False) + + ops.upload( + name="test", + evaluator_version={"definition": {}}, + folder=folder, + ) + + # Only evaluator.py should be uploaded + assert mock_container.upload_blob.call_count == 1 + blob_name = mock_container.upload_blob.call_args.kwargs.get("name") or mock_container.upload_blob.call_args[1].get("name") + assert blob_name == "evaluator.py" + + # --------------------------------------------------------------- + # upload() - blob_uri set on evaluator version tests + # --------------------------------------------------------------- + + def test_upload_sets_blob_uri_on_dict_evaluator_version(self): + ops = self._create_operations() + ops.list_versions.side_effect = ResourceNotFoundError("Not found") + blob_uri = "https://storage.blob.core.windows.net/container-1" + ops.pending_upload.return_value = self._mock_pending_upload_response(blob_uri=blob_uri) + ops.create_version.return_value = {"name": "test", "version": "1"} + + folder = self._create_temp_folder() + + evaluator_version = {"definition": {"entry_point": "eval:Eval"}} + + with patch("azure.ai.projects.operations._patch_evaluators.ContainerClient") as MockContainerClient: + mock_container = MagicMock() + MockContainerClient.from_container_url.return_value = mock_container + mock_container.__enter__ = MagicMock(return_value=mock_container) + mock_container.__exit__ = MagicMock(return_value=False) + + ops.upload( + name="test", + evaluator_version=evaluator_version, + folder=folder, + ) + + # Verify blob_uri was set in the definition + assert evaluator_version["definition"]["blob_uri"] == blob_uri + + def test_upload_sets_blob_uri_on_model_evaluator_version(self): + ops = self._create_operations() + ops.list_versions.side_effect = ResourceNotFoundError("Not found") + blob_uri = "https://storage.blob.core.windows.net/container-1" + ops.pending_upload.return_value = self._mock_pending_upload_response(blob_uri=blob_uri) + ops.create_version.return_value = {"name": "test", "version": "1"} + + folder = self._create_temp_folder() + + # Create a mock EvaluatorVersion object + ev = MagicMock(spec=EvaluatorVersion) + ev.definition = MagicMock() + ev.definition.blob_uri = None + + with patch("azure.ai.projects.operations._patch_evaluators.ContainerClient") as MockContainerClient: + mock_container = MagicMock() + MockContainerClient.from_container_url.return_value = mock_container + mock_container.__enter__ = MagicMock(return_value=mock_container) + mock_container.__exit__ = MagicMock(return_value=False) + + ops.upload( + name="test", + evaluator_version=ev, + folder=folder, + ) + + # Verify blob_uri was set on the model object + assert ev.definition.blob_uri == blob_uri + + # --------------------------------------------------------------- + # upload() - create_version call tests + # --------------------------------------------------------------- + + def test_upload_calls_create_version_with_correct_args(self): + ops = self._create_operations() + ops.list_versions.side_effect = ResourceNotFoundError("Not found") + ops.pending_upload.return_value = self._mock_pending_upload_response() + ops.create_version.return_value = {"name": "my_eval", "version": "1"} + + folder = self._create_temp_folder() + evaluator_version = {"definition": {"entry_point": "eval:Eval"}} + + with patch("azure.ai.projects.operations._patch_evaluators.ContainerClient") as MockContainerClient: + mock_container = MagicMock() + MockContainerClient.from_container_url.return_value = mock_container + mock_container.__enter__ = MagicMock(return_value=mock_container) + mock_container.__exit__ = MagicMock(return_value=False) + + result = ops.upload( + name="my_eval", + evaluator_version=evaluator_version, + folder=folder, + ) + + ops.create_version.assert_called_once_with( + name="my_eval", + evaluator_version=evaluator_version, + ) + assert result == {"name": "my_eval", "version": "1"} + + def test_upload_auto_increments_version(self): + ops = self._create_operations() + ops.list_versions.return_value = [{"version": "1"}, {"version": "2"}] + ops.pending_upload.return_value = self._mock_pending_upload_response() + ops.create_version.return_value = {"name": "my_eval", "version": "3"} + + folder = self._create_temp_folder() + + with patch("azure.ai.projects.operations._patch_evaluators.ContainerClient") as MockContainerClient: + mock_container = MagicMock() + MockContainerClient.from_container_url.return_value = mock_container + mock_container.__enter__ = MagicMock(return_value=mock_container) + mock_container.__exit__ = MagicMock(return_value=False) + + ops.upload( + name="my_eval", + evaluator_version={"definition": {}}, + folder=folder, + ) + + # pending_upload should be called with version "3" + ops.pending_upload.assert_called_once_with( + name="my_eval", + version="3", + pending_upload_request={}, + ) + + # --------------------------------------------------------------- + # upload() - error handling tests + # --------------------------------------------------------------- + + def test_upload_raises_permission_error_on_auth_mismatch(self): + ops = self._create_operations() + ops.list_versions.side_effect = ResourceNotFoundError("Not found") + ops.pending_upload.return_value = self._mock_pending_upload_response() + + folder = self._create_temp_folder() + + with patch("azure.ai.projects.operations._patch_evaluators.ContainerClient") as MockContainerClient: + mock_container = MagicMock() + MockContainerClient.from_container_url.return_value = mock_container + mock_container.__enter__ = MagicMock(return_value=mock_container) + mock_container.__exit__ = MagicMock(return_value=False) + mock_container.url = "https://mystorage.blob.core.windows.net/container" + + error = HttpResponseError(message="Auth failed") + error.error_code = "AuthorizationPermissionMismatch" + error.response = MagicMock() + mock_container.upload_blob.side_effect = error + + with pytest.raises(HttpResponseError, match="Storage Blob Data Contributor"): + ops.upload( + name="test", + evaluator_version={"definition": {}}, + folder=folder, + ) + + def test_upload_reraises_non_auth_http_errors(self): + ops = self._create_operations() + ops.list_versions.side_effect = ResourceNotFoundError("Not found") + ops.pending_upload.return_value = self._mock_pending_upload_response() + + folder = self._create_temp_folder() + + with patch("azure.ai.projects.operations._patch_evaluators.ContainerClient") as MockContainerClient: + mock_container = MagicMock() + MockContainerClient.from_container_url.return_value = mock_container + mock_container.__enter__ = MagicMock(return_value=mock_container) + mock_container.__exit__ = MagicMock(return_value=False) + + error = HttpResponseError(message="Server error") + error.error_code = "InternalServerError" + mock_container.upload_blob.side_effect = error + + with pytest.raises(HttpResponseError, match="Server error"): + ops.upload( + name="test", + evaluator_version={"definition": {}}, + folder=folder, + )