From cb498c592d510e179ae4bf0013835e5ac08a10fb Mon Sep 17 00:00:00 2001 From: Amine Saboni Date: Tue, 17 Mar 2026 12:38:18 +0000 Subject: [PATCH] feat: add BoAmps as output method --- codecarbon/output.py | 3 + codecarbon/output_methods/boamps/__init__.py | 25 + codecarbon/output_methods/boamps/mapper.py | 217 ++++ codecarbon/output_methods/boamps/models.py | 304 +++++ codecarbon/output_methods/boamps/output.py | 156 +++ .../boamps_schemas/algorithm_schema.json | 65 + .../boamps_schemas/dataset_schema.json | 53 + .../boamps_schemas/hardware_schema.json | 44 + .../boamps_schemas/measure_schema.json | 65 + .../boamps_schemas/report_schema.json | 259 ++++ tests/test_boamps_output.py | 1092 +++++++++++++++++ 11 files changed, 2283 insertions(+) create mode 100644 codecarbon/output_methods/boamps/__init__.py create mode 100644 codecarbon/output_methods/boamps/mapper.py create mode 100644 codecarbon/output_methods/boamps/models.py create mode 100644 codecarbon/output_methods/boamps/output.py create mode 100644 tests/fixtures/boamps_schemas/algorithm_schema.json create mode 100644 tests/fixtures/boamps_schemas/dataset_schema.json create mode 100644 tests/fixtures/boamps_schemas/hardware_schema.json create mode 100644 tests/fixtures/boamps_schemas/measure_schema.json create mode 100644 tests/fixtures/boamps_schemas/report_schema.json create mode 100644 tests/test_boamps_output.py diff --git a/codecarbon/output.py b/codecarbon/output.py index 72bbc3df7..b1b1d6d8d 100644 --- a/codecarbon/output.py +++ b/codecarbon/output.py @@ -4,6 +4,9 @@ from codecarbon.output_methods.base_output import BaseOutput # noqa: F401 +# Output to BoAmps format +from codecarbon.output_methods.boamps import BoAmpsOutput # noqa: F401 + # emissions data from codecarbon.output_methods.emissions_data import ( # noqa: F401 EmissionsData, diff --git a/codecarbon/output_methods/boamps/__init__.py b/codecarbon/output_methods/boamps/__init__.py new file mode 100644 index 000000000..b439bd23e --- /dev/null +++ b/codecarbon/output_methods/boamps/__init__.py @@ -0,0 +1,25 @@ +""" +BoAmps output support for CodeCarbon. + +Provides first-class support for generating BoAmps (Boavizta) standardized +JSON reports from CodeCarbon emission tracking data. +""" + +from codecarbon.output_methods.boamps.mapper import ( # noqa: F401 + map_emissions_to_boamps, +) +from codecarbon.output_methods.boamps.models import ( # noqa: F401 + BoAmpsAlgorithm, + BoAmpsDataset, + BoAmpsEnvironment, + BoAmpsHardware, + BoAmpsHeader, + BoAmpsInfrastructure, + BoAmpsMeasure, + BoAmpsPublisher, + BoAmpsReport, + BoAmpsSoftware, + BoAmpsSystem, + BoAmpsTask, +) +from codecarbon.output_methods.boamps.output import BoAmpsOutput # noqa: F401 diff --git a/codecarbon/output_methods/boamps/mapper.py b/codecarbon/output_methods/boamps/mapper.py new file mode 100644 index 000000000..3cc61a3c8 --- /dev/null +++ b/codecarbon/output_methods/boamps/mapper.py @@ -0,0 +1,217 @@ +""" +Maps CodeCarbon EmissionsData to BoAmps report format. +""" + +import warnings +from typing import Optional + +from codecarbon.output_methods.boamps.models import ( + BoAmpsEnvironment, + BoAmpsHardware, + BoAmpsHeader, + BoAmpsInfrastructure, + BoAmpsMeasure, + BoAmpsReport, + BoAmpsSoftware, + BoAmpsSystem, + BoAmpsTask, +) +from codecarbon.output_methods.emissions_data import EmissionsData + +BOAMPS_FORMAT_VERSION = "0.1" +BOAMPS_FORMAT_SPEC_URI = "https://github.com/Boavizta/BoAmps/tree/main/model" + + +def map_emissions_to_boamps( + emissions: EmissionsData, + task: Optional[BoAmpsTask] = None, + header: Optional[BoAmpsHeader] = None, + quality: Optional[str] = None, + infra_overrides: Optional[dict] = None, + environment_overrides: Optional[dict] = None, +) -> BoAmpsReport: + """ + Map CodeCarbon EmissionsData to a BoAmps report. + + Auto-fills fields from EmissionsData and merges with user-provided context. + User-provided values take precedence over auto-detected values. + + Args: + emissions: CodeCarbon emissions data from a completed run. + task: User-provided task context (required for schema-valid BoAmps). + header: User-provided header overrides. + quality: Quality assessment ("high", "medium", "low"). + infra_overrides: Additional infrastructure fields (cloud_instance, cloud_service). + environment_overrides: Additional environment fields (power_source, etc.). + + Returns: + A BoAmpsReport populated with auto-detected and user-provided data. + """ + report_header = _build_header(emissions, header) + measures = [_build_measure(emissions)] + system = _build_system(emissions) + software = _build_software(emissions) + infrastructure = _build_infrastructure(emissions, infra_overrides) + environment = _build_environment(emissions, environment_overrides) + + if task is None: + warnings.warn( + "No BoAmps task context provided. The output will be missing required " + "fields (taskStage, taskFamily, algorithms, dataset) and will not " + "validate against the BoAmps schema.", + UserWarning, + stacklevel=2, + ) + + return BoAmpsReport( + header=report_header, + task=task, + measures=measures, + system=system, + software=software, + infrastructure=infrastructure, + environment=environment, + quality=quality, + ) + + +def _build_header( + emissions: EmissionsData, user_header: Optional[BoAmpsHeader] +) -> BoAmpsHeader: + """Build header from EmissionsData, merging with user overrides.""" + auto_header = BoAmpsHeader( + format_version=BOAMPS_FORMAT_VERSION, + format_version_specification_uri=BOAMPS_FORMAT_SPEC_URI, + report_id=emissions.run_id, + report_datetime=emissions.timestamp, + ) + + if user_header is None: + return auto_header + + # User values override auto-detected values + return BoAmpsHeader( + licensing=user_header.licensing or auto_header.licensing, + format_version=user_header.format_version or auto_header.format_version, + format_version_specification_uri=( + user_header.format_version_specification_uri + or auto_header.format_version_specification_uri + ), + report_id=user_header.report_id or auto_header.report_id, + report_datetime=user_header.report_datetime or auto_header.report_datetime, + report_status=user_header.report_status or auto_header.report_status, + publisher=user_header.publisher or auto_header.publisher, + ) + + +def _build_measure(emissions: EmissionsData) -> BoAmpsMeasure: + """Build a BoAmps measure from EmissionsData.""" + measure = BoAmpsMeasure( + measurement_method="codecarbon", + version=emissions.codecarbon_version, + power_consumption=emissions.energy_consumed, + measurement_duration=emissions.duration, + measurement_date_time=emissions.timestamp, + cpu_tracking_mode=emissions.tracking_mode, + ) + + # CPU utilization as fraction (0-1) + if emissions.cpu_utilization_percent > 0: + measure.average_utilization_cpu = round( + emissions.cpu_utilization_percent / 100.0, 4 + ) + + # GPU fields only if GPU is present + if emissions.gpu_count and emissions.gpu_count > 0: + measure.gpu_tracking_mode = emissions.tracking_mode + if emissions.gpu_utilization_percent > 0: + measure.average_utilization_gpu = round( + emissions.gpu_utilization_percent / 100.0, 4 + ) + + return measure + + +def _build_system(emissions: EmissionsData) -> BoAmpsSystem: + """Build system info from EmissionsData.""" + return BoAmpsSystem(os=emissions.os) + + +def _build_software(emissions: EmissionsData) -> BoAmpsSoftware: + """Build software info from EmissionsData.""" + return BoAmpsSoftware( + language="python", + version=emissions.python_version, + ) + + +def _build_infrastructure( + emissions: EmissionsData, overrides: Optional[dict] = None +) -> BoAmpsInfrastructure: + """Build infrastructure from EmissionsData hardware fields.""" + components = [] + + # CPU component (always present) + cpu_component = BoAmpsHardware( + component_type="cpu", + component_name=emissions.cpu_model, + nb_component=int(emissions.cpu_count) if emissions.cpu_count else 1, + ) + components.append(cpu_component) + + # GPU component (only if present) + if emissions.gpu_count and emissions.gpu_count > 0: + gpu_component = BoAmpsHardware( + component_type="gpu", + component_name=emissions.gpu_model if emissions.gpu_model else None, + nb_component=int(emissions.gpu_count), + ) + components.append(gpu_component) + + # RAM component (always present) + ram_component = BoAmpsHardware( + component_type="ram", + nb_component=1, + memory_size=emissions.ram_total_size, + ) + components.append(ram_component) + + is_cloud = emissions.on_cloud == "Y" + infra = BoAmpsInfrastructure( + infra_type="publicCloud" if is_cloud else "onPremise", + cloud_provider=( + emissions.cloud_provider if is_cloud and emissions.cloud_provider else None + ), + components=components, + ) + + # Apply overrides from context file + if overrides: + for attr in ("cloud_instance", "cloud_service", "infra_type"): + if attr in overrides: + setattr(infra, attr, overrides[attr]) + + return infra + + +def _build_environment( + emissions: EmissionsData, overrides: Optional[dict] = None +) -> BoAmpsEnvironment: + """Build environment from EmissionsData location fields.""" + env = BoAmpsEnvironment( + country=emissions.country_name or None, + latitude=emissions.latitude or None, + longitude=emissions.longitude or None, + ) + + if overrides: + for attr in ( + "location", + "power_supplier_type", + "power_source", + "power_source_carbon_intensity", + ): + if attr in overrides: + setattr(env, attr, overrides[attr]) + + return env diff --git a/codecarbon/output_methods/boamps/models.py b/codecarbon/output_methods/boamps/models.py new file mode 100644 index 000000000..1bf8ac644 --- /dev/null +++ b/codecarbon/output_methods/boamps/models.py @@ -0,0 +1,304 @@ +""" +BoAmps data models for standardized AI/ML energy consumption reporting. + +These dataclasses map to the BoAmps JSON schemas defined at: +https://github.com/Boavizta/BoAmps/tree/main/model + +All fields use snake_case internally and are converted to camelCase on serialization. +""" + +import re +from dataclasses import dataclass, fields +from typing import List, Optional + + +def _snake_to_camel(name: str) -> str: + """Convert snake_case to camelCase.""" + components = name.split("_") + return components[0] + "".join(x.title() for x in components[1:]) + + +def _camel_to_snake(name: str) -> str: + """Convert camelCase to snake_case.""" + s1 = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1_\2", name) + return re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", s1).lower() + + +def _to_dict(obj) -> dict: + """Recursively convert a dataclass to a camelCase dict, stripping None values.""" + result = {} + for f in fields(obj): + value = getattr(obj, f.name) + if value is None: + continue + key = _snake_to_camel(f.name) + if isinstance(value, list): + result[key] = [ + _to_dict(item) if hasattr(item, "__dataclass_fields__") else item + for item in value + ] + elif hasattr(value, "__dataclass_fields__"): + result[key] = _to_dict(value) + else: + result[key] = value + return result + + +@dataclass +class BoAmpsPublisher: + name: Optional[str] = None + division: Optional[str] = None + project_name: Optional[str] = None + confidentiality_level: Optional[str] = None + public_key: Optional[str] = None + + def to_dict(self) -> dict: + return _to_dict(self) + + @classmethod + def from_dict(cls, d: dict) -> "BoAmpsPublisher": + return cls(**{_camel_to_snake(k): v for k, v in d.items()}) + + +@dataclass +class BoAmpsHeader: + licensing: Optional[str] = None + format_version: Optional[str] = None + format_version_specification_uri: Optional[str] = None + report_id: Optional[str] = None + report_datetime: Optional[str] = None + report_status: Optional[str] = None + publisher: Optional[BoAmpsPublisher] = None + + def to_dict(self) -> dict: + return _to_dict(self) + + @classmethod + def from_dict(cls, d: dict) -> "BoAmpsHeader": + kwargs = {} + for k, v in d.items(): + snake_key = _camel_to_snake(k) + if snake_key == "publisher" and isinstance(v, dict): + kwargs[snake_key] = BoAmpsPublisher.from_dict(v) + else: + kwargs[snake_key] = v + return cls(**kwargs) + + +@dataclass +class BoAmpsAlgorithm: + training_type: Optional[str] = None + algorithm_type: Optional[str] = None + algorithm_name: Optional[str] = None + algorithm_uri: Optional[str] = None + foundation_model_name: Optional[str] = None + foundation_model_uri: Optional[str] = None + parameters_number: Optional[float] = None + framework: Optional[str] = None + framework_version: Optional[str] = None + class_path: Optional[str] = None + layers_number: Optional[float] = None + epochs_number: Optional[float] = None + optimizer: Optional[str] = None + quantization: Optional[str] = None + + def to_dict(self) -> dict: + return _to_dict(self) + + @classmethod + def from_dict(cls, d: dict) -> "BoAmpsAlgorithm": + return cls(**{_camel_to_snake(k): v for k, v in d.items()}) + + +@dataclass +class BoAmpsDataset: + data_usage: Optional[str] = None + data_type: Optional[str] = None + data_format: Optional[str] = None + data_size: Optional[float] = None + data_quantity: Optional[float] = None + shape: Optional[str] = None + source: Optional[str] = None + source_uri: Optional[str] = None + owner: Optional[str] = None + + def to_dict(self) -> dict: + return _to_dict(self) + + @classmethod + def from_dict(cls, d: dict) -> "BoAmpsDataset": + return cls(**{_camel_to_snake(k): v for k, v in d.items()}) + + +@dataclass +class BoAmpsTask: + task_stage: Optional[str] = None + task_family: Optional[str] = None + nb_request: Optional[float] = None + algorithms: Optional[List[BoAmpsAlgorithm]] = None + dataset: Optional[List[BoAmpsDataset]] = None + measured_accuracy: Optional[float] = None + estimated_accuracy: Optional[str] = None + task_description: Optional[str] = None + + def to_dict(self) -> dict: + return _to_dict(self) + + @classmethod + def from_dict(cls, d: dict) -> "BoAmpsTask": + kwargs = {} + for k, v in d.items(): + snake_key = _camel_to_snake(k) + if snake_key == "algorithms" and isinstance(v, list): + kwargs[snake_key] = [BoAmpsAlgorithm.from_dict(a) for a in v] + elif snake_key == "dataset" and isinstance(v, list): + kwargs[snake_key] = [BoAmpsDataset.from_dict(ds) for ds in v] + else: + kwargs[snake_key] = v + return cls(**kwargs) + + +@dataclass +class BoAmpsMeasure: + measurement_method: Optional[str] = None + manufacturer: Optional[str] = None + version: Optional[str] = None + cpu_tracking_mode: Optional[str] = None + gpu_tracking_mode: Optional[str] = None + average_utilization_cpu: Optional[float] = None + average_utilization_gpu: Optional[float] = None + power_calibration_measurement: Optional[float] = None + duration_calibration_measurement: Optional[float] = None + power_consumption: Optional[float] = None + measurement_duration: Optional[float] = None + measurement_date_time: Optional[str] = None + + def to_dict(self) -> dict: + return _to_dict(self) + + @classmethod + def from_dict(cls, d: dict) -> "BoAmpsMeasure": + return cls(**{_camel_to_snake(k): v for k, v in d.items()}) + + +@dataclass +class BoAmpsSystem: + os: Optional[str] = None + distribution: Optional[str] = None + distribution_version: Optional[str] = None + + def to_dict(self) -> dict: + return _to_dict(self) + + @classmethod + def from_dict(cls, d: dict) -> "BoAmpsSystem": + return cls(**{_camel_to_snake(k): v for k, v in d.items()}) + + +@dataclass +class BoAmpsSoftware: + language: Optional[str] = None + version: Optional[str] = None + + def to_dict(self) -> dict: + return _to_dict(self) + + @classmethod + def from_dict(cls, d: dict) -> "BoAmpsSoftware": + return cls(**{_camel_to_snake(k): v for k, v in d.items()}) + + +@dataclass +class BoAmpsHardware: + component_name: Optional[str] = None + component_type: Optional[str] = None + nb_component: Optional[int] = None + memory_size: Optional[float] = None + manufacturer: Optional[str] = None + family: Optional[str] = None + series: Optional[str] = None + share: Optional[float] = None + + def to_dict(self) -> dict: + return _to_dict(self) + + @classmethod + def from_dict(cls, d: dict) -> "BoAmpsHardware": + return cls(**{_camel_to_snake(k): v for k, v in d.items()}) + + +@dataclass +class BoAmpsInfrastructure: + infra_type: Optional[str] = None + cloud_provider: Optional[str] = None + cloud_instance: Optional[str] = None + cloud_service: Optional[str] = None + components: Optional[List[BoAmpsHardware]] = None + + def to_dict(self) -> dict: + return _to_dict(self) + + @classmethod + def from_dict(cls, d: dict) -> "BoAmpsInfrastructure": + kwargs = {} + for k, v in d.items(): + snake_key = _camel_to_snake(k) + if snake_key == "components" and isinstance(v, list): + kwargs[snake_key] = [BoAmpsHardware.from_dict(c) for c in v] + else: + kwargs[snake_key] = v + return cls(**kwargs) + + +@dataclass +class BoAmpsEnvironment: + country: Optional[str] = None + latitude: Optional[float] = None + longitude: Optional[float] = None + location: Optional[str] = None + power_supplier_type: Optional[str] = None + power_source: Optional[str] = None + power_source_carbon_intensity: Optional[float] = None + + def to_dict(self) -> dict: + return _to_dict(self) + + @classmethod + def from_dict(cls, d: dict) -> "BoAmpsEnvironment": + return cls(**{_camel_to_snake(k): v for k, v in d.items()}) + + +@dataclass +class BoAmpsReport: + header: Optional[BoAmpsHeader] = None + task: Optional[BoAmpsTask] = None + measures: Optional[List[BoAmpsMeasure]] = None + system: Optional[BoAmpsSystem] = None + software: Optional[BoAmpsSoftware] = None + infrastructure: Optional[BoAmpsInfrastructure] = None + environment: Optional[BoAmpsEnvironment] = None + quality: Optional[str] = None + + def to_dict(self) -> dict: + return _to_dict(self) + + @classmethod + def from_dict(cls, d: dict) -> "BoAmpsReport": + kwargs = {} + nested_types = { + "header": BoAmpsHeader, + "task": BoAmpsTask, + "system": BoAmpsSystem, + "software": BoAmpsSoftware, + "infrastructure": BoAmpsInfrastructure, + "environment": BoAmpsEnvironment, + } + for k, v in d.items(): + snake_key = _camel_to_snake(k) + if snake_key == "measures" and isinstance(v, list): + kwargs[snake_key] = [BoAmpsMeasure.from_dict(m) for m in v] + elif snake_key in nested_types and isinstance(v, dict): + kwargs[snake_key] = nested_types[snake_key].from_dict(v) + else: + kwargs[snake_key] = v + return cls(**kwargs) diff --git a/codecarbon/output_methods/boamps/output.py b/codecarbon/output_methods/boamps/output.py new file mode 100644 index 000000000..d518bc130 --- /dev/null +++ b/codecarbon/output_methods/boamps/output.py @@ -0,0 +1,156 @@ +""" +BoAmps output handler for CodeCarbon. + +Writes BoAmps-formatted JSON reports containing energy consumption data +from CodeCarbon runs, enriched with user-provided ML task context. +""" + +import json +import os +from typing import Optional + +from codecarbon.external.logger import logger +from codecarbon.output_methods.base_output import BaseOutput +from codecarbon.output_methods.boamps.mapper import map_emissions_to_boamps +from codecarbon.output_methods.boamps.models import ( + BoAmpsHeader, + BoAmpsTask, + _camel_to_snake, +) +from codecarbon.output_methods.emissions_data import EmissionsData + + +def _extract_overrides(data: dict, keys: tuple) -> dict: + """Extract known camelCase keys from a dict, returning them as snake_case.""" + return {_camel_to_snake(k): data[k] for k in keys if k in data} + + +class BoAmpsOutput(BaseOutput): + """ + Output handler that writes BoAmps-formatted JSON reports. + + BoAmps (by Boavizta) is a standardized JSON format for reporting + AI/ML energy consumption. This handler auto-fills measurable fields + from CodeCarbon's EmissionsData and merges user-provided context + (task description, algorithms, datasets, etc.). + + Usage: + # Programmatic + handler = BoAmpsOutput( + task=BoAmpsTask(task_stage="inference", task_family="chatbot", ...), + quality="high", + ) + tracker = EmissionsTracker(output_handlers=[handler]) + + # From context file + handler = BoAmpsOutput.from_file("boamps_context.json") + tracker = EmissionsTracker(output_handlers=[handler]) + """ + + def __init__( + self, + output_dir: str = ".", + task: Optional[BoAmpsTask] = None, + header: Optional[BoAmpsHeader] = None, + quality: Optional[str] = None, + infra_overrides: Optional[dict] = None, + environment_overrides: Optional[dict] = None, + ): + self._output_dir = output_dir + self._task = task + self._header = header + self._quality = quality + self._infra_overrides = infra_overrides + self._environment_overrides = environment_overrides + + @classmethod + def from_file(cls, context_file_path: str, output_dir: str = ".") -> "BoAmpsOutput": + """ + Load BoAmps context from a JSON file. + + The context file should follow the BoAmps report schema structure, + containing fields that cannot be auto-detected by CodeCarbon + (e.g., task, publisher, quality). + + Args: + context_file_path: Path to the BoAmps context JSON file. + output_dir: Directory to write output reports to. + + Returns: + A configured BoAmpsOutput instance. + + Raises: + FileNotFoundError: If the context file does not exist. + json.JSONDecodeError: If the context file contains invalid JSON. + """ + if not os.path.isfile(context_file_path): + raise FileNotFoundError( + f"BoAmps context file not found: {context_file_path}" + ) + + with open(context_file_path) as f: + context = json.load(f) + + task = None + header = None + quality = None + infra_overrides = None + environment_overrides = None + + if "task" in context: + task = BoAmpsTask.from_dict(context["task"]) + + if "header" in context: + header = BoAmpsHeader.from_dict(context["header"]) + + if "quality" in context: + quality = context["quality"] + + if "infrastructure" in context: + infra_overrides = _extract_overrides( + context["infrastructure"], + ("cloudInstance", "cloudService", "infraType"), + ) + + if "environment" in context: + environment_overrides = _extract_overrides( + context["environment"], + ( + "location", + "powerSupplierType", + "powerSource", + "powerSourceCarbonIntensity", + ), + ) + + return cls( + output_dir=output_dir, + task=task, + header=header, + quality=quality, + infra_overrides=infra_overrides, + environment_overrides=environment_overrides, + ) + + def out(self, total: EmissionsData, delta: EmissionsData): + """Write the final BoAmps report as a JSON file.""" + try: + report = map_emissions_to_boamps( + total, + task=self._task, + header=self._header, + quality=self._quality, + infra_overrides=self._infra_overrides, + environment_overrides=self._environment_overrides, + ) + report_dict = report.to_dict() + file_name = f"boamps_report_{total.run_id}.json" + file_path = os.path.join(self._output_dir, file_name) + with open(file_path, "w") as f: + json.dump(report_dict, f, indent=2) + logger.info(f"BoAmps report saved to {os.path.abspath(file_path)}") + except Exception as e: + logger.error(f"Failed to write BoAmps report: {e}", exc_info=True) + + def live_out(self, total: EmissionsData, delta: EmissionsData): + """No-op: BoAmps reports are final, not incremental.""" diff --git a/tests/fixtures/boamps_schemas/algorithm_schema.json b/tests/fixtures/boamps_schemas/algorithm_schema.json new file mode 100644 index 000000000..336bcd836 --- /dev/null +++ b/tests/fixtures/boamps_schemas/algorithm_schema.json @@ -0,0 +1,65 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "id": "https://raw.githubusercontent.com/Boavizta/BoAmps/main/model/algorithm_schema.json", + "title": "algorithm", + "description": "the type of algorithm used by the computing task", + "type": "object", + "properties": { + "trainingType": { + "type": "string", + "description": "if applicable, type of training (if the stage corresponds to a training) : supervisedLearning, unsupervisedLearning, semiSupervisedLearning, reinforcementLearning, transferLearning ..." + }, + "algorithmType": { + "type": "string", + "description": "the type of algorithm used, example : embeddings creation, rag, nlp, neural network, llm..." + }, + "algorithmName": { + "type": "string", + "description": "the case-sensitive common name of the algorithm, example: randomForest, naive bayes, cnn, rnn, transformers, if you are directly using a foundation model, let it empty and fill the field foundationModelName..." + }, + "algorithmUri": { + "type": "string", + "description": "the URI of the model, if publicly available" + }, + "foundationModelName": { + "type": "string", + "description": "if a foundation model is used, its case-sensitive common name, example: llama3.1-8b, gpt4-o..." + }, + "foundationModelUri": { + "type": "string", + "description": "the URI of the foundation model, if publicly available" + }, + "parametersNumber": { + "type": "number", + "description" : "number of billions of total parameters of your model, e.g. 8 for llama3.1-8b" + }, + "framework": { + "type": "string", + "description": "the common name of the software framework implementing the algorithm, if any" + }, + "frameworkVersion": { + "type": "string", + "description": "the version of the software framework implementing the algorithm, if any" + }, + "classPath": { + "type": "string", + "description": "the full class path of the algorithm within the framework, with elements separated by dots" + }, + "layersNumber": { + "type": "number", + "description" : "if deep learning, precise the number of layers in your network" + }, + "epochsNumber":{ + "type": "number", + "description" : "if training, the number of complete passes through the training dataset" + }, + "optimizer":{ + "type": "string", + "description" : "the algorithm used to optimize the models weights, e.g. gridSearch, lora, adam" + }, + "quantization":{ + "type":"string", + "description": "the type of quantization used : fp32, fp16, b16, int8 ... " + } + } + } \ No newline at end of file diff --git a/tests/fixtures/boamps_schemas/dataset_schema.json b/tests/fixtures/boamps_schemas/dataset_schema.json new file mode 100644 index 000000000..f01c5a7d2 --- /dev/null +++ b/tests/fixtures/boamps_schemas/dataset_schema.json @@ -0,0 +1,53 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "id": "https://raw.githubusercontent.com/Boavizta/BoAmps/main/model/dataset_schema.json", + "title": "dataset", + "description": "Describe the nature, shape, number of items and other properties of the dataset involved in your task. If you are performing inferences, please indicate the average size of the data sent for a single inference and fill in the number of inferences in the propertiy: inferenceProperties", + "type": "object", + "properties": { + "dataUsage":{ + "type": "string", + "enum": [ "input", "output"], + "description": "the use of the dataset: is it used as model input or output ?" + }, + "dataType":{ + "type": "string", + "enum": [ "tabular", "audio", "boolean", "image", "video", "object","text","token", "word", "other"], + "description": "the nature of the data used " + }, + "dataFormat": { + "type": "string", + "enum": [ "3gp", "3gpp", "3gpp2", "8svx", "aa", "aac", "aax", "act", "afdesign", "afphoto", "ai", "aiff", "alac", "amr", "amv", "ape", "arrow", "asf", "au", "avi", "avif", "awb", "bmp", "bpg", "cd5", "cda", "cdr", "cgm", "clip", "cpt", "csv", "deep", "dirac", "divx", "drawingml", "drw", "dss", "dvf", "ecw", "eps", "fits", "flac", "flif", "flv", "flvf4v", "gem", "gerber", "gif", "gle", "gsm", "heif", "hp-gl", "html", "hvif", "ico", "iklax", "ilbm", "img", "ivs", "jpeg", "json", "kra", "lottie", "m4a", "m4b", "m4p", "m4v", "mathml", "matroska", "mdp", "mmf", "movpkg", "mp3", "mpc", "mpeg1", "mpeg2", "mpeg4", "msv", "mxf", "naplps", "netpbm", "nmf", "nrrd", "nsv", "odg", "ods", "ogg", "opus", "pam", "parquet", "pbm", "pcx", "pdf", "pdn", "pgf", "pgm", "pgml", "pict", "plbm", "png", "pnm", "postscript", "ppm", "psd", "psp", "pstricks", "qcc", "quicktime", "ra", "raw", "realmedia", "regis", "rf64", "roq", "sai", "sgi", "sid", "sql", "sln", "svg", "svi", "swf", "text", "tga", "tiff", "tinyvg", "tta", "vicar", "vivoactive", "vml", "vob", "voc", "vox", "wav", "webm", "webp", "wma", "wmf", "wmv", "wv", "xaml", "xar", "xcf", "xisf", "xls", "xlsx", "xml", "xps", "yaml", "other", null ], + "description": "if the data is passed in the form of a file, what format is the data in?" + }, + "dataSize": { + "type": "number", + "description": "the size of the dataset (in Go), if small quantity just fill the field nbItems" + }, + "dataQuantity": { + "type": "number", + "description": "the number of data in the dataset, e.g. 3 (images, audio or tokens)" + }, + "shape": { + "type": "string", + "description": "the shape of your dataset, can be found with X.shape with dataframes, e.g. (12, 1000) for a 2D table with 12 columns and 1000 rows" + }, + "source": { + "type": "string", + "enum": [ "public", "private", "other" ], + "description": "the kind of source of the dataset" + }, + "sourceUri": { + "type": "string", + "description": "the URI of the dataset if available" + }, + "owner": { + "type": "string", + "description": "the owner of the dataset if available" + } + }, + "required": [ + "dataUsage", + "dataType" + ] + } diff --git a/tests/fixtures/boamps_schemas/hardware_schema.json b/tests/fixtures/boamps_schemas/hardware_schema.json new file mode 100644 index 000000000..33dc6e6da --- /dev/null +++ b/tests/fixtures/boamps_schemas/hardware_schema.json @@ -0,0 +1,44 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "id": "https://raw.githubusercontent.com/Boavizta/BoAmps/main/model/hardware_schema.json", + "title": "hardware", + "description": "a hardware subsystem part of the infrastructure running the computing task", + "type": "object", + "properties": { + "componentName": { + "type": "string", + "description": "the name of this subsystem part of your infrastructure, example returned by codecarbon: 1 x NVIDIA GeForce GTX 1080 Ti" + }, + "componentType": { + "type": "string", + "description": "the type of this subsystem part of your infrastructure, example: cpu, gpu, ram, hdd, sdd..." + }, + "nbComponent": { + "type": "integer", + "description": "the number of items of this component in your infrastructure, if you have 1 RAM of 32Go, fill 1 here and 32 inside memorySize" + }, + "memorySize": { + "type": "number", + "description": "the size of the memory of the component in Gbytes, useful to detail the memory associated to ONE of your gpus for example (if we want the total memory, we will multiply the memorySize by nbComponent). If the component is CPU do not fill the RAM size here, create another component for RAM, this field is for the embeded memory of a component." + }, + "manufacturer": { + "type": "string", + "description": "the name of the manufacturer, example: nvidia" + }, + "family": { + "type": "string", + "description": "the family of this component, example: geforce" + }, + "series": { + "type": "string", + "description": "the series of this component, example: gtx1080" + }, + "share": { + "type": "number", + "minimum": 0, + "maximum": 1, + "description": "the percentage of the physical equipment used by the task, this sharing property should be set to 1 by default (if no share) and otherwise to the correct percentage, e.g. 0.5 if you share half-time." + } + }, + "required": ["componentType", "nbComponent"] +} diff --git a/tests/fixtures/boamps_schemas/measure_schema.json b/tests/fixtures/boamps_schemas/measure_schema.json new file mode 100644 index 000000000..f6d751c1a --- /dev/null +++ b/tests/fixtures/boamps_schemas/measure_schema.json @@ -0,0 +1,65 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "id": "https://raw.githubusercontent.com/Boavizta/BoAmps/main/model/measure_schema.json", + "title": "measure", + "description": "the energy measure obtained from software and/or hardware tools, for a computing task", + "type": "object", + "properties": { + "measurementMethod": { + "type": "string", + "description": "the method used to perform the energy or FLOPS measure, example: codecarbon, carbonai, flops-compute, wattmeter, azure metrics, ovh metrics..." + }, + "manufacturer": { + "type": "string", + "description": "the builder of the measuring tool, if the measurement method is wattmeter" + }, + "version": { + "type": "string", + "description": "the version of the measuring tool, if any" + }, + "cpuTrackingMode": { + "type": "string", + "description": "the method used to track the consumption of the CPU, example: constant, rapl..." + }, + "gpuTrackingMode": { + "type": "string", + "description": "the method used to track the consumption of the GPU, example: constant, nvml..." + }, + "averageUtilizationCpu": { + "type":"number", + "minimum": 0, + "maximum": 1, + "description": "the average percentage of use of the CPU during the task, for example: 0.5 if your CPU load was 50% on average" + }, + "averageUtilizationGpu": { + "type":"number", + "minimum": 0, + "maximum": 1, + "description": "the average percentage of use of the GPU during the task, for example: 0.8 if your GPU load was 80% on average" + }, + "powerCalibrationMeasurement": { + "type": "number", + "description": "the power consumed (in kWh) during the calibration measure if any (to isolate the initial consumption of the hardware)" + }, + "durationCalibrationMeasurement": { + "type": "number", + "description": "the duration of the calibration if any (in seconds)" + }, + "powerConsumption": { + "type": "number", + "description": "the power consumption measure of the computing task (in kWh)" + }, + "measurementDuration": { + "type": "number", + "description": "the duration of the measurement (in seconds)" + }, + "measurementDateTime": { + "type": "string", + "description": "the date when the measurement began, in format YYYY-MM-DD HH:MM:SS" + } + }, + "required": [ + "measurementMethod", + "powerConsumption" + ] +} diff --git a/tests/fixtures/boamps_schemas/report_schema.json b/tests/fixtures/boamps_schemas/report_schema.json new file mode 100644 index 000000000..8e250c893 --- /dev/null +++ b/tests/fixtures/boamps_schemas/report_schema.json @@ -0,0 +1,259 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "id": "https://raw.githubusercontent.com/Boavizta/BoAmps/main/model/report_schema.json", + "title": "report", + "description": "the main object containing all the context pretaining to a computing task", + "type": "object", + "properties": { + "header": { + "type": "object", + "description": "information about the source of the report and publishing organization's details", + "properties": { + "licensing": { + "type": "string", + "description": "the type of licensing applicable for the sharing of the report" + }, + "formatVersion": { + "type": "string", + "description": "the version of the specification of this set of schemas defining the report's fields" + }, + "formatVersionSpecificationUri": { + "type": "string", + "description": "the URI of the present specification of this set of schemas" + }, + "reportId": { + "type": "string", + "description": "the unique identifier of this report, preferably as a uuid4 string" + }, + "reportDatetime": { + "type": "string", + "description": "the publishing date of this report in format YYYY-MM-DD HH:MM:SS" + }, + "reportStatus": { + "type": "string", + "enum": [ "draft", "final", "corrective", "other" ], + "description": "the status of this report" + }, + "publisher": { + "type": "object", + "description": "the details about the publishing organization who produced the report", + "properties": { + "name": { + "type": "string", + "description": "name of the organization" + }, + "division": { + "type": "string", + "description": "name of the publishing department within the organization" + }, + "projectName": { + "type": "string", + "description": "name of the publishing project within the organization" + }, + "confidentialityLevel": { + "type": "string", + "enum": [ "public", "internal", "confidential", "secret" ], + "description": " the confidentiality of the report" + }, + "publicKey": { + "type": "string", + "description": "the cryptographic public key to check the identity of the publishing organization" + } + }, + "required": [ + "confidentialityLevel" + ] + } + }, + "required": [ + "reportDatetime" + ] + }, + "task": { + "type": "object", + "description": "the nature of the task being measured", + "properties": { + "taskStage": { + "type": "string", + "description": "stage of the task, example: datacreation, preprocessing, training, finetuning, inference, retraining..., add a + between stages if several but we do recommand to measure each step independantly" + }, + "taskFamily": { + "type": "string", + "description": "the family of task you are running, e.g. text classification, image generation, speech recognition, robotics navigation..." + }, + "nbRequest": { + "type": "number", + "description": "if inference stage, the number of requests the measure corresponds to, 0 or empty if you're not measuring the inference stage" + }, + "algorithms": { + "type": "array", + "description": "the list of the main algorithmic approache(s) used by the computing task", + "minItems": 1, + "items": { + "type":"object", + "$ref": "https://raw.githubusercontent.com/Boavizta/BoAmps/main/model/algorithm_schema.json" + } + }, + "dataset": { + "type": "array", + "description": "the list of dataset processed and/or generated by the computing task, if you are working on multipodal training you can add many items to the list to describe it. If you are working on inference, you can add at least one item to describe the input data and another one for the output", + "minItems": 1, + "items": { + "type":"object", + "$ref": "https://raw.githubusercontent.com/Boavizta/BoAmps/main/model/dataset_schema.json" + } + }, + "measuredAccuracy": { + "type": "number", + "minimum": 0, + "maximum": 1, + "description": "the measured accuracy of your model (between 0 and 1)" + }, + "estimatedAccuracy": { + "type": "string", + "enum": [ "veryPoor", "poor", "average", "good", "veryGood" ], + "description": "if you didn't measure the accuracy of your model in concrete percentages, you can give an assessment of the precision between: VERY POOR, POOR, AVERAGE, GOOD, VERY GOOD" + }, + "taskDescription": { + "type": "string", + "description": "free field, to be fillied in if you have more details to share about your task" + } + }, + "required": [ + "taskStage", + "taskFamily", + "algorithms", + "dataset" + ] + }, + "measures": { + "type": "array", + "description": "the software and/or hardware measures of the energy consumed by the computing task", + "minItems": 1, + "items": { + "type":"object", + "$ref": "https://raw.githubusercontent.com/Boavizta/BoAmps/main/model/measure_schema.json" + }, + "required": [ + "measures" + ] + }, + "system": { + "type": "object", + "description": "system information of the infrastructure on which is run the computing task", + "properties": { + "os": { + "type": "string", + "description":"name of the operating system" + }, + "distribution": { + "type": "string", + "description":"distribution of the operating system" + }, + "distributionVersion": { + "type": "string", + "description":"distribution's version of the operating system" + } + }, + "required": [ + "os" + ] + }, + "software": { + "type": "object", + "description": "programming language information of the computing task", + "properties": { + "language": { + "type": "string", + "description": "name of the programming language used, example : c, java, julia, python..." + }, + "version": { + "type": "string", + "description": "version of the programming language used" + } + }, + "required": [ + "language" + ] + }, + "infrastructure": { + "type": "object", + "description": "the infrastructure on which is performed the computing task", + "properties": { + "infraType":{ + "type":"string", + "enum":["publicCloud","privateCloud", "onPremise", "other"], + "description": "the type of infrastructure used for the task" + }, + "cloudProvider":{ + "type":"string", + "description": "If you are on the cloud, the name of your cloud provider, for example : aws, azure, google, ovh..." + }, + "cloudInstance":{ + "type":"string", + "description": "If you are on a cloud vm, the name of your cloud instance, for example : a1.large, dasv4-type2..." + }, + "cloudService":{ + "type":"string", + "description": "If you are using an AI cloud service, the name of your cloud service, for example : openAI service..." + }, + "components": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "$ref": "https://raw.githubusercontent.com/Boavizta/BoAmps/main/model/hardware_schema.json" + } + } + }, + "required": [ + "infraType", + "components" + ] + }, + "environment": { + "type": "object", + "description": "environment of the infrastructure: region of calculation and energy used", + "properties": { + "country": { + "type": "string" + }, + "latitude": { + "type": "number" + }, + "longitude": { + "type": "number" + }, + "location": { + "description": "more precise location like city, region or datacenter name", + "type": "string" + }, + "powerSupplierType": { + "type": "string", + "enum": [ "public", "private", "internal", "other" ] + }, + "powerSource": { + "type": "string", + "enum": [ "solar", "wind", "nuclear", "hydroelectric", "gas", "coal", "other" ] + }, + "powerSourceCarbonIntensity": { + "type": "number", + "description": "carbon intensity of the electicity you used (in gCO2eq/kWh)" + } + }, + "required": [ + "country" + ] + }, + "quality": { + "type": "string", + "enum":[ "high", "medium", "low" ], + "description": "the quality of the information you provided, 3 possibilities : high (percentage error +/-10%), medium (percentage error +/-25%), low (percentage error +/-50%)" + } + }, + "required": [ + "task", + "measures", + "infrastructure" + ] + } diff --git a/tests/test_boamps_output.py b/tests/test_boamps_output.py new file mode 100644 index 000000000..4db5461f1 --- /dev/null +++ b/tests/test_boamps_output.py @@ -0,0 +1,1092 @@ +""" +Comprehensive test suite for BoAmps output support. + +Tests organized as documentation sections covering: +A. Model serialization +B. EmissionsData -> BoAmps mapping +C. Output handler +D. Schema validation +E. Context file loading +F. Integration +""" + +import json +import os +import shutil +import tempfile +import unittest +import warnings + +from codecarbon.output_methods.boamps.mapper import ( + BOAMPS_FORMAT_VERSION, + map_emissions_to_boamps, +) +from codecarbon.output_methods.boamps.models import ( + BoAmpsAlgorithm, + BoAmpsDataset, + BoAmpsEnvironment, + BoAmpsHardware, + BoAmpsHeader, + BoAmpsInfrastructure, + BoAmpsMeasure, + BoAmpsPublisher, + BoAmpsReport, + BoAmpsSoftware, + BoAmpsSystem, + BoAmpsTask, + _camel_to_snake, + _snake_to_camel, +) +from codecarbon.output_methods.boamps.output import BoAmpsOutput +from codecarbon.output_methods.emissions_data import EmissionsData + + +def _make_emissions_data(**overrides) -> EmissionsData: + """Create a realistic EmissionsData instance for testing.""" + defaults = dict( + timestamp="2025-01-15 10:30:00", + project_name="test_project", + run_id="550e8400-e29b-41d4-a716-446655440000", + experiment_id="exp-001", + duration=3600.0, + emissions=0.042, + emissions_rate=1.17e-05, + cpu_power=12.5, + gpu_power=85.0, + ram_power=3.2, + cpu_energy=0.0125, + gpu_energy=0.085, + ram_energy=0.0032, + energy_consumed=0.1007, + water_consumed=0.0, + country_name="France", + country_iso_code="FRA", + region="Ile-de-France", + cloud_provider="", + cloud_region="", + os="Linux-5.15.0", + python_version="3.11.5", + codecarbon_version="2.5.0", + cpu_count=8, + cpu_model="Intel Core i7-12700", + gpu_count=1, + gpu_model="NVIDIA RTX 3090", + longitude=2.3522, + latitude=48.8566, + ram_total_size=32.0, + tracking_mode="machine", + cpu_utilization_percent=65.0, + gpu_utilization_percent=80.0, + on_cloud="N", + ) + defaults.update(overrides) + return EmissionsData(**defaults) + + +def _make_task() -> BoAmpsTask: + """Create a minimal valid BoAmps task for testing.""" + return BoAmpsTask( + task_stage="inference", + task_family="chatbot", + algorithms=[ + BoAmpsAlgorithm( + algorithm_type="llm", + foundation_model_name="llama3.1-8b", + ) + ], + dataset=[ + BoAmpsDataset(data_usage="input", data_type="token", data_quantity=50), + BoAmpsDataset(data_usage="output", data_type="token", data_quantity=200), + ], + ) + + +# =========================================================================== +# A. BoAmps Model Serialization Tests +# =========================================================================== + + +class TestModelSerialization(unittest.TestCase): + """Each model serializes to correct camelCase JSON structure.""" + + def test_snake_to_camel_conversion(self): + self.assertEqual(_snake_to_camel("measurement_method"), "measurementMethod") + self.assertEqual(_snake_to_camel("cpu_tracking_mode"), "cpuTrackingMode") + self.assertEqual(_snake_to_camel("os"), "os") + self.assertEqual(_snake_to_camel("nb_component"), "nbComponent") + self.assertEqual( + _snake_to_camel("format_version_specification_uri"), + "formatVersionSpecificationUri", + ) + + def test_camel_to_snake_conversion(self): + self.assertEqual(_camel_to_snake("measurementMethod"), "measurement_method") + self.assertEqual(_camel_to_snake("cpuTrackingMode"), "cpu_tracking_mode") + self.assertEqual(_camel_to_snake("os"), "os") + self.assertEqual(_camel_to_snake("nbComponent"), "nb_component") + + def test_publisher_serialization(self): + pub = BoAmpsPublisher( + name="Test Org", + confidentiality_level="public", + ) + d = pub.to_dict() + self.assertEqual(d["name"], "Test Org") + self.assertEqual(d["confidentialityLevel"], "public") + self.assertNotIn("division", d) # None stripped + + def test_header_serialization(self): + header = BoAmpsHeader( + report_id="abc-123", + report_datetime="2025-01-15 10:30:00", + format_version="0.1", + ) + d = header.to_dict() + self.assertEqual(d["reportId"], "abc-123") + self.assertEqual(d["reportDatetime"], "2025-01-15 10:30:00") + self.assertEqual(d["formatVersion"], "0.1") + self.assertNotIn("licensing", d) + + def test_algorithm_serialization(self): + algo = BoAmpsAlgorithm( + algorithm_type="llm", + foundation_model_name="llama3.1-8b", + parameters_number=8, + ) + d = algo.to_dict() + self.assertEqual(d["algorithmType"], "llm") + self.assertEqual(d["foundationModelName"], "llama3.1-8b") + self.assertEqual(d["parametersNumber"], 8) + self.assertNotIn("trainingType", d) + + def test_dataset_serialization(self): + ds = BoAmpsDataset( + data_usage="input", + data_type="token", + data_quantity=50, + ) + d = ds.to_dict() + self.assertEqual(d["dataUsage"], "input") + self.assertEqual(d["dataType"], "token") + self.assertEqual(d["dataQuantity"], 50) + + def test_task_serialization_with_nested_lists(self): + task = _make_task() + d = task.to_dict() + self.assertEqual(d["taskStage"], "inference") + self.assertEqual(d["taskFamily"], "chatbot") + self.assertEqual(len(d["algorithms"]), 1) + self.assertEqual(d["algorithms"][0]["algorithmType"], "llm") + self.assertEqual(len(d["dataset"]), 2) + self.assertEqual(d["dataset"][0]["dataUsage"], "input") + self.assertEqual(d["dataset"][1]["dataUsage"], "output") + + def test_measure_serialization(self): + m = BoAmpsMeasure( + measurement_method="codecarbon", + power_consumption=0.1007, + measurement_duration=3600.0, + average_utilization_cpu=0.65, + ) + d = m.to_dict() + self.assertEqual(d["measurementMethod"], "codecarbon") + self.assertEqual(d["powerConsumption"], 0.1007) + self.assertEqual(d["measurementDuration"], 3600.0) + self.assertEqual(d["averageUtilizationCpu"], 0.65) + + def test_hardware_serialization(self): + hw = BoAmpsHardware( + component_type="gpu", + component_name="NVIDIA RTX 3090", + nb_component=1, + ) + d = hw.to_dict() + self.assertEqual(d["componentType"], "gpu") + self.assertEqual(d["componentName"], "NVIDIA RTX 3090") + self.assertEqual(d["nbComponent"], 1) + + def test_infrastructure_serialization(self): + infra = BoAmpsInfrastructure( + infra_type="onPremise", + components=[ + BoAmpsHardware(component_type="cpu", nb_component=1), + BoAmpsHardware(component_type="ram", nb_component=1, memory_size=32.0), + ], + ) + d = infra.to_dict() + self.assertEqual(d["infraType"], "onPremise") + self.assertEqual(len(d["components"]), 2) + self.assertEqual(d["components"][0]["componentType"], "cpu") + + def test_none_values_stripped_from_output(self): + """None values should not appear in serialized output.""" + report = BoAmpsReport( + header=BoAmpsHeader(report_id="test"), + quality=None, + ) + d = report.to_dict() + self.assertNotIn("quality", d) + self.assertNotIn("task", d) + self.assertNotIn("measures", d) + + def test_full_report_serialization(self): + """Complete report with all sections serializes correctly.""" + report = BoAmpsReport( + header=BoAmpsHeader(report_id="test-123", format_version="0.1"), + task=_make_task(), + measures=[ + BoAmpsMeasure(measurement_method="codecarbon", power_consumption=0.1) + ], + system=BoAmpsSystem(os="Linux"), + software=BoAmpsSoftware(language="python", version="3.11"), + infrastructure=BoAmpsInfrastructure( + infra_type="onPremise", + components=[BoAmpsHardware(component_type="cpu", nb_component=1)], + ), + environment=BoAmpsEnvironment(country="France"), + quality="high", + ) + d = report.to_dict() + self.assertIn("header", d) + self.assertIn("task", d) + self.assertIn("measures", d) + self.assertIn("system", d) + self.assertIn("software", d) + self.assertIn("infrastructure", d) + self.assertIn("environment", d) + self.assertEqual(d["quality"], "high") + + def test_report_serializes_to_valid_json(self): + """Report.to_dict() result is JSON-serializable.""" + report = BoAmpsReport( + header=BoAmpsHeader(report_id="test"), + task=_make_task(), + measures=[ + BoAmpsMeasure(measurement_method="codecarbon", power_consumption=0.1) + ], + infrastructure=BoAmpsInfrastructure( + infra_type="onPremise", + components=[BoAmpsHardware(component_type="cpu", nb_component=1)], + ), + ) + json_str = json.dumps(report.to_dict(), indent=2) + parsed = json.loads(json_str) + self.assertEqual(parsed["header"]["reportId"], "test") + + +# =========================================================================== +# B. EmissionsData -> BoAmps Mapping Tests +# =========================================================================== + + +class TestEmissionsMapping(unittest.TestCase): + """Full mapping from EmissionsData to BoAmps report.""" + + def setUp(self): + self.emissions = _make_emissions_data() + self.task = _make_task() + + def test_full_mapping(self): + """Realistic EmissionsData maps to a complete BoAmps report.""" + report = map_emissions_to_boamps(self.emissions, task=self.task) + d = report.to_dict() + + # All major sections present + self.assertIn("header", d) + self.assertIn("task", d) + self.assertIn("measures", d) + self.assertIn("system", d) + self.assertIn("software", d) + self.assertIn("infrastructure", d) + self.assertIn("environment", d) + + def test_header_auto_population(self): + """run_id -> reportId, timestamp -> reportDatetime.""" + report = map_emissions_to_boamps(self.emissions, task=self.task) + header = report.header.to_dict() + self.assertEqual(header["reportId"], self.emissions.run_id) + self.assertEqual(header["reportDatetime"], self.emissions.timestamp) + self.assertEqual(header["formatVersion"], BOAMPS_FORMAT_VERSION) + + def test_measures_mapping(self): + """energy_consumed -> powerConsumption, duration -> measurementDuration.""" + report = map_emissions_to_boamps(self.emissions, task=self.task) + measure = report.measures[0].to_dict() + self.assertEqual(measure["measurementMethod"], "codecarbon") + self.assertEqual(measure["version"], self.emissions.codecarbon_version) + self.assertEqual(measure["powerConsumption"], self.emissions.energy_consumed) + self.assertEqual(measure["measurementDuration"], self.emissions.duration) + self.assertEqual(measure["measurementDateTime"], self.emissions.timestamp) + self.assertEqual(measure["cpuTrackingMode"], self.emissions.tracking_mode) + + def test_cpu_utilization_as_fraction(self): + """cpu_utilization_percent is converted to 0-1 range.""" + report = map_emissions_to_boamps(self.emissions, task=self.task) + measure = report.measures[0].to_dict() + self.assertAlmostEqual(measure["averageUtilizationCpu"], 0.65, places=2) + + def test_gpu_utilization_as_fraction(self): + """gpu_utilization_percent is converted to 0-1 range when GPU present.""" + report = map_emissions_to_boamps(self.emissions, task=self.task) + measure = report.measures[0].to_dict() + self.assertAlmostEqual(measure["averageUtilizationGpu"], 0.80, places=2) + + def test_gpu_tracking_mode_set_when_gpu_present(self): + """gpuTrackingMode is set when gpu_count > 0.""" + report = map_emissions_to_boamps(self.emissions, task=self.task) + measure = report.measures[0].to_dict() + self.assertIn("gpuTrackingMode", measure) + + def test_infrastructure_decomposition(self): + """CPU, GPU, RAM as separate components[].""" + report = map_emissions_to_boamps(self.emissions, task=self.task) + infra = report.infrastructure.to_dict() + component_types = [c["componentType"] for c in infra["components"]] + self.assertIn("cpu", component_types) + self.assertIn("gpu", component_types) + self.assertIn("ram", component_types) + + def test_cpu_component_details(self): + report = map_emissions_to_boamps(self.emissions, task=self.task) + cpu = [ + c for c in report.infrastructure.components if c.component_type == "cpu" + ][0] + self.assertEqual(cpu.component_name, "Intel Core i7-12700") + self.assertEqual(cpu.nb_component, 8) + + def test_gpu_component_details(self): + report = map_emissions_to_boamps(self.emissions, task=self.task) + gpu = [ + c for c in report.infrastructure.components if c.component_type == "gpu" + ][0] + self.assertEqual(gpu.component_name, "NVIDIA RTX 3090") + self.assertEqual(gpu.nb_component, 1) + + def test_ram_component_details(self): + report = map_emissions_to_boamps(self.emissions, task=self.task) + ram = [ + c for c in report.infrastructure.components if c.component_type == "ram" + ][0] + self.assertEqual(ram.memory_size, 32.0) + self.assertEqual(ram.nb_component, 1) + + def test_gpu_omitted_when_no_gpu(self): + """GPU component is omitted when gpu_count=0 and gpu_model is empty.""" + emissions = _make_emissions_data( + gpu_count=0, + gpu_model="", + gpu_power=0.0, + gpu_energy=0.0, + gpu_utilization_percent=0.0, + ) + report = map_emissions_to_boamps(emissions, task=self.task) + component_types = [c.component_type for c in report.infrastructure.components] + self.assertNotIn("gpu", component_types) + self.assertIn("cpu", component_types) + self.assertIn("ram", component_types) + + def test_gpu_tracking_mode_omitted_when_no_gpu(self): + """gpuTrackingMode not set when no GPU.""" + emissions = _make_emissions_data( + gpu_count=0, gpu_model="", gpu_utilization_percent=0.0 + ) + report = map_emissions_to_boamps(emissions, task=self.task) + measure = report.measures[0].to_dict() + self.assertNotIn("gpuTrackingMode", measure) + self.assertNotIn("averageUtilizationGpu", measure) + + def test_on_premise_detection(self): + """on_cloud='N' -> infraType='onPremise'.""" + report = map_emissions_to_boamps(self.emissions, task=self.task) + self.assertEqual(report.infrastructure.infra_type, "onPremise") + + def test_cloud_detection(self): + """on_cloud='Y' -> infraType='publicCloud' with cloud_provider.""" + emissions = _make_emissions_data( + on_cloud="Y", + cloud_provider="aws", + cloud_region="us-east-1", + ) + report = map_emissions_to_boamps(emissions, task=self.task) + self.assertEqual(report.infrastructure.infra_type, "publicCloud") + self.assertEqual(report.infrastructure.cloud_provider, "aws") + + def test_environment_mapping(self): + """country_name, latitude, longitude map to environment.""" + report = map_emissions_to_boamps(self.emissions, task=self.task) + env = report.environment.to_dict() + self.assertEqual(env["country"], "France") + self.assertAlmostEqual(env["latitude"], 48.8566) + self.assertAlmostEqual(env["longitude"], 2.3522) + + def test_system_mapping(self): + """os field maps to system.os.""" + report = map_emissions_to_boamps(self.emissions, task=self.task) + self.assertEqual(report.system.os, "Linux-5.15.0") + + def test_software_mapping(self): + """language='python', version from python_version.""" + report = map_emissions_to_boamps(self.emissions, task=self.task) + self.assertEqual(report.software.language, "python") + self.assertEqual(report.software.version, "3.11.5") + + def test_user_task_preserved(self): + """User-provided task context is preserved and merged.""" + report = map_emissions_to_boamps(self.emissions, task=self.task) + task_dict = report.task.to_dict() + self.assertEqual(task_dict["taskStage"], "inference") + self.assertEqual(task_dict["taskFamily"], "chatbot") + self.assertEqual(len(task_dict["algorithms"]), 1) + self.assertEqual(len(task_dict["dataset"]), 2) + + def test_user_header_overrides(self): + """User header values take precedence over auto-detected.""" + user_header = BoAmpsHeader( + licensing="CC-BY-4.0", + report_status="final", + publisher=BoAmpsPublisher(name="My Org", confidentiality_level="public"), + ) + report = map_emissions_to_boamps( + self.emissions, task=self.task, header=user_header + ) + header = report.header.to_dict() + self.assertEqual(header["licensing"], "CC-BY-4.0") + self.assertEqual(header["reportStatus"], "final") + self.assertEqual(header["publisher"]["name"], "My Org") + # Auto-detected fields still present + self.assertEqual(header["reportId"], self.emissions.run_id) + self.assertEqual(header["formatVersion"], BOAMPS_FORMAT_VERSION) + + def test_quality_passthrough(self): + report = map_emissions_to_boamps(self.emissions, task=self.task, quality="high") + self.assertEqual(report.quality, "high") + + def test_infra_overrides(self): + """Infrastructure overrides are applied.""" + overrides = {"cloud_instance": "p3.2xlarge", "cloud_service": "EC2"} + report = map_emissions_to_boamps( + self.emissions, task=self.task, infra_overrides=overrides + ) + self.assertEqual(report.infrastructure.cloud_instance, "p3.2xlarge") + self.assertEqual(report.infrastructure.cloud_service, "EC2") + + def test_environment_overrides(self): + """Environment overrides are applied.""" + overrides = {"power_source": "nuclear", "power_source_carbon_intensity": 12.0} + report = map_emissions_to_boamps( + self.emissions, task=self.task, environment_overrides=overrides + ) + self.assertEqual(report.environment.power_source, "nuclear") + self.assertEqual(report.environment.power_source_carbon_intensity, 12.0) + + def test_warning_when_no_task(self): + """Warns when required BoAmps task fields are missing.""" + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + map_emissions_to_boamps(self.emissions, task=None) + self.assertEqual(len(w), 1) + self.assertIn("task", str(w[0].message).lower()) + + +# =========================================================================== +# C. BoAmps Output Handler Tests +# =========================================================================== + + +class TestBoAmpsOutputHandler(unittest.TestCase): + """Output handler writes valid JSON files.""" + + def setUp(self): + self.tmpdir = tempfile.mkdtemp() + self.emissions = _make_emissions_data() + self.task = _make_task() + + def tearDown(self): + shutil.rmtree(self.tmpdir, ignore_errors=True) + + def test_out_writes_json_file(self): + """BoAmpsOutput.out() writes a valid JSON file.""" + handler = BoAmpsOutput(output_dir=self.tmpdir, task=self.task) + handler.out(self.emissions, self.emissions) + + expected_file = os.path.join( + self.tmpdir, f"boamps_report_{self.emissions.run_id}.json" + ) + self.assertTrue(os.path.isfile(expected_file)) + + with open(expected_file) as f: + report = json.load(f) + self.assertIn("header", report) + self.assertIn("measures", report) + + def test_output_file_naming(self): + """Output file is named boamps_report_{run_id}.json.""" + handler = BoAmpsOutput(output_dir=self.tmpdir, task=self.task) + handler.out(self.emissions, self.emissions) + + files = os.listdir(self.tmpdir) + self.assertEqual(len(files), 1) + self.assertTrue(files[0].startswith("boamps_report_")) + self.assertTrue(files[0].endswith(".json")) + self.assertIn(self.emissions.run_id, files[0]) + + def test_handler_programmatic_config(self): + """Handler can be constructed with programmatic config.""" + handler = BoAmpsOutput( + output_dir=self.tmpdir, + task=self.task, + quality="high", + header=BoAmpsHeader(licensing="CC-BY-4.0"), + ) + handler.out(self.emissions, self.emissions) + + expected_file = os.path.join( + self.tmpdir, f"boamps_report_{self.emissions.run_id}.json" + ) + with open(expected_file) as f: + report = json.load(f) + self.assertEqual(report["quality"], "high") + self.assertEqual(report["header"]["licensing"], "CC-BY-4.0") + + def test_live_out_is_noop(self): + """live_out() is a no-op (BoAmps reports are final).""" + handler = BoAmpsOutput(output_dir=self.tmpdir, task=self.task) + handler.live_out(self.emissions, self.emissions) + # No files should be created + self.assertEqual(len(os.listdir(self.tmpdir)), 0) + + def test_output_contains_all_auto_fields(self): + """Output JSON contains all auto-filled fields from EmissionsData.""" + handler = BoAmpsOutput(output_dir=self.tmpdir, task=self.task) + handler.out(self.emissions, self.emissions) + + expected_file = os.path.join( + self.tmpdir, f"boamps_report_{self.emissions.run_id}.json" + ) + with open(expected_file) as f: + report = json.load(f) + + # Header + self.assertEqual(report["header"]["reportId"], self.emissions.run_id) + self.assertEqual(report["header"]["reportDatetime"], self.emissions.timestamp) + # Measures + self.assertEqual(report["measures"][0]["measurementMethod"], "codecarbon") + self.assertEqual( + report["measures"][0]["powerConsumption"], + self.emissions.energy_consumed, + ) + # System + self.assertEqual(report["system"]["os"], self.emissions.os) + # Software + self.assertEqual(report["software"]["language"], "python") + # Infrastructure + self.assertEqual(report["infrastructure"]["infraType"], "onPremise") + # Environment + self.assertEqual(report["environment"]["country"], "France") + + def test_minimal_handler_warns(self): + """Minimal handler (no task) warns about missing fields.""" + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + handler = BoAmpsOutput(output_dir=self.tmpdir) + handler.out(self.emissions, self.emissions) + self.assertTrue( + any("task" in str(warning.message).lower() for warning in w) + ) + + +# =========================================================================== +# D. Schema Validation Tests +# =========================================================================== + + +class TestSchemaValidation(unittest.TestCase): + """Validate output against the actual BoAmps JSON schemas.""" + + # Vendored from https://github.com/Boavizta/BoAmps/tree/main/model (v0.1) + SCHEMA_DIR = os.path.join(os.path.dirname(__file__), "fixtures", "boamps_schemas") + + @classmethod + def _load_schemas(cls): + """Load all BoAmps schemas and create a resolver.""" + try: + from jsonschema import Draft4Validator, RefResolver + except ImportError: + return None, None + + schema_files = [ + "report_schema.json", + "algorithm_schema.json", + "dataset_schema.json", + "hardware_schema.json", + "measure_schema.json", + ] + store = {} + report_schema = None + for fname in schema_files: + path = os.path.join(cls.SCHEMA_DIR, fname) + if not os.path.isfile(path): + return None, None + with open(path) as f: + schema = json.load(f) + store[schema["id"]] = schema + if fname == "report_schema.json": + report_schema = schema + + resolver = RefResolver.from_schema(report_schema, store=store) + validator = Draft4Validator(report_schema, resolver=resolver) + return validator, report_schema + + def setUp(self): + self.validator, self.schema = self._load_schemas() + if self.validator is None: + self.skipTest( + "jsonschema not installed or BoAmps schemas not found at " + f"{self.SCHEMA_DIR}" + ) + self.emissions = _make_emissions_data() + self.task = _make_task() + + def test_minimal_valid_report_passes(self): + """Minimal valid report (auto-filled + minimal task) passes validation.""" + report = map_emissions_to_boamps(self.emissions, task=self.task) + report_dict = report.to_dict() + errors = list(self.validator.iter_errors(report_dict)) + self.assertEqual( + errors, + [], + f"Validation errors: {[e.message for e in errors]}", + ) + + def test_full_report_passes(self): + """Full report with all optional fields passes validation.""" + report = map_emissions_to_boamps( + self.emissions, + task=BoAmpsTask( + task_stage="training", + task_family="text classification", + nb_request=0, + algorithms=[ + BoAmpsAlgorithm( + algorithm_type="neural network", + algorithm_name="transformers", + training_type="supervisedLearning", + framework="pytorch", + framework_version="2.1.0", + parameters_number=0.125, + epochs_number=10, + quantization="fp16", + ) + ], + dataset=[ + BoAmpsDataset( + data_usage="input", + data_type="text", + data_format="csv", + data_size=2.5, + data_quantity=50000, + shape="(50000, 128)", + source="public", + ) + ], + measured_accuracy=0.95, + task_description="Fine-tuning BERT for sentiment analysis", + ), + header=BoAmpsHeader( + licensing="CC-BY-4.0", + report_status="final", + publisher=BoAmpsPublisher( + name="Test Org", + division="ML Team", + confidentiality_level="public", + ), + ), + quality="high", + environment_overrides={ + "power_source": "nuclear", + "power_source_carbon_intensity": 12.0, + }, + ) + report_dict = report.to_dict() + errors = list(self.validator.iter_errors(report_dict)) + self.assertEqual( + errors, + [], + f"Validation errors: {[e.message for e in errors]}", + ) + + def test_report_without_task_fails_schema(self): + """Report without required task section fails validation.""" + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + report = map_emissions_to_boamps(self.emissions, task=None) + report_dict = report.to_dict() + errors = list(self.validator.iter_errors(report_dict)) + required_missing = any("task" in e.message for e in errors) + self.assertTrue( + required_missing, + "Expected validation error for missing 'task' field", + ) + + +# =========================================================================== +# E. Context File Loading Tests +# =========================================================================== + + +class TestContextFileLoading(unittest.TestCase): + """Loading and merging BoAmps context files.""" + + def setUp(self): + self.tmpdir = tempfile.mkdtemp() + self.emissions = _make_emissions_data() + + def tearDown(self): + shutil.rmtree(self.tmpdir, ignore_errors=True) + + def _write_context_file(self, context: dict) -> str: + path = os.path.join(self.tmpdir, "boamps_context.json") + with open(path, "w") as f: + json.dump(context, f) + return path + + def test_load_valid_context_file(self): + """Load a valid BoAmps context file.""" + context = { + "task": { + "taskStage": "inference", + "taskFamily": "chatbot", + "algorithms": [{"algorithmType": "llm"}], + "dataset": [{"dataUsage": "input", "dataType": "token"}], + }, + "quality": "high", + } + path = self._write_context_file(context) + handler = BoAmpsOutput.from_file(path, output_dir=self.tmpdir) + self.assertIsNotNone(handler._task) + self.assertEqual(handler._task.task_stage, "inference") + self.assertEqual(handler._quality, "high") + + def test_merge_context_with_auto_detected(self): + """Context file fields merge with auto-detected EmissionsData fields.""" + context = { + "task": { + "taskStage": "inference", + "taskFamily": "chatbot", + "algorithms": [{"algorithmType": "llm"}], + "dataset": [{"dataUsage": "input", "dataType": "token"}], + }, + "header": { + "licensing": "CC-BY-4.0", + "reportStatus": "draft", + "publisher": {"name": "My Org", "confidentialityLevel": "public"}, + }, + } + path = self._write_context_file(context) + handler = BoAmpsOutput.from_file(path, output_dir=self.tmpdir) + handler.out(self.emissions, self.emissions) + + report_file = os.path.join( + self.tmpdir, f"boamps_report_{self.emissions.run_id}.json" + ) + with open(report_file) as f: + report = json.load(f) + + # Context file values + self.assertEqual(report["header"]["licensing"], "CC-BY-4.0") + self.assertEqual(report["header"]["publisher"]["name"], "My Org") + # Auto-detected values + self.assertEqual(report["header"]["reportId"], self.emissions.run_id) + self.assertEqual(report["measures"][0]["measurementMethod"], "codecarbon") + + def test_context_file_task_overrides(self): + """Context file task values are preserved.""" + context = { + "task": { + "taskStage": "training", + "taskFamily": "image generation", + "algorithms": [ + { + "algorithmType": "neural network", + "foundationModelName": "stable-diffusion", + } + ], + "dataset": [ + {"dataUsage": "input", "dataType": "image", "dataQuantity": 10000} + ], + }, + "quality": "medium", + } + path = self._write_context_file(context) + handler = BoAmpsOutput.from_file(path, output_dir=self.tmpdir) + self.assertEqual(handler._task.task_stage, "training") + self.assertEqual(handler._task.task_family, "image generation") + self.assertEqual( + handler._task.algorithms[0].foundation_model_name, "stable-diffusion" + ) + self.assertEqual(handler._quality, "medium") + + def test_invalid_context_file_path_raises(self): + """Invalid context file path raises clear error.""" + with self.assertRaises(FileNotFoundError) as ctx: + BoAmpsOutput.from_file("/nonexistent/path.json") + self.assertIn("not found", str(ctx.exception)) + + def test_malformed_json_raises(self): + """Malformed JSON context file raises clear error.""" + path = os.path.join(self.tmpdir, "bad.json") + with open(path, "w") as f: + f.write("{not valid json") + with self.assertRaises(json.JSONDecodeError): + BoAmpsOutput.from_file(path) + + def test_context_with_infrastructure_overrides(self): + """Infrastructure fields from context file are applied as overrides.""" + context = { + "task": { + "taskStage": "inference", + "taskFamily": "chatbot", + "algorithms": [{"algorithmType": "llm"}], + "dataset": [{"dataUsage": "input", "dataType": "token"}], + }, + "infrastructure": { + "cloudInstance": "p3.2xlarge", + "cloudService": "EC2", + }, + } + path = self._write_context_file(context) + handler = BoAmpsOutput.from_file(path, output_dir=self.tmpdir) + handler.out(self.emissions, self.emissions) + + report_file = os.path.join( + self.tmpdir, f"boamps_report_{self.emissions.run_id}.json" + ) + with open(report_file) as f: + report = json.load(f) + self.assertEqual(report["infrastructure"]["cloudInstance"], "p3.2xlarge") + self.assertEqual(report["infrastructure"]["cloudService"], "EC2") + + def test_context_with_environment_overrides(self): + """Environment fields from context file are applied as overrides.""" + context = { + "task": { + "taskStage": "inference", + "taskFamily": "chatbot", + "algorithms": [{"algorithmType": "llm"}], + "dataset": [{"dataUsage": "input", "dataType": "token"}], + }, + "environment": { + "powerSource": "nuclear", + "powerSourceCarbonIntensity": 12.0, + }, + } + path = self._write_context_file(context) + handler = BoAmpsOutput.from_file(path, output_dir=self.tmpdir) + handler.out(self.emissions, self.emissions) + + report_file = os.path.join( + self.tmpdir, f"boamps_report_{self.emissions.run_id}.json" + ) + with open(report_file) as f: + report = json.load(f) + self.assertEqual(report["environment"]["powerSource"], "nuclear") + self.assertEqual(report["environment"]["powerSourceCarbonIntensity"], 12.0) + + +# =========================================================================== +# F. Model from_dict / round-trip Tests +# =========================================================================== + + +class TestModelDeserialization(unittest.TestCase): + """Models can be loaded from camelCase dicts (e.g., context files).""" + + def test_algorithm_from_dict(self): + d = {"algorithmType": "llm", "foundationModelName": "llama3.1-8b"} + algo = BoAmpsAlgorithm.from_dict(d) + self.assertEqual(algo.algorithm_type, "llm") + self.assertEqual(algo.foundation_model_name, "llama3.1-8b") + + def test_dataset_from_dict(self): + d = {"dataUsage": "input", "dataType": "token", "dataQuantity": 50} + ds = BoAmpsDataset.from_dict(d) + self.assertEqual(ds.data_usage, "input") + self.assertEqual(ds.data_type, "token") + self.assertEqual(ds.data_quantity, 50) + + def test_task_from_dict_with_nested(self): + d = { + "taskStage": "inference", + "taskFamily": "chatbot", + "algorithms": [{"algorithmType": "llm"}], + "dataset": [{"dataUsage": "input", "dataType": "token"}], + } + task = BoAmpsTask.from_dict(d) + self.assertEqual(task.task_stage, "inference") + self.assertIsInstance(task.algorithms[0], BoAmpsAlgorithm) + self.assertIsInstance(task.dataset[0], BoAmpsDataset) + + def test_header_from_dict_with_publisher(self): + d = { + "licensing": "CC-BY-4.0", + "publisher": {"name": "Org", "confidentialityLevel": "public"}, + } + header = BoAmpsHeader.from_dict(d) + self.assertEqual(header.licensing, "CC-BY-4.0") + self.assertIsInstance(header.publisher, BoAmpsPublisher) + self.assertEqual(header.publisher.name, "Org") + + def test_infrastructure_from_dict_with_components(self): + d = { + "infraType": "onPremise", + "components": [ + {"componentType": "cpu", "nbComponent": 1}, + {"componentType": "gpu", "nbComponent": 2}, + ], + } + infra = BoAmpsInfrastructure.from_dict(d) + self.assertEqual(infra.infra_type, "onPremise") + self.assertEqual(len(infra.components), 2) + self.assertIsInstance(infra.components[0], BoAmpsHardware) + + def test_report_from_dict_roundtrip(self): + """Report can survive a to_dict -> from_dict roundtrip.""" + original = BoAmpsReport( + header=BoAmpsHeader(report_id="test", format_version="0.1"), + task=_make_task(), + measures=[ + BoAmpsMeasure(measurement_method="codecarbon", power_consumption=0.1) + ], + system=BoAmpsSystem(os="Linux"), + software=BoAmpsSoftware(language="python", version="3.11"), + infrastructure=BoAmpsInfrastructure( + infra_type="onPremise", + components=[BoAmpsHardware(component_type="cpu", nb_component=1)], + ), + environment=BoAmpsEnvironment(country="France"), + quality="high", + ) + d = original.to_dict() + restored = BoAmpsReport.from_dict(d) + self.assertEqual(restored.header.report_id, "test") + self.assertEqual(restored.task.task_stage, "inference") + self.assertEqual(restored.measures[0].measurement_method, "codecarbon") + self.assertEqual(restored.quality, "high") + # Re-serialize and compare + self.assertEqual(original.to_dict(), restored.to_dict()) + + +# =========================================================================== +# G. Integration Test +# =========================================================================== + + +class TestIntegration(unittest.TestCase): + """Full lifecycle: create handler, process emissions, verify output.""" + + def setUp(self): + self.tmpdir = tempfile.mkdtemp() + self.emissions = _make_emissions_data() + + def tearDown(self): + shutil.rmtree(self.tmpdir, ignore_errors=True) + + def _read_report(self) -> dict: + report_file = os.path.join( + self.tmpdir, f"boamps_report_{self.emissions.run_id}.json" + ) + self.assertTrue(os.path.isfile(report_file)) + with open(report_file) as f: + return json.load(f) + + def test_full_lifecycle(self): + """BoAmpsOutput produces a valid report from EmissionsData.""" + handler = BoAmpsOutput( + output_dir=self.tmpdir, + task=_make_task(), + quality="high", + header=BoAmpsHeader( + licensing="CC-BY-4.0", + publisher=BoAmpsPublisher( + name="Test Lab", confidentiality_level="public" + ), + ), + ) + handler.out(self.emissions, self.emissions) + report = self._read_report() + + # All sections present + for section in ( + "header", + "task", + "measures", + "system", + "software", + "infrastructure", + "environment", + ): + self.assertIn(section, report) + self.assertEqual(report["quality"], "high") + + # Auto-filled + self.assertEqual(report["header"]["reportId"], self.emissions.run_id) + self.assertEqual(report["measures"][0]["measurementMethod"], "codecarbon") + self.assertEqual(report["system"]["os"], self.emissions.os) + self.assertEqual(report["software"]["language"], "python") + + # User-provided + self.assertEqual(report["task"]["taskStage"], "inference") + self.assertEqual(report["header"]["licensing"], "CC-BY-4.0") + self.assertEqual(report["header"]["publisher"]["name"], "Test Lab") + + def test_context_file_lifecycle(self): + """Full lifecycle using from_file() constructor.""" + context = { + "header": { + "licensing": "MIT", + "reportStatus": "draft", + "publisher": { + "name": "Test Corp", + "confidentialityLevel": "internal", + }, + }, + "task": { + "taskStage": "inference", + "taskFamily": "chatbot", + "nbRequest": 100, + "algorithms": [ + { + "algorithmType": "llm", + "foundationModelName": "llama3.1-8b", + "parametersNumber": 8, + } + ], + "dataset": [ + {"dataUsage": "input", "dataType": "token", "dataQuantity": 50}, + {"dataUsage": "output", "dataType": "token", "dataQuantity": 200}, + ], + }, + "quality": "high", + } + context_path = os.path.join(self.tmpdir, "boamps_context.json") + with open(context_path, "w") as f: + json.dump(context, f) + + handler = BoAmpsOutput.from_file(context_path, output_dir=self.tmpdir) + handler.out(self.emissions, self.emissions) + report = self._read_report() + + self.assertEqual(report["task"]["taskStage"], "inference") + self.assertEqual(report["task"]["nbRequest"], 100) + self.assertEqual( + report["task"]["algorithms"][0]["foundationModelName"], "llama3.1-8b" + ) + self.assertEqual(report["header"]["licensing"], "MIT") + self.assertEqual(report["header"]["publisher"]["name"], "Test Corp") + self.assertEqual(report["quality"], "high") + # Auto-detected still present + self.assertEqual(report["measures"][0]["measurementMethod"], "codecarbon") + + +if __name__ == "__main__": + unittest.main()