diff --git a/CHANGELOG.md b/CHANGELOG.md index f64573f9..7eecb6b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ releases are available on [PyPI](https://pypi.org/project/pytask) and ## Unreleased +- {pull}`743` adds the `pytask.lock` lockfile as the primary state backend with a + portable format and documentation. When no lockfile exists, pytask reads the legacy + SQLite state and writes `pytask.lock`; `pytask build` continues updating the legacy + database for downgrade compatibility. - {pull}`787` makes the `attributes` field mandatory on `PNode` and `PProvisionalNode`, and preserves existing node attributes when loading entries from the data catalog. diff --git a/docs/source/how_to_guides/index.md b/docs/source/how_to_guides/index.md index d01ecae9..f41a0247 100644 --- a/docs/source/how_to_guides/index.md +++ b/docs/source/how_to_guides/index.md @@ -13,6 +13,7 @@ maxdepth: 1 --- migrating_from_scripts_to_pytask interfaces_for_dependencies_products +portability remote_files functional_interface capture_warnings diff --git a/docs/source/how_to_guides/portability.md b/docs/source/how_to_guides/portability.md new file mode 100644 index 00000000..bfa6003d --- /dev/null +++ b/docs/source/how_to_guides/portability.md @@ -0,0 +1,88 @@ +# Portability + +This guide explains what you need to do to move a pytask project between machines and +why the lockfile is central to that process. + +```{seealso} +The lockfile format and behavior are documented in the +[reference guide](../reference_guides/lockfile.md). +``` + +## How to port a project + +Use this checklist when you move a project to another machine or environment. + +1. **Update state once on the source machine.** + + Run a normal build so `pytask.lock` is up to date: + + ```console + $ pytask build + ``` + + If you already have a recent lockfile and up-to-date outputs, you can skip this step. + +1. **Ship the right files.** + + Commit `pytask.lock` to your repository and move it with the project. In practice, + you should move: + + - the project files tracked in version control (source, configuration, data inputs + and `pytask.lock`) + - the build artifacts you want to reuse (often in `bld/` if you follow the tutorial + layout) + - the `.pytask` folder in case you are using the data catalog and it manages some of + the files + +1. **Files outside the project** + + If you have files outside the project root (the folder with the `pyproject.toml` + file), you need to make sure that the same relative layout exists on the target + machine. + +1. **Run pytask on the target machine.** + + When states match, tasks are skipped. When they differ, tasks run and the lockfile is + updated. + +## What makes a project portable + +There are two things that must stay stable across machines: + +First, task and node IDs must be stable. An ID is the unique identifier that ties a task +or node to an entry in `pytask.lock`. pytask builds these IDs from project-relative +paths anchored at the project root, so most users do not need to do anything. If you +implement custom nodes, make sure their IDs remain project-relative and stable across +machines. + +Second, state values must be portable. The lockfile stores opaque state strings from +`PNode.state()` and `PTask.state()`, and pytask uses them to decide whether a task is up +to date. Content hashes are portable; timestamps or absolute paths are not. This mostly +matters when you define custom nodes or custom hash functions. + +## Tips for stable state values + +- Prefer file content hashes over timestamps for custom nodes. +- For `PythonNode` values that are not natively stable, provide a custom hash function. +- Avoid machine-specific paths or timestamps in custom `state()` implementations. + +```{seealso} +For custom nodes, see [Writing custom nodes](writing_custom_nodes.md). +For hashing guidance, see +[Hashing inputs of tasks](hashing_inputs_of_tasks.md). +``` + +## Cleaning up the lockfile + +`pytask.lock` is updated incrementally. Entries are only replaced when the corresponding +tasks run. If tasks are removed or renamed, their old entries remain as stale data and +are ignored. + +To clean up stale entries without deleting the file, run: + +```console +$ pytask build --clean-lockfile +``` + +This rewrites the lockfile after a successful build with only the currently collected +tasks and their current state values. diff --git a/docs/source/how_to_guides/writing_custom_nodes.md b/docs/source/how_to_guides/writing_custom_nodes.md index 437fe2a7..45b933a8 100644 --- a/docs/source/how_to_guides/writing_custom_nodes.md +++ b/docs/source/how_to_guides/writing_custom_nodes.md @@ -89,6 +89,13 @@ Here are some explanations. signature is a hash and a unique identifier for the node. For most nodes it will be a hash of the path or the name. +- `signature` and lockfile `id` are different concepts. + + - `signature` is the runtime identity in pytask's in-memory DAG. + - lockfile `id` is the portable key stored in `pytask.lock`. + + For custom nodes, make sure the lockfile id stays stable and unique within a task. + - The classmethod {meth}`~pytask.PickleNode.from_path` is a convenient method to instantiate the class. diff --git a/docs/source/reference_guides/configuration.md b/docs/source/reference_guides/configuration.md index 2f809ce8..1069b92d 100644 --- a/docs/source/reference_guides/configuration.md +++ b/docs/source/reference_guides/configuration.md @@ -44,11 +44,13 @@ are welcome to also support macOS. ````{confval} database_url -pytask uses a database to keep track of tasks, products, and dependencies over runs. By -default, it will create an SQLite database in the project's root directory called -`.pytask/pytask.sqlite3`. If you want to use a different name or a different dialect -[supported by sqlalchemy](https://docs.sqlalchemy.org/en/latest/core/engines.html#backend-specific-urls), -use either {option}`pytask build --database-url` or `database_url` in the config. +SQLite is the legacy state format. pytask uses `pytask.lock` as the primary state +backend for change detection. When no lockfile exists, pytask reads the configured +database and writes `pytask.lock`. For downgrade compatibility, `pytask build` also +keeps the legacy database state updated. + +The `database_url` option remains for backward compatibility and controls the legacy +database location and dialect ([supported by sqlalchemy](https://docs.sqlalchemy.org/en/latest/core/engines.html#backend-specific-urls)). ```toml database_url = "sqlite:///.pytask/pytask.sqlite3" diff --git a/docs/source/reference_guides/index.md b/docs/source/reference_guides/index.md index e3b85fa7..adcada16 100644 --- a/docs/source/reference_guides/index.md +++ b/docs/source/reference_guides/index.md @@ -9,6 +9,7 @@ maxdepth: 1 --- command_line_interface configuration +lockfile hookspecs api ``` diff --git a/docs/source/reference_guides/lockfile.md b/docs/source/reference_guides/lockfile.md new file mode 100644 index 00000000..0cbcb7ab --- /dev/null +++ b/docs/source/reference_guides/lockfile.md @@ -0,0 +1,97 @@ +# The Lock File + +`pytask.lock` is the default state backend. It stores task state in a portable, +git-friendly format so runs can be resumed or shared across machines. + +```{note} +SQLite is the legacy format. When no lockfile exists, pytask reads the legacy database +state and writes `pytask.lock`. The lockfile remains the primary backend for skip +decisions, and `pytask build` also keeps the legacy database updated for downgrade +compatibility. +``` + +## Example + +```toml +# This file is automatically @generated by pytask. +# It is not intended for manual editing. + +lock-version = "1" + +[[task]] +id = "src/tasks/data.py::task_clean_data" +state = "f9e8d7c6..." + +[task.depends_on] +"data/raw/input.csv" = "e5f6g7h8..." + +[task.produces] +"data/processed/clean.parquet" = "m3n4o5p6..." +``` + +## Behavior + +On each run, pytask: + +1. Reads `pytask.lock` (if present). +1. Compares current dependency/product/task `state()` to stored `state`. +1. Skips tasks whose states match; runs the rest. +1. Updates `pytask.lock` after each completed task (atomic write). +1. Updates `pytask.lock` after skipping unchanged tasks (unless `--dry-run` or + `--explain` are active). + +## Portability + +There are two portability concerns: + +1. **IDs**: Lockfile IDs must be project‑relative and stable across machines. +1. **State values**: `state` is opaque; portability depends on each node’s `state()` + implementation. Content hashes are portable; timestamps are not. + +## Maintenance + +Use `pytask build --clean-lockfile` to rewrite `pytask.lock` with only currently +collected tasks. The rewrite happens after a successful build and recomputes current +state values without executing tasks again. + +## File Format Reference + +### Top-Level + +| Field | Required | Description | +| -------------- | -------- | -------------------------------- | +| `lock-version` | Yes | Schema version (currently `"1"`) | + +### Task Entry + +| Field | Required | Description | +| ------------ | -------- | ----------------------------- | +| `id` | Yes | Portable task identifier | +| `state` | Yes | Opaque state string | +| `depends_on` | No | Mapping from node id to state | +| `produces` | No | Mapping from node id to state | + +### Dependency/Product Entry + +Node entries are stored as key-value pairs inside `depends_on` and `produces`, where the +key is the node id and the value is the node state string. + +### IDs vs Signatures + +`id` in the lockfile is a portable identifier used to match entries across runs and +machines. It is not the same as a node or task `signature` used internally in the DAG. + +- `signature`: runtime identity in the in-memory DAG. +- `id`: portable lockfile key persisted to `pytask.lock`. + +When implementing custom nodes, keep lockfile IDs stable and unique within a task. + +## Version Compatibility + +Only lock-version `"1"` is supported. Older or newer versions error with a clear upgrade +message. + +## Implementation Notes + +- The lockfile is encoded/decoded with `msgspec`’s TOML support. +- Writes are atomic: pytask writes a temporary file and replaces `pytask.lock`. diff --git a/docs/source/tutorials/making_tasks_persist.md b/docs/source/tutorials/making_tasks_persist.md index ae1e1636..6a5e1a59 100644 --- a/docs/source/tutorials/making_tasks_persist.md +++ b/docs/source/tutorials/making_tasks_persist.md @@ -9,7 +9,7 @@ In this case, you can apply the {func}`@pytask.mark.persist =1.3.0", "rich>=13.8.0", "sqlalchemy>=2.0.31", + "msgspec[toml]>=0.18.6", 'tomli>=1; python_version < "3.11"', 'typing-extensions>=4.8.0; python_version < "3.11"', "universal-pathlib>=0.2.2", @@ -54,7 +55,7 @@ docs = [ "matplotlib>=3.5.0", "myst-parser>=3.0.0", "myst-nb>=1.2.0", - "sphinx>=7.0.0", + "sphinx>=7.0.0,<9.0.0", "sphinx-click>=6.0.0", "sphinx-copybutton>=0.5.2", "sphinx-design>=0.3", @@ -138,6 +139,9 @@ ignore = [ "tests/test_capture.py" = ["T201", "PT011"] "tests/*" = ["ANN", "D", "FBT", "PLR2004", "S101"] "tests/test_jupyter/*" = ["INP001"] +"tests/_test_data/*" = ["INP001"] +"tests/_test_data/*/*" = ["INP001"] +"tests/_test_data/*/*/*" = ["INP001"] "scripts/*" = ["D", "INP001"] "docs/source/conf.py" = ["D401", "INP001"] "docs_src/*" = ["ARG001", "D", "INP001", "S301"] diff --git a/src/_pytask/build.py b/src/_pytask/build.py index b400ede3..e4cc01b9 100644 --- a/src/_pytask/build.py +++ b/src/_pytask/build.py @@ -72,6 +72,7 @@ def build( # noqa: PLR0913 debug_pytask: bool = False, disable_warnings: bool = False, dry_run: bool = False, + clean_lockfile: bool = False, editor_url_scheme: Literal["no_link", "file", "vscode", "pycharm"] # noqa: PYI051 | str = "file", explain: bool = False, @@ -121,6 +122,8 @@ def build( # noqa: PLR0913 Whether warnings should be disabled and not displayed. dry_run Whether a dry-run should be performed that shows which tasks need to be rerun. + clean_lockfile + Whether the lockfile should be rewritten to only include collected tasks. editor_url_scheme An url scheme that allows to click on task names, node names and filenames and jump right into you preferred editor to the right line. @@ -189,6 +192,7 @@ def build( # noqa: PLR0913 "debug_pytask": debug_pytask, "disable_warnings": disable_warnings, "dry_run": dry_run, + "clean_lockfile": clean_lockfile, "editor_url_scheme": editor_url_scheme, "explain": explain, "expression": expression, @@ -305,6 +309,12 @@ def build( # noqa: PLR0913 default=False, help="Execute a task even if it succeeded successfully before.", ) +@click.option( + "--clean-lockfile", + is_flag=True, + default=False, + help="Rewrite the lockfile with only currently collected tasks.", +) @click.option( "--explain", is_flag=True, diff --git a/src/_pytask/console.py b/src/_pytask/console.py index 8451ff66..3c4b91e7 100644 --- a/src/_pytask/console.py +++ b/src/_pytask/console.py @@ -111,10 +111,26 @@ def render_to_string( example, render warnings with colors or text in exceptions. """ - buffer = console.render(renderable) + render_console = console + if not strip_styles and console.no_color and console.color_system is not None: + theme: Theme | None + try: + theme = Theme(console._theme_stack._entries[-1]) + except (AttributeError, IndexError, TypeError): + theme = None + render_console = Console( + color_system=console.color_system, # type: ignore[invalid-argument-type] + force_terminal=True, + width=console.width, + no_color=False, + markup=getattr(console, "_markup", True), + theme=theme, + ) + + buffer = render_console.render(renderable) if strip_styles: buffer = Segment.strip_styles(buffer) - return console._render_buffer(buffer) + return render_console._render_buffer(buffer) def format_task_name(task: PTask, editor_url_scheme: str) -> Text: diff --git a/src/_pytask/database.py b/src/_pytask/database.py index 4a579672..f16fe44a 100644 --- a/src/_pytask/database.py +++ b/src/_pytask/database.py @@ -45,4 +45,7 @@ def pytask_parse_config(config: dict[str, Any]) -> None: @hookimpl def pytask_post_parse(config: dict[str, Any]) -> None: """Post-parse the configuration.""" + command = config.get("command") + if command not in (None, "build"): + return create_database(config["database_url"]) diff --git a/src/_pytask/database_utils.py b/src/_pytask/database_utils.py index 63810343..4db66a59 100644 --- a/src/_pytask/database_utils.py +++ b/src/_pytask/database_utils.py @@ -2,10 +2,13 @@ from __future__ import annotations +from pathlib import Path from typing import TYPE_CHECKING from typing import Literal from sqlalchemy import create_engine +from sqlalchemy import inspect +from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.orm import DeclarativeBase from sqlalchemy.orm import Mapped from sqlalchemy.orm import mapped_column @@ -24,7 +27,9 @@ __all__ = [ "BaseTable", "DatabaseSession", + "configure_database_if_present", "create_database", + "database_is_configured", "get_node_change_info", "update_states_in_database", ] @@ -58,6 +63,42 @@ def create_database(url: str) -> None: DatabaseSession.configure(bind=_ENGINE) +def database_is_configured() -> bool: + """Return whether the database session is configured.""" + return _ENGINE is not None + + +def configure_database_if_present(url: str) -> bool: + """Configure the database session if a legacy database exists.""" + global _ENGINE # noqa: PLW0603 + if _ENGINE is not None: + return True + + try: + engine = create_engine(url) + except SQLAlchemyError: + return False + + if engine.url.drivername == "sqlite": + db_path = engine.url.database + if not db_path or not Path(db_path).exists(): + engine.dispose() + return False + + try: + inspector = inspect(engine) + if "state" not in inspector.get_table_names(): + engine.dispose() + return False + except SQLAlchemyError: + engine.dispose() + return False + + _ENGINE = engine + DatabaseSession.configure(bind=_ENGINE) + return True + + def _create_or_update_state(first_key: str, second_key: str, hash_: str) -> None: """Create or update a state.""" with DatabaseSession() as session: @@ -71,6 +112,8 @@ def _create_or_update_state(first_key: str, second_key: str, hash_: str) -> None def update_states_in_database(session: Session, task_signature: str) -> None: """Update the state for each node of a task in the database.""" + if _ENGINE is None: + return for name in node_and_neighbors(session.dag, task_signature): node = session.dag.nodes[name].get("task") or session.dag.nodes[name]["node"] hash_ = node.state() @@ -82,6 +125,8 @@ def has_node_changed(task: PTask, node: PTask | PNode, state: str | None) -> boo # If node does not exist, we receive None. if state is None: return True + if _ENGINE is None: + return True with DatabaseSession() as session: db_state = session.get(State, (task.signature, node.signature)) @@ -115,6 +160,8 @@ def get_node_change_info( # If node does not exist, we receive None. if state is None: return True, "missing", details + if _ENGINE is None: + return True, "not_in_db", details with DatabaseSession() as session: db_state = session.get(State, (task.signature, node.signature)) diff --git a/src/_pytask/execute.py b/src/_pytask/execute.py index 167efdcf..9b3fd8c3 100644 --- a/src/_pytask/execute.py +++ b/src/_pytask/execute.py @@ -20,9 +20,6 @@ from _pytask.dag_utils import TopologicalSorter from _pytask.dag_utils import descending_tasks from _pytask.dag_utils import node_and_neighbors -from _pytask.database_utils import get_node_change_info -from _pytask.database_utils import has_node_changed -from _pytask.database_utils import update_states_in_database from _pytask.exceptions import ExecutionError from _pytask.exceptions import NodeLoadError from _pytask.exceptions import NodeNotFoundError @@ -46,6 +43,9 @@ from _pytask.pluginmanager import hookimpl from _pytask.provisional_utils import collect_provisional_products from _pytask.reports import ExecutionReport +from _pytask.state import get_node_change_info +from _pytask.state import has_node_changed +from _pytask.state import update_states from _pytask.traceback import remove_traceback_from_exc_info from _pytask.tree_util import tree_leaves from _pytask.tree_util import tree_map @@ -196,7 +196,7 @@ def pytask_execute_task_setup(session: Session, task: PTask) -> None: # noqa: C # Check if node changed and collect detailed info if in explain mode if session.config["explain"]: has_changed, reason, details = get_node_change_info( - task=task, node=node, state=node_state + session=session, task=task, node=node, state=node_state ) if has_changed: needs_to_be_executed = True @@ -222,7 +222,9 @@ def pytask_execute_task_setup(session: Session, task: PTask) -> None: # noqa: C ) ) else: - has_changed = has_node_changed(task=task, node=node, state=node_state) + has_changed = has_node_changed( + session=session, task=task, node=node, state=node_state + ) if has_changed: needs_to_be_executed = True @@ -232,6 +234,8 @@ def pytask_execute_task_setup(session: Session, task: PTask) -> None: # noqa: C if not needs_to_be_executed: collect_provisional_products(session, task) + if not session.config["dry_run"] and not session.config["explain"]: + update_states(session, task) raise SkippedUnchanged # Create directory for product if it does not exist. Maybe this should be a `setup` @@ -326,7 +330,7 @@ def pytask_execute_task_process_report( task = report.task if report.outcome == TaskOutcome.SUCCESS: - update_states_in_database(session, task.signature) + update_states(session, task) elif report.exc_info and isinstance(report.exc_info[1], WouldBeExecuted): report.outcome = TaskOutcome.WOULD_BE_EXECUTED diff --git a/src/_pytask/lockfile.py b/src/_pytask/lockfile.py new file mode 100644 index 00000000..ba18ccbd --- /dev/null +++ b/src/_pytask/lockfile.py @@ -0,0 +1,459 @@ +"""Support for the pytask lock file.""" + +from __future__ import annotations + +import os +from dataclasses import dataclass +from dataclasses import field +from itertools import chain +from pathlib import Path +from typing import TYPE_CHECKING +from typing import Any + +import msgspec +from packaging.version import InvalidVersion +from packaging.version import Version +from upath import UPath + +from _pytask.journal import JsonlJournal +from _pytask.node_protocols import PNode +from _pytask.node_protocols import PPathNode +from _pytask.node_protocols import PTask +from _pytask.node_protocols import PTaskWithPath +from _pytask.nodes import PythonNode +from _pytask.outcomes import ExitCode +from _pytask.pluginmanager import hookimpl +from _pytask.tree_util import tree_leaves + +if TYPE_CHECKING: + from _pytask.session import Session + +CURRENT_LOCKFILE_VERSION = "1" + + +class LockfileError(Exception): + """Raised when reading or writing a lockfile fails.""" + + +class LockfileVersionError(LockfileError): + """Raised when a lockfile version is not supported.""" + + +class _TaskEntry(msgspec.Struct): + id: str + state: str + depends_on: dict[str, str] = msgspec.field(default_factory=dict) + produces: dict[str, str] = msgspec.field(default_factory=dict) + + +class _Lockfile(msgspec.Struct, forbid_unknown_fields=False): + lock_version: str = msgspec.field(name="lock-version") + task: list[_TaskEntry] = msgspec.field(default_factory=list) + + +class _JournalEntry(msgspec.Struct): + lock_version: str = msgspec.field(name="lock-version") + id: str + state: str + depends_on: dict[str, str] = msgspec.field(default_factory=dict) + produces: dict[str, str] = msgspec.field(default_factory=dict) + + +def _should_initialize_lockfile_state(command: str | None) -> bool: + return command in (None, "build") + + +def _should_validate_lockfile_ids(command: str | None) -> bool: + return command in (None, "build", "collect") + + +def _encode_node_path(path: tuple[str | int, ...]) -> str: + return msgspec.json.encode(path).decode() + + +def _relative_path(path: Path, root: Path) -> str: + if isinstance(path, UPath) and path.protocol: + return str(path) + try: + rel = os.path.relpath(path, root) + except ValueError: + return path.as_posix() + return Path(rel).as_posix() + + +def build_portable_task_id(task: PTask, root: Path) -> str: + if isinstance(task, PTaskWithPath): + base_name = getattr(task, "base_name", None) or task.name + return f"{_relative_path(task.path, root)}::{base_name}" + return task.name + + +def _build_portable_task_id_from_parts( + task_path: Path | None, task_name: str, root: Path +) -> str: + if task_path is None: + return task_name + return f"{_relative_path(task_path, root)}::{task_name}" + + +def build_portable_node_id(node: PNode, root: Path) -> str: + if isinstance(node, PythonNode) and node.node_info: + task_id = _build_portable_task_id_from_parts( + node.node_info.task_path, node.node_info.task_name, root + ) + node_id = f"{task_id}::{node.node_info.arg_name}" + if node.node_info.path: + suffix = _encode_node_path(node.node_info.path) + node_id = f"{node_id}::{suffix}" + return node_id + if isinstance(node, PPathNode): + return _relative_path(node.path, root) + return node.name + + +def _journal(path: Path) -> JsonlJournal[_JournalEntry]: + return JsonlJournal( + path=path.with_suffix(f"{path.suffix}.journal"), type_=_JournalEntry + ) + + +def _read_journal_entries(journal: JsonlJournal[_JournalEntry]) -> list[_JournalEntry]: + entries = journal.read() + current_version = Version(CURRENT_LOCKFILE_VERSION) + for entry in entries: + try: + entry_version = Version(entry.lock_version) + except InvalidVersion: + msg = f"Invalid lock-version {entry.lock_version!r}." + raise LockfileVersionError(msg) from None + if entry_version != current_version: + msg = ( + f"Unsupported lock-version {entry.lock_version!r}. " + f"Current version is {CURRENT_LOCKFILE_VERSION}." + ) + raise LockfileVersionError(msg) + return entries + + +def read_lockfile(path: Path) -> _Lockfile | None: + if not path.exists(): + return None + + try: + raw = msgspec.toml.decode(path.read_bytes()) + except msgspec.DecodeError: + msg = "Lockfile has invalid format." + raise LockfileError(msg) from None + if not isinstance(raw, dict): + msg = "Lockfile has invalid format." + raise LockfileError(msg) + + version = raw.get("lock-version") + if not isinstance(version, str): + msg = "Lockfile is missing 'lock-version'." + raise LockfileError(msg) + + try: + parsed_version = Version(version) + except InvalidVersion: + msg = f"Invalid lock-version {version!r}." + raise LockfileVersionError(msg) from None + + if parsed_version != Version(CURRENT_LOCKFILE_VERSION): + msg = ( + f"Unsupported lock-version {version!r}. " + f"Current version is {CURRENT_LOCKFILE_VERSION}." + ) + raise LockfileVersionError(msg) + + try: + return msgspec.toml.decode(path.read_bytes(), type=_Lockfile) + except msgspec.DecodeError: + msg = "Lockfile has invalid format." + raise LockfileError(msg) from None + + +def _normalize_lockfile(lockfile: _Lockfile) -> _Lockfile: + tasks = [] + for task in sorted(lockfile.task, key=lambda entry: entry.id): + depends_on = {key: task.depends_on[key] for key in sorted(task.depends_on)} + produces = {key: task.produces[key] for key in sorted(task.produces)} + tasks.append( + _TaskEntry( + id=task.id, + state=task.state, + depends_on=depends_on, + produces=produces, + ) + ) + return _Lockfile(lock_version=CURRENT_LOCKFILE_VERSION, task=tasks) + + +def write_lockfile(path: Path, lockfile: _Lockfile) -> None: + normalized = _normalize_lockfile(lockfile) + data = msgspec.toml.encode(normalized) + tmp = path.with_suffix(f"{path.suffix}.tmp") + tmp.write_bytes(data) + tmp.replace(path) + + +def _apply_journal(lockfile: _Lockfile, entries: list[_JournalEntry]) -> _Lockfile: + if not entries: + return lockfile + task_index = {task.id: task for task in lockfile.task} + for entry in entries: + task_index[entry.id] = _TaskEntry( + id=entry.id, + state=entry.state, + depends_on=entry.depends_on, + produces=entry.produces, + ) + return _Lockfile( + lock_version=CURRENT_LOCKFILE_VERSION, + task=list(task_index.values()), + ) + + +def _build_task_entry(session: Session, task: PTask, root: Path) -> _TaskEntry | None: + task_state = task.state() + if task_state is None: + return None + + dag = session.dag + predecessors = set(dag.predecessors(task.signature)) + successors = set(dag.successors(task.signature)) + + depends_on: dict[str, str] = {} + for node_signature in predecessors: + node = ( + dag.nodes[node_signature].get("task") or dag.nodes[node_signature]["node"] + ) + if not isinstance(node, (PNode, PTask)): + continue + state = node.state() + if state is None: + continue + node_id = ( + build_portable_task_id(node, root) + if isinstance(node, PTask) + else build_portable_node_id(node, root) + ) + depends_on[node_id] = state + + produces: dict[str, str] = {} + for node_signature in successors: + node = ( + dag.nodes[node_signature].get("task") or dag.nodes[node_signature]["node"] + ) + if not isinstance(node, (PNode, PTask)): + continue + state = node.state() + if state is None: + continue + node_id = ( + build_portable_task_id(node, root) + if isinstance(node, PTask) + else build_portable_node_id(node, root) + ) + produces[node_id] = state + + return _TaskEntry( + id=build_portable_task_id(task, root), + state=task_state, + depends_on=depends_on, + produces=produces, + ) + + +def _raise_error_if_lockfile_ids_are_ambiguous(tasks: list[PTask], root: Path) -> None: + errors: list[str] = [] + + for task in tasks: + task_id = build_portable_task_id(task, root) + seen: dict[str, tuple[str, str, str]] = {} + + dependencies = ( + ("dependency", node) + for node in tree_leaves(task.depends_on) + if isinstance(node, PNode) + ) + products = ( + ("product", node) + for node in tree_leaves(task.produces) + if isinstance(node, PNode) + ) + + for kind, node in chain(dependencies, products): + node_id = build_portable_node_id(node, root) + current = (node.signature, kind, node.name) + previous = seen.get(node_id) + if previous is None: + seen[node_id] = current + continue + + previous_signature, previous_kind, previous_name = previous + current_signature, current_kind, current_name = current + + if previous_signature != current_signature: + errors.append( + f"- task {task_id!r}: lockfile id {node_id!r} is used by " + f"{previous_kind} {previous_name!r} (signature " + f"{previous_signature[:8]}...) and {current_kind} " + f"{current_name!r} (signature {current_signature[:8]}...)." + ) + + if errors: + msg = ( + "Ambiguous lockfile ids detected. Each dependency/product must map to a " + "unique lockfile id within a task.\n\n" + "\n".join(errors) + ) + raise ValueError(msg) + + +@dataclass +class LockfileState: + path: Path + root: Path + use_lockfile_for_skip: bool + lockfile: _Lockfile + _task_index: dict[str, _TaskEntry] = field(init=False, default_factory=dict) + _node_index: dict[str, dict[str, str]] = field(init=False, default_factory=dict) + _dirty: bool = field(init=False, default=False) + + def __post_init__(self) -> None: + self._rebuild_indexes() + + @classmethod + def from_path(cls, path: Path, root: Path) -> LockfileState: + existing = read_lockfile(path) + journal = _journal(path) + journal_entries = _read_journal_entries(journal) + if existing is None: + lockfile = _Lockfile( + lock_version=CURRENT_LOCKFILE_VERSION, + task=[], + ) + lockfile = _apply_journal(lockfile, journal_entries) + state = cls( + path=path, + root=root, + use_lockfile_for_skip=bool(journal_entries), + lockfile=lockfile, + ) + if journal_entries: + state._dirty = True + return state + lockfile = _apply_journal(existing, journal_entries) + state = cls( + path=path, + root=root, + use_lockfile_for_skip=True, + lockfile=lockfile, + ) + if journal_entries: + state._dirty = True + return state + + def _rebuild_indexes(self) -> None: + self._task_index = {task.id: task for task in self.lockfile.task} + self._node_index = {} + for task in self.lockfile.task: + nodes = {**task.depends_on, **task.produces} + self._node_index[task.id] = nodes + + def get_task_entry(self, task_id: str) -> _TaskEntry | None: + return self._task_index.get(task_id) + + def get_node_state(self, task_id: str, node_id: str) -> str | None: + return self._node_index.get(task_id, {}).get(node_id) + + def update_task(self, session: Session, task: PTask) -> None: + entry = _build_task_entry(session, task, self.root) + if entry is None: + return + existing = self._task_index.get(entry.id) + if existing == entry: + return + self._task_index[entry.id] = entry + self.lockfile = _Lockfile( + lock_version=CURRENT_LOCKFILE_VERSION, + task=list(self._task_index.values()), + ) + self._rebuild_indexes() + journal = _journal(self.path) + journal.append( + _JournalEntry( + lock_version=CURRENT_LOCKFILE_VERSION, + id=entry.id, + state=entry.state, + depends_on=entry.depends_on, + produces=entry.produces, + ) + ) + self._dirty = True + + def rebuild_from_session(self, session: Session) -> None: + if session.dag is None: + return + tasks = [] + for task in session.tasks: + entry = _build_task_entry(session, task, self.root) + if entry is not None: + tasks.append(entry) + self.lockfile = _Lockfile( + lock_version=CURRENT_LOCKFILE_VERSION, + task=tasks, + ) + self._rebuild_indexes() + write_lockfile(self.path, self.lockfile) + _journal(self.path).delete() + self._dirty = False + + def flush(self) -> None: + if not self._dirty: + return + write_lockfile(self.path, self.lockfile) + _journal(self.path).delete() + self._dirty = False + + +@hookimpl +def pytask_post_parse(config: dict[str, Any]) -> None: + """Initialize the lockfile state.""" + if not _should_initialize_lockfile_state(config.get("command")): + return + path = config["root"] / "pytask.lock" + config["lockfile_path"] = path + config["lockfile_state"] = LockfileState.from_path(path, config["root"]) + + +@hookimpl(trylast=True) +def pytask_collect_modify_tasks(session: Session, tasks: list[PTask]) -> None: + """Validate that lockfile ids are unambiguous for collected tasks.""" + if not _should_validate_lockfile_ids(session.config.get("command")): + return + _raise_error_if_lockfile_ids_are_ambiguous(tasks, session.config["root"]) + + +@hookimpl +def pytask_unconfigure(session: Session) -> None: + """Optionally rewrite the lockfile to drop stale entries.""" + if session.config.get("command") != "build": + return + if session.config.get("dry_run"): + return + if session.config.get("explain"): + return + if session.exit_code != ExitCode.OK: + lockfile_state = session.config.get("lockfile_state") + if lockfile_state is None: + return + lockfile_state.flush() + return + lockfile_state = session.config.get("lockfile_state") + if lockfile_state is None: + return + if session.config.get("clean_lockfile"): + lockfile_state.rebuild_from_session(session) + else: + lockfile_state.flush() diff --git a/src/_pytask/persist.py b/src/_pytask/persist.py index 9bd567b4..40d958b3 100644 --- a/src/_pytask/persist.py +++ b/src/_pytask/persist.py @@ -6,13 +6,14 @@ from typing import Any from _pytask.dag_utils import node_and_neighbors -from _pytask.database_utils import has_node_changed -from _pytask.database_utils import update_states_in_database +from _pytask.database_utils import update_states_in_database as _db_update_states from _pytask.mark_utils import has_mark from _pytask.outcomes import Persisted from _pytask.outcomes import TaskOutcome from _pytask.pluginmanager import hookimpl from _pytask.provisional_utils import collect_provisional_products +from _pytask.state import has_node_changed +from _pytask.state import update_states if TYPE_CHECKING: from _pytask.node_protocols import PTask @@ -20,6 +21,11 @@ from _pytask.session import Session +def update_states_in_database(session: Session, task_signature: str) -> None: + """Compatibility wrapper for older callers/tests.""" + _db_update_states(session, task_signature) + + @hookimpl def pytask_parse_config(config: dict[str, Any]) -> None: """Add the marker to the configuration.""" @@ -52,6 +58,7 @@ def pytask_execute_task_setup(session: Session, task: PTask) -> None: if all_nodes_exist: any_node_changed = any( has_node_changed( + session=session, task=task, node=session.dag.nodes[name].get("task") or session.dag.nodes[name]["node"], @@ -79,6 +86,6 @@ def pytask_execute_task_process_report( """ if report.exc_info and isinstance(report.exc_info[1], Persisted): report.outcome = TaskOutcome.PERSISTENCE - update_states_in_database(session, report.task.signature) + update_states(session, report.task) return True return None diff --git a/src/_pytask/pluginmanager.py b/src/_pytask/pluginmanager.py index 26add9e5..d47519f2 100644 --- a/src/_pytask/pluginmanager.py +++ b/src/_pytask/pluginmanager.py @@ -53,6 +53,7 @@ def pytask_add_hooks(pm: PluginManager) -> None: "_pytask.provisional", "_pytask.execute", "_pytask.live", + "_pytask.lockfile", "_pytask.logging", "_pytask.mark", "_pytask.nodes", diff --git a/src/_pytask/state.py b/src/_pytask/state.py new file mode 100644 index 00000000..5b05c173 --- /dev/null +++ b/src/_pytask/state.py @@ -0,0 +1,94 @@ +"""State handling for lockfile and database backends.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from _pytask.database_utils import get_node_change_info as _db_get_node_change_info +from _pytask.database_utils import has_node_changed as _db_has_node_changed +from _pytask.database_utils import update_states_in_database as _db_update_states +from _pytask.lockfile import LockfileState +from _pytask.lockfile import build_portable_node_id +from _pytask.lockfile import build_portable_task_id +from _pytask.node_protocols import PTask + +if TYPE_CHECKING: + from _pytask.node_protocols import PNode + from _pytask.session import Session + + +def _get_lockfile_state(session: Session) -> LockfileState | None: + return session.config.get("lockfile_state") + + +def has_node_changed( + session: Session, task: PTask, node: PTask | PNode, state: str | None +) -> bool: + lockfile_state = _get_lockfile_state(session) + if lockfile_state and lockfile_state.use_lockfile_for_skip: + if state is None: + return True + task_id = build_portable_task_id(task, lockfile_state.root) + if node is task or ( + hasattr(node, "signature") and node.signature == task.signature + ): + entry = lockfile_state.get_task_entry(task_id) + if entry is None: + return True + return state != entry.state + node_id = ( + build_portable_task_id(node, lockfile_state.root) + if isinstance(node, PTask) + else build_portable_node_id(node, lockfile_state.root) + ) + stored_state = lockfile_state.get_node_state(task_id, node_id) + if stored_state is None: + return True + return state != stored_state + return _db_has_node_changed(task=task, node=node, state=state) + + +def get_node_change_info( + session: Session, task: PTask, node: PTask | PNode, state: str | None +) -> tuple[bool, str, dict[str, str]]: + lockfile_state = _get_lockfile_state(session) + if not (lockfile_state and lockfile_state.use_lockfile_for_skip): + return _db_get_node_change_info(task=task, node=node, state=state) + + details: dict[str, str] = {} + if state is None: + return True, "missing", details + + task_id = build_portable_task_id(task, lockfile_state.root) + is_task = node is task or ( + hasattr(node, "signature") and node.signature == task.signature + ) + if is_task: + entry = lockfile_state.get_task_entry(task_id) + if entry is None: + return True, "not_in_db", details + stored_state = entry.state + else: + node_id = ( + build_portable_task_id(node, lockfile_state.root) + if isinstance(node, PTask) + else build_portable_node_id(node, lockfile_state.root) + ) + stored_state = lockfile_state.get_node_state(task_id, node_id) + if stored_state is None: + return True, "not_in_db", details + + if state != stored_state: + details["old_hash"] = stored_state + details["new_hash"] = state + return True, "changed", details + return False, "unchanged", details + + +def update_states(session: Session, task: PTask) -> None: + if session.dag is None: + return + lockfile_state = _get_lockfile_state(session) + if lockfile_state is not None: + lockfile_state.update_task(session, task) + _db_update_states(session, task.signature) diff --git a/src/_pytask/task_utils.py b/src/_pytask/task_utils.py index 77b4311b..5cc93872 100644 --- a/src/_pytask/task_utils.py +++ b/src/_pytask/task_utils.py @@ -12,8 +12,10 @@ from types import BuiltinFunctionType from typing import TYPE_CHECKING from typing import Any +from typing import TypeGuard from typing import TypeVar from typing import cast +from typing import overload from _pytask.coiled_utils import Function from _pytask.coiled_utils import extract_coiled_function_kwargs @@ -37,6 +39,11 @@ T = TypeVar("T", bound="Callable[..., Any]") +def _is_task_decorator_target(obj: object) -> TypeGuard[Callable[..., Any]]: + """Narrow objects accepted by bare ``@task`` usage to named callables.""" + return is_task_function(obj) + + __all__ = [ "COLLECTED_TASKS", "parse_collected_tasks_with_task_marker", @@ -55,15 +62,34 @@ """ -def task( # noqa: PLR0913 +@overload +def task( + name: T, + /, +) -> TaskDecorated[T]: ... + + +@overload +def task( name: str | None = None, *, after: str | Callable[..., Any] | list[Callable[..., Any]] | None = None, is_generator: bool = False, + id: str | None = None, + kwargs: dict[Any, Any] | None = None, + produces: Any | None = None, +) -> Callable[[T], TaskDecorated[T]]: ... + + +def task( # noqa: PLR0913 + name: str | T | None = None, + *, + after: str | Callable[..., Any] | list[Callable[..., Any]] | None = None, + is_generator: bool = False, id: str | None = None, # noqa: A002 kwargs: dict[Any, Any] | None = None, produces: Any | None = None, -) -> Callable[[T], TaskDecorated[T]]: +) -> TaskDecorated[T] | Callable[[T], TaskDecorated[T]]: """Decorate a task function. This decorator declares every callable as a pytask task. @@ -122,7 +148,7 @@ def wrapper(func: T) -> TaskDecorated[T]: _rich_traceback_omit = True # When @task is used without parentheses, name is the function, not a string. - effective_name = None if is_task_function(name) else name + effective_name = name if isinstance(name, str) else None for arg, arg_name in ((effective_name, "name"), (id, "id")): if not (isinstance(arg, str) or arg is None): @@ -185,8 +211,9 @@ def wrapper(func: T) -> TaskDecorated[T]: return unwrapped # When decorator is used without parentheses, call wrapper directly. - if is_task_function(name) and kwargs is None: - return wrapper(cast("T", name)) # ty: ignore[invalid-argument-type] + if _is_task_decorator_target(name) and kwargs is None: + func = cast("T", name) + return wrapper(func) return wrapper diff --git a/tests/_test_data/portability_projects/basic_completed/.gitattributes b/tests/_test_data/portability_projects/basic_completed/.gitattributes new file mode 100644 index 00000000..fcadb2cf --- /dev/null +++ b/tests/_test_data/portability_projects/basic_completed/.gitattributes @@ -0,0 +1 @@ +* text eol=lf diff --git a/tests/_test_data/portability_projects/basic_completed/input.txt b/tests/_test_data/portability_projects/basic_completed/input.txt new file mode 100644 index 00000000..55c5728f --- /dev/null +++ b/tests/_test_data/portability_projects/basic_completed/input.txt @@ -0,0 +1 @@ +hello portability diff --git a/tests/_test_data/portability_projects/basic_completed/output.txt b/tests/_test_data/portability_projects/basic_completed/output.txt new file mode 100644 index 00000000..89401b0c --- /dev/null +++ b/tests/_test_data/portability_projects/basic_completed/output.txt @@ -0,0 +1 @@ +HELLO PORTABILITY diff --git a/tests/_test_data/portability_projects/basic_completed/pytask.lock b/tests/_test_data/portability_projects/basic_completed/pytask.lock new file mode 100644 index 00000000..d8b41143 --- /dev/null +++ b/tests/_test_data/portability_projects/basic_completed/pytask.lock @@ -0,0 +1,11 @@ +lock-version = "1" + +[[task]] +id = "task_example.py::task_create_output" +state = "b4a2f46e74bb7fb4fdcb8b67b89a5b3f68e83992128ab0638835489c87d45b3c" + +[task.depends_on] +"input.txt" = "72fca1d369c99d96f3c6dfcbdf5a29c86671fec3f0cf0194ea97fbbfb08adf63" + +[task.produces] +"output.txt" = "e34bf52a65679202422cc650c3b12e4a6094ee6181a0252335e70a87348dd234" diff --git a/tests/_test_data/portability_projects/basic_completed/task_example.py b/tests/_test_data/portability_projects/basic_completed/task_example.py new file mode 100644 index 00000000..8ae7762e --- /dev/null +++ b/tests/_test_data/portability_projects/basic_completed/task_example.py @@ -0,0 +1,11 @@ +from pathlib import Path +from typing import Annotated + +from pytask import Product + + +def task_create_output( + path_in: Path = Path("input.txt"), + path_out: Annotated[Path, Product] = Path("output.txt"), +) -> None: + path_out.write_text(path_in.read_text().upper()) diff --git a/tests/conftest.py b/tests/conftest.py index e0b45db3..027f32da 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -44,7 +44,7 @@ def _remove_variable_info_from_output(data: str, path: Any) -> str: # noqa: ARG # Remove dynamic versions. index_root = next(i for i, line in enumerate(lines) if line.startswith("Root:")) - new_info_line = " ".join(lines[1:index_root]) + new_info_line = " ".join(line.strip() for line in lines[1:index_root]) for platform in ("linux", "win32", "darwin"): new_info_line = new_info_line.replace(platform, "") pattern = re.compile(version.VERSION_PATTERN, flags=re.IGNORECASE | re.VERBOSE) diff --git a/tests/test_database.py b/tests/test_database.py index 0cd8096e..42964816 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -2,19 +2,18 @@ import textwrap -from sqlalchemy.engine import make_url - -from pytask import DatabaseSession +from _pytask.lockfile import build_portable_node_id +from _pytask.lockfile import build_portable_task_id +from _pytask.lockfile import read_lockfile +from _pytask.node_protocols import PNode +from _pytask.tree_util import tree_leaves from pytask import ExitCode -from pytask import State from pytask import build from pytask import cli -from pytask import create_database -from pytask.path import hash_path -def test_existence_of_hashes_in_db(tmp_path): - """Modification dates of input and output files are stored in database.""" +def test_existence_of_hashes_in_lockfile(tmp_path): + """Modification dates of input and output files are stored in the lockfile.""" source = """ from pathlib import Path @@ -30,35 +29,36 @@ def task_write(path=Path("in.txt"), produces=Path("out.txt")): assert session.exit_code == ExitCode.OK - create_database( - make_url( # type: ignore[arg-type] - "sqlite:///" + tmp_path.joinpath(".pytask", "pytask.sqlite3").as_posix() - ) - ) - - with DatabaseSession() as db_session: - task_id = session.tasks[0].signature - out_path = tmp_path.joinpath("out.txt") - depends_on = session.tasks[0].depends_on - produces = session.tasks[0].produces - assert depends_on is not None - assert produces is not None - in_id = depends_on["path"].signature # type: ignore[union-attr] - out_id = produces["produces"].signature # type: ignore[union-attr] - - for id_, path in ( - (task_id, task_path), - (in_id, in_path), - (out_id, out_path), - ): - state = db_session.get(State, (task_id, id_)) - assert state is not None - hash_ = state.hash_ - assert hash_ == hash_path(path, path.stat().st_mtime) + lockfile = read_lockfile(tmp_path / "pytask.lock") + assert lockfile is not None + tasks_by_id = {entry.id: entry for entry in lockfile.task} + + task = session.tasks[0] + task_id = build_portable_task_id(task, tmp_path) + entry = tasks_by_id[task_id] + assert entry.state == task.state() + + depends_on = task.depends_on + produces = task.produces + assert depends_on is not None + assert produces is not None + in_nodes = tree_leaves(depends_on["path"]) + out_nodes = tree_leaves(produces["produces"]) + assert len(in_nodes) == 1 + assert len(out_nodes) == 1 + in_node = in_nodes[0] + out_node = out_nodes[0] + assert isinstance(in_node, PNode) + assert isinstance(out_node, PNode) + + in_id = build_portable_node_id(in_node, tmp_path) + out_id = build_portable_node_id(out_node, tmp_path) + assert entry.depends_on[in_id] == in_node.state() + assert entry.produces[out_id] == out_node.state() def test_rename_database_w_config(tmp_path, runner): - """Modification dates of input and output files are stored in database.""" + """Database files are created for compatibility with legacy backends.""" path_to_db = tmp_path.joinpath(".db.sqlite") tmp_path.joinpath("pyproject.toml").write_text( "[tool.pytask.ini_options]\ndatabase_url='sqlite:///.db.sqlite'" @@ -80,7 +80,7 @@ def test_database_url_from_config_is_parsed(tmp_path): def test_rename_database_w_cli(tmp_path, runner): - """Modification dates of input and output files are stored in database.""" + """Database files are created for compatibility with legacy backends.""" path_to_db = tmp_path.joinpath(".db.sqlite") result = runner.invoke( cli, diff --git a/tests/test_execute.py b/tests/test_execute.py index 08933d6d..c3315ab5 100644 --- a/tests/test_execute.py +++ b/tests/test_execute.py @@ -608,6 +608,7 @@ def create_file( result = subprocess.run( (sys.executable, tmp_path.joinpath("task_module.py").as_posix()), check=False, + cwd=tmp_path, ) assert result.returncode == ExitCode.OK assert tmp_path.joinpath("file.txt").read_text() == "This is the text." @@ -636,7 +637,8 @@ def task2() -> None: pass """ tmp_path.joinpath("task_module.py").write_text(textwrap.dedent(source)) result = run_in_subprocess( - (sys.executable, tmp_path.joinpath("task_module.py").as_posix()) + (sys.executable, tmp_path.joinpath("task_module.py").as_posix()), + cwd=tmp_path, ) assert result.exit_code == ExitCode.OK @@ -955,11 +957,11 @@ def func(path): produces={"path": PathNode(path=tmp_path / "out.txt")}, ) - session = build(tasks=[task]) + session = build(tasks=[task], paths=tmp_path) assert session.exit_code == ExitCode.OK assert tmp_path.joinpath("out.txt").exists() - session = build(tasks=task) + session = build(tasks=task, paths=tmp_path) assert session.exit_code == ExitCode.OK diff --git a/tests/test_jupyter/test_task_generator.ipynb b/tests/test_jupyter/test_task_generator.ipynb index b291658d..cb922e56 100644 --- a/tests/test_jupyter/test_task_generator.ipynb +++ b/tests/test_jupyter/test_task_generator.ipynb @@ -37,7 +37,7 @@ "):\n", " for path in paths:\n", "\n", - " @task # type: ignore[arg-type]\n", + " @task\n", " def task_copy(\n", " path: Path = path,\n", " ) -> Annotated[str, path.with_name(path.stem + \"-copy.txt\")]:\n", diff --git a/tests/test_lockfile.py b/tests/test_lockfile.py new file mode 100644 index 00000000..2aebe95b --- /dev/null +++ b/tests/test_lockfile.py @@ -0,0 +1,382 @@ +from __future__ import annotations + +import textwrap + +import pytest + +import _pytask.lockfile as lockfile_module +from _pytask.lockfile import LockfileError +from _pytask.lockfile import LockfileVersionError +from _pytask.lockfile import build_portable_node_id +from _pytask.lockfile import read_lockfile +from _pytask.models import NodeInfo +from _pytask.nodes import PythonNode +from pytask import DatabaseSession +from pytask import ExitCode +from pytask import PathNode +from pytask import State +from pytask import TaskWithoutPath +from pytask import build +from pytask import cli + + +def test_lockfile_rejects_older_version(tmp_path): + path = tmp_path / "pytask.lock" + path.write_text( + textwrap.dedent( + """ + lock-version = "0.9" + task = [] + """ + ).strip() + + "\n" + ) + + with pytest.raises(LockfileVersionError): + read_lockfile(path) + + +def test_lockfile_rejects_newer_version(tmp_path): + path = tmp_path / "pytask.lock" + path.write_text( + textwrap.dedent( + """ + lock-version = "9.0" + task = [] + """ + ).strip() + + "\n" + ) + + with pytest.raises(LockfileVersionError): + read_lockfile(path) + + +def test_lockfile_rejects_invalid_version_string(tmp_path): + path = tmp_path / "pytask.lock" + path.write_text( + textwrap.dedent( + """ + lock-version = "abc" + task = [] + """ + ).strip() + + "\n" + ) + + with pytest.raises(LockfileVersionError, match=r"Invalid lock-version"): + read_lockfile(path) + + +def test_lockfile_rejects_invalid_format(tmp_path): + path = tmp_path / "pytask.lock" + path.write_text("{not toml") + + with pytest.raises(LockfileError): + read_lockfile(path) + + +def test_python_node_id_is_collision_free(tmp_path): + task_path = tmp_path / "task.py" + node_info_left = NodeInfo( + arg_name="value", + path=("a-b", "c"), + task_path=task_path, + task_name="task", + value=None, + ) + node_info_right = NodeInfo( + arg_name="value", + path=("a", "b-c"), + task_path=task_path, + task_name="task", + value=None, + ) + node_left = PythonNode(name="node", value=1, node_info=node_info_left) + node_right = PythonNode(name="node", value=1, node_info=node_info_right) + + left_id = build_portable_node_id(node_left, tmp_path) + right_id = build_portable_node_id(node_right, tmp_path) + assert left_id != right_id + + +@pytest.mark.parametrize( + "args", + [ + pytest.param(lambda path: [path.as_posix()], id="build"), + pytest.param(lambda path: ["collect", path.as_posix()], id="collect"), + ], +) +def test_collection_fails_for_ambiguous_lockfile_ids(runner, tmp_path, args): + source = """ + from dataclasses import dataclass, field + from pathlib import Path + from typing import Any + + @dataclass + class CustomNode: + name: str + value: str + signature: str + attributes: dict[Any, Any] = field(default_factory=dict) + + def state(self): + return self.value + + def load(self, is_product=False): + return self.value + + def save(self, value): + self.value = value + + def task_example( + first=CustomNode(name="dup", value="1", signature="signature-a"), + second=CustomNode(name="dup", value="2", signature="signature-b"), + produces=Path("out.txt"), + ): + produces.write_text(first + second) + """ + tmp_path.joinpath("task_module.py").write_text(textwrap.dedent(source)) + + result = runner.invoke(cli, args(tmp_path)) + + assert result.exit_code == ExitCode.COLLECTION_FAILED + assert "Ambiguous lockfile ids detected" in result.output + assert "lockfile id 'dup'" in result.output + + +def test_markers_command_ignores_invalid_lockfile(runner, tmp_path): + tmp_path.joinpath("pytask.lock").write_text("{not toml") + + result = runner.invoke(cli, ["markers", tmp_path.as_posix()]) + + assert result.exit_code == ExitCode.OK + assert "persist" in result.output + + +def test_collection_fails_for_ambiguous_lockfile_ids_with_missing_product_state( + runner, tmp_path +): + source = """ + from dataclasses import dataclass, field + from pathlib import Path + from typing import Annotated, Any + + from pytask import Product + + @dataclass + class CustomNode: + name: str + filepath: Path + signature: str + attributes: dict[Any, Any] = field(default_factory=dict) + + def state(self): + if not self.filepath.exists(): + return None + return self.filepath.read_text() + + def load(self, is_product=False): + return self if is_product else self.filepath.read_text() + + def save(self, value): + self.filepath.write_text(value) + + def task_example( + dependency=CustomNode( + name="dup", filepath=Path("in.txt"), signature="signature-a" + ), + product: Annotated[CustomNode, Product] = CustomNode( + name="dup", filepath=Path("out.txt"), signature="signature-b" + ), + ): + product.save(dependency.upper()) + """ + tmp_path.joinpath("task_module.py").write_text(textwrap.dedent(source)) + tmp_path.joinpath("in.txt").write_text("hello") + + result = runner.invoke(cli, [tmp_path.as_posix()]) + + assert result.exit_code == ExitCode.COLLECTION_FAILED + assert "Ambiguous lockfile ids detected" in result.output + assert "lockfile id 'dup'" in result.output + + +def test_lockfile_writes_state_to_database_for_compatibility(tmp_path): + def func(path): + path.write_text("data") + + task = TaskWithoutPath( + name="task", + function=func, + produces={"path": PathNode(path=tmp_path / "out.txt")}, + ) + + session = build(tasks=[task], paths=tmp_path) + assert session.exit_code == ExitCode.OK + assert (tmp_path / "pytask.lock").exists() + + db_path = tmp_path / ".pytask" / "pytask.sqlite3" + assert db_path.exists() + + task_signature = session.tasks[0].signature + with DatabaseSession() as db_session: + state = db_session.get(State, (task_signature, task_signature)) + assert state is not None + assert state.hash_ == session.tasks[0].state() + + +def test_clean_lockfile_removes_stale_entries(tmp_path): + def func_first(path): + path.touch() + + def func_second(path): + path.touch() + + task_first = TaskWithoutPath( + name="task_first", + function=func_first, + produces={"path": PathNode(path=tmp_path / "first.txt")}, + ) + task_second = TaskWithoutPath( + name="task_second", + function=func_second, + produces={"path": PathNode(path=tmp_path / "second.txt")}, + ) + + session = build(tasks=[task_first, task_second], paths=tmp_path) + assert session.exit_code == ExitCode.OK + lockfile = read_lockfile(tmp_path / "pytask.lock") + assert lockfile is not None + assert {entry.id for entry in lockfile.task} == {"task_first", "task_second"} + + session = build(tasks=[task_first], paths=tmp_path, clean_lockfile=True) + assert session.exit_code == ExitCode.OK + lockfile = read_lockfile(tmp_path / "pytask.lock") + assert lockfile is not None + assert {entry.id for entry in lockfile.task} == {"task_first"} + + +def test_update_task_skips_write_when_unchanged(tmp_path, monkeypatch): + def func(path): + path.write_text("data") + + task = TaskWithoutPath( + name="task", + function=func, + produces={"path": PathNode(path=tmp_path / "out.txt")}, + ) + + session = build(tasks=[task], paths=tmp_path) + assert session.exit_code == ExitCode.OK + + lockfile_state = session.config["lockfile_state"] + assert lockfile_state is not None + + calls = {"count": 0} + + original_append = lockfile_module.JsonlJournal.append + + def _counting_append(self, payload): + calls["count"] += 1 + return original_append(self, payload) + + monkeypatch.setattr(lockfile_module.JsonlJournal, "append", _counting_append) + lockfile_state.update_task(session, session.tasks[0]) + + assert calls["count"] == 0 + + +def test_update_task_appends_journal_on_change(tmp_path): + def func(path): + path.write_text("data") + + task = TaskWithoutPath( + name="task", + function=func, + produces={"path": PathNode(path=tmp_path / "out.txt")}, + ) + + session = build(tasks=[task], paths=tmp_path) + assert session.exit_code == ExitCode.OK + + lockfile_state = session.config["lockfile_state"] + assert lockfile_state is not None + + def new_func(path): + path.write_text("changed") + + session.tasks[0].function = new_func + + lockfile_state.update_task(session, session.tasks[0]) + + journal_path = (tmp_path / "pytask.lock").with_suffix(".lock.journal") + assert journal_path.exists() + assert journal_path.read_text().strip() + + +def test_journal_replay_updates_lockfile_state(tmp_path): + def func(path): + path.write_text("data") + + task = TaskWithoutPath( + name="task", + function=func, + produces={"path": PathNode(path=tmp_path / "out.txt")}, + ) + + session = build(tasks=[task], paths=tmp_path) + assert session.exit_code == ExitCode.OK + + lockfile_state = session.config["lockfile_state"] + assert lockfile_state is not None + + def new_func(path): + path.write_text("changed") + + session.tasks[0].function = new_func + lockfile_state.update_task(session, session.tasks[0]) + + journal_path = (tmp_path / "pytask.lock").with_suffix(".lock.journal") + assert journal_path.exists() + + reloaded = lockfile_module.LockfileState.from_path( + tmp_path / "pytask.lock", tmp_path + ) + entry = reloaded.get_task_entry("task") + assert entry is not None + assert entry.state == session.tasks[0].state() + + +def test_flush_writes_lockfile_and_deletes_journal(tmp_path): + def func(path): + path.write_text("data") + + task = TaskWithoutPath( + name="task", + function=func, + produces={"path": PathNode(path=tmp_path / "out.txt")}, + ) + + session = build(tasks=[task], paths=tmp_path) + assert session.exit_code == ExitCode.OK + + lockfile_state = session.config["lockfile_state"] + assert lockfile_state is not None + + def new_func(path): + path.write_text("changed") + + session.tasks[0].function = new_func + lockfile_state.update_task(session, session.tasks[0]) + + journal_path = (tmp_path / "pytask.lock").with_suffix(".lock.journal") + assert journal_path.exists() + + lockfile_state.flush() + + assert not journal_path.exists() + lockfile = read_lockfile(tmp_path / "pytask.lock") + assert lockfile is not None + entries = {entry.id: entry for entry in lockfile.task} + assert entries["task"].state == session.tasks[0].state() diff --git a/tests/test_persist.py b/tests/test_persist.py index 49bf4a94..a1ebbf52 100644 --- a/tests/test_persist.py +++ b/tests/test_persist.py @@ -5,16 +5,17 @@ import pytest +from _pytask.lockfile import build_portable_node_id +from _pytask.lockfile import build_portable_task_id +from _pytask.lockfile import read_lockfile +from _pytask.node_protocols import PNode from _pytask.persist import pytask_execute_task_process_report -from pytask import DatabaseSession +from _pytask.tree_util import tree_leaves from pytask import ExitCode from pytask import Persisted from pytask import SkippedUnchanged -from pytask import State from pytask import TaskOutcome from pytask import build -from pytask import create_database -from pytask.path import hash_path from tests.conftest import restore_sys_path_and_module_after_test_execution @@ -27,7 +28,7 @@ def test_multiple_runs_with_persist(tmp_path): """Perform multiple consecutive runs and check intermediate outcomes with persist. 1. The product is missing which should result in a normal execution of the task. - 2. Change the product, check that run is successful and state in database has + 2. Change the product, check that run is successful and state in lockfile has changed. 3. Run the task another time. Now, the task is skipped successfully. @@ -63,21 +64,19 @@ def task_dummy(path=Path("in.txt"), produces=Path("out.txt")): assert exc_info is not None assert isinstance(exc_info[1], Persisted) - create_database( - "sqlite:///" + tmp_path.joinpath(".pytask", "pytask.sqlite3").as_posix() - ) - - with DatabaseSession() as db_session: - task_id = session.tasks[0].signature - produces = session.tasks[0].produces - assert produces is not None - node_id = produces["produces"].signature # type: ignore[union-attr] - - state = db_session.get(State, (task_id, node_id)) - assert state is not None - hash_ = state.hash_ - path = tmp_path.joinpath("out.txt") - assert hash_ == hash_path(path, path.stat().st_mtime) + lockfile = read_lockfile(tmp_path / "pytask.lock") + assert lockfile is not None + tasks_by_id = {entry.id: entry for entry in lockfile.task} + task = session.tasks[0] + entry = tasks_by_id[build_portable_task_id(task, tmp_path)] + produces = task.produces + assert produces is not None + nodes = tree_leaves(produces["produces"]) + assert len(nodes) == 1 + node = nodes[0] + assert isinstance(node, PNode) + node_id = build_portable_node_id(node, tmp_path) + assert entry.produces[node_id] == node.state() session = build(paths=tmp_path) diff --git a/tests/test_portability.py b/tests/test_portability.py new file mode 100644 index 00000000..32c452f7 --- /dev/null +++ b/tests/test_portability.py @@ -0,0 +1,27 @@ +from __future__ import annotations + +import shutil +from pathlib import Path + +from pytask import ExitCode +from pytask import cli + + +def test_completed_portability_fixture_is_skipped(runner, tmp_path): + fixture = ( + Path(__file__).resolve().parent + / "_test_data" + / "portability_projects" + / "basic_completed" + ) + project = tmp_path / "basic_completed" + shutil.copytree(fixture, project) + + original_lockfile = project.joinpath("pytask.lock").read_text() + + result = runner.invoke(cli, [project.as_posix()]) + + assert result.exit_code == ExitCode.OK + assert "1 Skipped because unchanged" in result.output + assert "1 Succeeded" not in result.output + assert project.joinpath("pytask.lock").read_text() == original_lockfile diff --git a/uv.lock b/uv.lock index 881da521..c5269b8b 100644 --- a/uv.lock +++ b/uv.lock @@ -1341,6 +1341,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/92/db/b4c12cff13ebac2786f4f217f06588bccd8b53d260453404ef22b121fc3a/greenlet-3.2.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:1afd685acd5597349ee6d7a88a8bec83ce13c106ac78c196ee9dde7c04fe87be", size = 268977, upload-time = "2025-06-05T16:10:24.001Z" }, { url = "https://files.pythonhosted.org/packages/52/61/75b4abd8147f13f70986df2801bf93735c1bd87ea780d70e3b3ecda8c165/greenlet-3.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:761917cac215c61e9dc7324b2606107b3b292a8349bdebb31503ab4de3f559ac", size = 627351, upload-time = "2025-06-05T16:38:50.685Z" }, { url = "https://files.pythonhosted.org/packages/35/aa/6894ae299d059d26254779a5088632874b80ee8cf89a88bca00b0709d22f/greenlet-3.2.3-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:a433dbc54e4a37e4fff90ef34f25a8c00aed99b06856f0119dcf09fbafa16392", size = 638599, upload-time = "2025-06-05T16:41:34.057Z" }, + { url = "https://files.pythonhosted.org/packages/30/64/e01a8261d13c47f3c082519a5e9dbf9e143cc0498ed20c911d04e54d526c/greenlet-3.2.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:72e77ed69312bab0434d7292316d5afd6896192ac4327d44f3d613ecb85b037c", size = 634482, upload-time = "2025-06-05T16:48:16.26Z" }, { url = "https://files.pythonhosted.org/packages/47/48/ff9ca8ba9772d083a4f5221f7b4f0ebe8978131a9ae0909cf202f94cd879/greenlet-3.2.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:68671180e3849b963649254a882cd544a3c75bfcd2c527346ad8bb53494444db", size = 633284, upload-time = "2025-06-05T16:13:01.599Z" }, { url = "https://files.pythonhosted.org/packages/e9/45/626e974948713bc15775b696adb3eb0bd708bec267d6d2d5c47bb47a6119/greenlet-3.2.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:49c8cfb18fb419b3d08e011228ef8a25882397f3a859b9fe1436946140b6756b", size = 582206, upload-time = "2025-06-05T16:12:48.51Z" }, { url = "https://files.pythonhosted.org/packages/b1/8e/8b6f42c67d5df7db35b8c55c9a850ea045219741bb14416255616808c690/greenlet-3.2.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:efc6dc8a792243c31f2f5674b670b3a95d46fa1c6a912b8e310d6f542e7b0712", size = 1111412, upload-time = "2025-06-05T16:36:45.479Z" }, @@ -1349,6 +1350,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fc/2e/d4fcb2978f826358b673f779f78fa8a32ee37df11920dc2bb5589cbeecef/greenlet-3.2.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:784ae58bba89fa1fa5733d170d42486580cab9decda3484779f4759345b29822", size = 270219, upload-time = "2025-06-05T16:10:10.414Z" }, { url = "https://files.pythonhosted.org/packages/16/24/929f853e0202130e4fe163bc1d05a671ce8dcd604f790e14896adac43a52/greenlet-3.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0921ac4ea42a5315d3446120ad48f90c3a6b9bb93dd9b3cf4e4d84a66e42de83", size = 630383, upload-time = "2025-06-05T16:38:51.785Z" }, { url = "https://files.pythonhosted.org/packages/d1/b2/0320715eb61ae70c25ceca2f1d5ae620477d246692d9cc284c13242ec31c/greenlet-3.2.3-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:d2971d93bb99e05f8c2c0c2f4aa9484a18d98c4c3bd3c62b65b7e6ae33dfcfaf", size = 642422, upload-time = "2025-06-05T16:41:35.259Z" }, + { url = "https://files.pythonhosted.org/packages/bd/49/445fd1a210f4747fedf77615d941444349c6a3a4a1135bba9701337cd966/greenlet-3.2.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c667c0bf9d406b77a15c924ef3285e1e05250948001220368e039b6aa5b5034b", size = 638375, upload-time = "2025-06-05T16:48:18.235Z" }, { url = "https://files.pythonhosted.org/packages/7e/c8/ca19760cf6eae75fa8dc32b487e963d863b3ee04a7637da77b616703bc37/greenlet-3.2.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:592c12fb1165be74592f5de0d70f82bc5ba552ac44800d632214b76089945147", size = 637627, upload-time = "2025-06-05T16:13:02.858Z" }, { url = "https://files.pythonhosted.org/packages/65/89/77acf9e3da38e9bcfca881e43b02ed467c1dedc387021fc4d9bd9928afb8/greenlet-3.2.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:29e184536ba333003540790ba29829ac14bb645514fbd7e32af331e8202a62a5", size = 585502, upload-time = "2025-06-05T16:12:49.642Z" }, { url = "https://files.pythonhosted.org/packages/97/c6/ae244d7c95b23b7130136e07a9cc5aadd60d59b5951180dc7dc7e8edaba7/greenlet-3.2.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:93c0bb79844a367782ec4f429d07589417052e621aa39a5ac1fb99c5aa308edc", size = 1114498, upload-time = "2025-06-05T16:36:46.598Z" }, @@ -1357,6 +1359,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f3/94/ad0d435f7c48debe960c53b8f60fb41c2026b1d0fa4a99a1cb17c3461e09/greenlet-3.2.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:25ad29caed5783d4bd7a85c9251c651696164622494c00802a139c00d639242d", size = 271992, upload-time = "2025-06-05T16:11:23.467Z" }, { url = "https://files.pythonhosted.org/packages/93/5d/7c27cf4d003d6e77749d299c7c8f5fd50b4f251647b5c2e97e1f20da0ab5/greenlet-3.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:88cd97bf37fe24a6710ec6a3a7799f3f81d9cd33317dcf565ff9950c83f55e0b", size = 638820, upload-time = "2025-06-05T16:38:52.882Z" }, { url = "https://files.pythonhosted.org/packages/c6/7e/807e1e9be07a125bb4c169144937910bf59b9d2f6d931578e57f0bce0ae2/greenlet-3.2.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:baeedccca94880d2f5666b4fa16fc20ef50ba1ee353ee2d7092b383a243b0b0d", size = 653046, upload-time = "2025-06-05T16:41:36.343Z" }, + { url = "https://files.pythonhosted.org/packages/9d/ab/158c1a4ea1068bdbc78dba5a3de57e4c7aeb4e7fa034320ea94c688bfb61/greenlet-3.2.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:be52af4b6292baecfa0f397f3edb3c6092ce071b499dd6fe292c9ac9f2c8f264", size = 647701, upload-time = "2025-06-05T16:48:19.604Z" }, { url = "https://files.pythonhosted.org/packages/cc/0d/93729068259b550d6a0288da4ff72b86ed05626eaf1eb7c0d3466a2571de/greenlet-3.2.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0cc73378150b8b78b0c9fe2ce56e166695e67478550769536a6742dca3651688", size = 649747, upload-time = "2025-06-05T16:13:04.628Z" }, { url = "https://files.pythonhosted.org/packages/f6/f6/c82ac1851c60851302d8581680573245c8fc300253fc1ff741ae74a6c24d/greenlet-3.2.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:706d016a03e78df129f68c4c9b4c4f963f7d73534e48a24f5f5a7101ed13dbbb", size = 605461, upload-time = "2025-06-05T16:12:50.792Z" }, { url = "https://files.pythonhosted.org/packages/98/82/d022cf25ca39cf1200650fc58c52af32c90f80479c25d1cbf57980ec3065/greenlet-3.2.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:419e60f80709510c343c57b4bb5a339d8767bf9aef9b8ce43f4f143240f88b7c", size = 1121190, upload-time = "2025-06-05T16:36:48.59Z" }, @@ -1365,6 +1368,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b1/cf/f5c0b23309070ae93de75c90d29300751a5aacefc0a3ed1b1d8edb28f08b/greenlet-3.2.3-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:500b8689aa9dd1ab26872a34084503aeddefcb438e2e7317b89b11eaea1901ad", size = 270732, upload-time = "2025-06-05T16:10:08.26Z" }, { url = "https://files.pythonhosted.org/packages/48/ae/91a957ba60482d3fecf9be49bc3948f341d706b52ddb9d83a70d42abd498/greenlet-3.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a07d3472c2a93117af3b0136f246b2833fdc0b542d4a9799ae5f41c28323faef", size = 639033, upload-time = "2025-06-05T16:38:53.983Z" }, { url = "https://files.pythonhosted.org/packages/6f/df/20ffa66dd5a7a7beffa6451bdb7400d66251374ab40b99981478c69a67a8/greenlet-3.2.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:8704b3768d2f51150626962f4b9a9e4a17d2e37c8a8d9867bbd9fa4eb938d3b3", size = 652999, upload-time = "2025-06-05T16:41:37.89Z" }, + { url = "https://files.pythonhosted.org/packages/51/b4/ebb2c8cb41e521f1d72bf0465f2f9a2fd803f674a88db228887e6847077e/greenlet-3.2.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:5035d77a27b7c62db6cf41cf786cfe2242644a7a337a0e155c80960598baab95", size = 647368, upload-time = "2025-06-05T16:48:21.467Z" }, { url = "https://files.pythonhosted.org/packages/8e/6a/1e1b5aa10dced4ae876a322155705257748108b7fd2e4fae3f2a091fe81a/greenlet-3.2.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2d8aa5423cd4a396792f6d4580f88bdc6efcb9205891c9d40d20f6e670992efb", size = 650037, upload-time = "2025-06-05T16:13:06.402Z" }, { url = "https://files.pythonhosted.org/packages/26/f2/ad51331a157c7015c675702e2d5230c243695c788f8f75feba1af32b3617/greenlet-3.2.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2c724620a101f8170065d7dded3f962a2aea7a7dae133a009cada42847e04a7b", size = 608402, upload-time = "2025-06-05T16:12:51.91Z" }, { url = "https://files.pythonhosted.org/packages/26/bc/862bd2083e6b3aff23300900a956f4ea9a4059de337f5c8734346b9b34fc/greenlet-3.2.3-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:873abe55f134c48e1f2a6f53f7d1419192a3d1a4e873bace00499a4e45ea6af0", size = 1119577, upload-time = "2025-06-05T16:36:49.787Z" }, @@ -1373,6 +1377,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d8/ca/accd7aa5280eb92b70ed9e8f7fd79dc50a2c21d8c73b9a0856f5b564e222/greenlet-3.2.3-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:3d04332dddb10b4a211b68111dabaee2e1a073663d117dc10247b5b1642bac86", size = 271479, upload-time = "2025-06-05T16:10:47.525Z" }, { url = "https://files.pythonhosted.org/packages/55/71/01ed9895d9eb49223280ecc98a557585edfa56b3d0e965b9fa9f7f06b6d9/greenlet-3.2.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8186162dffde068a465deab08fc72c767196895c39db26ab1c17c0b77a6d8b97", size = 683952, upload-time = "2025-06-05T16:38:55.125Z" }, { url = "https://files.pythonhosted.org/packages/ea/61/638c4bdf460c3c678a0a1ef4c200f347dff80719597e53b5edb2fb27ab54/greenlet-3.2.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f4bfbaa6096b1b7a200024784217defedf46a07c2eee1a498e94a1b5f8ec5728", size = 696917, upload-time = "2025-06-05T16:41:38.959Z" }, + { url = "https://files.pythonhosted.org/packages/22/cc/0bd1a7eb759d1f3e3cc2d1bc0f0b487ad3cc9f34d74da4b80f226fde4ec3/greenlet-3.2.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:ed6cfa9200484d234d8394c70f5492f144b20d4533f69262d530a1a082f6ee9a", size = 692443, upload-time = "2025-06-05T16:48:23.113Z" }, { url = "https://files.pythonhosted.org/packages/67/10/b2a4b63d3f08362662e89c103f7fe28894a51ae0bc890fabf37d1d780e52/greenlet-3.2.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:02b0df6f63cd15012bed5401b47829cfd2e97052dc89da3cfaf2c779124eb892", size = 692995, upload-time = "2025-06-05T16:13:07.972Z" }, { url = "https://files.pythonhosted.org/packages/5a/c6/ad82f148a4e3ce9564056453a71529732baf5448ad53fc323e37efe34f66/greenlet-3.2.3-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:86c2d68e87107c1792e2e8d5399acec2487a4e993ab76c792408e59394d52141", size = 655320, upload-time = "2025-06-05T16:12:53.453Z" }, { url = "https://files.pythonhosted.org/packages/5c/4f/aab73ecaa6b3086a4c89863d94cf26fa84cbff63f52ce9bc4342b3087a06/greenlet-3.2.3-cp314-cp314-win_amd64.whl", hash = "sha256:8c47aae8fbbfcf82cc13327ae802ba13c9c36753b67e760023fd116bc124a62a", size = 301236, upload-time = "2025-06-05T16:15:20.111Z" }, @@ -2093,6 +2098,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c8/3e/c5187de84bb2c2ca334ab163fcacf19a23ebb1d876c837f81a1b324a15bf/msgspec-0.20.0-cp314-cp314t-win_arm64.whl", hash = "sha256:93f23528edc51d9f686808a361728e903d6f2be55c901d6f5c92e44c6d546bfc", size = 183011, upload-time = "2025-11-24T03:56:16.442Z" }, ] +[package.optional-dependencies] +toml = [ + { name = "tomli", marker = "python_full_version < '3.11'" }, + { name = "tomli-w" }, +] + [[package]] name = "multidict" version = "6.4.4" @@ -2909,7 +2920,7 @@ source = { editable = "." } dependencies = [ { name = "click" }, { name = "click-default-group" }, - { name = "msgspec" }, + { name = "msgspec", extra = ["toml"] }, { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "optree" }, @@ -2969,6 +2980,7 @@ requires-dist = [ { name = "click", specifier = ">=8.1.8,!=8.2.0" }, { name = "click-default-group", specifier = ">=1.2.4" }, { name = "msgspec", specifier = ">=0.18.6" }, + { name = "msgspec", extras = ["toml"], specifier = ">=0.18.6" }, { name = "networkx", specifier = ">=2.4.0" }, { name = "optree", specifier = ">=0.9.0" }, { name = "packaging", specifier = ">=23.0.0" }, @@ -2988,7 +3000,7 @@ docs = [ { name = "matplotlib", specifier = ">=3.5.0" }, { name = "myst-nb", specifier = ">=1.2.0" }, { name = "myst-parser", specifier = ">=3.0.0" }, - { name = "sphinx", specifier = ">=7.0.0" }, + { name = "sphinx", specifier = ">=7.0.0,<9.0.0" }, { name = "sphinx-click", specifier = ">=6.0.0" }, { name = "sphinx-copybutton", specifier = ">=0.5.2" }, { name = "sphinx-design", specifier = ">=0.3" }, @@ -3981,6 +3993,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6e/c2/61d3e0f47e2b74ef40a68b9e6ad5984f6241a942f7cd3bbfbdbd03861ea9/tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc", size = 14257, upload-time = "2024-11-27T22:38:35.385Z" }, ] +[[package]] +name = "tomli-w" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/19/75/241269d1da26b624c0d5e110e8149093c759b7a286138f4efd61a60e75fe/tomli_w-1.2.0.tar.gz", hash = "sha256:2dd14fac5a47c27be9cd4c976af5a12d87fb1f0b4512f81d69cce3b35ae25021", size = 7184, upload-time = "2025-01-15T12:07:24.262Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/18/c86eb8e0202e32dd3df50d43d7ff9854f8e0603945ff398974c1d91ac1ef/tomli_w-1.2.0-py3-none-any.whl", hash = "sha256:188306098d013b691fcadc011abd66727d3c414c571bb01b1a174ba8c983cf90", size = 6675, upload-time = "2025-01-15T12:07:22.074Z" }, +] + [[package]] name = "toolz" version = "1.0.0"