From 6fe67850e5ab84a1e7ba7de512eade55edbae439 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 22 Mar 2026 13:53:18 +0100 Subject: [PATCH 1/5] Fix remote UPath collection and node display --- src/_pytask/collect_command.py | 9 +++++++-- src/_pytask/path.py | 11 +++++++++++ tests/test_collect.py | 24 ++++++++++++++++++++++++ tests/test_collect_command.py | 25 +++++++++++++++++++++++++ tests/test_path.py | 9 +++++++++ 5 files changed, 76 insertions(+), 2 deletions(-) diff --git a/src/_pytask/collect_command.py b/src/_pytask/collect_command.py index bd75eecc..24159180 100644 --- a/src/_pytask/collect_command.py +++ b/src/_pytask/collect_command.py @@ -30,6 +30,7 @@ from _pytask.node_protocols import PTaskWithPath from _pytask.outcomes import ExitCode from _pytask.path import find_common_ancestor +from _pytask.path import is_non_local_path from _pytask.path import relative_to from _pytask.pluginmanager import hookimpl from _pytask.pluginmanager import storage @@ -125,10 +126,14 @@ def _find_common_ancestor_of_all_nodes( all_paths.append(task.path) if show_nodes: all_paths.extend( - x.path for x in tree_leaves(task.depends_on) if isinstance(x, PPathNode) + x.path + for x in tree_leaves(task.depends_on) + if isinstance(x, PPathNode) and not is_non_local_path(x.path) ) all_paths.extend( - x.path for x in tree_leaves(task.produces) if isinstance(x, PPathNode) + x.path + for x in tree_leaves(task.produces) + if isinstance(x, PPathNode) and not is_non_local_path(x.path) ) return find_common_ancestor(*all_paths, *paths) diff --git a/src/_pytask/path.py b/src/_pytask/path.py index a70c16d5..05dfa993 100644 --- a/src/_pytask/path.py +++ b/src/_pytask/path.py @@ -13,6 +13,8 @@ from types import ModuleType from typing import TYPE_CHECKING +from upath import UPath + from _pytask._hashlib import file_digest from _pytask.cache import Cache @@ -25,6 +27,7 @@ "find_common_ancestor", "hash_path", "import_path", + "is_non_local_path", "relative_to", "shorten_path", ] @@ -56,6 +59,11 @@ def relative_to(path: Path, source: Path, *, include_source: bool = True) -> Pat return Path(source_name, path.relative_to(source)) +def is_non_local_path(path: Path) -> bool: + """Return whether a path points to a non-local `UPath` resource.""" + return isinstance(path, UPath) and bool(path.protocol) + + def find_closest_ancestor( path: Path, potential_ancestors: Sequence[Path] ) -> Path | None: @@ -432,6 +440,9 @@ def shorten_path(path: Path, paths: Sequence[Path]) -> str: path from one path in ``session.config["paths"]`` to the node. """ + if is_non_local_path(path): + return path.as_posix() + ancestor = find_closest_ancestor(path, paths) if ancestor is None: try: diff --git a/tests/test_collect.py b/tests/test_collect.py index 7e08c2d4..4cc3c77f 100644 --- a/tests/test_collect.py +++ b/tests/test_collect.py @@ -14,6 +14,7 @@ from pytask import CollectionOutcome from pytask import ExitCode from pytask import NodeInfo +from pytask import PickleNode from pytask import Session from pytask import Task from pytask import build @@ -190,6 +191,29 @@ def test_pytask_collect_node(session, path, node_info, expected): assert str(result.load()) == str(expected) +def test_pytask_collect_remote_path_node_keeps_uri_name(): + upath = pytest.importorskip("upath") + + session = Session.from_config( + {"check_casing_of_paths": False, "paths": (Path.cwd(),), "root": Path.cwd()} + ) + + result = pytask_collect_node( + session, + Path.cwd(), + NodeInfo( + arg_name="path", + path=(), + value=PickleNode(path=upath.UPath("s3://bucket/file.pkl")), + task_path=Path.cwd() / "task_example.py", + task_name="task_example", + ), + ) + + assert isinstance(result, PPathNode) + assert result.name == "s3://bucket/file.pkl" + + @pytest.mark.skipif( sys.platform != "win32", reason="Only works on case-insensitive file systems." ) diff --git a/tests/test_collect_command.py b/tests/test_collect_command.py index 532d6b97..bb4adcf6 100644 --- a/tests/test_collect_command.py +++ b/tests/test_collect_command.py @@ -396,6 +396,31 @@ def test_task_name_is_shortened(runner, tmp_path): assert "a/b/task_example.py::task_example" not in result.output +def test_collect_task_with_remote_upath_node(runner, tmp_path): + pytest.importorskip("upath") + + source = """ + from pathlib import Path + from typing import Annotated + + from upath import UPath + + from pytask import PickleNode + from pytask import Product + + def task_example( + data=PickleNode(path=UPath("s3://bucket/in.pkl")), + path: Annotated[Path, Product] = Path("out.txt"), + ): ... + """ + tmp_path.joinpath("task_module.py").write_text(textwrap.dedent(source)) + + result = runner.invoke(cli, ["collect", "--nodes", tmp_path.as_posix()]) + + assert result.exit_code == ExitCode.OK + assert "s3://bucket/in.pkl" in result.output + + def test_python_node_is_collected(runner, tmp_path): source = """ from pytask import Product diff --git a/tests/test_path.py b/tests/test_path.py index 4d72932f..fb6c98d3 100644 --- a/tests/test_path.py +++ b/tests/test_path.py @@ -20,6 +20,7 @@ from _pytask.path import find_closest_ancestor from _pytask.path import find_common_ancestor from _pytask.path import relative_to +from _pytask.path import shorten_path from pytask.path import import_path if TYPE_CHECKING: @@ -110,6 +111,14 @@ def test_find_common_ancestor(path_1, path_2, expectation, expected): assert result == expected +def test_shorten_path_keeps_non_local_uri(): + upath = pytest.importorskip("upath") + + path = upath.UPath("s3://bucket/file.pkl") + + assert shorten_path(path, [Path.cwd()]) == "s3://bucket/file.pkl" + + @pytest.mark.skipif(sys.platform != "win32", reason="Only works on Windows.") @pytest.mark.parametrize( ("path", "existing_paths", "expected"), From c16d3078b15cf3b92f5eeefe62343be36936b45d Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 22 Mar 2026 14:16:34 +0100 Subject: [PATCH 2/5] Handle remote UPath nodes in collection --- src/_pytask/collect.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/_pytask/collect.py b/src/_pytask/collect.py index 109833bc..c1743472 100644 --- a/src/_pytask/collect.py +++ b/src/_pytask/collect.py @@ -42,6 +42,7 @@ from _pytask.outcomes import count_outcomes from _pytask.path import find_case_sensitive_path from _pytask.path import import_path +from _pytask.path import is_non_local_path from _pytask.path import shorten_path from _pytask.pluginmanager import hookimpl from _pytask.reports import CollectionReport @@ -455,7 +456,11 @@ def pytask_collect_node( # noqa: C901, PLR0912 node.name = create_name_of_python_node(node_info) return node - if isinstance(node, PPathNode) and not node.path.is_absolute(): + if ( + isinstance(node, PPathNode) + and not is_non_local_path(node.path) + and not node.path.is_absolute() + ): node.path = path.joinpath(node.path) # ``normpath`` removes ``../`` from the path which is necessary for the casing From 03ca18db4fcae282ad5b7a70dff5cf2bd51aca89 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 22 Mar 2026 14:55:58 +0100 Subject: [PATCH 3/5] Handle local UPath protocols consistently --- src/_pytask/collect.py | 7 +++++++ src/_pytask/collect_command.py | 5 +++-- src/_pytask/path.py | 16 +++++++++++++++- tests/test_collect.py | 25 +++++++++++++++++++++++++ tests/test_collect_command.py | 26 ++++++++++++++++++++++++++ tests/test_path.py | 11 +++++++++++ 6 files changed, 87 insertions(+), 3 deletions(-) diff --git a/src/_pytask/collect.py b/src/_pytask/collect.py index c1743472..4a09c9b0 100644 --- a/src/_pytask/collect.py +++ b/src/_pytask/collect.py @@ -43,6 +43,7 @@ from _pytask.path import find_case_sensitive_path from _pytask.path import import_path from _pytask.path import is_non_local_path +from _pytask.path import normalize_local_upath from _pytask.path import shorten_path from _pytask.pluginmanager import hookimpl from _pytask.reports import CollectionReport @@ -456,6 +457,9 @@ def pytask_collect_node( # noqa: C901, PLR0912 node.name = create_name_of_python_node(node_info) return node + if isinstance(node, PPathNode): + node.path = normalize_local_upath(node.path) + if ( isinstance(node, PPathNode) and not is_non_local_path(node.path) @@ -492,6 +496,9 @@ def pytask_collect_node( # noqa: C901, PLR0912 node.name = create_name_of_python_node(node_info) return node + if isinstance(node, UPath): # pragma: no cover + node = normalize_local_upath(node) + if isinstance(node, UPath): # pragma: no cover if not node.protocol: node = Path(node) diff --git a/src/_pytask/collect_command.py b/src/_pytask/collect_command.py index 24159180..4ad86229 100644 --- a/src/_pytask/collect_command.py +++ b/src/_pytask/collect_command.py @@ -31,6 +31,7 @@ from _pytask.outcomes import ExitCode from _pytask.path import find_common_ancestor from _pytask.path import is_non_local_path +from _pytask.path import normalize_local_upath from _pytask.path import relative_to from _pytask.pluginmanager import hookimpl from _pytask.pluginmanager import storage @@ -126,12 +127,12 @@ def _find_common_ancestor_of_all_nodes( all_paths.append(task.path) if show_nodes: all_paths.extend( - x.path + normalize_local_upath(x.path) for x in tree_leaves(task.depends_on) if isinstance(x, PPathNode) and not is_non_local_path(x.path) ) all_paths.extend( - x.path + normalize_local_upath(x.path) for x in tree_leaves(task.produces) if isinstance(x, PPathNode) and not is_non_local_path(x.path) ) diff --git a/src/_pytask/path.py b/src/_pytask/path.py index 05dfa993..dd950354 100644 --- a/src/_pytask/path.py +++ b/src/_pytask/path.py @@ -28,11 +28,15 @@ "hash_path", "import_path", "is_non_local_path", + "normalize_local_upath", "relative_to", "shorten_path", ] +_LOCAL_UPATH_PROTOCOLS = frozenset(("", "file", "local")) + + def relative_to(path: Path, source: Path, *, include_source: bool = True) -> Path: """Make a path relative to another path. @@ -61,7 +65,14 @@ def relative_to(path: Path, source: Path, *, include_source: bool = True) -> Pat def is_non_local_path(path: Path) -> bool: """Return whether a path points to a non-local `UPath` resource.""" - return isinstance(path, UPath) and bool(path.protocol) + return isinstance(path, UPath) and path.protocol not in _LOCAL_UPATH_PROTOCOLS + + +def normalize_local_upath(path: Path) -> Path: + """Convert local `UPath` variants to a stdlib `Path`.""" + if isinstance(path, UPath) and path.protocol in {"file", "local"}: + return Path(path.path) + return path def find_closest_ancestor( @@ -443,6 +454,9 @@ def shorten_path(path: Path, paths: Sequence[Path]) -> str: if is_non_local_path(path): return path.as_posix() + path = normalize_local_upath(path) + paths = [normalize_local_upath(p) for p in paths] + ancestor = find_closest_ancestor(path, paths) if ancestor is None: try: diff --git a/tests/test_collect.py b/tests/test_collect.py index 4cc3c77f..266ce0f9 100644 --- a/tests/test_collect.py +++ b/tests/test_collect.py @@ -214,6 +214,31 @@ def test_pytask_collect_remote_path_node_keeps_uri_name(): assert result.name == "s3://bucket/file.pkl" +@pytest.mark.parametrize("protocol", ["file", "local"]) +def test_pytask_collect_local_upath_protocol_node_is_shortened(tmp_path, protocol): + upath = pytest.importorskip("upath") + + session = Session.from_config( + {"check_casing_of_paths": False, "paths": (tmp_path,), "root": tmp_path} + ) + + result = pytask_collect_node( + session, + tmp_path, + NodeInfo( + arg_name="path", + path=(), + value=PickleNode(path=upath.UPath(f"{protocol}://{tmp_path}/file.pkl")), + task_path=tmp_path / "task_example.py", + task_name="task_example", + ), + ) + + assert isinstance(result, PPathNode) + assert result.path == tmp_path / "file.pkl" + assert result.name == f"{tmp_path.name}/file.pkl" + + @pytest.mark.skipif( sys.platform != "win32", reason="Only works on case-insensitive file systems." ) diff --git a/tests/test_collect_command.py b/tests/test_collect_command.py index bb4adcf6..1b9af724 100644 --- a/tests/test_collect_command.py +++ b/tests/test_collect_command.py @@ -421,6 +421,32 @@ def task_example( assert "s3://bucket/in.pkl" in result.output +@pytest.mark.parametrize("protocol", ["file", "local"]) +def test_collect_task_with_local_upath_protocol_node(runner, tmp_path, protocol): + pytest.importorskip("upath") + + source = f""" + from pathlib import Path + from typing import Annotated + + from upath import UPath + + from pytask import PickleNode + from pytask import Product + + def task_example( + data=PickleNode(path=UPath("{protocol}://{tmp_path.as_posix()}/in.pkl")), + path: Annotated[Path, Product] = Path("out.txt"), + ): ... + """ + tmp_path.joinpath("task_module.py").write_text(textwrap.dedent(source)) + + result = runner.invoke(cli, ["collect", "--nodes", tmp_path.as_posix()]) + + assert result.exit_code == ExitCode.OK + assert f"{tmp_path.name}/in.pkl" in result.output + + def test_python_node_is_collected(runner, tmp_path): source = """ from pytask import Product diff --git a/tests/test_path.py b/tests/test_path.py index fb6c98d3..688fcc10 100644 --- a/tests/test_path.py +++ b/tests/test_path.py @@ -19,6 +19,7 @@ from _pytask.path import find_case_sensitive_path from _pytask.path import find_closest_ancestor from _pytask.path import find_common_ancestor +from _pytask.path import is_non_local_path from _pytask.path import relative_to from _pytask.path import shorten_path from pytask.path import import_path @@ -119,6 +120,16 @@ def test_shorten_path_keeps_non_local_uri(): assert shorten_path(path, [Path.cwd()]) == "s3://bucket/file.pkl" +@pytest.mark.parametrize("protocol", ["file", "local"]) +def test_shorten_path_treats_local_upath_protocols_as_local(tmp_path, protocol): + upath = pytest.importorskip("upath") + + path = upath.UPath(f"{protocol}://{tmp_path.as_posix()}/file.pkl") + + assert not is_non_local_path(path) + assert shorten_path(path, [tmp_path]) == f"{tmp_path.name}/file.pkl" + + @pytest.mark.skipif(sys.platform != "win32", reason="Only works on Windows.") @pytest.mark.parametrize( ("path", "existing_paths", "expected"), From c5f9852c23f4417974b54c525c002a2a31c5af8d Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 22 Mar 2026 15:49:01 +0100 Subject: [PATCH 4/5] Fix Windows local UPath normalization --- src/_pytask/path.py | 12 +++++++++++- tests/test_collect.py | 8 +++++++- tests/test_collect_command.py | 8 +++++++- tests/test_path.py | 19 ++++++++++++++++++- 4 files changed, 43 insertions(+), 4 deletions(-) diff --git a/src/_pytask/path.py b/src/_pytask/path.py index dd950354..5619c04f 100644 --- a/src/_pytask/path.py +++ b/src/_pytask/path.py @@ -35,6 +35,7 @@ _LOCAL_UPATH_PROTOCOLS = frozenset(("", "file", "local")) +_WINDOWS_DRIVE_PREFIX_LENGTH = 3 def relative_to(path: Path, source: Path, *, include_source: bool = True) -> Path: @@ -71,7 +72,16 @@ def is_non_local_path(path: Path) -> bool: def normalize_local_upath(path: Path) -> Path: """Convert local `UPath` variants to a stdlib `Path`.""" if isinstance(path, UPath) and path.protocol in {"file", "local"}: - return Path(path.path) + local_path = path.path + if ( + sys.platform == "win32" + and local_path.startswith("/") + and len(local_path) >= _WINDOWS_DRIVE_PREFIX_LENGTH + and local_path[1].isalpha() + and local_path[2] == ":" + ): + local_path = local_path[1:] + return Path(local_path) return path diff --git a/tests/test_collect.py b/tests/test_collect.py index 266ce0f9..29f80c59 100644 --- a/tests/test_collect.py +++ b/tests/test_collect.py @@ -22,6 +22,10 @@ from tests.conftest import noop +def _make_local_upath_uri(path: Path, protocol: str) -> str: + return f"{protocol}:///{path.as_posix().lstrip('/')}" + + @pytest.mark.parametrize( ("depends_on", "produces"), [ @@ -228,7 +232,9 @@ def test_pytask_collect_local_upath_protocol_node_is_shortened(tmp_path, protoco NodeInfo( arg_name="path", path=(), - value=PickleNode(path=upath.UPath(f"{protocol}://{tmp_path}/file.pkl")), + value=PickleNode( + path=upath.UPath(_make_local_upath_uri(tmp_path / "file.pkl", protocol)) + ), task_path=tmp_path / "task_example.py", task_name="task_example", ), diff --git a/tests/test_collect_command.py b/tests/test_collect_command.py index 1b9af724..f84cc98c 100644 --- a/tests/test_collect_command.py +++ b/tests/test_collect_command.py @@ -21,6 +21,10 @@ from _pytask.node_protocols import PTaskWithPath +def _make_local_upath_uri(path: Path, protocol: str) -> str: + return f"{protocol}:///{path.as_posix().lstrip('/')}" + + def test_collect_task(runner, tmp_path): source = """ from pathlib import Path @@ -425,6 +429,8 @@ def task_example( def test_collect_task_with_local_upath_protocol_node(runner, tmp_path, protocol): pytest.importorskip("upath") + uri = _make_local_upath_uri(tmp_path / "in.pkl", protocol) + source = f""" from pathlib import Path from typing import Annotated @@ -435,7 +441,7 @@ def test_collect_task_with_local_upath_protocol_node(runner, tmp_path, protocol) from pytask import Product def task_example( - data=PickleNode(path=UPath("{protocol}://{tmp_path.as_posix()}/in.pkl")), + data=PickleNode(path=UPath("{uri}")), path: Annotated[Path, Product] = Path("out.txt"), ): ... """ diff --git a/tests/test_path.py b/tests/test_path.py index 688fcc10..ba490149 100644 --- a/tests/test_path.py +++ b/tests/test_path.py @@ -20,6 +20,7 @@ from _pytask.path import find_closest_ancestor from _pytask.path import find_common_ancestor from _pytask.path import is_non_local_path +from _pytask.path import normalize_local_upath from _pytask.path import relative_to from _pytask.path import shorten_path from pytask.path import import_path @@ -28,6 +29,10 @@ from collections.abc import Generator +def _make_local_upath_uri(path: Path, protocol: str) -> str: + return f"{protocol}:///{path.as_posix().lstrip('/')}" + + @pytest.mark.parametrize( ("path", "source", "include_source", "expected"), [ @@ -124,12 +129,24 @@ def test_shorten_path_keeps_non_local_uri(): def test_shorten_path_treats_local_upath_protocols_as_local(tmp_path, protocol): upath = pytest.importorskip("upath") - path = upath.UPath(f"{protocol}://{tmp_path.as_posix()}/file.pkl") + path = upath.UPath(_make_local_upath_uri(tmp_path / "file.pkl", protocol)) assert not is_non_local_path(path) assert shorten_path(path, [tmp_path]) == f"{tmp_path.name}/file.pkl" +@pytest.mark.parametrize("protocol", ["file", "local"]) +def test_normalize_local_upath_strips_windows_drive_prefix(monkeypatch, protocol): + upath = pytest.importorskip("upath") + + monkeypatch.setattr(sys, "platform", "win32") + path = upath.UPath(f"{protocol}:///C:/tmp/file.pkl") + + result = normalize_local_upath(path) + + assert result.as_posix() == "C:/tmp/file.pkl" + + @pytest.mark.skipif(sys.platform != "win32", reason="Only works on Windows.") @pytest.mark.parametrize( ("path", "existing_paths", "expected"), From e548563edbfcb2093f24c41ecc887e23725ba1fd Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 22 Mar 2026 15:57:58 +0100 Subject: [PATCH 5/5] Add changelog entry for UPath collection fix --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2e6f2f7c..782ceca2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ releases are available on [PyPI](https://pypi.org/project/pytask) and ## Unreleased +- [#820](https://github.com/pytask-dev/pytask/pull/820) fixes collection and node + display for remote `UPath`-backed nodes, while preserving correct handling of local + `file://` and `local://` `UPath`s across platforms. - [#743](https://github.com/pytask-dev/pytask/pull/743) adds the `pytask.lock` lockfile as the primary state backend with a portable format and documentation. When no lockfile exists, pytask reads the legacy SQLite state and writes `pytask.lock`;