From a63c8cc73285a89022d46359f0fe760f994c165e Mon Sep 17 00:00:00 2001 From: Ambient Code Bot Date: Wed, 8 Apr 2026 15:39:35 +0000 Subject: [PATCH 1/5] fix: preserve URL query parameters in storage flash for signed URLs URLs with query parameters (e.g. CloudFront signed URLs with ?Expires=...&Signature=...&Key-Pair-Id=...) were being silently stripped during URL parsing, causing the download to receive an error/empty response instead of the actual image. Fixes #271 Co-Authored-By: Claude Opus 4.6 --- .../jumpstarter_driver_flashers/client.py | 33 +++++--- .../client_test.py | 80 +++++++++++++++++++ .../jumpstarter_driver_opendal/client.py | 8 +- .../jumpstarter_driver_opendal/driver_test.py | 25 ++++++ 4 files changed, 136 insertions(+), 10 deletions(-) diff --git a/python/packages/jumpstarter-driver-flashers/jumpstarter_driver_flashers/client.py b/python/packages/jumpstarter-driver-flashers/jumpstarter_driver_flashers/client.py index 43f26dad3..4c1373a0c 100644 --- a/python/packages/jumpstarter-driver-flashers/jumpstarter_driver_flashers/client.py +++ b/python/packages/jumpstarter-driver-flashers/jumpstarter_driver_flashers/client.py @@ -167,10 +167,14 @@ def flash( # noqa: C901 "http", root="/", endpoint=f"{parsed.scheme}://{parsed.netloc}", token=bearer_token ) operator_scheme = "http" - path = Path(parsed.path) + # Preserve query parameters so that signed URLs + # (e.g. CloudFront with ?Expires=...&Signature=...) work correctly. + path = parsed.path + if parsed.query: + path = f"{path}?{parsed.query}" else: path, operator, operator_scheme = operator_for_path(path) - image_url = self.http.get_url() + "/" + path.name + image_url = self.http.get_url() + "/" + self._filename(path) # start counting time for the flash operation start_time = time.time() @@ -968,7 +972,7 @@ def _transfer_bg_thread( """ self.logger.info(f"Writing image to storage in the background: {src_path}") try: - filename = Path(src_path).name if isinstance(src_path, (str, os.PathLike)) else src_path.name + filename = self._filename(src_path) if src_operator_scheme == "fs": file_hash = self._sha256_file(src_operator, src_path) @@ -1088,8 +1092,8 @@ def dump( raise NotImplementedError("Dump is not implemented for this driver yet") def _filename(self, path: PathBuf) -> str: - """Extract filename from url or path""" - if path.startswith("oci://"): + """Extract filename from url or path, stripping any query parameters""" + if isinstance(path, str) and path.startswith("oci://"): oci_path = path[6:] # Remove "oci://" prefix if ":" in oci_path: repository, tag = oci_path.rsplit(":", 1) @@ -1098,10 +1102,15 @@ def _filename(self, path: PathBuf) -> str: else: repo_name = oci_path.split("/")[-1] if "/" in oci_path else oci_path return repo_name - elif path.startswith(("http://", "https://")): + elif isinstance(path, str) and path.startswith(("http://", "https://")): return urlparse(path).path.split("/")[-1] else: - return Path(path).name + # Strip query parameters from the path (e.g. from signed URLs + # like /path/to/image.raw.xz?Expires=...&Signature=...) + name = Path(path).name + if isinstance(name, str) and "?" in name: + name = name.split("?", 1)[0] + return name def _upload_artifact(self, storage, path: PathBuf, operator: Operator): """Upload artifact to storage""" @@ -1636,15 +1645,21 @@ def _get_decompression_command(filename_or_url) -> str: Determine the appropriate decompression command based on file extension Args: - filename (str): Name of the file to check + filename_or_url (str): Name of the file or URL to check Returns: str: Decompression command ('zcat', 'xzcat', or 'cat' for uncompressed) """ if type(filename_or_url) is PosixPath: filename = filename_or_url.name - elif filename_or_url.startswith(("http://", "https://")): + elif isinstance(filename_or_url, str) and filename_or_url.startswith(("http://", "https://")): filename = urlparse(filename_or_url).path.split("/")[-1] + else: + # Handle plain string paths, possibly with query parameters + # (e.g. /path/to/image.raw.xz?Expires=...&Signature=...) + filename = str(filename_or_url).split("/")[-1] + if "?" in filename: + filename = filename.split("?", 1)[0] filename = filename.lower() if filename.endswith((".gz", ".gzip")): diff --git a/python/packages/jumpstarter-driver-flashers/jumpstarter_driver_flashers/client_test.py b/python/packages/jumpstarter-driver-flashers/jumpstarter_driver_flashers/client_test.py index 44f1214c4..e462f2a87 100644 --- a/python/packages/jumpstarter-driver-flashers/jumpstarter_driver_flashers/client_test.py +++ b/python/packages/jumpstarter-driver-flashers/jumpstarter_driver_flashers/client_test.py @@ -428,6 +428,86 @@ def test_categorize_exception_preserves_cause_for_wrapped_exceptions(): assert "File not found" in str(result) +def test_filename_strips_query_params_from_url_path(): + """Test _filename strips query parameters from paths with signed URL params""" + client = MockFlasherClient() + + # Full HTTP URL + assert client._filename("https://cdn.example.com/images/image.raw.xz") == "image.raw.xz" + + # Full HTTP URL with query parameters (e.g. CloudFront signed URL) + assert ( + client._filename("https://cdn.example.com/images/image.raw.xz?Expires=123&Signature=abc&Key-Pair-Id=xyz") + == "image.raw.xz" + ) + + # Path string with query parameters (as returned by operator_for_path after fix) + assert client._filename("/images/image.raw.xz?Expires=123&Signature=abc") == "image.raw.xz" + + # Plain path without query parameters + assert client._filename("/images/image.raw.xz") == "image.raw.xz" + + # OCI path + assert client._filename("oci://quay.io/org/myimage:latest") == "myimage-latest" + + +def test_decompression_command_with_query_params(): + """Test _get_decompression_command handles paths with query parameters""" + from pathlib import PosixPath + + from .client import _get_decompression_command + + # Standard PosixPath + assert _get_decompression_command(PosixPath("/images/image.raw.xz")) == "xzcat |" + assert _get_decompression_command(PosixPath("/images/image.raw.gz")) == "zcat |" + assert _get_decompression_command(PosixPath("/images/image.raw")) == "" + + # Full HTTP URL + assert _get_decompression_command("https://cdn.example.com/images/image.raw.xz") == "xzcat |" + + # String path with query parameters (as returned by operator_for_path for signed URLs) + assert _get_decompression_command("/images/image.raw.xz?Expires=123&Signature=abc") == "xzcat |" + assert _get_decompression_command("/images/image.raw.gz?Expires=123") == "zcat |" + assert _get_decompression_command("/images/image.raw?Expires=123") == "" + + +def test_flash_signed_url_preserves_query_params(): + """Test that flash with a signed HTTP URL preserves query parameters for image_url""" + client = MockFlasherClient() + + class DummyService: + def __init__(self): + self.storage = object() + + def start(self): + pass + + def stop(self): + pass + + def get_url(self): + return "http://exporter" + + client.http = DummyService() + client.tftp = DummyService() + client.call = lambda *args, **kwargs: None + + captured = {} + + def capture_perform(*args): + captured["image_url"] = args[3] + captured["should_download_to_httpd"] = args[4] + + client._perform_flash_operation = capture_perform + + # Direct HTTP URL with query params (no force_exporter_http) should preserve full URL + signed_url = "https://cdn.example.com/images/image.raw.xz?Expires=123&Signature=abc&Key-Pair-Id=xyz" + client.flash(signed_url, method="fls", fls_version="") + + assert captured["image_url"] == signed_url + assert captured["should_download_to_httpd"] is False + + def test_resolve_flash_parameters(): """Test flash parameter resolution for single file, partitions, and error cases""" client = MockFlasherClient() diff --git a/python/packages/jumpstarter-driver-opendal/jumpstarter_driver_opendal/client.py b/python/packages/jumpstarter-driver-opendal/jumpstarter_driver_opendal/client.py index eca6aa7d1..05048b781 100644 --- a/python/packages/jumpstarter-driver-opendal/jumpstarter_driver_opendal/client.py +++ b/python/packages/jumpstarter-driver-opendal/jumpstarter_driver_opendal/client.py @@ -54,7 +54,13 @@ def operator_for_path(path: PathBuf) -> tuple[PathBuf, Operator, str]: if type(path) is str and path.startswith(("http://", "https://")): parsed_url = urlparse(path) operator = Operator("http", root="/", endpoint=f"{parsed_url.scheme}://{parsed_url.netloc}") - return Path(parsed_url.path), operator, "http" + # Preserve query parameters in the path so that signed URLs + # (e.g. CloudFront URLs with ?Expires=...&Signature=...&Key-Pair-Id=...) + # are fetched correctly by the OpenDAL HTTP operator. + op_path = parsed_url.path + if parsed_url.query: + op_path = f"{op_path}?{parsed_url.query}" + return op_path, operator, "http" else: return Path(path).resolve(), Operator("fs", root="/"), "fs" diff --git a/python/packages/jumpstarter-driver-opendal/jumpstarter_driver_opendal/driver_test.py b/python/packages/jumpstarter-driver-opendal/jumpstarter_driver_opendal/driver_test.py index 2668b1760..ddd26e82c 100644 --- a/python/packages/jumpstarter-driver-opendal/jumpstarter_driver_opendal/driver_test.py +++ b/python/packages/jumpstarter-driver-opendal/jumpstarter_driver_opendal/driver_test.py @@ -322,3 +322,28 @@ def test_copy_and_rename_tracking(tmp_path): assert "copied_dir" in created_paths assert "renamed_dir" in created_paths assert len(created_paths) == 4 + + +def test_operator_for_path_preserves_query_params(): + """Test that operator_for_path preserves query parameters for HTTP URLs""" + from .client import operator_for_path + + # HTTP URL without query parameters + path, operator, scheme = operator_for_path("https://cdn.example.com/images/image.raw.xz") + assert scheme == "http" + assert path == "/images/image.raw.xz" + + # HTTP URL with query parameters (e.g. CloudFront signed URL) + path, operator, scheme = operator_for_path( + "https://cdn.example.com/images/image.raw.xz?Expires=123&Signature=abc&Key-Pair-Id=xyz" + ) + assert scheme == "http" + assert path == "/images/image.raw.xz?Expires=123&Signature=abc&Key-Pair-Id=xyz" + assert "Expires=123" in path + assert "Signature=abc" in path + assert "Key-Pair-Id=xyz" in path + + # Filesystem path + path, operator, scheme = operator_for_path("/tmp/image.raw.xz") + assert scheme == "fs" + assert str(path) == "/tmp/image.raw.xz" From 95b57fc11f9702a8c901d951ba91642d08a275be Mon Sep 17 00:00:00 2001 From: Ambient Code Bot Date: Wed, 8 Apr 2026 16:38:16 +0000 Subject: [PATCH 2/5] fix: use Path.resolve() in test to handle macOS /tmp symlink On macOS, /tmp is a symlink to /private/tmp, so Path.resolve() returns /private/tmp/... instead of /tmp/.... Compare against the resolved path to make the test pass on both Linux and macOS. Co-Authored-By: Claude Opus 4.6 --- .../jumpstarter_driver_opendal/driver_test.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/python/packages/jumpstarter-driver-opendal/jumpstarter_driver_opendal/driver_test.py b/python/packages/jumpstarter-driver-opendal/jumpstarter_driver_opendal/driver_test.py index ddd26e82c..8cfb74744 100644 --- a/python/packages/jumpstarter-driver-opendal/jumpstarter_driver_opendal/driver_test.py +++ b/python/packages/jumpstarter-driver-opendal/jumpstarter_driver_opendal/driver_test.py @@ -343,7 +343,10 @@ def test_operator_for_path_preserves_query_params(): assert "Signature=abc" in path assert "Key-Pair-Id=xyz" in path - # Filesystem path + # Filesystem path (use resolve() for the expected value since macOS + # resolves /tmp to /private/tmp) + from pathlib import Path + path, operator, scheme = operator_for_path("/tmp/image.raw.xz") assert scheme == "fs" - assert str(path) == "/tmp/image.raw.xz" + assert path == Path("/tmp/image.raw.xz").resolve() From 28e2314e5add9573cb2d93c3c92315e631daf070 Mon Sep 17 00:00:00 2001 From: Ambient Code Bot Date: Mon, 13 Apr 2026 18:38:05 +0000 Subject: [PATCH 3/5] refactor: extract clean_filename helper and fix ridesx query param handling Address review feedback from @raballew: - Extract shared clean_filename() helper in opendal client to avoid duplicated query-parameter stripping logic across packages - Fix ridesx client _upload_file_if_needed() to use clean_filename() instead of Path(path_buf).name, preventing query params from polluting filenames when signed URLs are used - Remove redundant isinstance(name, str) guard in flashers _filename() - Simplify _get_decompression_command() to use clean_filename() - Remove unused PosixPath import from flashers client Co-Authored-By: Claude Opus 4.6 --- .../jumpstarter_driver_flashers/client.py | 26 +++---------------- .../jumpstarter_driver_opendal/client.py | 15 +++++++++++ .../jumpstarter_driver_ridesx/client.py | 4 +-- 3 files changed, 21 insertions(+), 24 deletions(-) diff --git a/python/packages/jumpstarter-driver-flashers/jumpstarter_driver_flashers/client.py b/python/packages/jumpstarter-driver-flashers/jumpstarter_driver_flashers/client.py index 4c1373a0c..7db97d24e 100644 --- a/python/packages/jumpstarter-driver-flashers/jumpstarter_driver_flashers/client.py +++ b/python/packages/jumpstarter-driver-flashers/jumpstarter_driver_flashers/client.py @@ -10,7 +10,7 @@ from concurrent.futures import CancelledError from contextlib import contextmanager from dataclasses import dataclass -from pathlib import Path, PosixPath +from pathlib import Path from queue import Queue from urllib.parse import urlparse @@ -18,7 +18,7 @@ import pexpect import requests from jumpstarter_driver_composite.client import CompositeClient -from jumpstarter_driver_opendal.client import FlasherClient, OpendalClient, operator_for_path +from jumpstarter_driver_opendal.client import FlasherClient, OpendalClient, clean_filename, operator_for_path from jumpstarter_driver_opendal.common import PathBuf from jumpstarter_driver_pyserial.client import Console from opendal import Metadata, Operator @@ -1102,15 +1102,8 @@ def _filename(self, path: PathBuf) -> str: else: repo_name = oci_path.split("/")[-1] if "/" in oci_path else oci_path return repo_name - elif isinstance(path, str) and path.startswith(("http://", "https://")): - return urlparse(path).path.split("/")[-1] else: - # Strip query parameters from the path (e.g. from signed URLs - # like /path/to/image.raw.xz?Expires=...&Signature=...) - name = Path(path).name - if isinstance(name, str) and "?" in name: - name = name.split("?", 1)[0] - return name + return clean_filename(path) def _upload_artifact(self, storage, path: PathBuf, operator: Operator): """Upload artifact to storage""" @@ -1650,18 +1643,7 @@ def _get_decompression_command(filename_or_url) -> str: Returns: str: Decompression command ('zcat', 'xzcat', or 'cat' for uncompressed) """ - if type(filename_or_url) is PosixPath: - filename = filename_or_url.name - elif isinstance(filename_or_url, str) and filename_or_url.startswith(("http://", "https://")): - filename = urlparse(filename_or_url).path.split("/")[-1] - else: - # Handle plain string paths, possibly with query parameters - # (e.g. /path/to/image.raw.xz?Expires=...&Signature=...) - filename = str(filename_or_url).split("/")[-1] - if "?" in filename: - filename = filename.split("?", 1)[0] - - filename = filename.lower() + filename = clean_filename(filename_or_url).lower() if filename.endswith((".gz", ".gzip")): return "zcat |" elif filename.endswith(".xz"): diff --git a/python/packages/jumpstarter-driver-opendal/jumpstarter_driver_opendal/client.py b/python/packages/jumpstarter-driver-opendal/jumpstarter_driver_opendal/client.py index 05048b781..89024463e 100644 --- a/python/packages/jumpstarter-driver-opendal/jumpstarter_driver_opendal/client.py +++ b/python/packages/jumpstarter-driver-opendal/jumpstarter_driver_opendal/client.py @@ -44,6 +44,21 @@ async def aclose(self): pass +def clean_filename(path: PathBuf) -> str: + """Extract a clean filename from a path or URL, stripping query parameters. + + This handles paths returned by operator_for_path() which may contain + query parameters for signed URLs (e.g. /path/to/image.raw.xz?Expires=...&Signature=...). + """ + path_str = str(path) + if path_str.startswith(("http://", "https://")): + return urlparse(path_str).path.split("/")[-1] + name = Path(path_str).name + if "?" in name: + name = name.split("?", 1)[0] + return name + + def operator_for_path(path: PathBuf) -> tuple[PathBuf, Operator, str]: """Create an operator for the given path Return a tuple of: diff --git a/python/packages/jumpstarter-driver-ridesx/jumpstarter_driver_ridesx/client.py b/python/packages/jumpstarter-driver-ridesx/jumpstarter_driver_ridesx/client.py index 4bd10cfde..55bdfcaa0 100644 --- a/python/packages/jumpstarter-driver-ridesx/jumpstarter_driver_ridesx/client.py +++ b/python/packages/jumpstarter-driver-ridesx/jumpstarter_driver_ridesx/client.py @@ -5,7 +5,7 @@ import click from jumpstarter_driver_composite.client import CompositeClient -from jumpstarter_driver_opendal.client import FlasherClient, operator_for_path +from jumpstarter_driver_opendal.client import FlasherClient, clean_filename, operator_for_path from jumpstarter_driver_power.client import PowerClient from opendal import Operator @@ -39,7 +39,7 @@ def _upload_file_if_needed(self, file_path: str, operator: Operator | None = Non path_buf = Path(file_path) operator_scheme = "unknown" - filename = Path(path_buf).name + filename = clean_filename(path_buf) if self._should_upload_file(self.storage, filename, path_buf, operator, operator_scheme): if operator_scheme == "http": From 15d3321059c131d28bd2f7a2b6c049d7ebb9a33a Mon Sep 17 00:00:00 2001 From: Ambient Code Bot Date: Wed, 15 Apr 2026 10:35:00 +0000 Subject: [PATCH 4/5] test: add clean_filename unit tests and bearer token signed URL test Add direct unit tests for `clean_filename()` in driver_test.py to provide explicit regression coverage as a public API used by three packages. Add test for the bearer token + signed URL code path with `force_exporter_http=True` to verify query params are properly reconstructed via `parsed.path + '?' + parsed.query`. Addresses review feedback from @raballew. Co-Authored-By: Claude Opus 4.6 --- .../client_test.py | 58 +++++++++++++++++++ .../jumpstarter_driver_opendal/driver_test.py | 32 ++++++++++ 2 files changed, 90 insertions(+) diff --git a/python/packages/jumpstarter-driver-flashers/jumpstarter_driver_flashers/client_test.py b/python/packages/jumpstarter-driver-flashers/jumpstarter_driver_flashers/client_test.py index e462f2a87..5b9547c69 100644 --- a/python/packages/jumpstarter-driver-flashers/jumpstarter_driver_flashers/client_test.py +++ b/python/packages/jumpstarter-driver-flashers/jumpstarter_driver_flashers/client_test.py @@ -508,6 +508,64 @@ def capture_perform(*args): assert captured["should_download_to_httpd"] is False +def test_flash_bearer_token_signed_url_preserves_query_params(): + """Test that flash with force_exporter_http=True and bearer token preserves query params. + + When a signed URL is used with a bearer token, the flash() method enters the + bearer token code path (lines 162-174 in client.py) which reconstructs the path + from parsed.path + '?' + parsed.query. This test verifies query params are preserved + and the path passed to the storage thread is correct. + """ + client = MockFlasherClient() + + class DummyService: + def __init__(self): + self.storage = object() + + def start(self): + pass + + def stop(self): + pass + + def get_url(self): + return "http://exporter" + + def get_host(self): + return "127.0.0.1" + + client.http = DummyService() + client.tftp = DummyService() + client.call = lambda *args, **kwargs: None + + captured = {} + + def capture_perform(*args): + captured["path"] = args[2] + captured["image_url"] = args[3] + captured["should_download_to_httpd"] = args[4] + + client._perform_flash_operation = capture_perform + # Mock the background transfer thread to prevent it from actually running + client._transfer_bg_thread = lambda *args, **kwargs: None + + signed_url = "https://cdn.example.com/images/image.raw.xz?Expires=123&Signature=abc&Key-Pair-Id=xyz" + client.flash( + signed_url, + force_exporter_http=True, + bearer_token="test-token-123", + method="fls", + fls_version="", + ) + + # With force_exporter_http=True and bearer_token, should download to httpd + assert captured["should_download_to_httpd"] is True + # The path should have query params preserved (reconstructed from parsed.path + '?' + parsed.query) + assert captured["path"] == "/images/image.raw.xz?Expires=123&Signature=abc&Key-Pair-Id=xyz" + # The image_url should point to the exporter with the clean filename (no query params) + assert captured["image_url"] == "http://exporter/image.raw.xz" + + def test_resolve_flash_parameters(): """Test flash parameter resolution for single file, partitions, and error cases""" client = MockFlasherClient() diff --git a/python/packages/jumpstarter-driver-opendal/jumpstarter_driver_opendal/driver_test.py b/python/packages/jumpstarter-driver-opendal/jumpstarter_driver_opendal/driver_test.py index 8cfb74744..41432ceda 100644 --- a/python/packages/jumpstarter-driver-opendal/jumpstarter_driver_opendal/driver_test.py +++ b/python/packages/jumpstarter-driver-opendal/jumpstarter_driver_opendal/driver_test.py @@ -324,6 +324,38 @@ def test_copy_and_rename_tracking(tmp_path): assert len(created_paths) == 4 +def test_clean_filename(): + """Test clean_filename extracts filenames and strips query parameters""" + from pathlib import PosixPath + + from .client import clean_filename + + # Plain filesystem path + assert clean_filename("/images/image.raw.xz") == "image.raw.xz" + assert clean_filename(PosixPath("/images/image.raw.xz")) == "image.raw.xz" + + # Filesystem path with query params (as returned by operator_for_path for signed URLs) + assert clean_filename("/images/image.raw.xz?Expires=123&Signature=abc") == "image.raw.xz" + + # Full HTTP URL without query params + assert clean_filename("https://cdn.example.com/images/image.raw.xz") == "image.raw.xz" + assert clean_filename("http://cdn.example.com/images/image.raw.xz") == "image.raw.xz" + + # Full HTTP URL with query params (e.g. CloudFront signed URL) + assert ( + clean_filename("https://cdn.example.com/images/image.raw.xz?Expires=123&Signature=abc&Key-Pair-Id=xyz") + == "image.raw.xz" + ) + + # Edge case: no directory component + assert clean_filename("image.raw.xz") == "image.raw.xz" + assert clean_filename("image.raw.xz?Expires=123") == "image.raw.xz" + + # Edge case: compressed extensions + assert clean_filename("/path/to/image.raw.gz?token=abc") == "image.raw.gz" + assert clean_filename("/path/to/image.raw.gzip?token=abc") == "image.raw.gzip" + + def test_operator_for_path_preserves_query_params(): """Test that operator_for_path preserves query parameters for HTTP URLs""" from .client import operator_for_path From 8ff618182a8aa068d5903d1915edf58ca4f58911 Mon Sep 17 00:00:00 2001 From: Ambient Code Bot Date: Wed, 15 Apr 2026 21:34:48 +0000 Subject: [PATCH 5/5] fix: address round 3 review feedback from @raballew - Fix clean_filename() bug: strip query params before Path().name to handle unencoded slashes in signatures (e.g. ?Signature=abc/def/ghi) - Fix credential leak: log clean filename instead of full src_path in _transfer_bg_thread to avoid exposing Signature/Key-Pair-Id in logs - Fix credential persistence: store clean_filename in metadata JSON instead of full src_path with query params - Extract path_with_query() helper to replace inline comments in both operator_for_path and flash() bearer token branch - Fix _get_decompression_command docstring (returns '' not 'cat') - Use named params in test capture_perform to avoid brittle positional arg indexing - Add clean_filename regression test for slashes in query params - Add ridesx test for clean_filename with signed URLs Co-Authored-By: Claude Opus 4.6 --- .../jumpstarter_driver_flashers/client.py | 22 ++++++------ .../client_test.py | 14 ++++---- .../jumpstarter_driver_opendal/client.py | 34 +++++++++++-------- .../jumpstarter_driver_opendal/driver_test.py | 3 ++ .../jumpstarter_driver_ridesx/client_test.py | 14 ++++++++ 5 files changed, 55 insertions(+), 32 deletions(-) diff --git a/python/packages/jumpstarter-driver-flashers/jumpstarter_driver_flashers/client.py b/python/packages/jumpstarter-driver-flashers/jumpstarter_driver_flashers/client.py index 7db97d24e..f465566bf 100644 --- a/python/packages/jumpstarter-driver-flashers/jumpstarter_driver_flashers/client.py +++ b/python/packages/jumpstarter-driver-flashers/jumpstarter_driver_flashers/client.py @@ -18,7 +18,13 @@ import pexpect import requests from jumpstarter_driver_composite.client import CompositeClient -from jumpstarter_driver_opendal.client import FlasherClient, OpendalClient, clean_filename, operator_for_path +from jumpstarter_driver_opendal.client import ( + FlasherClient, + OpendalClient, + clean_filename, + operator_for_path, + path_with_query, +) from jumpstarter_driver_opendal.common import PathBuf from jumpstarter_driver_pyserial.client import Console from opendal import Metadata, Operator @@ -167,11 +173,7 @@ def flash( # noqa: C901 "http", root="/", endpoint=f"{parsed.scheme}://{parsed.netloc}", token=bearer_token ) operator_scheme = "http" - # Preserve query parameters so that signed URLs - # (e.g. CloudFront with ?Expires=...&Signature=...) work correctly. - path = parsed.path - if parsed.query: - path = f"{path}?{parsed.query}" + path = path_with_query(parsed) else: path, operator, operator_scheme = operator_for_path(path) image_url = self.http.get_url() + "/" + self._filename(path) @@ -970,9 +972,9 @@ def _transfer_bg_thread( original_url: Original URL for HTTP fallback headers: HTTP headers for requests """ - self.logger.info(f"Writing image to storage in the background: {src_path}") + filename = self._filename(src_path) + self.logger.info(f"Writing image to storage in the background: {filename}") try: - filename = self._filename(src_path) if src_operator_scheme == "fs": file_hash = self._sha256_file(src_operator, src_path) @@ -1023,7 +1025,7 @@ def _create_metadata_and_json( ) -> tuple[Metadata | None, str]: """Create a metadata json string from a metadata object""" metadata = None - metadata_dict = {"path": str(src_path)} + metadata_dict = {"path": clean_filename(src_path)} try: metadata = src_operator.stat(src_path) @@ -1641,7 +1643,7 @@ def _get_decompression_command(filename_or_url) -> str: filename_or_url (str): Name of the file or URL to check Returns: - str: Decompression command ('zcat', 'xzcat', or 'cat' for uncompressed) + str: Decompression command ('zcat |', 'xzcat |', or '' for uncompressed) """ filename = clean_filename(filename_or_url).lower() if filename.endswith((".gz", ".gzip")): diff --git a/python/packages/jumpstarter-driver-flashers/jumpstarter_driver_flashers/client_test.py b/python/packages/jumpstarter-driver-flashers/jumpstarter_driver_flashers/client_test.py index 5b9547c69..cef2434a4 100644 --- a/python/packages/jumpstarter-driver-flashers/jumpstarter_driver_flashers/client_test.py +++ b/python/packages/jumpstarter-driver-flashers/jumpstarter_driver_flashers/client_test.py @@ -494,9 +494,9 @@ def get_url(self): captured = {} - def capture_perform(*args): - captured["image_url"] = args[3] - captured["should_download_to_httpd"] = args[4] + def capture_perform(partition, block_device, path, image_url, should_download_to_httpd, *rest): + captured["image_url"] = image_url + captured["should_download_to_httpd"] = should_download_to_httpd client._perform_flash_operation = capture_perform @@ -540,10 +540,10 @@ def get_host(self): captured = {} - def capture_perform(*args): - captured["path"] = args[2] - captured["image_url"] = args[3] - captured["should_download_to_httpd"] = args[4] + def capture_perform(partition, block_device, path, image_url, should_download_to_httpd, *rest): + captured["path"] = path + captured["image_url"] = image_url + captured["should_download_to_httpd"] = should_download_to_httpd client._perform_flash_operation = capture_perform # Mock the background transfer thread to prevent it from actually running diff --git a/python/packages/jumpstarter-driver-opendal/jumpstarter_driver_opendal/client.py b/python/packages/jumpstarter-driver-opendal/jumpstarter_driver_opendal/client.py index 89024463e..dea62551f 100644 --- a/python/packages/jumpstarter-driver-opendal/jumpstarter_driver_opendal/client.py +++ b/python/packages/jumpstarter-driver-opendal/jumpstarter_driver_opendal/client.py @@ -47,35 +47,39 @@ async def aclose(self): def clean_filename(path: PathBuf) -> str: """Extract a clean filename from a path or URL, stripping query parameters. - This handles paths returned by operator_for_path() which may contain + Handles paths returned by operator_for_path() which may contain query parameters for signed URLs (e.g. /path/to/image.raw.xz?Expires=...&Signature=...). """ path_str = str(path) if path_str.startswith(("http://", "https://")): return urlparse(path_str).path.split("/")[-1] - name = Path(path_str).name - if "?" in name: - name = name.split("?", 1)[0] - return name + if "?" in path_str: + path_str = path_str.split("?", 1)[0] + return Path(path_str).name + + +def path_with_query(parsed_url) -> str: + """Reconstruct path preserving query parameters for signed URL support.""" + if parsed_url.query: + return f"{parsed_url.path}?{parsed_url.query}" + return parsed_url.path def operator_for_path(path: PathBuf) -> tuple[PathBuf, Operator, str]: - """Create an operator for the given path + """Create an operator for the given path. + + For HTTP URLs, query parameters are preserved in the returned path so that + signed URLs (e.g. CloudFront with Expires/Signature/Key-Pair-Id) work correctly. + Return a tuple of: - - the path + - the path (str for HTTP, Path for filesystem) - the operator for the given path - - the scheme of the operator. + - the scheme of the operator """ if type(path) is str and path.startswith(("http://", "https://")): parsed_url = urlparse(path) operator = Operator("http", root="/", endpoint=f"{parsed_url.scheme}://{parsed_url.netloc}") - # Preserve query parameters in the path so that signed URLs - # (e.g. CloudFront URLs with ?Expires=...&Signature=...&Key-Pair-Id=...) - # are fetched correctly by the OpenDAL HTTP operator. - op_path = parsed_url.path - if parsed_url.query: - op_path = f"{op_path}?{parsed_url.query}" - return op_path, operator, "http" + return path_with_query(parsed_url), operator, "http" else: return Path(path).resolve(), Operator("fs", root="/"), "fs" diff --git a/python/packages/jumpstarter-driver-opendal/jumpstarter_driver_opendal/driver_test.py b/python/packages/jumpstarter-driver-opendal/jumpstarter_driver_opendal/driver_test.py index 41432ceda..bf13ac753 100644 --- a/python/packages/jumpstarter-driver-opendal/jumpstarter_driver_opendal/driver_test.py +++ b/python/packages/jumpstarter-driver-opendal/jumpstarter_driver_opendal/driver_test.py @@ -355,6 +355,9 @@ def test_clean_filename(): assert clean_filename("/path/to/image.raw.gz?token=abc") == "image.raw.gz" assert clean_filename("/path/to/image.raw.gzip?token=abc") == "image.raw.gzip" + # Edge case: query params with unencoded slashes (e.g. base64 signatures) + assert clean_filename("/images/image.raw.xz?Expires=123&Signature=abc/def/ghi") == "image.raw.xz" + def test_operator_for_path_preserves_query_params(): """Test that operator_for_path preserves query parameters for HTTP URLs""" diff --git a/python/packages/jumpstarter-driver-ridesx/jumpstarter_driver_ridesx/client_test.py b/python/packages/jumpstarter-driver-ridesx/jumpstarter_driver_ridesx/client_test.py index 125f0b103..349f5150f 100644 --- a/python/packages/jumpstarter-driver-ridesx/jumpstarter_driver_ridesx/client_test.py +++ b/python/packages/jumpstarter-driver-ridesx/jumpstarter_driver_ridesx/client_test.py @@ -161,3 +161,17 @@ def test_flash_no_target_no_partition_spec(ridesx_client): """Non-OCI path without colon or target should give a generic helpful error""" with pytest.raises(click.ClickException, match="requires a target partition"): ridesx_client.flash("/path/to/boot.img") + + +def test_upload_file_if_needed_strips_query_params(ridesx_client): + """Verify _upload_file_if_needed produces a clean filename for signed URLs""" + from jumpstarter_driver_opendal.client import clean_filename + + # Simulate the path_buf that would come from operator_for_path with a signed URL + path_with_query = "/images/image.raw.xz?Expires=123&Signature=abc/def&Key-Pair-Id=xyz" + result = clean_filename(path_with_query) + assert result == "image.raw.xz" + + # Also verify the direct path case + result = clean_filename("/images/image.raw.xz") + assert result == "image.raw.xz"