From 25b8f23116ab86b908dd1186b412fa8c6347b62d Mon Sep 17 00:00:00 2001 From: Martin Gaughran Date: Thu, 8 Jan 2026 19:05:26 +0000 Subject: [PATCH 1/3] Use AnyUrl for URL fields This likely needs further refinement, but it should catch most accidental errors. The issue with using a more restrictive regex is that it will not provide a clear description of any error. --- src/deploy_tools/app_builder.py | 2 +- src/deploy_tools/models/apptainer_app.py | 5 +++-- src/deploy_tools/models/binary_app.py | 4 ++-- src/deploy_tools/models/save_and_load.py | 4 +++- src/deploy_tools/models/schemas/deployment.json | 4 ++++ src/deploy_tools/models/schemas/module.json | 4 ++++ src/deploy_tools/models/schemas/release.json | 4 ++++ 7 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/deploy_tools/app_builder.py b/src/deploy_tools/app_builder.py index d50361f..d639194 100644 --- a/src/deploy_tools/app_builder.py +++ b/src/deploy_tools/app_builder.py @@ -120,7 +120,7 @@ def _create_binary_file(self, app: BinaryApp, module: Module) -> None: ) binary_path = binary_folder / app.name binary_path.parent.mkdir(parents=True, exist_ok=True) - urlretrieve(app.url, binary_path) + urlretrieve(str(app.url), binary_path) match app.hash_type: case HashType.SHA256: diff --git a/src/deploy_tools/models/apptainer_app.py b/src/deploy_tools/models/apptainer_app.py index 3f43da3..07ca0f1 100644 --- a/src/deploy_tools/models/apptainer_app.py +++ b/src/deploy_tools/models/apptainer_app.py @@ -1,6 +1,6 @@ from typing import Annotated, Literal -from pydantic import Field, StringConstraints +from pydantic import AnyUrl, Field, StringConstraints, UrlConstraints from .app import ENTRYPOINT_NAME_REGEX from .parent import ParentModel @@ -63,7 +63,8 @@ class Entrypoint(ParentModel): class ContainerImage(ParentModel): path: Annotated[ - str, + AnyUrl, + UrlConstraints(allowed_schemes=["docker", "shub", "oras", "https"]), Field( description="Image URL excluding the version/tag. Must be a valid URL as " "described here: " diff --git a/src/deploy_tools/models/binary_app.py b/src/deploy_tools/models/binary_app.py index 01f2fb1..a97152c 100644 --- a/src/deploy_tools/models/binary_app.py +++ b/src/deploy_tools/models/binary_app.py @@ -1,7 +1,7 @@ from enum import StrEnum from typing import Annotated, Literal -from pydantic import Field, StringConstraints +from pydantic import AnyUrl, Field, StringConstraints from .app import ENTRYPOINT_NAME_REGEX from .parent import ParentModel @@ -32,7 +32,7 @@ class BinaryApp(ParentModel): StringConstraints(pattern=ENTRYPOINT_NAME_REGEX), Field(description="Name of executable to use after loading the Module"), ] - url: Annotated[str, Field(description="URL to download the binary from")] + url: Annotated[AnyUrl, Field(description="URL to download the binary from")] hash: Annotated[str, Field(description="Hash to verify binary integrity")] = "" hash_type: Annotated[ HashType, Field(description="Type of hash used to check the binary") diff --git a/src/deploy_tools/models/save_and_load.py b/src/deploy_tools/models/save_and_load.py index 81ff084..add0762 100644 --- a/src/deploy_tools/models/save_and_load.py +++ b/src/deploy_tools/models/save_and_load.py @@ -1,3 +1,4 @@ +import json from collections import defaultdict from pathlib import Path from typing import BinaryIO, TextIO @@ -27,7 +28,8 @@ def save_as_yaml( output_path.parent.mkdir(exist_ok=True, parents=True) with open(output_path, "w") as f: - yaml.safe_dump(obj.model_dump(), f) + load_json = json.loads(obj.model_dump_json()) + yaml.safe_dump(load_json, f) def load_from_yaml[T: BaseModel](model: type[T], input_stream: TextIO | BinaryIO) -> T: diff --git a/src/deploy_tools/models/schemas/deployment.json b/src/deploy_tools/models/schemas/deployment.json index 6ba71b2..a3ad58c 100644 --- a/src/deploy_tools/models/schemas/deployment.json +++ b/src/deploy_tools/models/schemas/deployment.json @@ -59,6 +59,8 @@ }, "url": { "description": "URL to download the binary from", + "format": "uri", + "minLength": 1, "title": "Url", "type": "string" }, @@ -87,6 +89,8 @@ "properties": { "path": { "description": "Image URL excluding the version/tag. Must be a valid URL as described here: https://apptainer.org/docs/user/main/cli/apptainer_pull.html#synopsis", + "format": "uri", + "minLength": 1, "title": "Path", "type": "string" }, diff --git a/src/deploy_tools/models/schemas/module.json b/src/deploy_tools/models/schemas/module.json index 5bdbb73..b837fa8 100644 --- a/src/deploy_tools/models/schemas/module.json +++ b/src/deploy_tools/models/schemas/module.json @@ -59,6 +59,8 @@ }, "url": { "description": "URL to download the binary from", + "format": "uri", + "minLength": 1, "title": "Url", "type": "string" }, @@ -87,6 +89,8 @@ "properties": { "path": { "description": "Image URL excluding the version/tag. Must be a valid URL as described here: https://apptainer.org/docs/user/main/cli/apptainer_pull.html#synopsis", + "format": "uri", + "minLength": 1, "title": "Path", "type": "string" }, diff --git a/src/deploy_tools/models/schemas/release.json b/src/deploy_tools/models/schemas/release.json index 5bdbb73..b837fa8 100644 --- a/src/deploy_tools/models/schemas/release.json +++ b/src/deploy_tools/models/schemas/release.json @@ -59,6 +59,8 @@ }, "url": { "description": "URL to download the binary from", + "format": "uri", + "minLength": 1, "title": "Url", "type": "string" }, @@ -87,6 +89,8 @@ "properties": { "path": { "description": "Image URL excluding the version/tag. Must be a valid URL as described here: https://apptainer.org/docs/user/main/cli/apptainer_pull.html#synopsis", + "format": "uri", + "minLength": 1, "title": "Path", "type": "string" }, From de6ee34313c7204d939a830b49a5d747e4a20360 Mon Sep 17 00:00:00 2001 From: Martin Gaughran Date: Thu, 8 Jan 2026 19:32:37 +0000 Subject: [PATCH 2/3] Add bash syntax checking Note that this does not ensure that the bash script will actually run successfully. There may be additional errors e.g. by using an unavailable function or tool. --- src/deploy_tools/validate.py | 51 ++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/src/deploy_tools/validate.py b/src/deploy_tools/validate.py index 96f5e7b..3bd7ead 100644 --- a/src/deploy_tools/validate.py +++ b/src/deploy_tools/validate.py @@ -1,4 +1,5 @@ import logging +import subprocess from pathlib import Path from tempfile import TemporaryDirectory @@ -21,6 +22,9 @@ logger = logging.getLogger(__name__) +SCRIPT_INDICATOR_PHRASE = "shell script" + + class ValidationError(Exception): pass @@ -52,6 +56,7 @@ def validate_and_test_configuration( if test_build: logger.info("Performing test build") build(deployment_changes, layout) + check_built_scripts(deployment_changes, layout) logger.info("Printing updates") print_updates(snapshot_default_versions, deployment_changes) @@ -229,3 +234,49 @@ def _get_all_default_versions( final_defaults[name] = sorted_versions[-1] return final_defaults + + +def is_shell_script(file: Path) -> bool: + """Determine whether specified file is a shell script. + + This uses the output of the Linux 'file' command, which is dependent on the + shebang line as well as the OS. Both are controlled in our deployment. + """ + result = subprocess.run( + ["file", f"{file.absolute()}"], capture_output=True, text=True + ) + return SCRIPT_INDICATOR_PHRASE in result.stdout + + +def check_bash_syntax(file: Path) -> None: + """Check bash syntax is valid for the given file. + + Since failing this validation will prevent a deploy-tools job from continuing, we + don't want to use more stringent tests as with e.g. shellcheck. + + This is also unable to check that all required functions and tools are available, so + some typos are likely to pass. + """ + result = subprocess.run( + ["bash", "-n", f"{file.absolute()}"], + capture_output=True, + text=True, + ) + if result.stderr: + raise ValidationError( + f"Output script {file.absolute()} is invalid with errors:\n{result.stderr}" + ) + + +def check_built_scripts(changes: DeploymentChanges, layout: Layout) -> None: + release_changes = changes.release_changes + releases = release_changes.to_add + release_changes.to_update + build_layout = layout.build_layout + + for release in releases: + name = release.module.name + version = release.module.version + + for entrypoint in build_layout.get_entrypoints_folder(name, version).glob("*"): + if is_shell_script(entrypoint): + check_bash_syntax(entrypoint) From 5133fdca1165677bc55fa8b920c947d376ce1c8f Mon Sep 17 00:00:00 2001 From: Martin Gaughran Date: Tue, 13 Jan 2026 13:27:51 +0000 Subject: [PATCH 3/3] Add comment to explain how we can make use of Pydantic types with YAML --- src/deploy_tools/models/save_and_load.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/deploy_tools/models/save_and_load.py b/src/deploy_tools/models/save_and_load.py index add0762..93251c5 100644 --- a/src/deploy_tools/models/save_and_load.py +++ b/src/deploy_tools/models/save_and_load.py @@ -28,6 +28,8 @@ def save_as_yaml( output_path.parent.mkdir(exist_ok=True, parents=True) with open(output_path, "w") as f: + # Use model_dump_json() and then yaml.safe_dump() to allow us to use Pydantic + # types such as AnyURL with YAML without needing a custom serializer load_json = json.loads(obj.model_dump_json()) yaml.safe_dump(load_json, f)