nextstrain · pre-commit-ci · Mar 30, 2026 · Mar 30, 2026
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,11 +1,11 @@
 exclude: '\.(tsv|fasta|gb)$|^shared/vendored/|^phylogenetic/rules/config.smk'
 repos:
   - repo: https://github.com/snakemake/snakefmt
-    rev: v0.11.1
+    rev: v1.0.0
     hooks:
       - id: snakefmt
   - repo: https://github.com/rhysd/actionlint
-    rev: v1.7.7
+    rev: v1.7.12
     hooks:
       - id: actionlint
         entry: env SHELLCHECK_OPTS='--exclude=SC2027' actionlint
@@ -16,11 +16,11 @@ repos:
   #       additional_dependencies:
   #         - tomli
   - repo: https://github.com/google/yamlfmt
-    rev: v0.17.2
+    rev: v0.21.0
     hooks:
       - id: yamlfmt
   - repo: https://github.com/pappasam/toml-sort
-    rev: v0.24.2
+    rev: v0.24.4
     hooks:
       - id: toml-sort-fix
   - repo: https://github.com/pre-commit/pre-commit-hooks

diff --git a/ingest/build-configs/nextstrain-automation/nextstrain_automation.smk b/ingest/build-configs/nextstrain-automation/nextstrain_automation.smk
@@ -63,12 +63,12 @@ rule custom_subset_metadata:
         metadata="data/all_metadata_added.tsv",
     output:
         subset_metadata="data/subset_metadata.tsv",
-    params:
-        metadata_fields=",".join(config["curate"]["metadata_columns"]),
-    benchmark:
-        "benchmarks/subset_metadata.txt"
     log:
         "logs/subset_metadata.txt",
+    benchmark:
+        "benchmarks/subset_metadata.txt"
+    params:
+        metadata_fields=",".join(config["curate"]["metadata_columns"]),
     shell:
         r"""
         exec &> >(tee {log:q})

diff --git a/ingest/build-configs/nextstrain-automation/trigger_rebuild.smk b/ingest/build-configs/nextstrain-automation/trigger_rebuild.smk
@@ -9,8 +9,8 @@ to expected upload flag files.
 
 rule trigger_build:
     """
-    Triggering monekypox builds via repository action type `rebuild`.
-    """
+Triggering monekypox builds via repository action type `rebuild`.
+"""
     input:
         metadata_upload="data/upload/s3/metadata_with_restricted.tsv.zst.done",
         fasta_upload="data/upload/s3/sequences_with_restricted.fasta.zst.done",

diff --git a/ingest/rules/curate.smk b/ingest/rules/curate.smk
@@ -28,10 +28,10 @@ rule generate_continent:
         script="scripts/generate_continent.py",
     output:
         ndjson="results/ppx_flat_continent.ndjson.zst",
-    benchmark:
-        "benchmarks/generate_continent.txt"
     log:
         "logs/generate_continent.txt",
+    benchmark:
+        "benchmarks/generate_continent.txt"
     shell:
         r"""
         exec &> >(tee {log:q})
@@ -54,10 +54,10 @@ rule curate:
         metadata="data/all_metadata.tsv",
         sequences="results/sequences.fasta",
         # ndjson="results/curated.ndjson.zst",
-    benchmark:
-        "benchmarks/curate.txt"
     log:
         "logs/curate.txt",
+    benchmark:
+        "benchmarks/curate.txt"
     params:
         field_map=format_field_map(config["curate"]["field_map"]),
         strain_regex=config["curate"]["strain_regex"],
@@ -113,12 +113,12 @@ rule subset_metadata:
         metadata="data/all_metadata.tsv",
     output:
         subset_metadata="data/subset_metadata.tsv",
-    params:
-        metadata_fields=",".join(config["curate"]["metadata_columns"]),
-    benchmark:
-        "benchmarks/subset_metadata.txt"
     log:
         "logs/subset_metadata.txt",
+    benchmark:
+        "benchmarks/subset_metadata.txt"
+    params:
+        metadata_fields=",".join(config["curate"]["metadata_columns"]),
     shell:
         r"""
         exec &> >(tee {log:q})

diff --git a/ingest/rules/fetch_from_ppx.smk b/ingest/rules/fetch_from_ppx.smk
@@ -17,12 +17,12 @@ rule fetch_ppx_data:
     output:
         ppx_ndjson="results/ppx.ndjson.zst",
         ppx_headers="results/ppx.headers.txt",
+    log:
+        "logs/fetch_ppx_data.txt",
     benchmark:
         "benchmarks/fetch_ppx_data.txt"
     params:
         ppx_api_url="https://backend.pathoplexus.org/mpox/get-released-data?compression=zstd",
-    log:
-        "logs/fetch_ppx_data.txt",
     shell:
         r"""
         exec &> >(tee {log:q})
@@ -52,10 +52,10 @@ rule flatten_ppx_data:
         ppx_ndjson="results/ppx.ndjson.zst",
     output:
         ppx_flat="results/ppx_flat.ndjson.zst",
-    benchmark:
-        "benchmarks/flatten_ppx_data.txt"
     log:
         "logs/flatten_ppx_data.txt",
+    benchmark:
+        "benchmarks/flatten_ppx_data.txt"
     shell:
         r"""
         exec &> >(tee {log:q})

diff --git a/ingest/rules/nextclade.smk b/ingest/rules/nextclade.smk
@@ -4,12 +4,12 @@ import sys
 rule get_nextclade_dataset:
     output:
         "data/mpxv.zip",
-    params:
-        dataset_name="MPXV",
     log:
         "logs/get_nextclade_dataset.txt",
     benchmark:
         "benchmarks/get_nextclade_dataset.txt"
+    params:
+        dataset_name="MPXV",
     shell:
         r"""
         exec &> >(tee {log:q})
@@ -28,15 +28,15 @@ rule run_nextclade:
         nextclade="results/nextclade.tsv",
         alignment="results/alignment.fasta",
         translations="results/translations.zip",
-    params:
-        # The lambda is used to deactivate automatic wildcard expansion.
-        # https://github.com/snakemake/snakemake/blob/384d0066c512b0429719085f2cf886fdb97fd80a/snakemake/rules.py#L997-L1000
-        translations=lambda w: "results/translations/{cds}.fasta",
-    threads: workflow.cores
     log:
         "logs/run_nextclade.txt",
     benchmark:
         "benchmarks/run_nextclade.txt"
+    threads: workflow.cores
+    params:
+        # The lambda is used to deactivate automatic wildcard expansion.
+        # https://github.com/snakemake/snakemake/blob/384d0066c512b0429719085f2cf886fdb97fd80a/snakemake/rules.py#L997-L1000
+        translations=lambda w: "results/translations/{cds}.fasta",
     shell:
         r"""
         exec &> >(tee {log:q})
@@ -71,16 +71,16 @@ rule nextclade_metadata:
         nextclade="results/nextclade.tsv",
     output:
         nextclade_metadata="results/nextclade_metadata.tsv",
+    log:
+        "logs/nextclade_metadata.txt",
+    benchmark:
+        "benchmarks/nextclade_metadata.txt"
     params:
         nextclade_id_field=config["nextclade"]["id_field"],
         nextclade_field_map=[
             f"{old}={new}" for old, new in config["nextclade"]["field_map"].items()
         ],
         nextclade_fields=",".join(config["nextclade"]["field_map"].keys()),
-    log:
-        "logs/nextclade_metadata.txt",
-    benchmark:
-        "benchmarks/nextclade_metadata.txt"
     shell:
         r"""
         exec &> >(tee {log:q})
@@ -100,13 +100,13 @@ rule join_metadata_and_nextclade:
         nextclade_metadata="results/nextclade_metadata.tsv",
     output:
         metadata="results/metadata.tsv",
-    params:
-        metadata_id_field=config["curate"]["id_field"],
-        nextclade_id_field=config["nextclade"]["id_field"],
     log:
         "logs/join_metadata_and_nextclade.txt",
     benchmark:
         "benchmarks/join_metadata_and_nextclade.txt"
+    params:
+        metadata_id_field=config["curate"]["id_field"],
+        nextclade_id_field=config["nextclade"]["id_field"],
     shell:
         r"""
         exec &> >(tee {log:q})

diff --git a/nextclade/Snakefile b/nextclade/Snakefile
@@ -157,10 +157,10 @@ rule premask:
 
 rule deduplicate:
     """
-    Remove identical sequences (even if they have differing Ns)
-    Keep those sequences with fewer Ns
-    Focus for Nextclade is on diversity, not on representativeness
-    """
+Remove identical sequences (even if they have differing Ns)
+Keep those sequences with fewer Ns
+Focus for Nextclade is on diversity, not on representativeness
+"""
     input:
         sequences="results/premasked.fasta",
     output:
@@ -215,7 +215,6 @@ rule reformat_ambiguous:
 
         df = pd.read_csv(input.metadata, sep="\t", keep_default_na=False)
 
-
         def replace_ambiguous_date(date):
             if date == "":
                 return "XXXX-XX-XX"
@@ -226,7 +225,6 @@ rule reformat_ambiguous:
                 rest.append("XX")
             return "-".join([year] + rest)
 
-
         df["date"] = df["date"].apply(replace_ambiguous_date)
 
         df.to_csv(output.metadata, sep="\t", index=False)
@@ -396,10 +394,10 @@ rule mask:
 
 rule deduplicate_2:
     """
-    Remove identical sequences (even if they have differing Ns)
-    Keep those sequences with fewer Ns
-    Focus for Nextclade is on diversity, not on representativeness
-    """
+Remove identical sequences (even if they have differing Ns)
+Keep those sequences with fewer Ns
+Focus for Nextclade is on diversity, not on representativeness
+"""
     input:
         sequences="results/{build_name}/masked_with_dups.fasta",
     output:

diff --git a/phylogenetic/Snakefile b/phylogenetic/Snakefile
@@ -68,8 +68,8 @@ if "custom_rules" in config:
 
 rule clean:
     """
-    Removing directories: {params}
-    """
+Removing directories: {params}
+"""
     params:
         build_dir,
         auspice_dir,
@@ -79,8 +79,8 @@ rule clean:
 
 rule cleanall:
     """
-    Removing directories: {params}
-    """
+Removing directories: {params}
+"""
     params:
         build_dir,
         auspice_dir,

diff --git a/phylogenetic/build-configs/chores/chores.smk b/phylogenetic/build-configs/chores/chores.smk
@@ -4,11 +4,11 @@
 rule update_example_data:
     """This updates the files under example_data/ based on latest available data from data.nextstrain.org.
 
-    The subset of data is generated by an augur filter call which:
-    - sets the subsampling size to 50
-    - includes the root (defined in config but hardcoded here)
-    - ensures all clades and lineages are accounted for using --group-by
-    """
+The subset of data is generated by an augur filter call which:
+- sets the subsampling size to 50
+- includes the root (defined in config but hardcoded here)
+- ensures all clades and lineages are accounted for using --group-by
+"""
     input:
         sequences="results/sequences.fasta",
         metadata="results/metadata.tsv",

diff --git a/phylogenetic/rules/annotate_phylogeny.smk b/phylogenetic/rules/annotate_phylogeny.smk
@@ -22,24 +22,24 @@ OUTPUTS:
 
 rule ancestral:
     """
-    Reconstructing ancestral sequences and mutations
-    """
+Reconstructing ancestral sequences and mutations
+"""
     input:
         tree=build_dir + "/{build_name}/tree.nwk",
         alignment=build_dir + "/{build_name}/masked.fasta",
     output:
         node_data=build_dir + "/{build_name}/nt_muts.json",
+    log:
+        "logs/{build_name}/ancestral.txt",
+    benchmark:
+        "benchmarks/{build_name}/ancestral.txt"
     params:
         inference="joint",
         root_sequence=lambda w: (
             ("--root-sequence " + config["ancestral_root_seq"])
             if config.get("ancestral_root_seq")
             else ""
         ),
-    log:
-        "logs/{build_name}/ancestral.txt",
-    benchmark:
-        "benchmarks/{build_name}/ancestral.txt"
     shell:
         r"""
         exec &> >(tee {log:q})
@@ -55,8 +55,8 @@ rule ancestral:
 
 rule translate:
     """
-    Translating amino acid sequences
-    """
+Translating amino acid sequences
+"""
     input:
         tree=build_dir + "/{build_name}/tree.nwk",
         node_data=build_dir + "/{build_name}/nt_muts.json",
@@ -81,22 +81,23 @@ rule translate:
 
 rule traits:
     """
-    Inferring ancestral traits for {params.columns!s}
-      - increase uncertainty of reconstruction by {params.sampling_bias_correction} to partially account for sampling bias
-    """
+Inferring ancestral traits for {params.columns!s}
+  - increase uncertainty of reconstruction by {params.sampling_bias_correction} to partially account for sampling bias
+
+"""
     input:
         tree=build_dir + "/{build_name}/tree.nwk",
         metadata=build_dir + "/{build_name}/metadata.tsv",
     output:
         node_data=build_dir + "/{build_name}/traits.json",
-    params:
-        columns=config["traits"]["columns"],
-        sampling_bias_correction=config["traits"]["sampling_bias_correction"],
-        strain_id=config["strain_id_field"],
     log:
         "logs/{build_name}/traits.txt",
     benchmark:
         "benchmarks/{build_name}/traits.txt"
+    params:
+        columns=config["traits"]["columns"],
+        sampling_bias_correction=config["traits"]["sampling_bias_correction"],
+        strain_id=config["strain_id_field"],
     shell:
         r"""
         exec &> >(tee {log:q})
@@ -114,8 +115,8 @@ rule traits:
 
 rule clades:
     """
-    Adding internal clade labels
-    """
+Adding internal clade labels
+"""
     input:
         tree=build_dir + "/{build_name}/tree.nwk",
         aa_muts=build_dir + "/{build_name}/aa_muts.json",
@@ -182,18 +183,18 @@ rule mutation_context:
 
 rule recency:
     """
-    Use metadata on submission date to construct submission recency field
-    """
+Use metadata on submission date to construct submission recency field
+"""
     input:
         metadata=build_dir + "/{build_name}/metadata.tsv",
     output:
         node_data=build_dir + "/{build_name}/recency.json",
-    params:
-        strain_id=config["strain_id_field"],
     log:
         "logs/{build_name}/recency.txt",
     benchmark:
         "benchmarks/{build_name}/recency.txt"
+    params:
+        strain_id=config["strain_id_field"],
     shell:
         r"""
         exec &> >(tee {log:q})