Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
exclude: '\.(tsv|fasta|gb)$|^shared/vendored/|^phylogenetic/rules/config.smk'
repos:
- repo: https://github.com/snakemake/snakefmt
rev: v0.11.1
rev: v1.0.0
hooks:
- id: snakefmt
- repo: https://github.com/rhysd/actionlint
rev: v1.7.7
rev: v1.7.12
hooks:
- id: actionlint
entry: env SHELLCHECK_OPTS='--exclude=SC2027' actionlint
Expand All @@ -16,11 +16,11 @@ repos:
# additional_dependencies:
# - tomli
- repo: https://github.com/google/yamlfmt
rev: v0.17.2
rev: v0.21.0
hooks:
- id: yamlfmt
- repo: https://github.com/pappasam/toml-sort
rev: v0.24.2
rev: v0.24.4
hooks:
- id: toml-sort-fix
- repo: https://github.com/pre-commit/pre-commit-hooks
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,12 @@ rule custom_subset_metadata:
metadata="data/all_metadata_added.tsv",
output:
subset_metadata="data/subset_metadata.tsv",
params:
metadata_fields=",".join(config["curate"]["metadata_columns"]),
benchmark:
"benchmarks/subset_metadata.txt"
log:
"logs/subset_metadata.txt",
benchmark:
"benchmarks/subset_metadata.txt"
params:
metadata_fields=",".join(config["curate"]["metadata_columns"]),
shell:
r"""
exec &> >(tee {log:q})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ to expected upload flag files.

rule trigger_build:
"""
Triggering monekypox builds via repository action type `rebuild`.
"""
Triggering monekypox builds via repository action type `rebuild`.
"""
input:
metadata_upload="data/upload/s3/metadata_with_restricted.tsv.zst.done",
fasta_upload="data/upload/s3/sequences_with_restricted.fasta.zst.done",
Expand Down
16 changes: 8 additions & 8 deletions ingest/rules/curate.smk
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ rule generate_continent:
script="scripts/generate_continent.py",
output:
ndjson="results/ppx_flat_continent.ndjson.zst",
benchmark:
"benchmarks/generate_continent.txt"
log:
"logs/generate_continent.txt",
benchmark:
"benchmarks/generate_continent.txt"
shell:
r"""
exec &> >(tee {log:q})
Expand All @@ -54,10 +54,10 @@ rule curate:
metadata="data/all_metadata.tsv",
sequences="results/sequences.fasta",
# ndjson="results/curated.ndjson.zst",
benchmark:
"benchmarks/curate.txt"
log:
"logs/curate.txt",
benchmark:
"benchmarks/curate.txt"
params:
field_map=format_field_map(config["curate"]["field_map"]),
strain_regex=config["curate"]["strain_regex"],
Expand Down Expand Up @@ -113,12 +113,12 @@ rule subset_metadata:
metadata="data/all_metadata.tsv",
output:
subset_metadata="data/subset_metadata.tsv",
params:
metadata_fields=",".join(config["curate"]["metadata_columns"]),
benchmark:
"benchmarks/subset_metadata.txt"
log:
"logs/subset_metadata.txt",
benchmark:
"benchmarks/subset_metadata.txt"
params:
metadata_fields=",".join(config["curate"]["metadata_columns"]),
shell:
r"""
exec &> >(tee {log:q})
Expand Down
8 changes: 4 additions & 4 deletions ingest/rules/fetch_from_ppx.smk
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ rule fetch_ppx_data:
output:
ppx_ndjson="results/ppx.ndjson.zst",
ppx_headers="results/ppx.headers.txt",
log:
"logs/fetch_ppx_data.txt",
benchmark:
"benchmarks/fetch_ppx_data.txt"
params:
ppx_api_url="https://backend.pathoplexus.org/mpox/get-released-data?compression=zstd",
log:
"logs/fetch_ppx_data.txt",
shell:
r"""
exec &> >(tee {log:q})
Expand Down Expand Up @@ -52,10 +52,10 @@ rule flatten_ppx_data:
ppx_ndjson="results/ppx.ndjson.zst",
output:
ppx_flat="results/ppx_flat.ndjson.zst",
benchmark:
"benchmarks/flatten_ppx_data.txt"
log:
"logs/flatten_ppx_data.txt",
benchmark:
"benchmarks/flatten_ppx_data.txt"
shell:
r"""
exec &> >(tee {log:q})
Expand Down
28 changes: 14 additions & 14 deletions ingest/rules/nextclade.smk
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ import sys
rule get_nextclade_dataset:
output:
"data/mpxv.zip",
params:
dataset_name="MPXV",
log:
"logs/get_nextclade_dataset.txt",
benchmark:
"benchmarks/get_nextclade_dataset.txt"
params:
dataset_name="MPXV",
shell:
r"""
exec &> >(tee {log:q})
Expand All @@ -28,15 +28,15 @@ rule run_nextclade:
nextclade="results/nextclade.tsv",
alignment="results/alignment.fasta",
translations="results/translations.zip",
params:
# The lambda is used to deactivate automatic wildcard expansion.
# https://github.com/snakemake/snakemake/blob/384d0066c512b0429719085f2cf886fdb97fd80a/snakemake/rules.py#L997-L1000
translations=lambda w: "results/translations/{cds}.fasta",
threads: workflow.cores
log:
"logs/run_nextclade.txt",
benchmark:
"benchmarks/run_nextclade.txt"
threads: workflow.cores
params:
# The lambda is used to deactivate automatic wildcard expansion.
# https://github.com/snakemake/snakemake/blob/384d0066c512b0429719085f2cf886fdb97fd80a/snakemake/rules.py#L997-L1000
translations=lambda w: "results/translations/{cds}.fasta",
shell:
r"""
exec &> >(tee {log:q})
Expand Down Expand Up @@ -71,16 +71,16 @@ rule nextclade_metadata:
nextclade="results/nextclade.tsv",
output:
nextclade_metadata="results/nextclade_metadata.tsv",
log:
"logs/nextclade_metadata.txt",
benchmark:
"benchmarks/nextclade_metadata.txt"
params:
nextclade_id_field=config["nextclade"]["id_field"],
nextclade_field_map=[
f"{old}={new}" for old, new in config["nextclade"]["field_map"].items()
],
nextclade_fields=",".join(config["nextclade"]["field_map"].keys()),
log:
"logs/nextclade_metadata.txt",
benchmark:
"benchmarks/nextclade_metadata.txt"
shell:
r"""
exec &> >(tee {log:q})
Expand All @@ -100,13 +100,13 @@ rule join_metadata_and_nextclade:
nextclade_metadata="results/nextclade_metadata.tsv",
output:
metadata="results/metadata.tsv",
params:
metadata_id_field=config["curate"]["id_field"],
nextclade_id_field=config["nextclade"]["id_field"],
log:
"logs/join_metadata_and_nextclade.txt",
benchmark:
"benchmarks/join_metadata_and_nextclade.txt"
params:
metadata_id_field=config["curate"]["id_field"],
nextclade_id_field=config["nextclade"]["id_field"],
shell:
r"""
exec &> >(tee {log:q})
Expand Down
18 changes: 8 additions & 10 deletions nextclade/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -157,10 +157,10 @@ rule premask:

rule deduplicate:
"""
Remove identical sequences (even if they have differing Ns)
Keep those sequences with fewer Ns
Focus for Nextclade is on diversity, not on representativeness
"""
Remove identical sequences (even if they have differing Ns)
Keep those sequences with fewer Ns
Focus for Nextclade is on diversity, not on representativeness
"""
input:
sequences="results/premasked.fasta",
output:
Expand Down Expand Up @@ -215,7 +215,6 @@ rule reformat_ambiguous:

df = pd.read_csv(input.metadata, sep="\t", keep_default_na=False)


def replace_ambiguous_date(date):
if date == "":
return "XXXX-XX-XX"
Expand All @@ -226,7 +225,6 @@ rule reformat_ambiguous:
rest.append("XX")
return "-".join([year] + rest)


df["date"] = df["date"].apply(replace_ambiguous_date)

df.to_csv(output.metadata, sep="\t", index=False)
Expand Down Expand Up @@ -396,10 +394,10 @@ rule mask:

rule deduplicate_2:
"""
Remove identical sequences (even if they have differing Ns)
Keep those sequences with fewer Ns
Focus for Nextclade is on diversity, not on representativeness
"""
Remove identical sequences (even if they have differing Ns)
Keep those sequences with fewer Ns
Focus for Nextclade is on diversity, not on representativeness
"""
input:
sequences="results/{build_name}/masked_with_dups.fasta",
output:
Expand Down
8 changes: 4 additions & 4 deletions phylogenetic/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ if "custom_rules" in config:

rule clean:
"""
Removing directories: {params}
"""
Removing directories: {params}
"""
params:
build_dir,
auspice_dir,
Expand All @@ -79,8 +79,8 @@ rule clean:

rule cleanall:
"""
Removing directories: {params}
"""
Removing directories: {params}
"""
params:
build_dir,
auspice_dir,
Expand Down
10 changes: 5 additions & 5 deletions phylogenetic/build-configs/chores/chores.smk
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
rule update_example_data:
"""This updates the files under example_data/ based on latest available data from data.nextstrain.org.

The subset of data is generated by an augur filter call which:
- sets the subsampling size to 50
- includes the root (defined in config but hardcoded here)
- ensures all clades and lineages are accounted for using --group-by
"""
The subset of data is generated by an augur filter call which:
- sets the subsampling size to 50
- includes the root (defined in config but hardcoded here)
- ensures all clades and lineages are accounted for using --group-by
"""
input:
sequences="results/sequences.fasta",
metadata="results/metadata.tsv",
Expand Down
43 changes: 22 additions & 21 deletions phylogenetic/rules/annotate_phylogeny.smk
Original file line number Diff line number Diff line change
Expand Up @@ -22,24 +22,24 @@ OUTPUTS:

rule ancestral:
"""
Reconstructing ancestral sequences and mutations
"""
Reconstructing ancestral sequences and mutations
"""
input:
tree=build_dir + "/{build_name}/tree.nwk",
alignment=build_dir + "/{build_name}/masked.fasta",
output:
node_data=build_dir + "/{build_name}/nt_muts.json",
log:
"logs/{build_name}/ancestral.txt",
benchmark:
"benchmarks/{build_name}/ancestral.txt"
params:
inference="joint",
root_sequence=lambda w: (
("--root-sequence " + config["ancestral_root_seq"])
if config.get("ancestral_root_seq")
else ""
),
log:
"logs/{build_name}/ancestral.txt",
benchmark:
"benchmarks/{build_name}/ancestral.txt"
shell:
r"""
exec &> >(tee {log:q})
Expand All @@ -55,8 +55,8 @@ rule ancestral:

rule translate:
"""
Translating amino acid sequences
"""
Translating amino acid sequences
"""
input:
tree=build_dir + "/{build_name}/tree.nwk",
node_data=build_dir + "/{build_name}/nt_muts.json",
Expand All @@ -81,22 +81,23 @@ rule translate:

rule traits:
"""
Inferring ancestral traits for {params.columns!s}
- increase uncertainty of reconstruction by {params.sampling_bias_correction} to partially account for sampling bias
"""
Inferring ancestral traits for {params.columns!s}
- increase uncertainty of reconstruction by {params.sampling_bias_correction} to partially account for sampling bias

"""
input:
tree=build_dir + "/{build_name}/tree.nwk",
metadata=build_dir + "/{build_name}/metadata.tsv",
output:
node_data=build_dir + "/{build_name}/traits.json",
params:
columns=config["traits"]["columns"],
sampling_bias_correction=config["traits"]["sampling_bias_correction"],
strain_id=config["strain_id_field"],
log:
"logs/{build_name}/traits.txt",
benchmark:
"benchmarks/{build_name}/traits.txt"
params:
columns=config["traits"]["columns"],
sampling_bias_correction=config["traits"]["sampling_bias_correction"],
strain_id=config["strain_id_field"],
shell:
r"""
exec &> >(tee {log:q})
Expand All @@ -114,8 +115,8 @@ rule traits:

rule clades:
"""
Adding internal clade labels
"""
Adding internal clade labels
"""
input:
tree=build_dir + "/{build_name}/tree.nwk",
aa_muts=build_dir + "/{build_name}/aa_muts.json",
Expand Down Expand Up @@ -182,18 +183,18 @@ rule mutation_context:

rule recency:
"""
Use metadata on submission date to construct submission recency field
"""
Use metadata on submission date to construct submission recency field
"""
input:
metadata=build_dir + "/{build_name}/metadata.tsv",
output:
node_data=build_dir + "/{build_name}/recency.json",
params:
strain_id=config["strain_id_field"],
log:
"logs/{build_name}/recency.txt",
benchmark:
"benchmarks/{build_name}/recency.txt"
params:
strain_id=config["strain_id_field"],
shell:
r"""
exec &> >(tee {log:q})
Expand Down
Loading