From 494b15137302d1b9f04c33643728f6e84ca2f27c Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Wed, 25 Feb 2026 11:16:50 -0500 Subject: [PATCH 1/4] sprocket format overwrite the repo --- data_structures/flag_filter.wdl | 9 +- data_structures/read_group.wdl | 22 ++- tools/arriba.wdl | 73 +++++--- tools/bwa.wdl | 68 +++---- tools/deeptools.wdl | 3 +- tools/fastp.wdl | 36 ++-- tools/fastqc.wdl | 1 - tools/fq.wdl | 19 +- tools/gatk4.wdl | 94 +++++----- tools/htseq.wdl | 16 +- tools/kraken2.wdl | 54 ++---- tools/librarian.wdl | 11 +- tools/md5sum.wdl | 3 +- tools/mosdepth.wdl | 1 - tools/ngsderive.wdl | 8 +- tools/picard.wdl | 38 ++-- tools/qualimap.wdl | 14 +- tools/sambamba.wdl | 11 +- tools/samtools.wdl | 87 +++------ tools/star.wdl | 90 ++++----- tools/util.wdl | 34 ++-- workflows/chipseq/chipseq-standard.wdl | 53 ++++-- workflows/dnaseq/dnaseq-core.wdl | 47 ++--- workflows/dnaseq/dnaseq-standard-fastq.wdl | 17 +- workflows/dnaseq/dnaseq-standard.wdl | 13 +- workflows/general/alignment-post.wdl | 32 +++- workflows/general/bam-to-fastqs.wdl | 26 +-- workflows/general/samtools-merge.wdl | 31 ++-- workflows/methylation/methylation-cohort.wdl | 81 ++++----- .../methylation/methylation-preprocess.wdl | 14 +- .../methylation/methylation-standard.wdl | 55 +++--- workflows/qc/markdups-post.wdl | 6 +- workflows/qc/quality-check-standard.wdl | 172 +++++++++++------- workflows/reference/bwa-db-build.wdl | 4 +- workflows/reference/gatk-reference.wdl | 20 +- workflows/reference/qc-reference.wdl | 25 ++- workflows/reference/star-db-build.wdl | 6 +- workflows/rnaseq/rnaseq-core.wdl | 47 ++--- workflows/rnaseq/rnaseq-standard-fastq.wdl | 18 +- workflows/rnaseq/rnaseq-standard.wdl | 5 +- workflows/rnaseq/rnaseq-variant-calling.wdl | 12 +- 41 files changed, 650 insertions(+), 726 deletions(-) diff --git a/data_structures/flag_filter.wdl b/data_structures/flag_filter.wdl index 3f1f0826e..7a27ea213 100644 --- a/data_structures/flag_filter.wdl +++ b/data_structures/flag_filter.wdl @@ -58,7 +58,6 @@ ## In short, those are all flags corresponding to the quality of the read ## and them being `true` may indicate that the read is of low quality and ## should be excluded. - version 1.1 struct FlagFilter { @@ -127,15 +126,15 @@ workflow validate_flag_filter { } call validate_string_is_12bit_int as validate_include_if_any { input: - number = flags.include_if_any + number = flags.include_if_any, } call validate_string_is_12bit_int as validate_include_if_all { input: - number = flags.include_if_all + number = flags.include_if_all, } call validate_string_is_12bit_int as validate_exclude_if_any { input: - number = flags.exclude_if_any + number = flags.exclude_if_any, } call validate_string_is_12bit_int as validate_exclude_if_all { input: - number = flags.exclude_if_all + number = flags.exclude_if_all, } } diff --git a/data_structures/read_group.wdl b/data_structures/read_group.wdl index c37306baa..0be2fb22b 100644 --- a/data_structures/read_group.wdl +++ b/data_structures/read_group.wdl @@ -40,7 +40,6 @@ ## } ## } ## ``` - version 1.1 #@ except: SnakeCase @@ -99,8 +98,7 @@ workflow read_group_to_string { } output { - String validated_read_group - = inner_read_group_to_string.stringified_read_group + String validated_read_group = inner_read_group_to_string.stringified_read_group } } @@ -109,7 +107,7 @@ task get_read_groups { description: "Gets read group information from a BAM file and writes it out as JSON which is converted to a WDL struct." warning: "This task will uppercase any lowercase `PL` values it finds, as is required by the [SAM specification](https://samtools.github.io/hts-specs/SAMv1.pdf)." outputs: { - read_groups: "An array of `ReadGroup` structs containing read group information." + read_groups: "An array of `ReadGroup` structs containing read group information.", } } @@ -167,8 +165,18 @@ task validate_read_group { String sample_pattern = "sample.?" String restrictive_pattern = "\\ " # Disallow spaces Array[String] platforms = [ - "CAPILLARY", "DNBSEQ", "ELEMENT", "HELICOS", "ILLUMINA", "IONTORRENT", "LS454", - "ONT", "PACBIO", "SINGULAR", "SOLID", "ULTIMA", + "CAPILLARY", + "DNBSEQ", + "ELEMENT", + "HELICOS", + "ILLUMINA", + "IONTORRENT", + "LS454", + "ONT", + "PACBIO", + "SINGULAR", + "SOLID", + "ULTIMA", ] command <<< @@ -366,7 +374,7 @@ task inner_read_group_to_string { description: "Converts a `ReadGroup` struct to a `String` **without any validation**." warning: "Please use the `read_group_to_string` workflow, which has validation of the `ReadGroup` contents." outputs: { - stringified_read_group: "Input `ReadGroup` as a string" + stringified_read_group: "Input `ReadGroup` as a string", } } diff --git a/tools/arriba.wdl b/tools/arriba.wdl index 05f00f0b8..a2e010885 100644 --- a/tools/arriba.wdl +++ b/tools/arriba.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://arriba.readthedocs.io/en/latest/) - version 1.1 task arriba { @@ -138,14 +137,40 @@ task arriba { File? protein_domains File? wgs_svs Array[String] interesting_contigs = [ - "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", - "15", "16", "17", "18", "19", "20", "21", "22", "X", "Y", "AC_*", "NC_*", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "X", + "Y", + "AC_*", + "NC_*", + ] + Array[String] viral_contigs = [ + "AC_*", + "NC_*", ] - Array[String] viral_contigs = ["AC_*", "NC_*"] Array[String] disable_filters = [] #@ except: LineWidth - String feature_name - = "gene_name=gene_name|gene_id,gene_id=gene_id,transcript_id=transcript_id,feature_exon=exon,feature_CDS=CDS" + String feature_name = "gene_name=gene_name|gene_id,gene_id=gene_id,transcript_id=transcript_id,feature_exon=exon,feature_CDS=CDS" String prefix = basename(bam, ".bam") + ".fusions" String strandedness = "auto" Boolean mark_duplicates = true @@ -176,10 +201,8 @@ task arriba { } Int bam_size_gb = ceil(size(bam, "GB")) - Int disk_size_gb = bam_size_gb - + ceil(size(gtf, "GB")) - + ceil(size(reference_fasta_gz, "GB")) - + modify_disk_size_gb + Int disk_size_gb = bam_size_gb + ceil(size(gtf, "GB")) + ceil(size(reference_fasta_gz, + "GB")) + modify_disk_size_gb Int memory_gb = bam_size_gb + modify_memory_gb command <<< @@ -198,21 +221,13 @@ task arriba { ~{"-d '" + wgs_svs + "'"} \ -D ~{max_genomic_breakpoint_distance} \ -s "~{strandedness}" \ - ~{( - if length(interesting_contigs) > 0 - then "-i " + sep(",", quote(interesting_contigs)) - else "" - )} \ - ~{( - if length(viral_contigs) > 0 - then "-v " + sep(",", quote(viral_contigs)) - else "" - )} \ - ~{( - if length(disable_filters) > 0 - then "-f " + sep(",", quote(disable_filters)) - else "" - )} \ + ~{(if length(interesting_contigs) > 0 then "-i " + sep(",", quote( + interesting_contigs + )) else "")} \ + ~{(if length(viral_contigs) > 0 then "-v " + sep(",", quote(viral_contigs)) + else "")} \ + ~{(if length(disable_filters) > 0 then "-f " + sep(",", quote(disable_filters) + ) else "")} \ -E ~{max_e_value} \ -S ~{min_supporting_reads} \ -m ~{max_mismappers} \ @@ -255,7 +270,7 @@ task arriba_tsv_to_vcf { meta { description: "Convert Arriba TSV format fusions to VCF format." outputs: { - fusions_vcf: "Output file of fusions in VCF format" + fusions_vcf: "Output file of fusions in VCF format", } } @@ -274,9 +289,7 @@ task arriba_tsv_to_vcf { } Int input_size_gb = ceil(size(fusions, "GB")) - Int disk_size_gb = ceil(input_size_gb) - + (ceil(size(reference_fasta, "GB")) * 3) - + modify_disk_size_gb + Int disk_size_gb = ceil(input_size_gb) + (ceil(size(reference_fasta, "GB")) * 3) + modify_disk_size_gb command <<< set -euo pipefail @@ -356,7 +369,7 @@ task arriba_annotate_exon_numbers { meta { description: "Annotate fusions with exon numbers." outputs: { - fusion_tsv: "TSV file with fusions annotated with exon numbers" + fusion_tsv: "TSV file with fusions annotated with exon numbers", } } diff --git a/tools/bwa.wdl b/tools/bwa.wdl index 1d65d6df4..89b814018 100644 --- a/tools/bwa.wdl +++ b/tools/bwa.wdl @@ -1,12 +1,11 @@ ## [Homepage](https://github.com/lh3/bwa) - version 1.1 task bwa_aln { meta { description: "Maps Single-End FASTQ files to BAM format using bwa aln" outputs: { - bam: "Aligned BAM format file" + bam: "Aligned BAM format file", } } @@ -36,11 +35,9 @@ task bwa_aln { File fastq File bwa_db_tar_gz String read_group - String prefix = sub( - basename(fastq), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(fastq), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 - ) + ) Boolean use_all_cores = false Int ncpu = 2 Int modify_disk_size_gb = 0 @@ -50,9 +47,8 @@ task bwa_aln { Float input_fastq_size = size(fastq, "GB") Float reference_size = size(bwa_db_tar_gz, "GB") - Int disk_size_gb = ( - ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb - ) + Int disk_size_gb = (ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb + ) command <<< set -euo pipefail @@ -98,7 +94,7 @@ task bwa_aln_pe { meta { description: "Maps Paired-End FASTQ files to BAM format using bwa aln" outputs: { - bam: "Aligned BAM format file" + bam: "Aligned BAM format file", } } @@ -132,11 +128,8 @@ task bwa_aln_pe { File read_two_fastq_gz File bwa_db_tar_gz String read_group - String prefix = sub( - basename(read_one_fastq_gz), - "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", - "" - ) + String prefix = sub(basename(read_one_fastq_gz), "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", + "") Boolean use_all_cores = false Int ncpu = 4 Int modify_disk_size_gb = 0 @@ -144,13 +137,11 @@ task bwa_aln_pe { String output_bam = prefix + ".bam" - Float input_fastq_size = ( - size(read_one_fastq_gz, "GB") + size(read_two_fastq_gz, "GB") - ) + Float input_fastq_size = (size(read_one_fastq_gz, "GB") + size(read_two_fastq_gz, "GB" + )) Float reference_size = size(bwa_db_tar_gz, "GB") - Int disk_size_gb = ( - ceil((input_fastq_size + reference_size) * 2) + 5 + modify_disk_size_gb - ) + Int disk_size_gb = (ceil((input_fastq_size + reference_size) * 2) + 5 + modify_disk_size_gb + ) command <<< set -euo pipefail @@ -202,7 +193,7 @@ task bwa_mem { meta { description: "Maps FASTQ files to BAM format using bwa mem" outputs: { - bam: "Aligned BAM format file" + bam: "Aligned BAM format file", } } @@ -230,11 +221,8 @@ task bwa_mem { File bwa_db_tar_gz String read_group File? read_two_fastq_gz - String prefix = sub( - basename(read_one_fastq_gz), - "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", - "" - ) + String prefix = sub(basename(read_one_fastq_gz), "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", + "") Boolean use_all_cores = false Int ncpu = 4 Int modify_disk_size_gb = 0 @@ -242,12 +230,10 @@ task bwa_mem { String output_bam = prefix + ".bam" - Float input_fastq_size = size(read_one_fastq_gz, "GB") - + size(read_two_fastq_gz, "GB") + Float input_fastq_size = size(read_one_fastq_gz, "GB") + size(read_two_fastq_gz, "GB") Float reference_size = size(bwa_db_tar_gz, "GB") - Int disk_size_gb = ( - ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb - ) + Int disk_size_gb = (ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb + ) command <<< set -euo pipefail @@ -271,21 +257,17 @@ task bwa_mem { -R "~{read_group}" \ bwa_db/"$PREFIX" \ "~{basename(read_one_fastq_gz)}" \ - ~{( - if defined(read_two_fastq_gz) - then "'" + basename(select_first([read_two_fastq_gz])) + "'" - else "" - )} \ + ~{(if defined(read_two_fastq_gz) then "'" + basename(select_first([ + read_two_fastq_gz, + ])) + "'" else "")} \ | samtools view --no-PG --threads "$samtools_cores" -hb - \ > "~{output_bam}" rm -r bwa_db rm "~{basename(read_one_fastq_gz)}" - ~{( - if defined(read_two_fastq_gz) - then "rm '" + basename(select_first([read_two_fastq_gz])) + "'" - else "" - )} + ~{(if defined(read_two_fastq_gz) then "rm '" + basename(select_first([ + read_two_fastq_gz, + ])) + "'" else "")} >>> output { @@ -305,7 +287,7 @@ task build_bwa_db { meta { description: "Creates a BWA index and returns it as a compressed tar archive" outputs: { - bwa_db_tar_gz: "Tarballed bwa reference files" + bwa_db_tar_gz: "Tarballed bwa reference files", } } diff --git a/tools/deeptools.wdl b/tools/deeptools.wdl index f3327b1fe..197491901 100755 --- a/tools/deeptools.wdl +++ b/tools/deeptools.wdl @@ -1,12 +1,11 @@ ## [Homepage](https://deeptools.readthedocs.io/en/develop/index.html) - version 1.1 task bam_coverage { meta { description: "Generates a BigWig coverage track using bamCoverage from DeepTools" outputs: { - bigwig: "BigWig format coverage file" + bigwig: "BigWig format coverage file", } } diff --git a/tools/fastp.wdl b/tools/fastp.wdl index 42b78e34e..99d5ebceb 100644 --- a/tools/fastp.wdl +++ b/tools/fastp.wdl @@ -95,11 +95,9 @@ task fastp { input { File read_one_fastq File? read_two_fastq - String prefix = sub( - basename(read_one_fastq), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(read_one_fastq), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 - ) + ".trimmed" + ) + ".trimmed" Boolean output_fastq = true Boolean deduplicate = false Boolean disable_duplicate_eval = false @@ -148,9 +146,9 @@ task fastp { Float input_size = size(read_one_fastq, "GB") + size(read_two_fastq, "GB") Int disk_size_gb = ceil(input_size) * 2 + 10 + modify_disk_size_gb - command <<< + command <<< set -euo pipefail - + n_cores=~{ncpu} if ~{use_all_cores}; then n_cores=$(nproc) @@ -159,20 +157,10 @@ task fastp { fastp \ -i "~{read_one_fastq}" \ ~{"-I '" + read_two_fastq + "'"} \ - ~{( - if output_fastq - then "-o '" + ( - if defined(read_two_fastq) - then "~{prefix}.R1.fastq.gz" - else "~{prefix}.fastq.gz" - ) + "'" - else "" - )} \ - ~{( - if (defined(read_two_fastq) && output_fastq) - then "-O '" + prefix + ".R2.fastq.gz'" - else "" - )} \ + ~{(if output_fastq then "-o '" + (if defined(read_two_fastq) then "~{prefix}.R1.fastq.gz" + else "~{prefix}.fastq.gz") + "'" else "")} \ + ~{(if (defined(read_two_fastq) && output_fastq) then "-O '" + prefix + ".R2.fastq.gz'" + else "")} \ --reads_to_process ~{first_n_reads} \ ~{if deduplicate then "--dedup" else ""} \ --dup_calc_accuracy ~{duplicate_accuracy} \ @@ -219,11 +207,9 @@ task fastp { runtime { cpu: ncpu - memory: ( - if disable_duplicate_eval - then "4 GB" - else dup_acc_to_mem[duplicate_accuracy] - ) + memory: (if disable_duplicate_eval then "4 GB" else dup_acc_to_mem[ + duplicate_accuracy + ]) disks: "~{disk_size_gb} GB" container: "quay.io/biocontainers/fastp:1.0.1--heae3180_0" maxRetries: 1 diff --git a/tools/fastqc.wdl b/tools/fastqc.wdl index 531be2b68..feddc237b 100755 --- a/tools/fastqc.wdl +++ b/tools/fastqc.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) - version 1.1 task fastqc { diff --git a/tools/fq.wdl b/tools/fq.wdl index c92495d8c..dd45affa5 100755 --- a/tools/fq.wdl +++ b/tools/fq.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://github.com/stjude-rust-labs/fq) - version 1.1 task fqlint { @@ -67,9 +66,7 @@ task fqlint { Float read1_size = size(read_one_fastq, "GB") Float read2_size = size(read_two_fastq, "GB") - Int memory_gb = ( - ceil((read1_size + read2_size) * 0.25) + 1 + modify_memory_gb - ) + Int memory_gb = (ceil((read1_size + read2_size) * 0.25) + 1 + modify_memory_gb) Int disk_size_gb = ceil((read1_size + read2_size) * 2) + modify_disk_size_gb @@ -123,11 +120,9 @@ task subsample { input { File read_one_fastq File? read_two_fastq - String prefix = sub( - basename(read_one_fastq), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(read_one_fastq), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 - ) + ) Float probability = 1.0 Int record_count = -1 Int modify_disk_size_gb = 0 @@ -138,11 +133,9 @@ task subsample { Int disk_size_gb = ceil((read1_size + read2_size) * 2) + modify_disk_size_gb - String probability_arg = ( - if (probability < 1.0 && probability > 0) - then "-p ~{probability}" - else "" - ) + String probability_arg = (if (probability < 1.0 && probability > 0) then "-p ~{ + probability + }" else "") String record_count_arg = if (record_count > 0) then "-n ~{record_count}" else "" String r1_dst = prefix + ".R1.subsampled.fastq.gz" diff --git a/tools/gatk4.wdl b/tools/gatk4.wdl index c0b9085e3..011c988e4 100644 --- a/tools/gatk4.wdl +++ b/tools/gatk4.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://software.broadinstitute.org/gatk) - version 1.1 task split_n_cigar_reads { @@ -13,7 +12,7 @@ task split_n_cigar_reads { } } - parameter_meta { + parameter_meta { bam: "Input BAM format file to with unsplit reads containing Ns in their CIGAR strings." bam_index: "BAM index file corresponding to the input BAM" fasta: "Reference genome in FASTA format. Must be uncompressed." @@ -37,23 +36,21 @@ task split_n_cigar_reads { Int ncpu = 8 } - Int disk_size_gb = ceil(size(bam, "GB") + 1) * 3 - + ceil(size(fasta, "GB")) - + modify_disk_size_gb + Int disk_size_gb = ceil(size(bam, "GB") + 1) * 3 + ceil(size(fasta, "GB")) + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) command <<< - set -euo pipefail - - gatk \ - --java-options "-Xms4000m -Xmx~{java_heap_size}g" \ - SplitNCigarReads \ - -R "~{fasta}" \ - -I "~{bam}" \ - -O "~{prefix}.bam" \ - -OBM true - # GATK is unreasonable and uses the plain ".bai" suffix. - mv "~{prefix}.bai" "~{prefix}.bam.bai" + set -euo pipefail + + gatk \ + --java-options "-Xms4000m -Xmx~{java_heap_size}g" \ + SplitNCigarReads \ + -R "~{fasta}" \ + -I "~{bam}" \ + -O "~{prefix}.bam" \ + -OBM true + # GATK is unreasonable and uses the plain ".bai" suffix. + mv "~{prefix}.bai" "~{prefix}.bam.bai" >>> output { @@ -76,11 +73,11 @@ task base_recalibrator { description: "Generates recalibration report for base quality score recalibration." external_help: "https://gatk.broadinstitute.org/hc/en-us/articles/360036897372-BaseRecalibratorSpark-BETA" outputs: { - recalibration_report: "Recalibration report file" + recalibration_report: "Recalibration report file", } } - parameter_meta { + parameter_meta { bam: "Input BAM format file on which to recabilbrate base quality scores" bam_index: "BAM index file corresponding to the input BAM" fasta: "Reference genome in FASTA format" @@ -114,26 +111,22 @@ task base_recalibrator { Int memory_gb = 25 Int modify_disk_size_gb = 0 Int ncpu = 4 - } + } - Int disk_size_gb = ceil(size(bam, "GB") + 1) * 3 - + ceil(size(fasta, "GB")) - + modify_disk_size_gb + Int disk_size_gb = ceil(size(bam, "GB") + 1) * 3 + ceil(size(fasta, "GB")) + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) #@ except: LineWidth command <<< # shellcheck disable=SC2102 gatk \ - --java-options "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms4000m -Xmx~{java_heap_size}g" \ + --java-options "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms4000m -Xmx~{ + java_heap_size + }g" \ BaseRecalibratorSpark \ -R "~{fasta}" \ -I "~{bam}" \ - ~{( - if use_original_quality_scores - then "--use-original-qualities" - else "" - )} \ + ~{(if use_original_quality_scores then "--use-original-qualities" else "")} \ -O "~{outfile_name}" \ --known-sites "~{dbSNP_vcf}" \ ~{sep(" ", prefix("--known-sites ", squote(known_indels_sites_vcfs)))} \ @@ -163,7 +156,7 @@ task apply_bqsr { } } - parameter_meta { + parameter_meta { bam: "Input BAM format file on which to apply base quality score recalibration" bam_index: "BAM index file corresponding to the input BAM" recalibration_report: "Recalibration report file" @@ -194,11 +187,13 @@ task apply_bqsr { # shellcheck disable=SC2102 gatk \ - --java-options "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms3000m -Xmx~{java_heap_size}g" \ + --java-options "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms3000m -Xmx~{ + java_heap_size + }g" \ ApplyBQSRSpark \ --spark-master local[~{ncpu}] \ -I "~{bam}" \ - ~{if use_original_quality_scores then "--use-original-qualities" else "" } \ + ~{if use_original_quality_scores then "--use-original-qualities" else ""} \ -O "~{prefix}.bqsr.bam" \ --bqsr-recal-file "~{recalibration_report}" >>> @@ -227,7 +222,7 @@ task haplotype_caller { } } - parameter_meta { + parameter_meta { bam: "Input BAM format file on which to call variants" bam_index: "BAM index file corresponding to the input BAM" interval_list: { @@ -269,10 +264,7 @@ task haplotype_caller { Int ncpu = 4 } - Int disk_size_gb = ceil(size(bam, "GB") * 2) - + 30 - + ceil(size(fasta, "GB")) - + modify_disk_size_gb + Int disk_size_gb = ceil(size(bam, "GB") * 2) + 30 + ceil(size(fasta, "GB")) + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) #@ except: LineWidth @@ -313,7 +305,7 @@ task variant_filtration { } } - parameter_meta { + parameter_meta { vcf: "Input VCF format file to filter" vcf_index: "VCF index file corresponding to the input VCF" fasta: "Reference genome in FASTA format" @@ -340,8 +332,14 @@ task variant_filtration { File fasta File fasta_index File dict - Array[String] filter_names = ["FS", "QD"] - Array[String] filter_expressions = ["FS > 30.0", "QD < 2.0"] + Array[String] filter_names = [ + "FS", + "QD", + ] + Array[String] filter_expressions = [ + "FS > 30.0", + "QD < 2.0", + ] String prefix = basename(vcf, ".vcf.gz") Int cluster = 3 Int window = 35 @@ -377,7 +375,7 @@ task variant_filtration { } task mark_duplicates_spark { - meta { + meta { description: "Marks duplicate reads in the input BAM file using GATK's Spark implementation of Picard's MarkDuplicates." external_help: "https://gatk.broadinstitute.org/hc/en-us/articles/13832682540699-MarkDuplicatesSpark" outputs: { @@ -427,7 +425,7 @@ task mark_duplicates_spark { group: "Common", } optical_distance: { - description: "Maximum distance between read coordinates to consider them optical duplicates. If `0`, then optical duplicate marking is disabled.", + description: "Maximum distance between read coordinates to consider them optical duplicates. If `0`, then optical duplicate marking is disabled.", help: "Suggested settings of 100 for unpatterned versions of the Illumina platform (e.g. HiSeq) or 2500 for patterned flowcell models (e.g. NovaSeq). Calculation of distance depends on coordinate data embedded in the read names, typically produced by the Illumina sequencing machines.", warning: "Optical duplicate detection will not work on non-standard names without modifying `read_name_regex`.", } @@ -452,13 +450,8 @@ task mark_duplicates_spark { Float bam_size = size(bam, "GB") Int memory_gb = min(ceil(bam_size + 15), 50) + modify_memory_gb - Int disk_size_gb = ( - ( - if create_bam - then ceil((bam_size * 2) + 10) - else ceil(bam_size + 10) - ) + modify_disk_size_gb - ) + Int disk_size_gb = ((if create_bam then ceil((bam_size * 2) + 10) else ceil(bam_size + 10 + )) + modify_disk_size_gb) Int java_heap_size = ceil(memory_gb * 0.9) @@ -474,9 +467,8 @@ task mark_duplicates_spark { --create-output-bam-index ~{create_bam} \ --read-validation-stringency "~{validation_stringency}" \ --duplicate-scoring-strategy "~{duplicate_scoring_strategy}" \ - --read-name-regex '~{ - if (optical_distance > 0) then read_name_regex else "null" - }' \ + --read-name-regex '~{if (optical_distance > 0) then read_name_regex else "null" + }' \ --duplicate-tagging-policy "~{tagging_policy}" \ --optical-duplicate-pixel-distance ~{optical_distance} \ --spark-master local[~{ncpu}] diff --git a/tools/htseq.wdl b/tools/htseq.wdl index 5afba83a0..80dfe9a7b 100755 --- a/tools/htseq.wdl +++ b/tools/htseq.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://github.com/htseq/htseq) - version 1.1 task count { @@ -9,7 +8,7 @@ task count { feature_counts: { description: "A two column TSV file. First column is feature names and second column is counts.", help: "Presence of a header is determined by the `include_custom_header` parameter.", - } + }, } } @@ -98,9 +97,7 @@ task count { Int memory_gb = (if pos_sorted then ceil(bam_size) + 4 else 4) + modify_memory_gb - Int disk_size_gb = ceil( - (bam_size + gtf_size) * if pos_sorted then 4 else 1 - ) + 10 + modify_disk_size_gb + Int disk_size_gb = ceil((bam_size + gtf_size) * if pos_sorted then 4 else 1) + 10 + modify_disk_size_gb command <<< set -euo pipefail @@ -122,11 +119,8 @@ task count { -i "~{idattr}" \ --nonunique ~{if nonunique then "all" else "none"} \ --secondary-alignments ~{if secondary_alignments then "score" else "ignore"} \ - --supplementary-alignments ~{( - if supplementary_alignments - then "score" - else "ignore" - )} \ + --supplementary-alignments ~{(if supplementary_alignments then "score" else "ignore" + )} \ "~{bam}" \ "~{gtf}" \ >> "~{outfile_name}" @@ -148,7 +142,7 @@ task calc_tpm { meta { description: "Given a feature counts file and a feature lengths file, calculate Transcripts Per Million (TPM)" outputs: { - tpm_file: "Transcripts Per Million (TPM) file. A two column headered TSV file." + tpm_file: "Transcripts Per Million (TPM) file. A two column headered TSV file.", } } diff --git a/tools/kraken2.wdl b/tools/kraken2.wdl index 57fc6bd42..2bcf1cc6f 100644 --- a/tools/kraken2.wdl +++ b/tools/kraken2.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://github.com/DerrickWood/kraken2) - version 1.1 task download_taxonomy { @@ -9,7 +8,7 @@ task download_taxonomy { taxonomy: { description: "The NCBI taxonomy, which is needed by the `build_db` task.", warning: "This output is not human-readable or meant for anything other than building a Kraken2 database.", - } + }, } } @@ -58,7 +57,7 @@ task download_library { library: { description: "A library of reference genomes, which is needed by the `build_db` task.", warning: "This output is not human-readable or meant for anything other than building a Kraken2 database.", - } + }, } } @@ -97,14 +96,8 @@ task download_library { String db_name = "kraken2_" + library_name + "_library" #@ except: ExpressionSpacing - Int disk_size_gb = ( - ( - if library_name == "bacteria" then 300 - else if library_name == "nr" then 600 - else if library_name == "nt" then 2500 - else 25 - ) + modify_disk_size_gb - ) + Int disk_size_gb = ((if library_name == "bacteria" then 300 else if library_name == "nr" + then 600 else if library_name == "nt" then 2500 else 25) + modify_disk_size_gb) command <<< set -euo pipefail @@ -140,7 +133,7 @@ task create_library_from_fastas { custom_library: { description: "Kraken2 compatible library, which is needed by the `build_db` task.", warning: "This output is not human-readable or meant for anything other than building a Kraken2 database.", - } + }, } } @@ -200,7 +193,7 @@ task build_db { meta { description: "Builds a custom Kraken2 database" outputs: { - built_db: "A complete Kraken2 database" + built_db: "A complete Kraken2 database", } } @@ -249,13 +242,9 @@ task build_db { Float tarballs_size = size(tarballs, "GB") Int disk_size_gb = ceil(tarballs_size * 6) + 10 + modify_disk_size_gb - Int memory_gb = ( - ( - if (max_db_size_gb > 0) - then ceil(max_db_size_gb * 1.2) - else ceil(tarballs_size * 2) - ) + modify_memory_gb - ) + Int memory_gb = ((if (max_db_size_gb > 0) then ceil(max_db_size_gb * 1.2) else ceil( + tarballs_size * 2 + )) + modify_memory_gb) String max_db_size_bytes = "~{max_db_size_gb}000000000" @@ -281,11 +270,8 @@ task build_db { --kmer-len ~{kmer_len} \ --minimizer-len ~{minimizer_len} \ --minimizer-spaces ~{minimizer_spaces} \ - ~{( - if (max_db_size_gb > 0) - then "--max-db-size '" + max_db_size_bytes + "'" - else "" - )} \ + ~{(if (max_db_size_gb > 0) then "--max-db-size '" + max_db_size_bytes + "'" + else "")} \ --threads "$n_cores" \ --db "~{db_name}" @@ -359,11 +345,9 @@ task kraken { File read_two_fastq_gz #@ except: InputName File db - String prefix = sub( - basename(read_one_fastq_gz), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(read_one_fastq_gz), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 - ) + ) Boolean store_sequences = false Boolean use_names = true Boolean use_all_cores = false @@ -376,14 +360,10 @@ task kraken { Float db_size = size(db, "GB") Float read1_size = size(read_one_fastq_gz, "GB") Float read2_size = size(read_two_fastq_gz, "GB") - Int disk_size_gb_calculation = ( - ceil((db_size * 2) + read1_size + read2_size) + 10 + modify_disk_size_gb - ) - Int disk_size_gb = ( - if store_sequences - then disk_size_gb_calculation + ceil(read1_size + read2_size) - else disk_size_gb_calculation - ) + Int disk_size_gb_calculation = (ceil((db_size * 2) + read1_size + read2_size) + 10 + modify_disk_size_gb + ) + Int disk_size_gb = (if store_sequences then disk_size_gb_calculation + ceil(read1_size + + read2_size) else disk_size_gb_calculation) Int memory_gb = ceil(db_size * 2) + modify_memory_gb diff --git a/tools/librarian.wdl b/tools/librarian.wdl index 4d1d43755..97dd8892f 100644 --- a/tools/librarian.wdl +++ b/tools/librarian.wdl @@ -1,5 +1,4 @@ ## # librarian - version 1.1 task librarian { @@ -24,18 +23,14 @@ task librarian { input { File read_one_fastq - String prefix = sub( - basename(read_one_fastq), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(read_one_fastq), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 - ) + ".librarian" + ) + ".librarian" Int modify_disk_size_gb = 0 } Float read1_size = size(read_one_fastq, "GB") - Int disk_size_gb = ( - ceil(read1_size) + 10 + modify_disk_size_gb - ) + Int disk_size_gb = (ceil(read1_size) + 10 + modify_disk_size_gb) command <<< set -euo pipefail diff --git a/tools/md5sum.wdl b/tools/md5sum.wdl index af67e781f..aeb8d7780 100755 --- a/tools/md5sum.wdl +++ b/tools/md5sum.wdl @@ -1,12 +1,11 @@ ## [Homepage](https://github.com/coreutils/coreutils) - version 1.1 task compute_checksum { meta { description: "Generates an MD5 checksum for the input file" outputs: { - md5sum: "STDOUT of the `md5sum` command that has been redirected to a file" + md5sum: "STDOUT of the `md5sum` command that has been redirected to a file", } } diff --git a/tools/mosdepth.wdl b/tools/mosdepth.wdl index 746d5e67f..7a074133e 100644 --- a/tools/mosdepth.wdl +++ b/tools/mosdepth.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://github.com/brentp/mosdepth) - version 1.1 task coverage { diff --git a/tools/ngsderive.wdl b/tools/ngsderive.wdl index 0cab3af85..87238c5a4 100644 --- a/tools/ngsderive.wdl +++ b/tools/ngsderive.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://github.com/stjudecloud/ngsderive) - version 1.1 task strandedness { @@ -398,13 +397,8 @@ task endedness { } Float bam_size = size(bam, "GB") - Int memory_gb = ( - if calc_rpt - then ( - ceil(bam_size * 2.5) + 4 + modify_memory_gb + Int memory_gb = (if calc_rpt then (ceil(bam_size * 2.5) + 4 + modify_memory_gb) else 4 ) - else 4 - ) Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb command <<< diff --git a/tools/picard.wdl b/tools/picard.wdl index fb3d49aad..7d64ff148 100755 --- a/tools/picard.wdl +++ b/tools/picard.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://broadinstitute.github.io/picard/) - version 1.1 task mark_duplicates { @@ -84,13 +83,8 @@ task mark_duplicates { Float bam_size = size(bam, "GB") Int memory_gb = min(ceil(bam_size + 12), 50) + modify_memory_gb - Int disk_size_gb = ( - ( - if create_bam - then ceil((bam_size * 2) + 10) - else ceil(bam_size + 10) - ) + modify_disk_size_gb - ) + Int disk_size_gb = ((if create_bam then ceil((bam_size * 2) + 10) else ceil(bam_size + 10 + )) + modify_disk_size_gb) Int java_heap_size = ceil(memory_gb * 0.9) @@ -105,9 +99,8 @@ task mark_duplicates { --CREATE_MD5_FILE ~{create_bam} \ --VALIDATION_STRINGENCY "~{validation_stringency}" \ --DUPLICATE_SCORING_STRATEGY "~{duplicate_scoring_strategy}" \ - --READ_NAME_REGEX '~{ - if (optical_distance > 0) then read_name_regex else "null" - }' \ + --READ_NAME_REGEX '~{if (optical_distance > 0) then read_name_regex else "null" + }' \ --TAGGING_POLICY "~{tagging_policy}" \ --CLEAR_DT ~{clear_dt} \ --REMOVE_DUPLICATES ~{remove_duplicates} \ @@ -196,11 +189,8 @@ task validate_bam { String outfile = if summary_mode then outfile_name else outfile_name + ".gz" String mode_arg = if (summary_mode) then "--MODE SUMMARY" else "" - String stringency_arg = ( - if (index_validation_stringency_less_exhaustive) - then "--INDEX_VALIDATION_STRINGENCY LESS_EXHAUSTIVE" - else "" - ) + String stringency_arg = (if (index_validation_stringency_less_exhaustive) then "--INDEX_VALIDATION_STRINGENCY LESS_EXHAUSTIVE" + else "") Float bam_size = size(bam, "GB") Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) @@ -420,7 +410,7 @@ task merge_sam_files { File merged_bam_md5 = outfile_name + ".md5" } - runtime{ + runtime { cpu: if threading then 2 else 1 memory: "~{memory_gb} GB" disks: "~{disk_size_gb} GB" @@ -506,7 +496,7 @@ task collect_wgs_metrics { wgs_metrics: { description: "Output report of `picard CollectWgsMetrics`", external_help: "https://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectWgsMetrics.WgsMetrics", - } + }, } } @@ -853,11 +843,7 @@ task bam_to_fastq { picard -Xmx~{java_heap_size}g SamToFastq INPUT="~{bam}" \ FASTQ="~{prefix}.R1.fastq" \ - ~{( - if paired - then "SECOND_END_FASTQ='" + prefix + ".R2.fastq'" - else "" - )} \ + ~{(if paired then "SECOND_END_FASTQ='" + prefix + ".R2.fastq'" else "")} \ RE_REVERSE=true \ VALIDATION_STRINGENCY=SILENT @@ -870,7 +856,7 @@ task bam_to_fastq { File? read_two_fastq_gz = "~{prefix}.R2.fastq.gz" } - runtime{ + runtime { memory: "~{memory_gb} GB" disks: "~{disk_size_gb} GB" container: "quay.io/biocontainers/picard:3.1.1--hdfd78af_0" @@ -934,7 +920,7 @@ task scatter_interval_list { } } - parameter_meta { + parameter_meta { interval_list: "Input interval list to split" scatter_count: "Number of interval lists to create" subdivision_mode: { @@ -1001,7 +987,7 @@ task create_sequence_dictionary { description: "Creates a sequence dictionary for the input FASTA file using Picard" external_help: "https://gatk.broadinstitute.org/hc/en-us/articles/13832748622491-CreateSequenceDictionary-Picard-" outputs: { - dictionary: "Sequence dictionary produced by `picard CreateSequenceDictionary`." + dictionary: "Sequence dictionary produced by `picard CreateSequenceDictionary`.", } } diff --git a/tools/qualimap.wdl b/tools/qualimap.wdl index deb67571a..92067aa34 100755 --- a/tools/qualimap.wdl +++ b/tools/qualimap.wdl @@ -1,5 +1,4 @@ ## [Homepage](http://qualimap.bioinfo.cipf.es/) - version 1.1 task rnaseq { @@ -50,13 +49,9 @@ task rnaseq { # Qualimap has an inefficient name sorting algorithm and will # use an excessive amount of storage. - Int disk_size_gb = ( - ( - if name_sorted - then ceil(bam_size + gtf_size + 15) - else ceil(((bam_size + gtf_size) * 12) + 10) - ) + modify_disk_size_gb - ) + Int disk_size_gb = ((if name_sorted then ceil(bam_size + gtf_size + 15) else ceil((( + bam_size + gtf_size + ) * 12) + 10)) + modify_disk_size_gb) command <<< set -euo pipefail @@ -81,8 +76,7 @@ task rnaseq { output { File raw_summary = "~{prefix}/rnaseq_qc_results.txt" - File raw_coverage - = "~{prefix}/raw_data_qualimapReport/coverage_profile_along_genes_(total).txt" + File raw_coverage = "~{prefix}/raw_data_qualimapReport/coverage_profile_along_genes_(total).txt" File results = out_tar_gz } diff --git a/tools/sambamba.wdl b/tools/sambamba.wdl index b75b87815..d0e688e9c 100644 --- a/tools/sambamba.wdl +++ b/tools/sambamba.wdl @@ -1,12 +1,11 @@ ## [Homepage](https://lomereiter.github.io/sambamba/) - version 1.1 task index { meta { description: "Creates a `.bai` BAM index for the input BAM" outputs: { - bam_index: "A `.bai` BAM index associated with the input BAM. Filename will be `basename(bam) + '.bai'`." + bam_index: "A `.bai` BAM index associated with the input BAM. Filename will be `basename(bam) + '.bai'`.", } } @@ -63,7 +62,7 @@ task merge { meta { description: "Merges multiple sorted BAMs into a single BAM" outputs: { - merged_bam: "The BAM resulting from merging all the input BAMs" + merged_bam: "The BAM resulting from merging all the input BAMs", } } @@ -123,7 +122,7 @@ task sort { meta { description: "Sorts the input BAM file" outputs: { - sorted_bam: "The input BAM after it has been sorted according to `sort_order`" + sorted_bam: "The input BAM after it has been sorted according to `sort_order`", } } @@ -234,7 +233,7 @@ task flagstat { meta { description: "Produces a report containing statistics about the alignments based on the bit flags set in the BAM" outputs: { - flagstat_report: "`sambamba flagstat` STDOUT redirected to a file" + flagstat_report: "`sambamba flagstat` STDOUT redirected to a file", } } @@ -275,7 +274,7 @@ task flagstat { >>> output { - File flagstat_report = outfile_name + File flagstat_report = outfile_name } runtime { diff --git a/tools/samtools.wdl b/tools/samtools.wdl index 05d75f414..4a1259de8 100755 --- a/tools/samtools.wdl +++ b/tools/samtools.wdl @@ -1,5 +1,4 @@ ## [Homepage](http://samtools.sourceforge.net/) - version 1.1 import "../data_structures/flag_filter.wdl" @@ -38,7 +37,7 @@ task split { meta { description: "Runs Samtools split on the input BAM file. This splits the BAM by read group into one or more output files." outputs: { - split_bams: "The split BAM files. The extensions will contain read group IDs, and will end in `.bam`." + split_bams: "The split BAM files. The extensions will contain read group IDs, and will end in `.bam`.", } } @@ -131,12 +130,12 @@ task split { rm first_read.sam done fi - + exit $EXITCODE >>> output { - Array[File] split_bams = glob("*.bam") + Array[File] split_bams = glob("*.bam") } runtime { @@ -152,7 +151,7 @@ task flagstat { meta { description: "Produces a `samtools flagstat` report containing statistics about the alignments based on the bit flags set in the BAM" outputs: { - flagstat_report: "`samtools flagstat` STDOUT redirected to a file" + flagstat_report: "`samtools flagstat` STDOUT redirected to a file", } } @@ -195,7 +194,7 @@ task flagstat { >>> output { - File flagstat_report = outfile_name + File flagstat_report = outfile_name } runtime { @@ -210,7 +209,7 @@ task index { meta { description: "Creates a `.bai` BAM index for the input BAM" outputs: { - bam_index: "A `.bai` BAM index associated with the input BAM. Filename will be `basename(bam) + '.bai'`." + bam_index: "A `.bai` BAM index associated with the input BAM. Filename will be `basename(bam) + '.bai'`.", } } @@ -397,7 +396,6 @@ task subsample { fi rm first_read.sam fi - >>> output { @@ -419,7 +417,7 @@ task filter { description: "Filters a BAM based on its bitwise flag value." help: "This task is a wrapper around `samtools view`. This task will fail if there are no reads in the output BAM. This can happen either because the input BAM was empty or because the supplied `bitwise_filter` was too strict. If you want to down-sample a BAM, use the `subsample` task instead." outputs: { - filtered_bam: "BAM file that has been filtered based on the input flags" + filtered_bam: "BAM file that has been filtered based on the input flags", } } @@ -505,7 +503,7 @@ task merge { meta { description: "Merges multiple sorted BAMs into a single BAM" outputs: { - merged_bam: "The BAM resulting from merging all the input BAMs" + merged_bam: "The BAM resulting from merging all the input BAMs", } } @@ -613,7 +611,7 @@ task addreplacerg { meta { description: "Adds or replaces read group tags" outputs: { - tagged_bam: "The transformed input BAM after read group modifications have been applied" + tagged_bam: "The transformed input BAM after read group modifications have been applied", } } @@ -700,7 +698,7 @@ task collate { meta { description: "Runs `samtools collate` on the input BAM file. Shuffles and groups reads together by their names." outputs: { - collated_bam: "A collated BAM (reads sharing a name next to each other, no other guarantee of sort order)" + collated_bam: "A collated BAM (reads sharing a name next to each other, no other guarantee of sort order)", } } @@ -854,16 +852,10 @@ task bam_to_fastq { } Float bam_size = size(bam, "GB") - Int memory_gb = ( - if (collated || !paired_end) - then 4 - else (ceil(bam_size * 0.4) + 4) - ) + modify_memory_gb - Int disk_size_gb = ceil(bam_size * ( - if (retain_collated_bam && !collated && paired_end) - then 5 - else 2 - )) + 10 + modify_disk_size_gb + Int memory_gb = (if (collated || !paired_end) then 4 else (ceil(bam_size * 0.4) + 4)) + + modify_memory_gb + Int disk_size_gb = ceil(bam_size * (if (retain_collated_bam && !collated && paired_end + ) then 5 else 2)) + 10 + modify_disk_size_gb command <<< set -euo pipefail @@ -883,11 +875,8 @@ task bam_to_fastq { ~{if fast_mode then "-f" else ""} \ -O \ "~{bam}" \ - | tee ~{( - if retain_collated_bam - then "\"" + prefix + ".collated.bam\"" - else "" - )} \ + | tee ~{(if retain_collated_bam then "\"" + prefix + ".collated.bam\"" + else "")} \ > bam_pipe \ & else @@ -900,35 +889,15 @@ task bam_to_fastq { -F "~{bitwise_filter.exclude_if_any}" \ --rf "~{bitwise_filter.include_if_any}" \ -G "~{bitwise_filter.exclude_if_all}" \ - ~{( - if append_read_number - then "-N" - else "-n" - )} \ - -1 ~{( - if paired_end - then "\"" + prefix + ".R1.fastq.gz\"" - else "\"" + prefix + ".fastq.gz\"" - )} \ - -2 ~{( - if paired_end - then "\"" + prefix + ".R2.fastq.gz\"" - else "\"" + prefix + ".fastq.gz\"" - )} \ - ~{( - if paired_end - then ( - if output_singletons - then "-s \"" + prefix + ".singleton.fastq.gz\"" - else "-s junk.singleton.fastq.gz" - ) - else "" - )} \ - -0 ~{( - if paired_end - then "junk.unknown_bit_setting.fastq.gz" - else "\"" + prefix + ".fastq.gz\"" - )} \ + ~{(if append_read_number then "-N" else "-n")} \ + -1 ~{(if paired_end then "\"" + prefix + ".R1.fastq.gz\"" else "\"" + prefix + ".fastq.gz\"" + )} \ + -2 ~{(if paired_end then "\"" + prefix + ".R2.fastq.gz\"" else "\"" + prefix + ".fastq.gz\"" + )} \ + ~{(if paired_end then (if output_singletons then "-s \"" + prefix + ".singleton.fastq.gz\"" + else "-s junk.singleton.fastq.gz") else "")} \ + -0 ~{(if paired_end then "junk.unknown_bit_setting.fastq.gz" else "\"" + prefix + + ".fastq.gz\"")} \ bam_pipe rm bam_pipe @@ -971,7 +940,7 @@ task fixmate { description: "Runs `samtools fixmate` on the input BAM file. This fills in mate coordinates and insert size fields among other tags and fields." warning: "This task assumes a name-sorted or name-collated input BAM. If you have a position-sorted BAM, please use the `position_sorted_fixmate` task." outputs: { - fixmate_bam: "The BAM resulting from running `samtools fixmate` on the input BAM" + fixmate_bam: "The BAM resulting from running `samtools fixmate` on the input BAM", } } @@ -1070,7 +1039,7 @@ task position_sorted_fixmate { warning: "If you already have a collated BAM, please use the `fixmate` task." help: "`fixmate` fills in mate coordinates and insert size fields among other tags and fields. This task collates the input BAM, runs `fixmate`, and then resorts the output into a position-sorted BAM." outputs: { - fixmate_bam: "BAM file with mate information added" + fixmate_bam: "BAM file with mate information added", } } @@ -1314,7 +1283,7 @@ task faidx { meta { description: "Creates a `.fai` FASTA index for the input FASTA" outputs: { - fasta_index: "A `.fai` FASTA index associated with the input FASTA. Filename will be `basename(fasta) + '.fai'`." + fasta_index: "A `.fai` FASTA index associated with the input FASTA. Filename will be `basename(fasta) + '.fai'`.", } } diff --git a/tools/star.wdl b/tools/star.wdl index 920e34a19..2c8096017 100755 --- a/tools/star.wdl +++ b/tools/star.wdl @@ -1,12 +1,11 @@ ## [Homepage](https://github.com/alexdobin/STAR) - version 1.1 task build_star_db { meta { description: "Runs STAR's build command to generate a STAR format reference for alignment" outputs: { - star_db: "A gzipped TAR file containing the STAR reference files. Suitable as the `star_db_tar_gz` input to the `alignment` task." + star_db: "A gzipped TAR file containing the STAR reference files. Suitable as the `star_db_tar_gz` input to the `alignment` task.", } } @@ -86,9 +85,8 @@ task build_star_db { Float reference_fasta_size = size(reference_fasta, "GB") Float gtf_size = size(gtf, "GB") - Int disk_size_gb = ( - ceil((reference_fasta_size + gtf_size) * 3) + 10 + modify_disk_size_gb - ) + Int disk_size_gb = (ceil((reference_fasta_size + gtf_size) * 3) + 10 + modify_disk_size_gb + ) # Leave 2GB as system overhead String memory_limit_bytes = "~{memory_gb - 2}000000000" @@ -558,7 +556,11 @@ task alignment { Array[File] read_one_fastqs_gz Array[String] read_groups Array[File]? read_two_fastqs_gz - Array[Int] out_sj_filter_intron_max_vs_read_n = [50000, 100000, 200000] + Array[Int] out_sj_filter_intron_max_vs_read_n = [ + 50000, + 100000, + 200000, + ] SpliceJunctionMotifs out_sj_filter_overhang_min = SpliceJunctionMotifs { noncanonical_motifs: 30, GT_AG_and_CT_AC_motif: 12, @@ -595,11 +597,9 @@ task alignment { Pair[Int, Int] clip_3p_n_bases = (0, 0) Pair[Int, Int] clip_3p_after_adapter_n_bases = (0, 0) Pair[Int, Int] clip_5p_n_bases = (0, 0) - String prefix = sub( - basename(read_one_fastqs_gz[0]), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(read_one_fastqs_gz[0]), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 - ) + ) String read_name_separator = "/" String clip_adapter_type = "Hamming" String out_sam_strand_field = "intronMotif" @@ -699,16 +699,16 @@ task alignment { Int modify_disk_size_gb = 0 } - Array[File] read_twos = select_first([read_two_fastqs_gz, []]) + Array[File] read_twos = select_first([ + read_two_fastqs_gz, + [], + ]) Float read_one_fastqs_size = size(read_one_fastqs_gz, "GB") Float read_two_fastqs_size = size(read_twos, "GB") Float star_db_tar_gz_size = size(star_db_tar_gz, "GB") - Int disk_size_gb = ( - ( - ceil(read_one_fastqs_size + read_two_fastqs_size + star_db_tar_gz_size) * 3 - ) + 10 + modify_disk_size_gb - ) + Int disk_size_gb = ((ceil(read_one_fastqs_size + read_two_fastqs_size + star_db_tar_gz_size + ) * 3) + 10 + modify_disk_size_gb) command <<< set -euo pipefail @@ -733,9 +733,9 @@ task alignment { --outFileNamePrefix "~{prefix + "."}" \ --twopassMode "~{twopass_mode}" \ --outSAMattrRGline ~{sep(" , ", read_groups)} \ - --outSJfilterIntronMaxVsReadN ~{ - sep(" ", quote(out_sj_filter_intron_max_vs_read_n)) - } \ + --outSJfilterIntronMaxVsReadN ~{sep(" ", quote( + out_sj_filter_intron_max_vs_read_n + ))} \ --outSJfilterOverhangMin ~{sep(" ", quote([ out_sj_filter_overhang_min.noncanonical_motifs, out_sj_filter_overhang_min.GT_AG_and_CT_AC_motif, @@ -766,36 +766,18 @@ task alignment { align_sj_stitch_mismatch_n_max.GC_AG_and_CT_GC_motif, align_sj_stitch_mismatch_n_max.AT_AC_and_GT_AT_motif, ]))} \ - --clip3pAdapterSeq "~{clip_3p_adapter_seq.left}" ~{( - if (length(read_twos) != 0) - then "'" + clip_3p_adapter_seq.right + "'" - else "" - )} \ - --clip3pAdapterMMp ~{clip_3p_adapter_mmp.left} ~{( - if (length(read_twos) != 0) - then clip_3p_adapter_mmp.right - else None - )} \ - --alignEndsProtrude ~{align_ends_protrude.left} "~{( - if (length(read_twos) != 0) - then align_ends_protrude.right - else None - )}" \ - --clip3pNbases ~{clip_3p_n_bases.left} ~{( - if (length(read_twos) != 0) - then clip_3p_n_bases.right - else None - )} \ - --clip3pAfterAdapterNbases ~{clip_3p_after_adapter_n_bases.left} ~{( - if (length(read_twos) != 0) - then clip_3p_after_adapter_n_bases.right - else None - )} \ - --clip5pNbases ~{clip_5p_n_bases.left} ~{( - if (length(read_twos) != 0) - then clip_5p_n_bases.right - else None - )} \ + --clip3pAdapterSeq "~{clip_3p_adapter_seq.left}" ~{(if (length(read_twos) != 0 + ) then "'" + clip_3p_adapter_seq.right + "'" else "")} \ + --clip3pAdapterMMp ~{clip_3p_adapter_mmp.left} ~{(if (length(read_twos) != 0) + then clip_3p_adapter_mmp.right else None)} \ + --alignEndsProtrude ~{align_ends_protrude.left} "~{(if (length(read_twos) != 0 + ) then align_ends_protrude.right else None)}" \ + --clip3pNbases ~{clip_3p_n_bases.left} ~{(if (length(read_twos) != 0) then clip_3p_n_bases.right + else None)} \ + --clip3pAfterAdapterNbases ~{clip_3p_after_adapter_n_bases.left} ~{(if (length( + read_twos) != 0) then clip_3p_after_adapter_n_bases.right else None)} \ + --clip5pNbases ~{clip_5p_n_bases.left} ~{(if (length(read_twos) != 0) then clip_5p_n_bases.right + else None)} \ --readNameSeparator "~{read_name_separator}" \ --clipAdapterType "~{clip_adapter_type}" \ --outSAMstrandField "~{out_sam_strand_field}" \ @@ -803,13 +785,9 @@ task alignment { --outSAMunmapped "~{out_sam_unmapped}" \ --outSAMorder "~{out_sam_order}" \ --outSAMreadID "~{out_sam_read_id}" \ - --outSAMtlen ~{( - if (out_sam_tlen == "left_plus") - then "1" - else ( - if (out_sam_tlen == "left_any") then "2" else "error" - ) - )} \ + --outSAMtlen ~{(if (out_sam_tlen == "left_plus") then "1" else (if ( + out_sam_tlen == "left_any" + ) then "2" else "error"))} \ --outFilterType "~{out_filter_type}" \ --outFilterIntronMotifs "~{out_filter_intron_motifs}" \ --outFilterIntronStrands "~{out_filter_intron_strands}" \ diff --git a/tools/util.wdl b/tools/util.wdl index f339cb603..06ad3527d 100644 --- a/tools/util.wdl +++ b/tools/util.wdl @@ -1,12 +1,11 @@ ## # Utilities - version 1.1 task download { meta { description: "Uses wget to download a file from a remote URL to the local filesystem" outputs: { - downloaded_file: "File downloaded from provided URL" + downloaded_file: "File downloaded from provided URL", } } @@ -53,7 +52,7 @@ task split_string { description: "Split a string into an array of strings based on a delimiter" warning: "This implementation will result in a runtime error if the provided string has any embedded single quotes (`'`)!" outputs: { - split_strings: "Split string as an array" + split_strings: "Split string as an array", } } @@ -90,7 +89,7 @@ task calc_feature_lengths { description: "Calculate feature lengths from a GTF file using the non-overlapping exonic length algorithm" help: "The non-overlapping exonic length algorithm can be implemented as the sum of each base covered by at least one exon; where each base is given a value of 1 regardless of how many exons overlap it." outputs: { - feature_lengths: "A two column headered TSV file with feature names in the first column and feature lengths (as integers) in the second column" + feature_lengths: "A two column headered TSV file with feature names in the first column and feature lengths (as integers) in the second column", } } @@ -166,7 +165,7 @@ task add_to_bam_header { meta { description: "Adds another line of text to the bottom of a BAM header" outputs: { - reheadered_bam: "The BAM after its header has been modified" + reheadered_bam: "The BAM after its header has been modified", } } @@ -215,7 +214,7 @@ task unpack_tarball { meta { description: "Accepts a `.tar.gz` archive and converts it into a flat array of files. Any directory structure of the archive is ignored." outputs: { - tarball_contents: "An array of files found in the input tarball" + tarball_contents: "An array of files found in the input tarball", } } @@ -314,7 +313,7 @@ task global_phred_scores { meta { description: "Calculates statistics about PHRED scores of the input BAM" outputs: { - phred_scores: "Headered TSV file containing PHRED score statistics" + phred_scores: "Headered TSV file containing PHRED score statistics", } } @@ -384,16 +383,17 @@ task check_fastq_and_rg_concordance { Array[String]? read_two_names } - Array[String] read_twos = select_first([read_two_names, []]) + Array[String] read_twos = select_first([ + read_two_names, + [], + ]) command <<< python3 /scripts/util/check_FQs_and_RGs.py \ --read-one-fastqs "~{sep(",", read_one_names)}" \ - ~{( - if length(read_twos) > 0 - then "--read-two-fastqs \"" + sep(",", squote(read_twos)) + "\"" - else "" - )} \ + ~{(if length(read_twos) > 0 then "--read-two-fastqs \"" + sep(",", squote( + read_twos + )) + "\"" else "")} \ --read-groups "~{sep(",", read_groups)}" >>> @@ -407,7 +407,7 @@ task split_fastq { meta { description: "Splits a FASTQ into multiple files based on the number of reads per file" outputs: { - fastqs: "Array of FASTQ files, each containing a subset of the input FASTQ" + fastqs: "Array of FASTQ files, each containing a subset of the input FASTQ", } } @@ -427,11 +427,7 @@ task split_fastq { input { File fastq - String prefix = sub( - basename(fastq), - "(fastq|fq)\\.gz$", - "" - ) + String prefix = sub(basename(fastq), "(fastq|fq)\\.gz$", "") Int reads_per_file = 10000000 Int modify_disk_size_gb = 0 Int ncpu = 2 diff --git a/workflows/chipseq/chipseq-standard.wdl b/workflows/chipseq/chipseq-standard.wdl index d9f9290df..aa30759fe 100755 --- a/workflows/chipseq/chipseq-standard.wdl +++ b/workflows/chipseq/chipseq-standard.wdl @@ -9,11 +9,14 @@ import "../../tools/samtools.wdl" import "../../tools/util.wdl" import "../general/bam-to-fastqs.wdl" as b2fq #@ except: LineWidth -import "https://raw.githubusercontent.com/stjude/seaseq/2.3/workflows/workflows/mapping.wdl" as seaseq_map +import "https://raw.githubusercontent.com/stjude/seaseq/2.3/workflows/workflows/mapping.wdl" + as seaseq_map #@ except: LineWidth -import "https://raw.githubusercontent.com/stjude/seaseq/3.0/workflows/tasks/samtools.wdl" as seaseq_samtools +import "https://raw.githubusercontent.com/stjude/seaseq/3.0/workflows/tasks/samtools.wdl" + as seaseq_samtools #@ except: LineWidth -import "https://raw.githubusercontent.com/stjude/seaseq/3.0/workflows/tasks/seaseq_util.wdl" as seaseq_util +import "https://raw.githubusercontent.com/stjude/seaseq/3.0/workflows/tasks/seaseq_util.wdl" + as seaseq_util workflow chipseq_standard_experimental { meta { @@ -67,7 +70,10 @@ workflow chipseq_standard_experimental { use_all_cores, } } - File selected_bam = select_first([subsample.sampled_bam, bam]) + File selected_bam = select_first([ + subsample.sampled_bam, + bam, + ]) call read_group.get_read_groups after validate_input_bam { input: bam = selected_bam, @@ -79,7 +85,7 @@ workflow chipseq_standard_experimental { use_all_cores, } - scatter (pair in zip(bam_to_fastqs.read1s, get_read_groups.read_groups)){ + scatter (pair in zip(bam_to_fastqs.read1s, get_read_groups.read_groups)) { if (enable_read_trimming) { call fp.fastp as trim { input: read_one_fastq = pair.left, @@ -93,10 +99,13 @@ workflow chipseq_standard_experimental { } } - File chosen_fastq = select_first([trim.single_end_reads_fastq_gz, pair.left]) + File chosen_fastq = select_first([ + trim.single_end_reads_fastq_gz, + pair.left, + ]) call seaseq_util.basicfastqstats as basic_stats { input: - fastqfile = chosen_fastq + fastqfile = chosen_fastq, } call seaseq_map.mapping as bowtie_single_end_mapping { input: fastqfile = chosen_fastq, @@ -104,13 +113,11 @@ workflow chipseq_standard_experimental { metricsfile = basic_stats.metrics_out, blacklist = excludelist, } - File chosen_bam = select_first( - [ - bowtie_single_end_mapping.bklist_bam, - bowtie_single_end_mapping.mkdup_bam, - bowtie_single_end_mapping.sorted_bam, - ] - ) + File chosen_bam = select_first([ + bowtie_single_end_mapping.bklist_bam, + bowtie_single_end_mapping.mkdup_bam, + bowtie_single_end_mapping.sorted_bam, + ]) call read_group.read_group_to_string { input: read_group = pair.right, @@ -127,7 +134,7 @@ workflow chipseq_standard_experimental { } Array[File] aligned_bams = addreplacerg.tagged_bam - scatter(aligned_bam in aligned_bams){ + scatter (aligned_bam in aligned_bams) { call picard.clean_sam as picard_clean { input: bam = aligned_bam, } @@ -147,7 +154,9 @@ workflow chipseq_standard_experimental { use_all_cores, } #@ except: UnusedCall - call picard.validate_bam { input: bam = markdup.mkdupbam } + call picard.validate_bam { input: + bam = markdup.mkdupbam, + } call md5sum.compute_checksum { input: file = markdup.mkdupbam, @@ -164,9 +173,13 @@ workflow chipseq_standard_experimental { File bam_checksum = compute_checksum.md5sum File bam_index = samtools_index.bam_index File bigwig = deeptools_bam_coverage.bigwig - Array[File] fastp_reports = select_all(flatten([fastp.report, trim.report])) - Array[File] fastp_jsons = select_all(flatten( - [fastp.report_json, trim.report_json] - )) + Array[File] fastp_reports = select_all(flatten([ + fastp.report, + trim.report, + ])) + Array[File] fastp_jsons = select_all(flatten([ + fastp.report_json, + trim.report_json, + ])) } } diff --git a/workflows/dnaseq/dnaseq-core.wdl b/workflows/dnaseq/dnaseq-core.wdl index 62b027db2..21a1d3990 100644 --- a/workflows/dnaseq/dnaseq-core.wdl +++ b/workflows/dnaseq/dnaseq-core.wdl @@ -1,5 +1,4 @@ ## **WARNING:** this workflow is experimental! Use at your own risk! - version 1.1 import "../../tools/bwa.wdl" @@ -69,10 +68,7 @@ workflow dnaseq_core_experimental { read_groups, } - scatter (tuple in zip( - zip(read_one_fastqs_gz, read_two_fastqs_gz), - read_groups - )) { + scatter (tuple in zip(zip(read_one_fastqs_gz, read_two_fastqs_gz), read_groups)) { if (enable_read_trimming) { call fp.fastp as trim after validate { input: read_one_fastq = tuple.left.left, @@ -87,8 +83,14 @@ workflow dnaseq_core_experimental { output_fastq = enable_read_trimming, } } - File chosen_r1_fastq = select_first([trim.read_one_fastq_gz, tuple.left.left]) - File chosen_r2_fastq = select_first([trim.read_two_fastq_gz, tuple.left.right]) + File chosen_r1_fastq = select_first([ + trim.read_one_fastq_gz, + tuple.left.left, + ]) + File chosen_r2_fastq = select_first([ + trim.read_two_fastq_gz, + tuple.left.right, + ]) call util.split_fastq as read_ones after validate { input: fastq = chosen_r1_fastq, @@ -105,11 +107,8 @@ workflow dnaseq_core_experimental { read_one_fastq_gz = t.left, read_two_fastq_gz = t.right, bwa_db_tar_gz = bwa_db, - prefix = sub(sub( - basename(t.left), - "(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", - "" - ), "\\.([rR][12])\\.", "."), + prefix = sub(sub(basename(t.left), "(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", + ""), "\\.([rR][12])\\.", "."), read_group = tuple.right, use_all_cores, } @@ -119,17 +118,17 @@ workflow dnaseq_core_experimental { read_one_fastq_gz = t.left, read_two_fastq_gz = t.right, bwa_db_tar_gz = bwa_db, - prefix = sub(sub( - basename(t.left), - "(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", - "" - ), "\\.([rR][12])\\.", "."), + prefix = sub(sub(basename(t.left), "(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", + ""), "\\.([rR][12])\\.", "."), read_group = tuple.right, use_all_cores, } } call picard.sort as sort { input: - bam = select_first([bwa_mem.bam, bwa_aln_pe.bam]) + bam = select_first([ + bwa_mem.bam, + bwa_aln_pe.bam, + ]), } } } @@ -146,9 +145,13 @@ workflow dnaseq_core_experimental { output { File harmonized_bam = merge.merged_bam File harmonized_bam_index = index.bam_index - Array[File] fastp_reports = select_all(flatten([fastp.report, trim.report])) - Array[File] fastp_jsons = select_all(flatten( - [fastp.report_json, trim.report_json] - )) + Array[File] fastp_reports = select_all(flatten([ + fastp.report, + trim.report, + ])) + Array[File] fastp_jsons = select_all(flatten([ + fastp.report_json, + trim.report_json, + ])) } } diff --git a/workflows/dnaseq/dnaseq-standard-fastq.wdl b/workflows/dnaseq/dnaseq-standard-fastq.wdl index c0542c19d..aa9b29890 100644 --- a/workflows/dnaseq/dnaseq-standard-fastq.wdl +++ b/workflows/dnaseq/dnaseq-standard-fastq.wdl @@ -1,5 +1,4 @@ ## **WARNING:** this workflow is experimental! Use at your own risk! - version 1.1 import "../../data_structures/read_group.wdl" @@ -54,11 +53,9 @@ workflow dnaseq_standard_fastq_experimental { Array[File] read_one_fastqs_gz Array[File] read_two_fastqs_gz Array[ReadGroup] read_groups - String prefix = sub( - basename(read_one_fastqs_gz[0]), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(read_one_fastqs_gz[0]), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 - ) + ) String aligner = "mem" Boolean enable_read_trimming = false Boolean validate_input = true @@ -101,12 +98,10 @@ workflow dnaseq_standard_fastq_experimental { subsample.subsampled_read1, read_one_fastqs_gz, ]) - Array[File] selected_read_two_fastqs = select_all( - select_first([ - subsample.subsampled_read2, - read_two_fastqs_gz, - ]) - ) + Array[File] selected_read_two_fastqs = select_all(select_first([ + subsample.subsampled_read2, + read_two_fastqs_gz, + ])) call dnaseq_core_wf.dnaseq_core_experimental after fqlint { input: read_one_fastqs_gz = selected_read_one_fastqs, diff --git a/workflows/dnaseq/dnaseq-standard.wdl b/workflows/dnaseq/dnaseq-standard.wdl index 3d4ff774a..270f6bd2d 100644 --- a/workflows/dnaseq/dnaseq-standard.wdl +++ b/workflows/dnaseq/dnaseq-standard.wdl @@ -1,5 +1,4 @@ ## **WARNING:** this workflow is experimental! Use at your own risk! - version 1.1 import "../../data_structures/read_group.wdl" @@ -55,7 +54,7 @@ workflow dnaseq_standard_experimental { } call parse_input { input: - aligner + aligner, } if (validate_input) { @@ -71,7 +70,10 @@ workflow dnaseq_standard_experimental { use_all_cores, } } - File selected_bam = select_first([subsample.sampled_bam, bam]) + File selected_bam = select_first([ + subsample.sampled_bam, + bam, + ]) call read_group.get_read_groups after parse_input { input: bam = selected_bam, @@ -95,7 +97,10 @@ workflow dnaseq_standard_experimental { SM: sample_override, } } - ReadGroup selected_rg = select_first([overriden_rg, rg]) + ReadGroup selected_rg = select_first([ + overriden_rg, + rg, + ]) call read_group.read_group_to_string { input: read_group = selected_rg, format_as_sam_record = true, diff --git a/workflows/general/alignment-post.wdl b/workflows/general/alignment-post.wdl index 53c18d64a..9caa5344c 100644 --- a/workflows/general/alignment-post.wdl +++ b/workflows/general/alignment-post.wdl @@ -4,7 +4,8 @@ import "../../tools/md5sum.wdl" import "../../tools/picard.wdl" import "../../tools/samtools.wdl" #@ except: LineWidth -import "https://raw.githubusercontent.com/stjude/XenoCP/4.0.0-alpha/wdl/workflows/xenocp.wdl" as xenocp_wf +import "https://raw.githubusercontent.com/stjude/XenoCP/4.0.0-alpha/wdl/workflows/xenocp.wdl" + as xenocp_wf workflow alignment_post { meta { @@ -12,7 +13,7 @@ workflow alignment_post { outputs: { processed_bam: "Input BAM after being transformed by standard processing", bam_index: "BAI index associated with `processed_bam`", - bam_checksum: "STDOUT of the `md5sum` command run on the input BAM that has been redirected to a file", + bam_checksum: "STDOUT of the `md5sum` command run on the input BAM that has been redirected to a file", validate_report: "Validation report produced by `picard ValidateSamFile`. Validation warnings and errors are logged.", } allowNestedInputs: true @@ -46,7 +47,9 @@ workflow alignment_post { Boolean use_all_cores = false } - call picard.sort as picard_sort { input: bam } + call picard.sort as picard_sort { input: + bam, + } if (cleanse_xenograft) { call samtools.index as pre_xenocp_index { input: @@ -57,14 +60,23 @@ workflow alignment_post { call xenocp_wf.xenocp { input: input_bam = picard_sort.sorted_bam, input_bai = pre_xenocp_index.bam_index, - reference_tar_gz = select_first([contaminant_db, ""]), - aligner = select_first([xenocp_aligner, "undefined"]), + reference_tar_gz = select_first([ + contaminant_db, + "", + ]), + aligner = select_first([ + xenocp_aligner, + "undefined", + ]), skip_duplicate_marking = true, } } if (mark_duplicates) { call picard.mark_duplicates as picard_markdup { input: - bam = select_first([xenocp.bam, picard_sort.sorted_bam]), + bam = select_first([ + xenocp.bam, + picard_sort.sorted_bam, + ]), } } @@ -79,9 +91,13 @@ workflow alignment_post { use_all_cores, } File aligned_bam_index = samtools_index.bam_index - call picard.validate_bam { input: bam = aligned_bam } + call picard.validate_bam { input: + bam = aligned_bam, + } - call md5sum.compute_checksum { input: file = aligned_bam } + call md5sum.compute_checksum { input: + file = aligned_bam, + } output { File processed_bam = aligned_bam diff --git a/workflows/general/bam-to-fastqs.wdl b/workflows/general/bam-to-fastqs.wdl index 409ac0133..d70a3588e 100644 --- a/workflows/general/bam-to-fastqs.wdl +++ b/workflows/general/bam-to-fastqs.wdl @@ -27,7 +27,9 @@ workflow bam_to_fastqs { Boolean use_all_cores = false } - call samtools.quickcheck { input: bam } + call samtools.quickcheck { input: + bam, + } call samtools.split after quickcheck { input: bam, @@ -42,11 +44,13 @@ workflow bam_to_fastqs { } if (paired_end) { - scatter (reads in - zip(bam_to_fastq.read_one_fastq_gz, bam_to_fastq.read_two_fastq_gz) - ) { + scatter (reads in zip(bam_to_fastq.read_one_fastq_gz, bam_to_fastq.read_two_fastq_gz + )) { call fq.fqlint { input: - read_one_fastq = select_first([reads.left, "undefined"]), + read_one_fastq = select_first([ + reads.left, + "undefined", + ]), read_two_fastq = reads.right, } } @@ -54,17 +58,17 @@ workflow bam_to_fastqs { if (!paired_end) { scatter (fq in bam_to_fastq.single_end_reads_fastq_gz) { call fq.fqlint as se_fqlint { input: - read_one_fastq = select_first([fq, "undefined"]), + read_one_fastq = select_first([ + fq, + "undefined", + ]), } } } output { - Array[File] read1s = ( - if paired_end - then select_all(bam_to_fastq.read_one_fastq_gz) - else select_all(bam_to_fastq.single_end_reads_fastq_gz) - ) + Array[File] read1s = (if paired_end then select_all(bam_to_fastq.read_one_fastq_gz + ) else select_all(bam_to_fastq.single_end_reads_fastq_gz)) Array[File?] read2s = bam_to_fastq.read_two_fastq_gz } } diff --git a/workflows/general/samtools-merge.wdl b/workflows/general/samtools-merge.wdl index a34585c99..a48f6e1d1 100644 --- a/workflows/general/samtools-merge.wdl +++ b/workflows/general/samtools-merge.wdl @@ -1,21 +1,20 @@ ## **WARNING:** this workflow is experimental! Use at your own risk! - version 1.1 import "../../tools/samtools.wdl" workflow samtools_merge { - meta{ + meta { name: "Merge BAMs" description: "Runs `samtools merge`, with optional iteration to avoid maximum command line argument length" category: "Utility" outputs: { - merged_bam: "The BAM resulting from merging all the input BAMs" + merged_bam: "The BAM resulting from merging all the input BAMs", } allowNestedInputs: true } - parameter_meta{ + parameter_meta { bams: "BAMs to merge into a final BAM" prefix: "Prefix for output BAM." use_all_cores: "Use all cores? Recommended for cloud environments." @@ -31,22 +30,19 @@ workflow samtools_merge { Int bam_length = length(bams) - if (bam_length > max_length){ + if (bam_length > max_length) { # Find the number of merges required - scatter (merge_num in range((bam_length / max_length) + 1)){ + scatter (merge_num in range((bam_length / max_length) + 1)) { # Get the sublist of bams - scatter (bam_num in range(max_length)){ - Int num = ( - if merge_num > 0 - then bam_num + (merge_num * max_length) - else bam_num - ) - if (num < bam_length){ + scatter (bam_num in range(max_length)) { + Int num = (if merge_num > 0 then bam_num + (merge_num * max_length) else bam_num + ) + if (num < bam_length) { File bam_list = bams[num] } } } - scatter (list in bam_list){ + scatter (list in bam_list) { call samtools.merge as inner_merge { input: bams = select_all(list), prefix, @@ -65,7 +61,7 @@ workflow samtools_merge { } } - if (bam_length < max_length){ + if (bam_length < max_length) { call samtools.merge as basic_merge { input: bams, prefix, @@ -76,6 +72,9 @@ workflow samtools_merge { } output { - File merged_bam = select_first([final_merge.merged_bam, basic_merge.merged_bam]) + File merged_bam = select_first([ + final_merge.merged_bam, + basic_merge.merged_bam, + ]) } } diff --git a/workflows/methylation/methylation-cohort.wdl b/workflows/methylation/methylation-cohort.wdl index 16e343e4c..9486cc450 100644 --- a/workflows/methylation/methylation-cohort.wdl +++ b/workflows/methylation/methylation-cohort.wdl @@ -38,21 +38,18 @@ workflow methylation_cohort { Int beta_length = length(unfiltered_normalized_beta) Int pval_length = length(p_values) - if (beta_length > max_length){ - scatter (merge_num in range((beta_length / max_length) + 1)){ + if (beta_length > max_length) { + scatter (merge_num in range((beta_length / max_length) + 1)) { # Get the sublist of beta files - scatter (beta_num in range(max_length)){ - Int num = ( - if merge_num > 0 - then beta_num + (merge_num * max_length) - else beta_num - ) - if (num < beta_length){ + scatter (beta_num in range(max_length)) { + Int num = (if merge_num > 0 then beta_num + (merge_num * max_length) else beta_num + ) + if (num < beta_length) { File bam_list = unfiltered_normalized_beta[num] } } } - scatter (iter_index in range(length(bam_list))){ + scatter (iter_index in range(length(bam_list))) { call combine_data as inner_merge { input: files_to_combine = select_all(bam_list[iter_index]), combined_file_name = "~{iter_index}.combined.csv", @@ -64,22 +61,19 @@ workflow methylation_cohort { combined_file_name = "combined_beta.csv", } - if (pval_length > 0 && !skip_pvalue_check){ + if (pval_length > 0 && !skip_pvalue_check) { # If p-values are provided, merge those as well - scatter (merge_num in range((pval_length / max_length) + 1)){ + scatter (merge_num in range((pval_length / max_length) + 1)) { # Get the sublist of p-value files - scatter (pval_num in range(max_length)){ - Int num_p = ( - if merge_num > 0 - then pval_num + (merge_num * max_length) - else pval_num - ) - if (num_p < pval_length){ + scatter (pval_num in range(max_length)) { + Int num_p = (if merge_num > 0 then pval_num + (merge_num * max_length) + else pval_num) + if (num_p < pval_length) { File pval_list = p_values[num_p] } } } - scatter (iter_index in range(length(pval_list))){ + scatter (iter_index in range(length(pval_list))) { call combine_data as inner_merge_pvals { input: files_to_combine = select_all(pval_list[iter_index]), combined_file_name = "~{iter_index}.pvals.combined.csv", @@ -93,12 +87,12 @@ workflow methylation_cohort { } } - if (beta_length <= max_length){ + if (beta_length <= max_length) { call combine_data as simple_merge { input: files_to_combine = unfiltered_normalized_beta, combined_file_name = "combined_beta.csv", } - if (pval_length > 0 && !skip_pvalue_check){ + if (pval_length > 0 && !skip_pvalue_check) { call combine_data as simple_merge_pval { input: files_to_combine = p_values, combined_file_name = "combined_pvals.csv", @@ -106,23 +100,16 @@ workflow methylation_cohort { } } - File? pval_file = ( - if (pval_length > 0 && !skip_pvalue_check) - then select_first( - [ - final_merge_pvals.combined_file, - simple_merge_pval.combined_file, - ]) - else None - ) + File? pval_file = (if (pval_length > 0 && !skip_pvalue_check) then select_first([ + final_merge_pvals.combined_file, + simple_merge_pval.combined_file, + ]) else None) call filter_probes { input: - beta_values = select_first( - [ - final_merge.combined_file, - simple_merge.combined_file, - ] - ), + beta_values = select_first([ + final_merge.combined_file, + simple_merge.combined_file, + ]), p_values = pval_file, num_probes, additional_probes_to_exclude = select_all([ @@ -140,12 +127,10 @@ workflow methylation_cohort { } output { - File combined_beta = select_first( - [ - final_merge.combined_file, - simple_merge.combined_file, - ] - ) + File combined_beta = select_first([ + final_merge.combined_file, + simple_merge.combined_file, + ]) File filtered_beta = filter_probes.filtered_beta_values File filtered_probeset = filter_probes.filtered_probes File umap_embedding = generate_umap.umap @@ -159,7 +144,7 @@ task combine_data { meta { description: "Combine data from multiple CSV files by column" outputs: { - combined_file: "Combined CSV file" + combined_file: "Combined CSV file", } } @@ -183,9 +168,7 @@ task combine_data { Int modify_memory_gb = 0 } - Int memory_gb = ceil(size(files_to_combine, "GB") * - if simple_merge then 2 else 1) - + modify_memory_gb + Int memory_gb = ceil(size(files_to_combine, "GB") * if simple_merge then 2 else 1) + modify_memory_gb + 2 Int disk_size_gb = ceil(size(files_to_combine, "GB") * 2) + 2 @@ -273,7 +256,7 @@ task generate_umap { meta { description: "Generate UMAP embedding" outputs: { - umap: "UMAP embedding for all samples" + umap: "UMAP embedding for all samples", } } @@ -312,7 +295,7 @@ task plot_umap { meta { description: "Plot UMAP embedding" outputs: { - umap_plot: "UMAP plot for all samples" + umap_plot: "UMAP plot for all samples", } } diff --git a/workflows/methylation/methylation-preprocess.wdl b/workflows/methylation/methylation-preprocess.wdl index 4274b3942..76fdafe25 100644 --- a/workflows/methylation/methylation-preprocess.wdl +++ b/workflows/methylation/methylation-preprocess.wdl @@ -53,10 +53,8 @@ task process_raw_idats { >>> output { - File beta_swan_norm_unfiltered - = out_base + ".beta_swan_norm_unfiltered.csv" - File beta_swan_norm_unfiltered_genomic - = out_base + ".beta_swan_norm_unfiltered.genomic.csv" + File beta_swan_norm_unfiltered = out_base + ".beta_swan_norm_unfiltered.csv" + File beta_swan_norm_unfiltered_genomic = out_base + ".beta_swan_norm_unfiltered.genomic.csv" File annotation = out_base + ".annotation.csv" File beta_unnorm = out_base + ".beta.csv" File cn_values = out_base + ".cn_values.csv" @@ -81,13 +79,15 @@ task list_sex_probes { meta { description: "List probes that map to the sex chromosomes" outputs: { - probe_list: "List of probe names that map to the sex chromosomes" + probe_list: "List of probe names that map to the sex chromosomes", } } - parameter_meta {} + parameter_meta { + } - input {} + input { + } command <<< set -euo pipefail diff --git a/workflows/methylation/methylation-standard.wdl b/workflows/methylation/methylation-standard.wdl index a3c9dfbdc..3aaf1ab34 100644 --- a/workflows/methylation/methylation-standard.wdl +++ b/workflows/methylation/methylation-standard.wdl @@ -38,15 +38,15 @@ workflow methylation { scatter (pair in zip(green_idats, red_idats)) { call preprocess.process_raw_idats { input: - idats = pair + idats = pair, } } - call preprocess.list_sex_probes {} + call preprocess.list_sex_probes { + } call cohort.methylation_cohort { input: - unfiltered_normalized_beta = - process_raw_idats.beta_swan_norm_unfiltered_genomic, + unfiltered_normalized_beta = process_raw_idats.beta_swan_norm_unfiltered_genomic, p_values = process_raw_idats.probe_pvalues, sex_probe_list = list_sex_probes.probe_list, additional_probes_to_exclude, @@ -56,21 +56,18 @@ workflow methylation { Int probelist_length = length(probe_files) Int max_length = 100 - if (probelist_length > max_length){ - scatter (merge_num in range((probelist_length / max_length) + 1)){ + if (probelist_length > max_length) { + scatter (merge_num in range((probelist_length / max_length) + 1)) { # Get the sublist of probe files - scatter (probe_num in range(max_length)){ - Int num = ( - if merge_num > 0 - then probe_num + (merge_num * max_length) - else probe_num - ) - if (num < probelist_length){ + scatter (probe_num in range(max_length)) { + Int num = (if merge_num > 0 then probe_num + (merge_num * max_length) else probe_num + ) + if (num < probelist_length) { File probe_file_batches = probe_files[num] } } } - scatter (iter_index in range(length(probe_file_batches))){ + scatter (iter_index in range(length(probe_file_batches))) { call concat_and_uniq { input: files_to_combine = select_all(probe_file_batches[iter_index]), output_file_name = "probes_with_snps_part_~{iter_index}.tab", @@ -79,13 +76,13 @@ workflow methylation { call concat_and_uniq as final_cat { input: files_to_combine = flatten([ - concat_and_uniq.combined_file + concat_and_uniq.combined_file, ]), output_file_name = "probes_with_snps.tab", } } - if (probelist_length <= max_length){ + if (probelist_length <= max_length) { call concat_and_uniq as simple_merge { input: files_to_combine = probe_files, output_file_name = "probes_with_snps.tab", @@ -95,21 +92,18 @@ workflow methylation { Array[File] non_genomic_probe_list = process_raw_idats.non_genomic_probes Int non_genomic_probelist_length = length(non_genomic_probe_list) - if (non_genomic_probelist_length > max_length){ - scatter (merge_num in range((non_genomic_probelist_length / max_length) + 1)){ + if (non_genomic_probelist_length > max_length) { + scatter (merge_num in range((non_genomic_probelist_length / max_length) + 1)) { # Get the sublist of probe files - scatter (probe_num in range(max_length)){ - Int num_ng = ( - if merge_num > 0 - then probe_num + (merge_num * max_length) - else probe_num - ) - if (num_ng < non_genomic_probelist_length){ + scatter (probe_num in range(max_length)) { + Int num_ng = (if merge_num > 0 then probe_num + (merge_num * max_length) + else probe_num) + if (num_ng < non_genomic_probelist_length) { File non_genomic_probe_batches = non_genomic_probe_list[num_ng] } } } - scatter (iter_index in range(length(non_genomic_probe_batches))){ + scatter (iter_index in range(length(non_genomic_probe_batches))) { call concat_and_uniq as non_genomic_concat { input: files_to_combine = select_all(non_genomic_probe_batches[iter_index]), output_file_name = "non_genomic_probes_part_~{iter_index}.tab", @@ -118,13 +112,13 @@ workflow methylation { call concat_and_uniq as final_cat_non_genomic { input: files_to_combine = flatten([ - non_genomic_concat.combined_file + non_genomic_concat.combined_file, ]), output_file_name = "non_genomic_probes.tab", } } - if (non_genomic_probelist_length <= max_length){ + if (non_genomic_probelist_length <= max_length) { call concat_and_uniq as simple_merge_non_genomic { input: files_to_combine = non_genomic_probe_list, output_file_name = "non_genomic_probes.tab", @@ -132,8 +126,7 @@ workflow methylation { } output { - Array[File] beta_swan_norm_unfiltered_genomic = - process_raw_idats.beta_swan_norm_unfiltered_genomic + Array[File] beta_swan_norm_unfiltered_genomic = process_raw_idats.beta_swan_norm_unfiltered_genomic File combined_beta = methylation_cohort.combined_beta File filtered_beta = methylation_cohort.filtered_beta File filtered_probeset = methylation_cohort.filtered_probeset @@ -157,7 +150,7 @@ task concat_and_uniq { meta { description: "Concatenate multiple files and retain unique lines" outputs: { - combined_file: "File containing unique lines from all input files" + combined_file: "File containing unique lines from all input files", } } diff --git a/workflows/qc/markdups-post.wdl b/workflows/qc/markdups-post.wdl index 70771d8e8..2e0420fa5 100644 --- a/workflows/qc/markdups-post.wdl +++ b/workflows/qc/markdups-post.wdl @@ -5,7 +5,6 @@ ## whether a read is a duplicate or not. ## But the tasks called below produce different results depending on whether the ## input BAM has been duplicate marked or not. - version 1.1 import "../../tools/mosdepth.wdl" @@ -61,7 +60,7 @@ workflow markdups_post { bam_index = markdups_bam_index, prefix = prefix + "." + "whole_genome", } - scatter(coverage_pair in zip(coverage_beds, coverage_labels)) { + scatter (coverage_pair in zip(coverage_beds, coverage_labels)) { call mosdepth.coverage as regions_coverage { input: bam = markdups_bam, bam_index = markdups_bam_index, @@ -72,8 +71,7 @@ workflow markdups_post { output { File insert_size_metrics = collect_insert_size_metrics.insert_size_metrics - File insert_size_metrics_pdf - = collect_insert_size_metrics.insert_size_metrics_pdf + File insert_size_metrics_pdf = collect_insert_size_metrics.insert_size_metrics_pdf File flagstat_report = flagstat.flagstat_report File mosdepth_global_summary = wg_coverage.summary File mosdepth_global_dist = wg_coverage.global_dist diff --git a/workflows/qc/quality-check-standard.wdl b/workflows/qc/quality-check-standard.wdl index 5115d3f90..a35f0b867 100644 --- a/workflows/qc/quality-check-standard.wdl +++ b/workflows/qc/quality-check-standard.wdl @@ -126,8 +126,7 @@ workflow quality_check_standard { File kraken_db File? gtf #@ except: LineWidth - File multiqc_config - = "https://raw.githubusercontent.com/stjudecloud/workflows/main/workflows/qc/multiqc_config/multiqc_config.yaml" + File multiqc_config = "https://raw.githubusercontent.com/stjudecloud/workflows/main/workflows/qc/multiqc_config/multiqc_config.yaml" Array[File] extra_multiqc_inputs = [] Array[File] coverage_beds = [] Array[String] coverage_labels = [] @@ -164,20 +163,24 @@ workflow quality_check_standard { coverage_labels, } call flag_filter.validate_flag_filter as kraken_filter_validator { input: - flags = standard_filter + flags = standard_filter, } if (run_comparative_kraken) { - call flag_filter.validate_flag_filter - as comparative_kraken_filter_validator - { input: - flags = comparative_filter + call flag_filter.validate_flag_filter as comparative_kraken_filter_validator { input: + flags = comparative_filter, } } - call md5sum.compute_checksum after parse_input { input: file = bam } + call md5sum.compute_checksum after parse_input { input: + file = bam, + } - call samtools.quickcheck after parse_input { input: bam } - call util.compression_integrity after parse_input { input: bgzipped_file = bam } + call samtools.quickcheck after parse_input { input: + bam, + } + call util.compression_integrity after parse_input { input: + bgzipped_file = bam, + } if (subsample_n_reads > 0) { call samtools.subsample after quickcheck { input: @@ -188,7 +191,10 @@ workflow quality_check_standard { } if (defined(subsample.sampled_bam)) { call samtools.index as subsample_index { input: - bam = select_first([subsample.sampled_bam, "undefined"]), + bam = select_first([ + subsample.sampled_bam, + "undefined", + ]), use_all_cores, } } @@ -203,11 +209,8 @@ workflow quality_check_standard { subsample_index.bam_index, bam_index, ]) - String post_subsample_prefix = ( - if (defined(subsample.sampled_bam)) - then prefix + ".subsampled" - else prefix - ) + String post_subsample_prefix = (if (defined(subsample.sampled_bam)) then prefix + ".subsampled" + else prefix) call picard.validate_bam after quickcheck { input: bam = post_subsample_bam, @@ -235,7 +238,9 @@ workflow quality_check_standard { outfile_name = post_subsample_prefix + ".readlength.tsv", } call ngsderive.encoding after quickcheck { input: - ngs_files = [post_subsample_bam], + ngs_files = [ + post_subsample_bam, + ], outfile_name = post_subsample_prefix + ".encoding.tsv", num_reads = -1, } @@ -249,9 +254,7 @@ workflow quality_check_standard { prefix = post_subsample_prefix, } - call samtools.bam_to_fastq after quickcheck - after kraken_filter_validator - { input: + call samtools.bam_to_fastq after quickcheck after kraken_filter_validator { input: bam = post_subsample_bam, bitwise_filter = standard_filter, prefix = post_subsample_prefix, @@ -267,14 +270,24 @@ workflow quality_check_standard { } call fq.fqlint { input: - read_one_fastq = select_first([bam_to_fastq.read_one_fastq_gz, "undefined"]), - read_two_fastq = select_first([bam_to_fastq.read_two_fastq_gz, "undefined"]), + read_one_fastq = select_first([ + bam_to_fastq.read_one_fastq_gz, + "undefined", + ]), + read_two_fastq = select_first([ + bam_to_fastq.read_two_fastq_gz, + "undefined", + ]), } call kraken2.kraken after fqlint { input: - read_one_fastq_gz - = select_first([bam_to_fastq.read_one_fastq_gz, "undefined"]), - read_two_fastq_gz - = select_first([bam_to_fastq.read_two_fastq_gz, "undefined"]), + read_one_fastq_gz = select_first([ + bam_to_fastq.read_one_fastq_gz, + "undefined", + ]), + read_two_fastq_gz = select_first([ + bam_to_fastq.read_two_fastq_gz, + "undefined", + ]), db = kraken_db, store_sequences = store_kraken_sequences, prefix = post_subsample_prefix, @@ -282,23 +295,29 @@ workflow quality_check_standard { } if (run_fastp) { call fp.fastp after fqlint { input: - read_one_fastq - = select_first([bam_to_fastq.read_one_fastq_gz, "undefined"]), - read_two_fastq - = select_first([bam_to_fastq.read_two_fastq_gz, "undefined"]), + read_one_fastq = select_first([ + bam_to_fastq.read_one_fastq_gz, + "undefined", + ]), + read_two_fastq = select_first([ + bam_to_fastq.read_two_fastq_gz, + "undefined", + ]), output_fastq = false, } } if (run_librarian) { call libraran_tasks.librarian after fqlint { input: - read_one_fastq = select_first([bam_to_fastq.read_one_fastq_gz, "undefined"]), + read_one_fastq = select_first([ + bam_to_fastq.read_one_fastq_gz, + "undefined", + ]), } } if (run_comparative_kraken) { - call samtools.bam_to_fastq as alt_filtered_fastq after quickcheck - after comparative_kraken_filter_validator - { input: + call samtools.bam_to_fastq as alt_filtered_fastq after quickcheck after comparative_kraken_filter_validator { + input: bam = post_subsample_bam, bitwise_filter = comparative_filter, prefix = post_subsample_prefix + ".alt_filtered", @@ -315,16 +334,24 @@ workflow quality_check_standard { use_all_cores, } call fq.fqlint as alt_filtered_fqlint { input: - read_one_fastq - = select_first([alt_filtered_fastq.read_one_fastq_gz, "undefined"]), - read_two_fastq - = select_first([alt_filtered_fastq.read_two_fastq_gz, "undefined"]), + read_one_fastq = select_first([ + alt_filtered_fastq.read_one_fastq_gz, + "undefined", + ]), + read_two_fastq = select_first([ + alt_filtered_fastq.read_two_fastq_gz, + "undefined", + ]), } call kraken2.kraken as comparative_kraken after alt_filtered_fqlint { input: - read_one_fastq_gz - = select_first([alt_filtered_fastq.read_one_fastq_gz, "undefined"]), - read_two_fastq_gz - = select_first([alt_filtered_fastq.read_two_fastq_gz, "undefined"]), + read_one_fastq_gz = select_first([ + alt_filtered_fastq.read_one_fastq_gz, + "undefined", + ]), + read_two_fastq_gz = select_first([ + alt_filtered_fastq.read_two_fastq_gz, + "undefined", + ]), db = kraken_db, store_sequences = store_kraken_sequences, prefix = post_subsample_prefix + ".alt_filtered", @@ -337,8 +364,8 @@ workflow quality_check_standard { bam_index = post_subsample_bam_index, prefix = post_subsample_prefix + ".whole_genome", } - scatter(coverage_pair in zip(coverage_beds, parse_input.labels)) { - call mosdepth.coverage as regions_coverage after quickcheck { input: + scatter (coverage_pair in zip(coverage_beds, parse_input.labels)) { + call mosdepth.coverage as regions_coverage after quickcheck { input: bam = post_subsample_bam, bam_index = post_subsample_bam_index, coverage_bed = coverage_pair.left, @@ -350,19 +377,31 @@ workflow quality_check_standard { call ngsderive.junction_annotation after quickcheck { input: bam = post_subsample_bam, bam_index = post_subsample_bam_index, - gene_model = select_first([gtf, "undefined"]), + gene_model = select_first([ + gtf, + "undefined", + ]), prefix = post_subsample_prefix, } call ngsderive.strandedness after quickcheck { input: bam = post_subsample_bam, bam_index = post_subsample_bam_index, - gene_model = select_first([gtf, "undefined"]), + gene_model = select_first([ + gtf, + "undefined", + ]), outfile_name = post_subsample_prefix + ".strandedness.tsv", } call qualimap.rnaseq as qualimap_rnaseq { input: - bam = select_first([bam_to_fastq.collated_bam, "undefined"]), + bam = select_first([ + bam_to_fastq.collated_bam, + "undefined", + ]), prefix = post_subsample_prefix + ".qualimap_rnaseq_results", - gtf = select_first([gtf, "undefined"]), + gtf = select_first([ + gtf, + "undefined", + ]), name_sorted = true, paired_end = true, # matches default but prevents user from overriding } @@ -434,17 +473,24 @@ workflow quality_check_standard { ], regions_coverage.summary, select_all(regions_coverage.region_dist), - select_first([markdups_post.mosdepth_region_summary, []]), - select_first([markdups_post.mosdepth_region_dist, []]), - ( - if (mark_duplicates && optical_distance > 0) - then [markdups.mark_duplicates_metrics] - else [] - ), + select_first([ + markdups_post.mosdepth_region_summary, + [], + ]), + select_first([ + markdups_post.mosdepth_region_dist, + [], + ]), + (if (mark_duplicates && optical_distance > 0) then [ + markdups.mark_duplicates_metrics, + ] else []), ])) call multiqc_tasks.multiqc { input: - files = flatten([multiqc_files, extra_multiqc_inputs]), + files = flatten([ + multiqc_files, + extra_multiqc_inputs, + ]), config = multiqc_config, report_name = post_subsample_prefix + ".multiqc", } @@ -483,7 +529,10 @@ workflow quality_check_standard { File? kraken_sequences = kraken.sequences File? comparative_kraken_sequences = comparative_kraken.sequences File? junctions = junction_annotation.junctions - Array[File] intermediate_files = select_first([optional_files, []]) + Array[File] intermediate_files = select_first([ + optional_files, + [], + ]) } } @@ -491,7 +540,7 @@ task parse_input { meta { description: "Parses and validates the `quality_check_standard` workflow's provided inputs" outputs: { - labels: "An array of labels to use on the result coverage files associated with each coverage BED" + labels: "An array of labels to use on the result coverage files associated with each coverage BED", } } @@ -539,11 +588,8 @@ task parse_input { >>> output { - Array[String] labels = ( - if (coverage_beds_len > 0) - then read_lines("labels.txt") - else [] - ) + Array[String] labels = (if (coverage_beds_len > 0) then read_lines("labels.txt") + else []) } runtime { diff --git a/workflows/reference/bwa-db-build.wdl b/workflows/reference/bwa-db-build.wdl index 3aefef4f1..579385f1c 100644 --- a/workflows/reference/bwa-db-build.wdl +++ b/workflows/reference/bwa-db-build.wdl @@ -40,7 +40,7 @@ workflow bwa_db_build { } output { - File reference_fa = reference_download.downloaded_file - File bwa_db_tar_gz = build_bwa_db.bwa_db_tar_gz + File reference_fa = reference_download.downloaded_file + File bwa_db_tar_gz = build_bwa_db.bwa_db_tar_gz } } diff --git a/workflows/reference/gatk-reference.wdl b/workflows/reference/gatk-reference.wdl index 3b4dff835..2cf2e4087 100644 --- a/workflows/reference/gatk-reference.wdl +++ b/workflows/reference/gatk-reference.wdl @@ -88,16 +88,28 @@ workflow gatk_reference { if (defined(dbSNP_vcf_index_url) && defined(dbSNP_vcf_index_name)) { call util.download as dbsnp_index { input: - url = select_first([dbSNP_vcf_index_url, "undefined"]), - outfile_name = select_first([dbSNP_vcf_index_name, "undefined"]), + url = select_first([ + dbSNP_vcf_index_url, + "undefined", + ]), + outfile_name = select_first([ + dbSNP_vcf_index_name, + "undefined", + ]), disk_size_gb = dbSNP_vcf_index_disk_size_gb, } } if (defined(interval_list_url) && defined(interval_list_name)) { call util.download as intervals { input: - url = select_first([interval_list_url, "undefined"]), - outfile_name = select_first([interval_list_name, "undefined"]), + url = select_first([ + interval_list_url, + "undefined", + ]), + outfile_name = select_first([ + interval_list_name, + "undefined", + ]), disk_size_gb = interval_list_disk_size_gb, } } diff --git a/workflows/reference/qc-reference.wdl b/workflows/reference/qc-reference.wdl index 2d64b8901..28f67aebf 100644 --- a/workflows/reference/qc-reference.wdl +++ b/workflows/reference/qc-reference.wdl @@ -121,12 +121,12 @@ workflow qc_reference { } } - if ( - (length(kraken_fastas) > 0) - || (length(kraken_fasta_urls) > 0) - || (length(kraken_libraries) > 0) - ) { - call kraken2.download_taxonomy { input: protein } + if ((length(kraken_fastas) > 0) || (length(kraken_fasta_urls) > 0) || (length( + kraken_libraries + ) > 0)) { + call kraken2.download_taxonomy { input: + protein, + } } scatter (lib in kraken_libraries) { @@ -136,7 +136,10 @@ workflow qc_reference { } } - Array[File] custom_fastas = flatten([kraken_fastas, fastas_download.downloaded_file]) + Array[File] custom_fastas = flatten([ + kraken_fastas, + fastas_download.downloaded_file, + ]) if (length(custom_fastas) > 0) { call kraken2.create_library_from_fastas { input: fastas_gz = custom_fastas, @@ -145,9 +148,13 @@ workflow qc_reference { } Array[File] kraken_tarballs = flatten([ - select_all([download_taxonomy.taxonomy]), + select_all([ + download_taxonomy.taxonomy, + ]), download_library.library, - select_all([create_library_from_fastas.custom_library]), + select_all([ + create_library_from_fastas.custom_library, + ]), ]) if (length(kraken_tarballs) > 0) { call kraken2.build_db as kraken_build_db { input: diff --git a/workflows/reference/star-db-build.wdl b/workflows/reference/star-db-build.wdl index d3a99fbe2..d2d14b2a1 100644 --- a/workflows/reference/star-db-build.wdl +++ b/workflows/reference/star-db-build.wdl @@ -56,8 +56,8 @@ workflow star_db_build { } output { - File reference_fa = reference_download.downloaded_file - File gtf = gtf_download.downloaded_file - File star_db_tar_gz = build_star_db.star_db + File reference_fa = reference_download.downloaded_file + File gtf = gtf_download.downloaded_file + File star_db_tar_gz = build_star_db.star_db } } diff --git a/workflows/rnaseq/rnaseq-core.wdl b/workflows/rnaseq/rnaseq-core.wdl index b5d088317..ef7a8bd10 100644 --- a/workflows/rnaseq/rnaseq-core.wdl +++ b/workflows/rnaseq/rnaseq-core.wdl @@ -144,11 +144,9 @@ workflow rnaseq_core { GC_AG_and_CT_GC_motif: 5, AT_AC_and_GT_AT_motif: 5, } - String prefix = sub( - basename(read_one_fastqs_gz[0]), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(read_one_fastqs_gz[0]), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 - ) + ) String xenocp_aligner = "star" Float align_spliced_mate_map_l_min_over_l_mate = 0.5 Int out_filter_multimap_n_max = 50 @@ -201,16 +199,10 @@ workflow rnaseq_core { } } - Array[File] chosen_r1s = ( - if enable_read_trimming - then select_all(trim.read_one_fastq_gz) - else read_one_fastqs_gz - ) - Array[File] chosen_r2s = ( - if enable_read_trimming - then select_all(trim.read_two_fastq_gz) - else read_two_fastqs_gz - ) + Array[File] chosen_r1s = (if enable_read_trimming then select_all(trim.read_one_fastq_gz + ) else read_one_fastqs_gz) + Array[File] chosen_r2s = (if enable_read_trimming then select_all(trim.read_two_fastq_gz + ) else read_two_fastqs_gz) call star.alignment after validate { input: read_one_fastqs_gz = chosen_r1s, @@ -252,21 +244,16 @@ workflow rnaseq_core { gene_model = gtf, } - String htseq_strandedness = ( - if (provided_strandedness != "") - then htseq_strandedness_mapping[provided_strandedness] - else htseq_strandedness_mapping[ngsderive_strandedness.strandedness_string] - ) + String htseq_strandedness = (if (provided_strandedness != "") then htseq_strandedness_mapping[ + provided_strandedness] else htseq_strandedness_mapping[ngsderive_strandedness.strandedness_string + ]) call htseq.count as htseq_count { input: bam = alignment_post.processed_bam, gtf, strandedness = htseq_strandedness, - prefix = basename(alignment_post.processed_bam, "bam") - + ( - if provided_strandedness == "" - then ngsderive_strandedness.strandedness_string - else provided_strandedness + prefix = basename(alignment_post.processed_bam, "bam") + (if provided_strandedness + == "" then ngsderive_strandedness.strandedness_string else provided_strandedness ), pos_sorted = true, } @@ -280,9 +267,13 @@ workflow rnaseq_core { File feature_counts = htseq_count.feature_counts File inferred_strandedness = ngsderive_strandedness.strandedness_file String inferred_strandedness_string = ngsderive_strandedness.strandedness_string - Array[File] fastp_reports = select_all(flatten([fastp.report, trim.report])) - Array[File] fastp_jsons = select_all(flatten( - [fastp.report_json, trim.report_json] - )) + Array[File] fastp_reports = select_all(flatten([ + fastp.report, + trim.report, + ])) + Array[File] fastp_jsons = select_all(flatten([ + fastp.report_json, + trim.report_json, + ])) } } diff --git a/workflows/rnaseq/rnaseq-standard-fastq.wdl b/workflows/rnaseq/rnaseq-standard-fastq.wdl index dee188b10..eb8e6b9b2 100644 --- a/workflows/rnaseq/rnaseq-standard-fastq.wdl +++ b/workflows/rnaseq/rnaseq-standard-fastq.wdl @@ -73,11 +73,9 @@ workflow rnaseq_standard_fastq { Array[File] read_two_fastqs_gz Array[ReadGroup] read_groups File? contaminant_db - String prefix = sub( - basename(read_one_fastqs_gz[0]), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(read_one_fastqs_gz[0]), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 - ) + ) String xenocp_aligner = "star" String strandedness = "" Boolean enable_read_trimming = false @@ -100,7 +98,7 @@ workflow rnaseq_standard_fastq { } } - if (validate_input){ + if (validate_input) { scatter (reads in zip(read_one_fastqs_gz, read_two_fastqs_gz)) { call fq.fqlint after parse_input { input: read_one_fastq = reads.left, @@ -123,12 +121,10 @@ workflow rnaseq_standard_fastq { subsample.subsampled_read1, read_one_fastqs_gz, ]) - Array[File] selected_read_two_fastqs = select_all( - select_first([ - subsample.subsampled_read2, - read_two_fastqs_gz, - ]) - ) + Array[File] selected_read_two_fastqs = select_all(select_first([ + subsample.subsampled_read2, + read_two_fastqs_gz, + ])) call rnaseq_core_wf.rnaseq_core after fqlint { input: read_one_fastqs_gz = selected_read_one_fastqs, diff --git a/workflows/rnaseq/rnaseq-standard.wdl b/workflows/rnaseq/rnaseq-standard.wdl index edac26733..793446486 100755 --- a/workflows/rnaseq/rnaseq-standard.wdl +++ b/workflows/rnaseq/rnaseq-standard.wdl @@ -92,7 +92,10 @@ workflow rnaseq_standard { use_all_cores, } } - File selected_bam = select_first([subsample.sampled_bam, bam]) + File selected_bam = select_first([ + subsample.sampled_bam, + bam, + ]) call read_group.get_read_groups after validate_input_bam { input: bam = selected_bam, diff --git a/workflows/rnaseq/rnaseq-variant-calling.wdl b/workflows/rnaseq/rnaseq-variant-calling.wdl index 8df2e61a1..ab7a8a445 100644 --- a/workflows/rnaseq/rnaseq-variant-calling.wdl +++ b/workflows/rnaseq/rnaseq-variant-calling.wdl @@ -54,7 +54,7 @@ workflow rnaseq_variant_calling { Int scatter_count = 6 } - if (!bam_is_dup_marked){ + if (!bam_is_dup_marked) { call picard.mark_duplicates { input: bam, create_bam = true, @@ -62,8 +62,14 @@ workflow rnaseq_variant_calling { } call gatk.split_n_cigar_reads { input: - bam = select_first([mark_duplicates.duplicate_marked_bam, bam]), - bam_index = select_first([mark_duplicates.duplicate_marked_bam_index, bam_index]), + bam = select_first([ + mark_duplicates.duplicate_marked_bam, + bam, + ]), + bam_index = select_first([ + mark_duplicates.duplicate_marked_bam_index, + bam_index, + ]), fasta, fasta_index, dict, From cea1fcdf62ba1c7c241059a2d38a6a06d68b8f1a Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Mon, 2 Mar 2026 04:36:26 -0500 Subject: [PATCH 2/4] format overwrite from PR branch https://github.com/stjude-rust-labs/sprocket/pull/678 --- tools/arriba.wdl | 2 +- tools/bwa.wdl | 10 +++++----- tools/fastp.wdl | 2 +- tools/fq.wdl | 2 +- tools/gatk4.wdl | 4 ++-- tools/htseq.wdl | 2 +- tools/kraken2.wdl | 7 ++++--- tools/librarian.wdl | 2 +- tools/ngsderive.wdl | 2 +- tools/picard.wdl | 4 ++-- tools/samtools.wdl | 6 +++--- tools/star.wdl | 18 ++++++++++-------- workflows/dnaseq/dnaseq-standard-fastq.wdl | 2 +- workflows/general/bam-to-fastqs.wdl | 4 ++-- workflows/general/samtools-merge.wdl | 2 +- workflows/methylation/methylation-cohort.wdl | 2 +- workflows/methylation/methylation-standard.wdl | 2 +- workflows/rnaseq/rnaseq-core.wdl | 12 ++++++------ workflows/rnaseq/rnaseq-standard-fastq.wdl | 2 +- 19 files changed, 45 insertions(+), 42 deletions(-) diff --git a/tools/arriba.wdl b/tools/arriba.wdl index a2e010885..2ca327c95 100644 --- a/tools/arriba.wdl +++ b/tools/arriba.wdl @@ -227,7 +227,7 @@ task arriba { ~{(if length(viral_contigs) > 0 then "-v " + sep(",", quote(viral_contigs)) else "")} \ ~{(if length(disable_filters) > 0 then "-f " + sep(",", quote(disable_filters) - ) else "")} \ + ) else "")} \ -E ~{max_e_value} \ -S ~{min_supporting_reads} \ -m ~{max_mismappers} \ diff --git a/tools/bwa.wdl b/tools/bwa.wdl index 89b814018..56aafcb5c 100644 --- a/tools/bwa.wdl +++ b/tools/bwa.wdl @@ -37,7 +37,7 @@ task bwa_aln { String read_group String prefix = sub(basename(fastq), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 - ) + ) Boolean use_all_cores = false Int ncpu = 2 Int modify_disk_size_gb = 0 @@ -48,7 +48,7 @@ task bwa_aln { Float input_fastq_size = size(fastq, "GB") Float reference_size = size(bwa_db_tar_gz, "GB") Int disk_size_gb = (ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb - ) + ) command <<< set -euo pipefail @@ -138,10 +138,10 @@ task bwa_aln_pe { String output_bam = prefix + ".bam" Float input_fastq_size = (size(read_one_fastq_gz, "GB") + size(read_two_fastq_gz, "GB" - )) + )) Float reference_size = size(bwa_db_tar_gz, "GB") Int disk_size_gb = (ceil((input_fastq_size + reference_size) * 2) + 5 + modify_disk_size_gb - ) + ) command <<< set -euo pipefail @@ -233,7 +233,7 @@ task bwa_mem { Float input_fastq_size = size(read_one_fastq_gz, "GB") + size(read_two_fastq_gz, "GB") Float reference_size = size(bwa_db_tar_gz, "GB") Int disk_size_gb = (ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb - ) + ) command <<< set -euo pipefail diff --git a/tools/fastp.wdl b/tools/fastp.wdl index 99d5ebceb..c4a131976 100644 --- a/tools/fastp.wdl +++ b/tools/fastp.wdl @@ -97,7 +97,7 @@ task fastp { File? read_two_fastq String prefix = sub(basename(read_one_fastq), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 - ) + ".trimmed" + ) + ".trimmed" Boolean output_fastq = true Boolean deduplicate = false Boolean disable_duplicate_eval = false diff --git a/tools/fq.wdl b/tools/fq.wdl index dd45affa5..38b7e397b 100755 --- a/tools/fq.wdl +++ b/tools/fq.wdl @@ -122,7 +122,7 @@ task subsample { File? read_two_fastq String prefix = sub(basename(read_one_fastq), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 - ) + ) Float probability = 1.0 Int record_count = -1 Int modify_disk_size_gb = 0 diff --git a/tools/gatk4.wdl b/tools/gatk4.wdl index 011c988e4..f0cfe2d30 100644 --- a/tools/gatk4.wdl +++ b/tools/gatk4.wdl @@ -451,7 +451,7 @@ task mark_duplicates_spark { Float bam_size = size(bam, "GB") Int memory_gb = min(ceil(bam_size + 15), 50) + modify_memory_gb Int disk_size_gb = ((if create_bam then ceil((bam_size * 2) + 10) else ceil(bam_size + 10 - )) + modify_disk_size_gb) + )) + modify_disk_size_gb) Int java_heap_size = ceil(memory_gb * 0.9) @@ -468,7 +468,7 @@ task mark_duplicates_spark { --read-validation-stringency "~{validation_stringency}" \ --duplicate-scoring-strategy "~{duplicate_scoring_strategy}" \ --read-name-regex '~{if (optical_distance > 0) then read_name_regex else "null" - }' \ + }' \ --duplicate-tagging-policy "~{tagging_policy}" \ --optical-duplicate-pixel-distance ~{optical_distance} \ --spark-master local[~{ncpu}] diff --git a/tools/htseq.wdl b/tools/htseq.wdl index 80dfe9a7b..bbc8741de 100755 --- a/tools/htseq.wdl +++ b/tools/htseq.wdl @@ -120,7 +120,7 @@ task count { --nonunique ~{if nonunique then "all" else "none"} \ --secondary-alignments ~{if secondary_alignments then "score" else "ignore"} \ --supplementary-alignments ~{(if supplementary_alignments then "score" else "ignore" - )} \ + )} \ "~{bam}" \ "~{gtf}" \ >> "~{outfile_name}" diff --git a/tools/kraken2.wdl b/tools/kraken2.wdl index 2bcf1cc6f..22a665eb9 100644 --- a/tools/kraken2.wdl +++ b/tools/kraken2.wdl @@ -97,7 +97,8 @@ task download_library { #@ except: ExpressionSpacing Int disk_size_gb = ((if library_name == "bacteria" then 300 else if library_name == "nr" - then 600 else if library_name == "nt" then 2500 else 25) + modify_disk_size_gb) + then 600 + else if library_name == "nt" then 2500 else 25) + modify_disk_size_gb) command <<< set -euo pipefail @@ -347,7 +348,7 @@ task kraken { File db String prefix = sub(basename(read_one_fastq_gz), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 - ) + ) Boolean store_sequences = false Boolean use_names = true Boolean use_all_cores = false @@ -361,7 +362,7 @@ task kraken { Float read1_size = size(read_one_fastq_gz, "GB") Float read2_size = size(read_two_fastq_gz, "GB") Int disk_size_gb_calculation = (ceil((db_size * 2) + read1_size + read2_size) + 10 + modify_disk_size_gb - ) + ) Int disk_size_gb = (if store_sequences then disk_size_gb_calculation + ceil(read1_size + read2_size) else disk_size_gb_calculation) diff --git a/tools/librarian.wdl b/tools/librarian.wdl index 97dd8892f..9fe3efac5 100644 --- a/tools/librarian.wdl +++ b/tools/librarian.wdl @@ -25,7 +25,7 @@ task librarian { File read_one_fastq String prefix = sub(basename(read_one_fastq), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 - ) + ".librarian" + ) + ".librarian" Int modify_disk_size_gb = 0 } diff --git a/tools/ngsderive.wdl b/tools/ngsderive.wdl index 87238c5a4..a1b1e2c34 100644 --- a/tools/ngsderive.wdl +++ b/tools/ngsderive.wdl @@ -398,7 +398,7 @@ task endedness { Float bam_size = size(bam, "GB") Int memory_gb = (if calc_rpt then (ceil(bam_size * 2.5) + 4 + modify_memory_gb) else 4 - ) + ) Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb command <<< diff --git a/tools/picard.wdl b/tools/picard.wdl index 7d64ff148..2127f92d6 100755 --- a/tools/picard.wdl +++ b/tools/picard.wdl @@ -84,7 +84,7 @@ task mark_duplicates { Float bam_size = size(bam, "GB") Int memory_gb = min(ceil(bam_size + 12), 50) + modify_memory_gb Int disk_size_gb = ((if create_bam then ceil((bam_size * 2) + 10) else ceil(bam_size + 10 - )) + modify_disk_size_gb) + )) + modify_disk_size_gb) Int java_heap_size = ceil(memory_gb * 0.9) @@ -100,7 +100,7 @@ task mark_duplicates { --VALIDATION_STRINGENCY "~{validation_stringency}" \ --DUPLICATE_SCORING_STRATEGY "~{duplicate_scoring_strategy}" \ --READ_NAME_REGEX '~{if (optical_distance > 0) then read_name_regex else "null" - }' \ + }' \ --TAGGING_POLICY "~{tagging_policy}" \ --CLEAR_DT ~{clear_dt} \ --REMOVE_DUPLICATES ~{remove_duplicates} \ diff --git a/tools/samtools.wdl b/tools/samtools.wdl index 4a1259de8..16a519d66 100755 --- a/tools/samtools.wdl +++ b/tools/samtools.wdl @@ -855,7 +855,7 @@ task bam_to_fastq { Int memory_gb = (if (collated || !paired_end) then 4 else (ceil(bam_size * 0.4) + 4)) + modify_memory_gb Int disk_size_gb = ceil(bam_size * (if (retain_collated_bam && !collated && paired_end - ) then 5 else 2)) + 10 + modify_disk_size_gb + ) then 5 else 2)) + 10 + modify_disk_size_gb command <<< set -euo pipefail @@ -891,9 +891,9 @@ task bam_to_fastq { -G "~{bitwise_filter.exclude_if_all}" \ ~{(if append_read_number then "-N" else "-n")} \ -1 ~{(if paired_end then "\"" + prefix + ".R1.fastq.gz\"" else "\"" + prefix + ".fastq.gz\"" - )} \ + )} \ -2 ~{(if paired_end then "\"" + prefix + ".R2.fastq.gz\"" else "\"" + prefix + ".fastq.gz\"" - )} \ + )} \ ~{(if paired_end then (if output_singletons then "-s \"" + prefix + ".singleton.fastq.gz\"" else "-s junk.singleton.fastq.gz") else "")} \ -0 ~{(if paired_end then "junk.unknown_bit_setting.fastq.gz" else "\"" + prefix diff --git a/tools/star.wdl b/tools/star.wdl index 2c8096017..28a299175 100755 --- a/tools/star.wdl +++ b/tools/star.wdl @@ -86,7 +86,7 @@ task build_star_db { Float reference_fasta_size = size(reference_fasta, "GB") Float gtf_size = size(gtf, "GB") Int disk_size_gb = (ceil((reference_fasta_size + gtf_size) * 3) + 10 + modify_disk_size_gb - ) + ) # Leave 2GB as system overhead String memory_limit_bytes = "~{memory_gb - 2}000000000" @@ -599,7 +599,7 @@ task alignment { Pair[Int, Int] clip_5p_n_bases = (0, 0) String prefix = sub(basename(read_one_fastqs_gz[0]), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 - ) + ) String read_name_separator = "/" String clip_adapter_type = "Hamming" String out_sam_strand_field = "intronMotif" @@ -708,7 +708,7 @@ task alignment { Float read_two_fastqs_size = size(read_twos, "GB") Float star_db_tar_gz_size = size(star_db_tar_gz, "GB") Int disk_size_gb = ((ceil(read_one_fastqs_size + read_two_fastqs_size + star_db_tar_gz_size - ) * 3) + 10 + modify_disk_size_gb) + ) * 3) + 10 + modify_disk_size_gb) command <<< set -euo pipefail @@ -769,15 +769,17 @@ task alignment { --clip3pAdapterSeq "~{clip_3p_adapter_seq.left}" ~{(if (length(read_twos) != 0 ) then "'" + clip_3p_adapter_seq.right + "'" else "")} \ --clip3pAdapterMMp ~{clip_3p_adapter_mmp.left} ~{(if (length(read_twos) != 0) - then clip_3p_adapter_mmp.right else None)} \ + then clip_3p_adapter_mmp.right + else None)} \ --alignEndsProtrude ~{align_ends_protrude.left} "~{(if (length(read_twos) != 0 - ) then align_ends_protrude.right else None)}" \ + ) then align_ends_protrude.right else None)}" \ --clip3pNbases ~{clip_3p_n_bases.left} ~{(if (length(read_twos) != 0) then clip_3p_n_bases.right - else None)} \ + else None)} \ --clip3pAfterAdapterNbases ~{clip_3p_after_adapter_n_bases.left} ~{(if (length( - read_twos) != 0) then clip_3p_after_adapter_n_bases.right else None)} \ + read_twos + ) != 0) then clip_3p_after_adapter_n_bases.right else None)} \ --clip5pNbases ~{clip_5p_n_bases.left} ~{(if (length(read_twos) != 0) then clip_5p_n_bases.right - else None)} \ + else None)} \ --readNameSeparator "~{read_name_separator}" \ --clipAdapterType "~{clip_adapter_type}" \ --outSAMstrandField "~{out_sam_strand_field}" \ diff --git a/workflows/dnaseq/dnaseq-standard-fastq.wdl b/workflows/dnaseq/dnaseq-standard-fastq.wdl index aa9b29890..fdf48606b 100644 --- a/workflows/dnaseq/dnaseq-standard-fastq.wdl +++ b/workflows/dnaseq/dnaseq-standard-fastq.wdl @@ -55,7 +55,7 @@ workflow dnaseq_standard_fastq_experimental { Array[ReadGroup] read_groups String prefix = sub(basename(read_one_fastqs_gz[0]), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 - ) + ) String aligner = "mem" Boolean enable_read_trimming = false Boolean validate_input = true diff --git a/workflows/general/bam-to-fastqs.wdl b/workflows/general/bam-to-fastqs.wdl index d70a3588e..a80aec153 100644 --- a/workflows/general/bam-to-fastqs.wdl +++ b/workflows/general/bam-to-fastqs.wdl @@ -45,7 +45,7 @@ workflow bam_to_fastqs { if (paired_end) { scatter (reads in zip(bam_to_fastq.read_one_fastq_gz, bam_to_fastq.read_two_fastq_gz - )) { + )) { call fq.fqlint { input: read_one_fastq = select_first([ reads.left, @@ -68,7 +68,7 @@ workflow bam_to_fastqs { output { Array[File] read1s = (if paired_end then select_all(bam_to_fastq.read_one_fastq_gz - ) else select_all(bam_to_fastq.single_end_reads_fastq_gz)) + ) else select_all(bam_to_fastq.single_end_reads_fastq_gz)) Array[File?] read2s = bam_to_fastq.read_two_fastq_gz } } diff --git a/workflows/general/samtools-merge.wdl b/workflows/general/samtools-merge.wdl index a48f6e1d1..c6f7ea81b 100644 --- a/workflows/general/samtools-merge.wdl +++ b/workflows/general/samtools-merge.wdl @@ -36,7 +36,7 @@ workflow samtools_merge { # Get the sublist of bams scatter (bam_num in range(max_length)) { Int num = (if merge_num > 0 then bam_num + (merge_num * max_length) else bam_num - ) + ) if (num < bam_length) { File bam_list = bams[num] } diff --git a/workflows/methylation/methylation-cohort.wdl b/workflows/methylation/methylation-cohort.wdl index 9486cc450..8ca1ddc35 100644 --- a/workflows/methylation/methylation-cohort.wdl +++ b/workflows/methylation/methylation-cohort.wdl @@ -43,7 +43,7 @@ workflow methylation_cohort { # Get the sublist of beta files scatter (beta_num in range(max_length)) { Int num = (if merge_num > 0 then beta_num + (merge_num * max_length) else beta_num - ) + ) if (num < beta_length) { File bam_list = unfiltered_normalized_beta[num] } diff --git a/workflows/methylation/methylation-standard.wdl b/workflows/methylation/methylation-standard.wdl index 3aaf1ab34..6a2c82627 100644 --- a/workflows/methylation/methylation-standard.wdl +++ b/workflows/methylation/methylation-standard.wdl @@ -61,7 +61,7 @@ workflow methylation { # Get the sublist of probe files scatter (probe_num in range(max_length)) { Int num = (if merge_num > 0 then probe_num + (merge_num * max_length) else probe_num - ) + ) if (num < probelist_length) { File probe_file_batches = probe_files[num] } diff --git a/workflows/rnaseq/rnaseq-core.wdl b/workflows/rnaseq/rnaseq-core.wdl index ef7a8bd10..b24e1e2bb 100644 --- a/workflows/rnaseq/rnaseq-core.wdl +++ b/workflows/rnaseq/rnaseq-core.wdl @@ -146,7 +146,7 @@ workflow rnaseq_core { } String prefix = sub(basename(read_one_fastqs_gz[0]), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 - ) + ) String xenocp_aligner = "star" Float align_spliced_mate_map_l_min_over_l_mate = 0.5 Int out_filter_multimap_n_max = 50 @@ -200,9 +200,9 @@ workflow rnaseq_core { } Array[File] chosen_r1s = (if enable_read_trimming then select_all(trim.read_one_fastq_gz - ) else read_one_fastqs_gz) + ) else read_one_fastqs_gz) Array[File] chosen_r2s = (if enable_read_trimming then select_all(trim.read_two_fastq_gz - ) else read_two_fastqs_gz) + ) else read_two_fastqs_gz) call star.alignment after validate { input: read_one_fastqs_gz = chosen_r1s, @@ -245,8 +245,8 @@ workflow rnaseq_core { } String htseq_strandedness = (if (provided_strandedness != "") then htseq_strandedness_mapping[ - provided_strandedness] else htseq_strandedness_mapping[ngsderive_strandedness.strandedness_string - ]) + provided_strandedness + ] else htseq_strandedness_mapping[ngsderive_strandedness.strandedness_string]) call htseq.count as htseq_count { input: bam = alignment_post.processed_bam, @@ -254,7 +254,7 @@ workflow rnaseq_core { strandedness = htseq_strandedness, prefix = basename(alignment_post.processed_bam, "bam") + (if provided_strandedness == "" then ngsderive_strandedness.strandedness_string else provided_strandedness - ), + ), pos_sorted = true, } diff --git a/workflows/rnaseq/rnaseq-standard-fastq.wdl b/workflows/rnaseq/rnaseq-standard-fastq.wdl index eb8e6b9b2..29ff3bfc6 100644 --- a/workflows/rnaseq/rnaseq-standard-fastq.wdl +++ b/workflows/rnaseq/rnaseq-standard-fastq.wdl @@ -75,7 +75,7 @@ workflow rnaseq_standard_fastq { File? contaminant_db String prefix = sub(basename(read_one_fastqs_gz[0]), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 - ) + ) String xenocp_aligner = "star" String strandedness = "" Boolean enable_read_trimming = false From c376d694e435b1499d1076c3b442ff7af85cd6a1 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Mon, 2 Mar 2026 10:24:00 -0500 Subject: [PATCH 3/4] multiline if else then clauses --- data_structures/read_group.wdl | 9 +- tools/arriba.wdl | 34 ++- tools/bwa.wdl | 18 +- tools/fastp.wdl | 81 +++++-- tools/fq.wdl | 16 +- tools/gatk4.wdl | 30 ++- tools/htseq.wdl | 34 ++- tools/kraken2.wdl | 72 +++++-- tools/mosdepth.wdl | 5 +- tools/ngsderive.wdl | 29 ++- tools/picard.wdl | 48 +++-- tools/qualimap.wdl | 15 +- tools/sambamba.wdl | 10 +- tools/samtools.wdl | 199 ++++++++++++++---- tools/star.wdl | 40 +++- tools/util.wdl | 12 +- workflows/general/bam-to-fastqs.wdl | 6 +- workflows/general/samtools-merge.wdl | 4 +- workflows/methylation/methylation-cohort.wdl | 32 ++- .../methylation/methylation-standard.wdl | 10 +- workflows/qc/quality-check-standard.wdl | 21 +- workflows/rnaseq/rnaseq-core.wdl | 23 +- 22 files changed, 558 insertions(+), 190 deletions(-) diff --git a/data_structures/read_group.wdl b/data_structures/read_group.wdl index 0be2fb22b..3e49ccd1d 100644 --- a/data_structures/read_group.wdl +++ b/data_structures/read_group.wdl @@ -276,7 +276,10 @@ task validate_read_group { fi fi if [ "$(echo "~{sep(" ", required_fields)}" | grep -Ewc "KS")" -eq 1 ]; then - if [ -z "~{if defined(read_group.KS) then read_group.KS else ""}" ]; then + if [ -z "~{if defined(read_group.KS) + then read_group.KS + else "" + }" ]; then >&2 echo "KS is required" exit_code=1 fi @@ -391,7 +394,9 @@ task inner_read_group_to_string { Boolean format_as_sam_record = false } - String delimiter = if format_as_sam_record then "\\t" else " " + String delimiter = if format_as_sam_record + then "\\t" + else " " command <<< if ~{format_as_sam_record}; then diff --git a/tools/arriba.wdl b/tools/arriba.wdl index 2ca327c95..2b1b49e4a 100644 --- a/tools/arriba.wdl +++ b/tools/arriba.wdl @@ -221,13 +221,18 @@ task arriba { ~{"-d '" + wgs_svs + "'"} \ -D ~{max_genomic_breakpoint_distance} \ -s "~{strandedness}" \ - ~{(if length(interesting_contigs) > 0 then "-i " + sep(",", quote( - interesting_contigs - )) else "")} \ - ~{(if length(viral_contigs) > 0 then "-v " + sep(",", quote(viral_contigs)) - else "")} \ - ~{(if length(disable_filters) > 0 then "-f " + sep(",", quote(disable_filters) - ) else "")} \ + ~{(if length(interesting_contigs) > 0 + then "-i " + sep(",", quote(interesting_contigs)) + else "" + )} \ + ~{(if length(viral_contigs) > 0 + then "-v " + sep(",", quote(viral_contigs)) + else "" + )} \ + ~{(if length(disable_filters) > 0 + then "-f " + sep(",", quote(disable_filters)) + else "" + )} \ -E ~{max_e_value} \ -S ~{min_supporting_reads} \ -m ~{max_mismappers} \ @@ -247,9 +252,18 @@ task arriba { -l ~{max_itd_length} \ -z ~{min_itd_allele_fraction} \ -Z ~{min_itd_supporting_reads} \ - ~{if mark_duplicates then "" else "-u"} \ - ~{if report_additional_columns then "-X" else ""} \ - ~{if fill_gaps then "-I" else ""} + ~{if mark_duplicates + then "" + else "-u" + } \ + ~{if report_additional_columns + then "-X" + else "" + } \ + ~{if fill_gaps + then "-I" + else "" + } >>> output { diff --git a/tools/bwa.wdl b/tools/bwa.wdl index 56aafcb5c..7884c91f1 100644 --- a/tools/bwa.wdl +++ b/tools/bwa.wdl @@ -257,17 +257,23 @@ task bwa_mem { -R "~{read_group}" \ bwa_db/"$PREFIX" \ "~{basename(read_one_fastq_gz)}" \ - ~{(if defined(read_two_fastq_gz) then "'" + basename(select_first([ - read_two_fastq_gz, - ])) + "'" else "")} \ + ~{(if defined(read_two_fastq_gz) + then "'" + basename(select_first([ + read_two_fastq_gz, + ])) + "'" + else "" + )} \ | samtools view --no-PG --threads "$samtools_cores" -hb - \ > "~{output_bam}" rm -r bwa_db rm "~{basename(read_one_fastq_gz)}" - ~{(if defined(read_two_fastq_gz) then "rm '" + basename(select_first([ - read_two_fastq_gz, - ])) + "'" else "")} + ~{(if defined(read_two_fastq_gz) + then "rm '" + basename(select_first([ + read_two_fastq_gz, + ])) + "'" + else "" + )} >>> output { diff --git a/tools/fastp.wdl b/tools/fastp.wdl index c4a131976..64d1ea545 100644 --- a/tools/fastp.wdl +++ b/tools/fastp.wdl @@ -112,7 +112,9 @@ task fastp { Boolean phred64 = false Boolean use_all_cores = false Int first_n_reads = 0 - Int duplicate_accuracy = if deduplicate then 3 else 1 + Int duplicate_accuracy = if deduplicate + then 3 + else 1 Int n_base_limit = 5 Int qualified_quality = 15 Int unqualified_percent = 40 @@ -157,31 +159,71 @@ task fastp { fastp \ -i "~{read_one_fastq}" \ ~{"-I '" + read_two_fastq + "'"} \ - ~{(if output_fastq then "-o '" + (if defined(read_two_fastq) then "~{prefix}.R1.fastq.gz" - else "~{prefix}.fastq.gz") + "'" else "")} \ - ~{(if (defined(read_two_fastq) && output_fastq) then "-O '" + prefix + ".R2.fastq.gz'" - else "")} \ + ~{(if output_fastq + then "-o '" + (if defined(read_two_fastq) + then "~{prefix}.R1.fastq.gz" + else "~{prefix}.fastq.gz" + ) + "'" + else "" + )} \ + ~{(if (defined(read_two_fastq) && output_fastq) + then "-O '" + prefix + ".R2.fastq.gz'" + else "" + )} \ --reads_to_process ~{first_n_reads} \ - ~{if deduplicate then "--dedup" else ""} \ + ~{if deduplicate + then "--dedup" + else "" + } \ --dup_calc_accuracy ~{duplicate_accuracy} \ - ~{if disable_duplicate_eval then "--dont_eval_duplication" else ""} \ - ~{if phred64 then "--phred64" else ""} \ - ~{if disable_quality_filter then "--disable_quality_filtering" else ""} \ + ~{if disable_duplicate_eval + then "--dont_eval_duplication" + else "" + } \ + ~{if phred64 + then "--phred64" + else "" + } \ + ~{if disable_quality_filter + then "--disable_quality_filtering" + else "" + } \ -n ~{n_base_limit} \ -q ~{qualified_quality} \ -u ~{unqualified_percent} \ -e ~{average_quality} \ - ~{if disable_length_filter then "--disable_length_filtering" else ""} \ + ~{if disable_length_filter + then "--disable_length_filtering" + else "" + } \ -l ~{length_required} \ --length_limit ~{length_limit} \ - ~{if enable_complexity_filter then "-y" else ""} \ + ~{if enable_complexity_filter + then "-y" + else "" + } \ -Y ~{complexity_threshold} \ - ~{if enable_overrepresentation_eval then "-p" else ""} \ + ~{if enable_overrepresentation_eval + then "-p" + else "" + } \ -P ~{overrepresentation_sampling} \ - ~{if disable_adapter_trimming then "--disable_adapter_trimming" else ""} \ - ~{if enable_pe_adapter_trimming then "-2" else ""} \ - ~{if allow_gap_overlap_trimming then "--allow_gap_overlap_trimming" else ""} \ - ~{if enable_base_correction then "-c" else ""} \ + ~{if disable_adapter_trimming + then "--disable_adapter_trimming" + else "" + } \ + ~{if enable_pe_adapter_trimming + then "-2" + else "" + } \ + ~{if allow_gap_overlap_trimming + then "--allow_gap_overlap_trimming" + else "" + } \ + ~{if enable_base_correction + then "-c" + else "" + } \ --overlap_len_require ~{overlap_len_require} \ --overlap_diff_limit ~{overlap_diff_limit} \ --overlap_diff_percent_limit ~{overlap_diff_percent_limit} \ @@ -207,9 +249,10 @@ task fastp { runtime { cpu: ncpu - memory: (if disable_duplicate_eval then "4 GB" else dup_acc_to_mem[ - duplicate_accuracy - ]) + memory: (if disable_duplicate_eval + then "4 GB" + else dup_acc_to_mem[duplicate_accuracy] + ) disks: "~{disk_size_gb} GB" container: "quay.io/biocontainers/fastp:1.0.1--heae3180_0" maxRetries: 1 diff --git a/tools/fq.wdl b/tools/fq.wdl index 38b7e397b..e8840b6b4 100755 --- a/tools/fq.wdl +++ b/tools/fq.wdl @@ -75,7 +75,10 @@ task fqlint { ~{sep(" ", prefix("--disable-validator ", squote(disable_validator_codes)))} \ --single-read-validation-level "~{single_read_validation_level}" \ --paired-read-validation-level "~{paired_read_validation_level}" \ - --lint-mode ~{if panic then "panic" else "log"} \ + --lint-mode ~{if panic + then "panic" + else "log" + } \ "~{read_one_fastq}" \ ~{"'" + read_two_fastq + "'"} >>> @@ -133,10 +136,13 @@ task subsample { Int disk_size_gb = ceil((read1_size + read2_size) * 2) + modify_disk_size_gb - String probability_arg = (if (probability < 1.0 && probability > 0) then "-p ~{ - probability - }" else "") - String record_count_arg = if (record_count > 0) then "-n ~{record_count}" else "" + String probability_arg = (if (probability < 1.0 && probability > 0) + then "-p ~{probability}" + else "" + ) + String record_count_arg = if (record_count > 0) + then "-n ~{record_count}" + else "" String r1_dst = prefix + ".R1.subsampled.fastq.gz" String r2_dst = prefix + ".R2.subsampled.fastq.gz" diff --git a/tools/gatk4.wdl b/tools/gatk4.wdl index f0cfe2d30..1ddea3b69 100644 --- a/tools/gatk4.wdl +++ b/tools/gatk4.wdl @@ -126,7 +126,10 @@ task base_recalibrator { BaseRecalibratorSpark \ -R "~{fasta}" \ -I "~{bam}" \ - ~{(if use_original_quality_scores then "--use-original-qualities" else "")} \ + ~{(if use_original_quality_scores + then "--use-original-qualities" + else "" + )} \ -O "~{outfile_name}" \ --known-sites "~{dbSNP_vcf}" \ ~{sep(" ", prefix("--known-sites ", squote(known_indels_sites_vcfs)))} \ @@ -193,7 +196,10 @@ task apply_bqsr { ApplyBQSRSpark \ --spark-master local[~{ncpu}] \ -I "~{bam}" \ - ~{if use_original_quality_scores then "--use-original-qualities" else ""} \ + ~{if use_original_quality_scores + then "--use-original-qualities" + else "" + } \ -O "~{prefix}.bqsr.bam" \ --bqsr-recal-file "~{recalibration_report}" >>> @@ -276,7 +282,10 @@ task haplotype_caller { -I "~{bam}" \ -L "~{interval_list}" \ -O "~{prefix}.vcf.gz" \ - ~{if use_soft_clipped_bases then "" else "--dont-use-soft-clipped-bases"} \ + ~{if use_soft_clipped_bases + then "" + else "--dont-use-soft-clipped-bases" + } \ --standard-min-confidence-threshold-for-calling ~{stand_call_conf} \ --dbsnp "~{dbSNP_vcf}" >>> @@ -450,8 +459,10 @@ task mark_duplicates_spark { Float bam_size = size(bam, "GB") Int memory_gb = min(ceil(bam_size + 15), 50) + modify_memory_gb - Int disk_size_gb = ((if create_bam then ceil((bam_size * 2) + 10) else ceil(bam_size + 10 - )) + modify_disk_size_gb) + Int disk_size_gb = ((if create_bam + then ceil((bam_size * 2) + 10) + else ceil(bam_size + 10) + ) + modify_disk_size_gb) Int java_heap_size = ceil(memory_gb * 0.9) @@ -463,11 +474,16 @@ task mark_duplicates_spark { --java-options "-Xmx~{java_heap_size}g" \ -I "~{bam}" \ -M "~{prefix}.metrics.txt" \ - -O "~{if create_bam then prefix + ".bam" else "/dev/null"}" \ + -O "~{if create_bam + then prefix + ".bam" + else "/dev/null" + }" \ --create-output-bam-index ~{create_bam} \ --read-validation-stringency "~{validation_stringency}" \ --duplicate-scoring-strategy "~{duplicate_scoring_strategy}" \ - --read-name-regex '~{if (optical_distance > 0) then read_name_regex else "null" + --read-name-regex '~{if (optical_distance > 0) + then read_name_regex + else "null" }' \ --duplicate-tagging-policy "~{tagging_policy}" \ --optical-duplicate-pixel-distance ~{optical_distance} \ diff --git a/tools/htseq.wdl b/tools/htseq.wdl index bbc8741de..b9bb1f846 100755 --- a/tools/htseq.wdl +++ b/tools/htseq.wdl @@ -95,9 +95,15 @@ task count { Float bam_size = size(bam, "GB") Float gtf_size = size(gtf, "GB") - Int memory_gb = (if pos_sorted then ceil(bam_size) + 4 else 4) + modify_memory_gb + Int memory_gb = (if pos_sorted + then ceil(bam_size) + 4 + else 4 + ) + modify_memory_gb - Int disk_size_gb = ceil((bam_size + gtf_size) * if pos_sorted then 4 else 1) + 10 + modify_disk_size_gb + Int disk_size_gb = ceil((bam_size + gtf_size) * if pos_sorted + then 4 + else 1 + ) + 10 + modify_disk_size_gb command <<< set -euo pipefail @@ -111,15 +117,26 @@ task count { # 9223372036854776000 == max 64 bit Float htseq-count -f bam \ --max-reads-in-buffer 9223372036854776000 \ - -r ~{if pos_sorted then "pos" else "name"} \ + -r ~{if pos_sorted + then "pos" + else "name" + } \ -s "~{strandedness}" \ -a ~{minaqual} \ -t "~{feature_type}" \ -m "~{mode}" \ -i "~{idattr}" \ - --nonunique ~{if nonunique then "all" else "none"} \ - --secondary-alignments ~{if secondary_alignments then "score" else "ignore"} \ - --supplementary-alignments ~{(if supplementary_alignments then "score" else "ignore" + --nonunique ~{if nonunique + then "all" + else "none" + } \ + --secondary-alignments ~{if secondary_alignments + then "score" + else "ignore" + } \ + --supplementary-alignments ~{(if supplementary_alignments + then "score" + else "ignore" )} \ "~{bam}" \ "~{gtf}" \ @@ -174,7 +191,10 @@ task calc_tpm { "~{counts}" \ "~{feature_lengths}" \ "~{outfile_name}" \ - ~{if has_header then "--counts_has_header" else ""} + ~{if has_header + then "--counts_has_header" + else "" + } >>> output { diff --git a/tools/kraken2.wdl b/tools/kraken2.wdl index 22a665eb9..1e01506a0 100644 --- a/tools/kraken2.wdl +++ b/tools/kraken2.wdl @@ -26,7 +26,10 @@ task download_taxonomy { set -euo pipefail kraken2-build --download-taxonomy \ - ~{if protein then "--protein" else ""} \ + ~{if protein + then "--protein" + else "" + } \ --use-ftp \ --db "~{db_name}" 2>&1 \ | awk '/gunzip:/ { print; exit 42 } !/gunzip:/ { print }' 1>&2 @@ -96,16 +99,24 @@ task download_library { String db_name = "kraken2_" + library_name + "_library" #@ except: ExpressionSpacing - Int disk_size_gb = ((if library_name == "bacteria" then 300 else if library_name == "nr" - then 600 - else if library_name == "nt" then 2500 else 25) + modify_disk_size_gb) + Int disk_size_gb = ((if library_name == "bacteria" + then 300 + else if library_name == "nr" + then 600 + else if library_name == "nt" + then 2500 + else 25 + ) + modify_disk_size_gb) command <<< set -euo pipefail kraken2-build --download-library \ "~{library_name}" \ - ~{if protein then "--protein" else ""} \ + ~{if protein + then "--protein" + else "" + } \ --use-ftp \ --db "~{db_name}" 2>&1 \ | awk '/gunzip:/ { print; exit 42 } !/gunzip:/ { print }' 1>&2 @@ -166,7 +177,10 @@ task create_library_from_fastas { while read -r fasta; do gunzip -c "$fasta" > tmp.fa kraken2-build \ - ~{if protein then "--protein" else ""} \ + ~{if protein + then "--protein" + else "" + } \ --add-to-library tmp.fa \ --db "~{db_name}" done < fastas.txt @@ -232,9 +246,15 @@ task build_db { String db_name = "kraken2_db" Boolean protein = false Boolean use_all_cores = false - Int kmer_len = if protein then 15 else 35 - Int minimizer_len = if protein then 12 else 31 - Int minimizer_spaces = if protein then 0 else 7 + Int kmer_len = if protein + then 15 + else 35 + Int minimizer_len = if protein + then 12 + else 31 + Int minimizer_spaces = if protein + then 0 + else 7 Int max_db_size_gb = -1 Int ncpu = 4 Int modify_memory_gb = 0 @@ -243,9 +263,10 @@ task build_db { Float tarballs_size = size(tarballs, "GB") Int disk_size_gb = ceil(tarballs_size * 6) + 10 + modify_disk_size_gb - Int memory_gb = ((if (max_db_size_gb > 0) then ceil(max_db_size_gb * 1.2) else ceil( - tarballs_size * 2 - )) + modify_memory_gb) + Int memory_gb = ((if (max_db_size_gb > 0) + then ceil(max_db_size_gb * 1.2) + else ceil(tarballs_size * 2) + ) + modify_memory_gb) String max_db_size_bytes = "~{max_db_size_gb}000000000" @@ -267,12 +288,17 @@ task build_db { >&2 echo "*** start DB build ***" kraken2-build --build \ - ~{if protein then "--protein" else ""} \ + ~{if protein + then "--protein" + else "" + } \ --kmer-len ~{kmer_len} \ --minimizer-len ~{minimizer_len} \ --minimizer-spaces ~{minimizer_spaces} \ - ~{(if (max_db_size_gb > 0) then "--max-db-size '" + max_db_size_bytes + "'" - else "")} \ + ~{(if (max_db_size_gb > 0) + then "--max-db-size '" + max_db_size_bytes + "'" + else "" + )} \ --threads "$n_cores" \ --db "~{db_name}" @@ -363,8 +389,10 @@ task kraken { Float read2_size = size(read_two_fastq_gz, "GB") Int disk_size_gb_calculation = (ceil((db_size * 2) + read1_size + read2_size) + 10 + modify_disk_size_gb ) - Int disk_size_gb = (if store_sequences then disk_size_gb_calculation + ceil(read1_size - + read2_size) else disk_size_gb_calculation) + Int disk_size_gb = (if store_sequences + then disk_size_gb_calculation + ceil(read1_size + read2_size) + else disk_size_gb_calculation + ) Int memory_gb = ceil(db_size * 2) + modify_memory_gb @@ -384,12 +412,18 @@ task kraken { kraken2 --db kraken2_db/ \ --paired \ - --output ~{if store_sequences then "'" + out_sequences + "'" else "-"} \ + --output ~{if store_sequences + then "'" + out_sequences + "'" + else "-" + } \ --threads "$n_cores" \ --minimum-base-quality ~{min_base_quality} \ --report "~{out_report}" \ --report-zero-counts \ - ~{if use_names then "--use-names" else ""} \ + ~{if use_names + then "--use-names" + else "" + } \ "~{read_one_fastq_gz}" \ "~{read_two_fastq_gz}" diff --git a/tools/mosdepth.wdl b/tools/mosdepth.wdl index 7a074133e..63606083d 100644 --- a/tools/mosdepth.wdl +++ b/tools/mosdepth.wdl @@ -52,7 +52,10 @@ task coverage { -n \ ~{"-b '" + coverage_bed + "'"} \ -Q ~{min_mapping_quality} \ - ~{if (use_fast_mode) then "-x" else ""} \ + ~{if (use_fast_mode) + then "-x" + else "" + } \ "~{prefix}" \ "$CWD_BAM" diff --git a/tools/ngsderive.wdl b/tools/ngsderive.wdl index a1b1e2c34..5f7366e7e 100644 --- a/tools/ngsderive.wdl +++ b/tools/ngsderive.wdl @@ -60,7 +60,10 @@ task strandedness { ln -s "~{gene_model}" "$CWD_GFF" ngsderive strandedness --verbose \ - ~{if split_by_rg then "--split-by-rg" else ""} \ + ~{if split_by_rg + then "--split-by-rg" + else "" + } \ -m ~{min_reads_per_gene} \ -n ~{num_genes} \ -q ~{min_mapq} \ @@ -397,16 +400,30 @@ task endedness { } Float bam_size = size(bam, "GB") - Int memory_gb = (if calc_rpt then (ceil(bam_size * 2.5) + 4 + modify_memory_gb) else 4 + Int memory_gb = (if calc_rpt + then (ceil(bam_size * 2.5) + 4 + modify_memory_gb) + else 4 ) Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb command <<< ngsderive endedness --verbose \ - ~{if lenient then "--lenient" else ""} \ - ~{if calc_rpt then "-r" else ""} \ - ~{if round_rpt then "--round-rpt" else ""} \ - ~{if split_by_rg then "--split-by-rg" else ""} \ + ~{if lenient + then "--lenient" + else "" + } \ + ~{if calc_rpt + then "-r" + else "" + } \ + ~{if round_rpt + then "--round-rpt" + else "" + } \ + ~{if split_by_rg + then "--split-by-rg" + else "" + } \ --paired-deviance ~{paired_deviance} \ -n ~{num_reads} \ "~{bam}" \ diff --git a/tools/picard.wdl b/tools/picard.wdl index 2127f92d6..16fc2f451 100755 --- a/tools/picard.wdl +++ b/tools/picard.wdl @@ -83,8 +83,10 @@ task mark_duplicates { Float bam_size = size(bam, "GB") Int memory_gb = min(ceil(bam_size + 12), 50) + modify_memory_gb - Int disk_size_gb = ((if create_bam then ceil((bam_size * 2) + 10) else ceil(bam_size + 10 - )) + modify_disk_size_gb) + Int disk_size_gb = ((if create_bam + then ceil((bam_size * 2) + 10) + else ceil(bam_size + 10) + ) + modify_disk_size_gb) Int java_heap_size = ceil(memory_gb * 0.9) @@ -94,12 +96,17 @@ task mark_duplicates { picard -Xmx~{java_heap_size}g MarkDuplicates \ -I "~{bam}" \ --METRICS_FILE "~{prefix}.metrics.txt" \ - -O "~{if create_bam then prefix + ".bam" else "/dev/null"}" \ + -O "~{if create_bam + then prefix + ".bam" + else "/dev/null" + }" \ --CREATE_INDEX ~{create_bam} \ --CREATE_MD5_FILE ~{create_bam} \ --VALIDATION_STRINGENCY "~{validation_stringency}" \ --DUPLICATE_SCORING_STRATEGY "~{duplicate_scoring_strategy}" \ - --READ_NAME_REGEX '~{if (optical_distance > 0) then read_name_regex else "null" + --READ_NAME_REGEX '~{if (optical_distance > 0) + then read_name_regex + else "null" }' \ --TAGGING_POLICY "~{tagging_policy}" \ --CLEAR_DT ~{clear_dt} \ @@ -187,10 +194,16 @@ task validate_bam { Int modify_disk_size_gb = 0 } - String outfile = if summary_mode then outfile_name else outfile_name + ".gz" - String mode_arg = if (summary_mode) then "--MODE SUMMARY" else "" - String stringency_arg = (if (index_validation_stringency_less_exhaustive) then "--INDEX_VALIDATION_STRINGENCY LESS_EXHAUSTIVE" - else "") + String outfile = if summary_mode + then outfile_name + else outfile_name + ".gz" + String mode_arg = if (summary_mode) + then "--MODE SUMMARY" + else "" + String stringency_arg = (if (index_validation_stringency_less_exhaustive) + then "--INDEX_VALIDATION_STRINGENCY LESS_EXHAUSTIVE" + else "" + ) Float bam_size = size(bam, "GB") Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) @@ -208,7 +221,10 @@ task validate_bam { --VALIDATION_STRINGENCY "~{validation_stringency}" \ ~{sep(" ", prefix("--IGNORE ", squote(ignore_list)))} \ --MAX_OUTPUT ~{max_errors} \ - ~{if !summary_mode then "| gzip" else ""} \ + ~{if !summary_mode + then "| gzip" + else "" + } \ > "~{outfile}" \ || rc=$? @@ -411,7 +427,9 @@ task merge_sam_files { } runtime { - cpu: if threading then 2 else 1 + cpu: if threading + then 2 + else 1 memory: "~{memory_gb} GB" disks: "~{disk_size_gb} GB" container: "quay.io/biocontainers/picard:3.1.1--hdfd78af_0" @@ -843,12 +861,18 @@ task bam_to_fastq { picard -Xmx~{java_heap_size}g SamToFastq INPUT="~{bam}" \ FASTQ="~{prefix}.R1.fastq" \ - ~{(if paired then "SECOND_END_FASTQ='" + prefix + ".R2.fastq'" else "")} \ + ~{(if paired + then "SECOND_END_FASTQ='" + prefix + ".R2.fastq'" + else "" + )} \ RE_REVERSE=true \ VALIDATION_STRINGENCY=SILENT gzip "~{prefix}.R1.fastq" \ - ~{if paired then "'" + prefix + ".R2.fastq'" else ""} + ~{if paired + then "'" + prefix + ".R2.fastq'" + else "" + } >>> output { diff --git a/tools/qualimap.wdl b/tools/qualimap.wdl index 92067aa34..59f10fd15 100755 --- a/tools/qualimap.wdl +++ b/tools/qualimap.wdl @@ -40,8 +40,12 @@ task rnaseq { } String out_tar_gz = prefix + ".tar.gz" - String name_sorted_arg = if (name_sorted) then "-s" else "" - String paired_end_arg = if (paired_end) then "-pe" else "" + String name_sorted_arg = if (name_sorted) + then "-s" + else "" + String paired_end_arg = if (paired_end) + then "-pe" + else "" Int java_heap_size = ceil(memory_gb * 0.9) Float bam_size = size(bam, "GB") @@ -49,9 +53,10 @@ task rnaseq { # Qualimap has an inefficient name sorting algorithm and will # use an excessive amount of storage. - Int disk_size_gb = ((if name_sorted then ceil(bam_size + gtf_size + 15) else ceil((( - bam_size + gtf_size - ) * 12) + 10)) + modify_disk_size_gb) + Int disk_size_gb = ((if name_sorted + then ceil(bam_size + gtf_size + 15) + else ceil(((bam_size + gtf_size) * 12) + 10) + ) + modify_disk_size_gb) command <<< set -euo pipefail diff --git a/tools/sambamba.wdl b/tools/sambamba.wdl index d0e688e9c..f5e0eced0 100644 --- a/tools/sambamba.wdl +++ b/tools/sambamba.wdl @@ -156,7 +156,10 @@ task sort { sambamba sort \ --nthreads ~{ncpu} \ -o "~{outfile_name}" \ - ~{if queryname_sort then "-n" else ""} \ + ~{if queryname_sort + then "-n" + else "" + } \ "~{bam}" >>> @@ -208,7 +211,10 @@ task markdup { command <<< sambamba markdup \ --nthreads ~{ncpu} \ - ~{if remove_duplicates then "--remove-duplicates" else ""} \ + ~{if remove_duplicates + then "--remove-duplicates" + else "" + } \ "~{bam}" \ "~{prefix}.markdup.bam" \ > "~{prefix}.markdup_log.txt" diff --git a/tools/samtools.wdl b/tools/samtools.wdl index 16a519d66..1d4bdb992 100755 --- a/tools/samtools.wdl +++ b/tools/samtools.wdl @@ -582,11 +582,26 @@ task merge { samtools merge \ --threads "$n_cores" \ ~{"-h \"" + new_header + "\""} \ - ~{if name_sorted then "-n" else ""} \ - ~{if (region != "") then "-R \"" + region + "\"" else ""} \ - ~{if attach_rg then "-r" else ""} \ - ~{if combine_rg then "-c" else ""} \ - ~{if combine_pg then "-p" else ""} \ + ~{if name_sorted + then "-n" + else "" + } \ + ~{if (region != "") + then "-R \"" + region + "\"" + else "" + } \ + ~{if attach_rg + then "-r" + else "" + } \ + ~{if combine_rg + then "-c" + else "" + } \ + ~{if combine_pg + then "-p" + else "" + } \ "~{prefix}.bam" \ "${bams[@]}" @@ -675,8 +690,14 @@ task addreplacerg { --threads "$n_cores" \ ~{sep(" ", prefix("-r ", squote(read_group_line)))} \ ~{"-R \"" + read_group_id + "\""} \ - -m ~{if orphan_only then "orphan_only" else "overwrite_all"} \ - ~{if overwrite_header_record then "-w" else ""} \ + -m ~{if orphan_only + then "orphan_only" + else "overwrite_all" + } \ + ~{if overwrite_header_record + then "-w" + else "" + } \ -o "~{outfile_name}" \ "~{bam}" >>> @@ -749,7 +770,10 @@ task collate { samtools collate \ --threads "$n_cores" \ - ~{if fast_mode then "-f" else ""} \ + ~{if fast_mode + then "-f" + else "" + } \ -o "~{outfile_name}" \ "~{bam}" >>> @@ -852,10 +876,15 @@ task bam_to_fastq { } Float bam_size = size(bam, "GB") - Int memory_gb = (if (collated || !paired_end) then 4 else (ceil(bam_size * 0.4) + 4)) - + modify_memory_gb + Int memory_gb = (if (collated || !paired_end) + then 4 + else (ceil(bam_size * 0.4) + 4) + ) + modify_memory_gb Int disk_size_gb = ceil(bam_size * (if (retain_collated_bam && !collated && paired_end - ) then 5 else 2)) + 10 + modify_disk_size_gb + ) + then 5 + else 2 + )) + 10 + modify_disk_size_gb command <<< set -euo pipefail @@ -870,13 +899,21 @@ task bam_to_fastq { mkfifo bam_pipe if ! ~{collated} && ~{paired_end}; then samtools collate \ - ~{if retain_collated_bam then "" else "-u"} \ + ~{if retain_collated_bam + then "" + else "-u" + } \ --threads "$n_cores" \ - ~{if fast_mode then "-f" else ""} \ + ~{if fast_mode + then "-f" + else "" + } \ -O \ "~{bam}" \ - | tee ~{(if retain_collated_bam then "\"" + prefix + ".collated.bam\"" - else "")} \ + | tee ~{(if retain_collated_bam + then "\"" + prefix + ".collated.bam\"" + else "" + )} \ > bam_pipe \ & else @@ -889,15 +926,29 @@ task bam_to_fastq { -F "~{bitwise_filter.exclude_if_any}" \ --rf "~{bitwise_filter.include_if_any}" \ -G "~{bitwise_filter.exclude_if_all}" \ - ~{(if append_read_number then "-N" else "-n")} \ - -1 ~{(if paired_end then "\"" + prefix + ".R1.fastq.gz\"" else "\"" + prefix + ".fastq.gz\"" + ~{(if append_read_number + then "-N" + else "-n" )} \ - -2 ~{(if paired_end then "\"" + prefix + ".R2.fastq.gz\"" else "\"" + prefix + ".fastq.gz\"" + -1 ~{(if paired_end + then "\"" + prefix + ".R1.fastq.gz\"" + else "\"" + prefix + ".fastq.gz\"" + )} \ + -2 ~{(if paired_end + then "\"" + prefix + ".R2.fastq.gz\"" + else "\"" + prefix + ".fastq.gz\"" + )} \ + ~{(if paired_end + then (if output_singletons + then "-s \"" + prefix + ".singleton.fastq.gz\"" + else "-s junk.singleton.fastq.gz" + ) + else "" + )} \ + -0 ~{(if paired_end + then "junk.unknown_bit_setting.fastq.gz" + else "\"" + prefix + ".fastq.gz\"" )} \ - ~{(if paired_end then (if output_singletons then "-s \"" + prefix + ".singleton.fastq.gz\"" - else "-s junk.singleton.fastq.gz") else "")} \ - -0 ~{(if paired_end then "junk.unknown_bit_setting.fastq.gz" else "\"" + prefix - + ".fastq.gz\"")} \ bam_pipe rm bam_pipe @@ -1011,11 +1062,26 @@ task fixmate { samtools fixmate \ --threads "$n_cores" \ - ~{if remove_unaligned_and_secondary then "-r" else ""} \ - ~{if disable_proper_pair_check then "-p" else ""} \ - ~{if add_cigar then "-c" else ""} \ - ~{if add_mate_score then "-m" else ""} \ - ~{if disable_flag_sanitization then "-z off" else ""} \ + ~{if remove_unaligned_and_secondary + then "-r" + else "" + } \ + ~{if disable_proper_pair_check + then "-p" + else "" + } \ + ~{if add_cigar + then "-c" + else "" + } \ + ~{if add_mate_score + then "-m" + else "" + } \ + ~{if disable_flag_sanitization + then "-z off" + else "" + } \ "~{bam}" \ "~{prefix}~{extension}" >>> @@ -1106,18 +1172,36 @@ task position_sorted_fixmate { samtools collate \ --threads "$n_cores" \ - ~{if fast_mode then "-f" else ""} \ + ~{if fast_mode + then "-f" + else "" + } \ -u \ -O \ "~{bam}" \ | samtools fixmate \ --threads "$n_cores" \ -u \ - ~{if remove_unaligned_and_secondary then "-r" else ""} \ - ~{if disable_proper_pair_check then "-p" else ""} \ - ~{if add_cigar then "-c" else ""} \ - ~{if add_mate_score then "-m" else ""} \ - ~{if disable_flag_sanitization then "-z off" else ""} \ + ~{if remove_unaligned_and_secondary + then "-r" + else "" + } \ + ~{if disable_proper_pair_check + then "-p" + else "" + } \ + ~{if add_cigar + then "-c" + else "" + } \ + ~{if add_mate_score + then "-m" + else "" + } \ + ~{if disable_flag_sanitization + then "-z off" + else "" + } \ - \ - \ | samtools sort \ @@ -1248,25 +1332,54 @@ task markdup { samtools markdup \ --threads "$n_cores" \ - -f "~{prefix + if json then ".json" else ".txt"}" \ + -f "~{prefix + if json + then ".json" + else ".txt" + }" \ --read-coords '~{read_coords_regex}' \ --coords-order "~{coordinates_order}" \ - ~{if remove_duplicates then "-r" else ""} \ - ~{if mark_supp_or_sec_or_unmapped_as_duplicates then "-S" else ""} \ - ~{if mark_duplicates_with_do_tag then "-t" else ""} \ - ~{if duplicate_count then "--duplicate-count" else ""} \ - ~{if include_qc_fails then "--include-fails" else ""} \ - ~{if duplicates_of_duplicates_check then "" else "--no-multi-dup"} \ - ~{if use_read_groups then "--use-read-groups" else ""} \ + ~{if remove_duplicates + then "-r" + else "" + } \ + ~{if mark_supp_or_sec_or_unmapped_as_duplicates + then "-S" + else "" + } \ + ~{if mark_duplicates_with_do_tag + then "-t" + else "" + } \ + ~{if duplicate_count + then "--duplicate-count" + else "" + } \ + ~{if include_qc_fails + then "--include-fails" + else "" + } \ + ~{if duplicates_of_duplicates_check + then "" + else "--no-multi-dup" + } \ + ~{if use_read_groups + then "--use-read-groups" + else "" + } \ -l ~{max_readlen} \ -d ~{optical_distance} \ -c \ "~{bam}" \ - "~{if create_bam then prefix + ".bam" else "/dev/null"}" + "~{if create_bam + then prefix + ".bam" + else "/dev/null" + }" >>> output { - File markdup_report = prefix + if json then ".json" else ".txt" + File markdup_report = prefix + if json + then ".json" + else ".txt" File? markdup_bam = prefix + ".bam" } diff --git a/tools/star.wdl b/tools/star.wdl index 28a299175..8703e453f 100755 --- a/tools/star.wdl +++ b/tools/star.wdl @@ -767,19 +767,33 @@ task alignment { align_sj_stitch_mismatch_n_max.AT_AC_and_GT_AT_motif, ]))} \ --clip3pAdapterSeq "~{clip_3p_adapter_seq.left}" ~{(if (length(read_twos) != 0 - ) then "'" + clip_3p_adapter_seq.right + "'" else "")} \ + ) + then "'" + clip_3p_adapter_seq.right + "'" + else "" + )} \ --clip3pAdapterMMp ~{clip_3p_adapter_mmp.left} ~{(if (length(read_twos) != 0) then clip_3p_adapter_mmp.right - else None)} \ + else None + )} \ --alignEndsProtrude ~{align_ends_protrude.left} "~{(if (length(read_twos) != 0 - ) then align_ends_protrude.right else None)}" \ - --clip3pNbases ~{clip_3p_n_bases.left} ~{(if (length(read_twos) != 0) then clip_3p_n_bases.right - else None)} \ + ) + then align_ends_protrude.right + else None + )}" \ + --clip3pNbases ~{clip_3p_n_bases.left} ~{(if (length(read_twos) != 0) + then clip_3p_n_bases.right + else None + )} \ --clip3pAfterAdapterNbases ~{clip_3p_after_adapter_n_bases.left} ~{(if (length( read_twos - ) != 0) then clip_3p_after_adapter_n_bases.right else None)} \ - --clip5pNbases ~{clip_5p_n_bases.left} ~{(if (length(read_twos) != 0) then clip_5p_n_bases.right - else None)} \ + ) != 0) + then clip_3p_after_adapter_n_bases.right + else None + )} \ + --clip5pNbases ~{clip_5p_n_bases.left} ~{(if (length(read_twos) != 0) + then clip_5p_n_bases.right + else None + )} \ --readNameSeparator "~{read_name_separator}" \ --clipAdapterType "~{clip_adapter_type}" \ --outSAMstrandField "~{out_sam_strand_field}" \ @@ -787,9 +801,13 @@ task alignment { --outSAMunmapped "~{out_sam_unmapped}" \ --outSAMorder "~{out_sam_order}" \ --outSAMreadID "~{out_sam_read_id}" \ - --outSAMtlen ~{(if (out_sam_tlen == "left_plus") then "1" else (if ( - out_sam_tlen == "left_any" - ) then "2" else "error"))} \ + --outSAMtlen ~{(if (out_sam_tlen == "left_plus") + then "1" + else (if (out_sam_tlen == "left_any") + then "2" + else "error" + ) + )} \ --outFilterType "~{out_filter_type}" \ --outFilterIntronMotifs "~{out_filter_intron_motifs}" \ --outFilterIntronStrands "~{out_filter_intron_strands}" \ diff --git a/tools/util.wdl b/tools/util.wdl index 06ad3527d..3e59b5384 100644 --- a/tools/util.wdl +++ b/tools/util.wdl @@ -338,7 +338,10 @@ task global_phred_scores { command <<< python3 /scripts/util/calc_global_phred_scores.py \ - ~{if fast_mode then "--fast_mode" else ""} \ + ~{if fast_mode + then "--fast_mode" + else "" + } \ "~{bam}" \ "~{prefix}" >>> @@ -391,9 +394,10 @@ task check_fastq_and_rg_concordance { command <<< python3 /scripts/util/check_FQs_and_RGs.py \ --read-one-fastqs "~{sep(",", read_one_names)}" \ - ~{(if length(read_twos) > 0 then "--read-two-fastqs \"" + sep(",", squote( - read_twos - )) + "\"" else "")} \ + ~{(if length(read_twos) > 0 + then "--read-two-fastqs \"" + sep(",", squote(read_twos)) + "\"" + else "" + )} \ --read-groups "~{sep(",", read_groups)}" >>> diff --git a/workflows/general/bam-to-fastqs.wdl b/workflows/general/bam-to-fastqs.wdl index a80aec153..573db7c7d 100644 --- a/workflows/general/bam-to-fastqs.wdl +++ b/workflows/general/bam-to-fastqs.wdl @@ -67,8 +67,10 @@ workflow bam_to_fastqs { } output { - Array[File] read1s = (if paired_end then select_all(bam_to_fastq.read_one_fastq_gz - ) else select_all(bam_to_fastq.single_end_reads_fastq_gz)) + Array[File] read1s = (if paired_end + then select_all(bam_to_fastq.read_one_fastq_gz) + else select_all(bam_to_fastq.single_end_reads_fastq_gz) + ) Array[File?] read2s = bam_to_fastq.read_two_fastq_gz } } diff --git a/workflows/general/samtools-merge.wdl b/workflows/general/samtools-merge.wdl index c6f7ea81b..fe6825278 100644 --- a/workflows/general/samtools-merge.wdl +++ b/workflows/general/samtools-merge.wdl @@ -35,7 +35,9 @@ workflow samtools_merge { scatter (merge_num in range((bam_length / max_length) + 1)) { # Get the sublist of bams scatter (bam_num in range(max_length)) { - Int num = (if merge_num > 0 then bam_num + (merge_num * max_length) else bam_num + Int num = (if merge_num > 0 + then bam_num + (merge_num * max_length) + else bam_num ) if (num < bam_length) { File bam_list = bams[num] diff --git a/workflows/methylation/methylation-cohort.wdl b/workflows/methylation/methylation-cohort.wdl index 8ca1ddc35..b057fec87 100644 --- a/workflows/methylation/methylation-cohort.wdl +++ b/workflows/methylation/methylation-cohort.wdl @@ -42,7 +42,9 @@ workflow methylation_cohort { scatter (merge_num in range((beta_length / max_length) + 1)) { # Get the sublist of beta files scatter (beta_num in range(max_length)) { - Int num = (if merge_num > 0 then beta_num + (merge_num * max_length) else beta_num + Int num = (if merge_num > 0 + then beta_num + (merge_num * max_length) + else beta_num ) if (num < beta_length) { File bam_list = unfiltered_normalized_beta[num] @@ -66,8 +68,10 @@ workflow methylation_cohort { scatter (merge_num in range((pval_length / max_length) + 1)) { # Get the sublist of p-value files scatter (pval_num in range(max_length)) { - Int num_p = (if merge_num > 0 then pval_num + (merge_num * max_length) - else pval_num) + Int num_p = (if merge_num > 0 + then pval_num + (merge_num * max_length) + else pval_num + ) if (num_p < pval_length) { File pval_list = p_values[num_p] } @@ -100,10 +104,13 @@ workflow methylation_cohort { } } - File? pval_file = (if (pval_length > 0 && !skip_pvalue_check) then select_first([ - final_merge_pvals.combined_file, - simple_merge_pval.combined_file, - ]) else None) + File? pval_file = (if (pval_length > 0 && !skip_pvalue_check) + then select_first([ + final_merge_pvals.combined_file, + simple_merge_pval.combined_file, + ]) + else None + ) call filter_probes { input: beta_values = select_first([ @@ -168,14 +175,19 @@ task combine_data { Int modify_memory_gb = 0 } - Int memory_gb = ceil(size(files_to_combine, "GB") * if simple_merge then 2 else 1) + modify_memory_gb - + 2 + Int memory_gb = ceil(size(files_to_combine, "GB") * if simple_merge + then 2 + else 1 + ) + modify_memory_gb + 2 Int disk_size_gb = ceil(size(files_to_combine, "GB") * 2) + 2 command <<< python /scripts/methylation/combine.py \ --output-name "~{combined_file_name}" \ - ~{if simple_merge then "--simple-merge" else ""} \ + ~{if simple_merge + then "--simple-merge" + else "" + } \ ~{sep(" ", quote(files_to_combine))} >>> diff --git a/workflows/methylation/methylation-standard.wdl b/workflows/methylation/methylation-standard.wdl index 6a2c82627..6c4e8b9f7 100644 --- a/workflows/methylation/methylation-standard.wdl +++ b/workflows/methylation/methylation-standard.wdl @@ -60,7 +60,9 @@ workflow methylation { scatter (merge_num in range((probelist_length / max_length) + 1)) { # Get the sublist of probe files scatter (probe_num in range(max_length)) { - Int num = (if merge_num > 0 then probe_num + (merge_num * max_length) else probe_num + Int num = (if merge_num > 0 + then probe_num + (merge_num * max_length) + else probe_num ) if (num < probelist_length) { File probe_file_batches = probe_files[num] @@ -96,8 +98,10 @@ workflow methylation { scatter (merge_num in range((non_genomic_probelist_length / max_length) + 1)) { # Get the sublist of probe files scatter (probe_num in range(max_length)) { - Int num_ng = (if merge_num > 0 then probe_num + (merge_num * max_length) - else probe_num) + Int num_ng = (if merge_num > 0 + then probe_num + (merge_num * max_length) + else probe_num + ) if (num_ng < non_genomic_probelist_length) { File non_genomic_probe_batches = non_genomic_probe_list[num_ng] } diff --git a/workflows/qc/quality-check-standard.wdl b/workflows/qc/quality-check-standard.wdl index a35f0b867..bd7fb0e9d 100644 --- a/workflows/qc/quality-check-standard.wdl +++ b/workflows/qc/quality-check-standard.wdl @@ -209,8 +209,10 @@ workflow quality_check_standard { subsample_index.bam_index, bam_index, ]) - String post_subsample_prefix = (if (defined(subsample.sampled_bam)) then prefix + ".subsampled" - else prefix) + String post_subsample_prefix = (if (defined(subsample.sampled_bam)) + then prefix + ".subsampled" + else prefix + ) call picard.validate_bam after quickcheck { input: bam = post_subsample_bam, @@ -481,9 +483,12 @@ workflow quality_check_standard { markdups_post.mosdepth_region_dist, [], ]), - (if (mark_duplicates && optical_distance > 0) then [ - markdups.mark_duplicates_metrics, - ] else []), + (if (mark_duplicates && optical_distance > 0) + then [ + markdups.mark_duplicates_metrics, + ] + else [] + ), ])) call multiqc_tasks.multiqc { input: @@ -588,8 +593,10 @@ task parse_input { >>> output { - Array[String] labels = (if (coverage_beds_len > 0) then read_lines("labels.txt") - else []) + Array[String] labels = (if (coverage_beds_len > 0) + then read_lines("labels.txt") + else [] + ) } runtime { diff --git a/workflows/rnaseq/rnaseq-core.wdl b/workflows/rnaseq/rnaseq-core.wdl index b24e1e2bb..afad29046 100644 --- a/workflows/rnaseq/rnaseq-core.wdl +++ b/workflows/rnaseq/rnaseq-core.wdl @@ -199,10 +199,14 @@ workflow rnaseq_core { } } - Array[File] chosen_r1s = (if enable_read_trimming then select_all(trim.read_one_fastq_gz - ) else read_one_fastqs_gz) - Array[File] chosen_r2s = (if enable_read_trimming then select_all(trim.read_two_fastq_gz - ) else read_two_fastqs_gz) + Array[File] chosen_r1s = (if enable_read_trimming + then select_all(trim.read_one_fastq_gz) + else read_one_fastqs_gz + ) + Array[File] chosen_r2s = (if enable_read_trimming + then select_all(trim.read_two_fastq_gz) + else read_two_fastqs_gz + ) call star.alignment after validate { input: read_one_fastqs_gz = chosen_r1s, @@ -244,16 +248,19 @@ workflow rnaseq_core { gene_model = gtf, } - String htseq_strandedness = (if (provided_strandedness != "") then htseq_strandedness_mapping[ - provided_strandedness - ] else htseq_strandedness_mapping[ngsderive_strandedness.strandedness_string]) + String htseq_strandedness = (if (provided_strandedness != "") + then htseq_strandedness_mapping[provided_strandedness] + else htseq_strandedness_mapping[ngsderive_strandedness.strandedness_string] + ) call htseq.count as htseq_count { input: bam = alignment_post.processed_bam, gtf, strandedness = htseq_strandedness, prefix = basename(alignment_post.processed_bam, "bam") + (if provided_strandedness - == "" then ngsderive_strandedness.strandedness_string else provided_strandedness + == "" + then ngsderive_strandedness.strandedness_string + else provided_strandedness ), pos_sorted = true, } From 43b42ee35e027bf094bbfa994f085d4b90594726 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Fri, 6 Mar 2026 08:20:46 -0500 Subject: [PATCH 4/4] manual removal of unneeded tokens (and then formatted) --- tools/arriba.wdl | 12 +++---- tools/bwa.wdl | 8 ++--- tools/fastp.wdl | 16 ++++----- tools/fq.wdl | 3 +- tools/gatk4.wdl | 8 ++--- tools/htseq.wdl | 4 +-- tools/kraken2.wdl | 17 ++++------ tools/ngsderive.wdl | 3 +- tools/picard.wdl | 11 +++---- tools/qualimap.wdl | 4 +-- tools/samtools.wdl | 32 +++++++++--------- tools/star.wdl | 33 +++++++++---------- tools/util.wdl | 4 +-- workflows/general/bam-to-fastqs.wdl | 3 +- workflows/general/samtools-merge.wdl | 3 +- workflows/methylation/methylation-cohort.wdl | 9 ++--- .../methylation/methylation-standard.wdl | 6 ++-- workflows/qc/quality-check-standard.wdl | 10 +++--- workflows/rnaseq/rnaseq-core.wdl | 13 +++----- 19 files changed, 87 insertions(+), 112 deletions(-) diff --git a/tools/arriba.wdl b/tools/arriba.wdl index 2b1b49e4a..196479133 100644 --- a/tools/arriba.wdl +++ b/tools/arriba.wdl @@ -221,18 +221,18 @@ task arriba { ~{"-d '" + wgs_svs + "'"} \ -D ~{max_genomic_breakpoint_distance} \ -s "~{strandedness}" \ - ~{(if length(interesting_contigs) > 0 + ~{if length(interesting_contigs) > 0 then "-i " + sep(",", quote(interesting_contigs)) else "" - )} \ - ~{(if length(viral_contigs) > 0 + } \ + ~{if length(viral_contigs) > 0 then "-v " + sep(",", quote(viral_contigs)) else "" - )} \ - ~{(if length(disable_filters) > 0 + } \ + ~{if length(disable_filters) > 0 then "-f " + sep(",", quote(disable_filters)) else "" - )} \ + } \ -E ~{max_e_value} \ -S ~{min_supporting_reads} \ -m ~{max_mismappers} \ diff --git a/tools/bwa.wdl b/tools/bwa.wdl index 7884c91f1..a27cb71a0 100644 --- a/tools/bwa.wdl +++ b/tools/bwa.wdl @@ -257,23 +257,23 @@ task bwa_mem { -R "~{read_group}" \ bwa_db/"$PREFIX" \ "~{basename(read_one_fastq_gz)}" \ - ~{(if defined(read_two_fastq_gz) + ~{if defined(read_two_fastq_gz) then "'" + basename(select_first([ read_two_fastq_gz, ])) + "'" else "" - )} \ + } \ | samtools view --no-PG --threads "$samtools_cores" -hb - \ > "~{output_bam}" rm -r bwa_db rm "~{basename(read_one_fastq_gz)}" - ~{(if defined(read_two_fastq_gz) + ~{if defined(read_two_fastq_gz) then "rm '" + basename(select_first([ read_two_fastq_gz, ])) + "'" else "" - )} + } >>> output { diff --git a/tools/fastp.wdl b/tools/fastp.wdl index 64d1ea545..2701ec7b6 100644 --- a/tools/fastp.wdl +++ b/tools/fastp.wdl @@ -159,17 +159,16 @@ task fastp { fastp \ -i "~{read_one_fastq}" \ ~{"-I '" + read_two_fastq + "'"} \ - ~{(if output_fastq - then "-o '" + (if defined(read_two_fastq) + ~{if output_fastq + then "-o '" + if defined(read_two_fastq) then "~{prefix}.R1.fastq.gz" - else "~{prefix}.fastq.gz" - ) + "'" + else "~{prefix}.fastq.gz" + "'" else "" - )} \ - ~{(if (defined(read_two_fastq) && output_fastq) + } \ + ~{if (defined(read_two_fastq) && output_fastq) then "-O '" + prefix + ".R2.fastq.gz'" else "" - )} \ + } \ --reads_to_process ~{first_n_reads} \ ~{if deduplicate then "--dedup" @@ -249,10 +248,9 @@ task fastp { runtime { cpu: ncpu - memory: (if disable_duplicate_eval + memory: if disable_duplicate_eval then "4 GB" else dup_acc_to_mem[duplicate_accuracy] - ) disks: "~{disk_size_gb} GB" container: "quay.io/biocontainers/fastp:1.0.1--heae3180_0" maxRetries: 1 diff --git a/tools/fq.wdl b/tools/fq.wdl index e8840b6b4..4bf2133e9 100755 --- a/tools/fq.wdl +++ b/tools/fq.wdl @@ -136,10 +136,9 @@ task subsample { Int disk_size_gb = ceil((read1_size + read2_size) * 2) + modify_disk_size_gb - String probability_arg = (if (probability < 1.0 && probability > 0) + String probability_arg = if (probability < 1.0 && probability > 0) then "-p ~{probability}" else "" - ) String record_count_arg = if (record_count > 0) then "-n ~{record_count}" else "" diff --git a/tools/gatk4.wdl b/tools/gatk4.wdl index 1ddea3b69..3e62b0d26 100644 --- a/tools/gatk4.wdl +++ b/tools/gatk4.wdl @@ -126,10 +126,10 @@ task base_recalibrator { BaseRecalibratorSpark \ -R "~{fasta}" \ -I "~{bam}" \ - ~{(if use_original_quality_scores + ~{if use_original_quality_scores then "--use-original-qualities" else "" - )} \ + } \ -O "~{outfile_name}" \ --known-sites "~{dbSNP_vcf}" \ ~{sep(" ", prefix("--known-sites ", squote(known_indels_sites_vcfs)))} \ @@ -459,10 +459,10 @@ task mark_duplicates_spark { Float bam_size = size(bam, "GB") Int memory_gb = min(ceil(bam_size + 15), 50) + modify_memory_gb - Int disk_size_gb = ((if create_bam + Int disk_size_gb = (if create_bam then ceil((bam_size * 2) + 10) else ceil(bam_size + 10) - ) + modify_disk_size_gb) + ) + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) diff --git a/tools/htseq.wdl b/tools/htseq.wdl index b9bb1f846..0552586b5 100755 --- a/tools/htseq.wdl +++ b/tools/htseq.wdl @@ -134,10 +134,10 @@ task count { then "score" else "ignore" } \ - --supplementary-alignments ~{(if supplementary_alignments + --supplementary-alignments ~{if supplementary_alignments then "score" else "ignore" - )} \ + } \ "~{bam}" \ "~{gtf}" \ >> "~{outfile_name}" diff --git a/tools/kraken2.wdl b/tools/kraken2.wdl index 1e01506a0..601f8c1f6 100644 --- a/tools/kraken2.wdl +++ b/tools/kraken2.wdl @@ -98,15 +98,14 @@ task download_library { String db_name = "kraken2_" + library_name + "_library" - #@ except: ExpressionSpacing - Int disk_size_gb = ((if library_name == "bacteria" + Int disk_size_gb = (if library_name == "bacteria" then 300 else if library_name == "nr" then 600 else if library_name == "nt" then 2500 else 25 - ) + modify_disk_size_gb) + ) + modify_disk_size_gb command <<< set -euo pipefail @@ -263,10 +262,10 @@ task build_db { Float tarballs_size = size(tarballs, "GB") Int disk_size_gb = ceil(tarballs_size * 6) + 10 + modify_disk_size_gb - Int memory_gb = ((if (max_db_size_gb > 0) + Int memory_gb = (if (max_db_size_gb > 0) then ceil(max_db_size_gb * 1.2) else ceil(tarballs_size * 2) - ) + modify_memory_gb) + ) + modify_memory_gb String max_db_size_bytes = "~{max_db_size_gb}000000000" @@ -295,10 +294,10 @@ task build_db { --kmer-len ~{kmer_len} \ --minimizer-len ~{minimizer_len} \ --minimizer-spaces ~{minimizer_spaces} \ - ~{(if (max_db_size_gb > 0) + ~{if (max_db_size_gb > 0) then "--max-db-size '" + max_db_size_bytes + "'" else "" - )} \ + } \ --threads "$n_cores" \ --db "~{db_name}" @@ -389,11 +388,9 @@ task kraken { Float read2_size = size(read_two_fastq_gz, "GB") Int disk_size_gb_calculation = (ceil((db_size * 2) + read1_size + read2_size) + 10 + modify_disk_size_gb ) - Int disk_size_gb = (if store_sequences + Int disk_size_gb = if store_sequences then disk_size_gb_calculation + ceil(read1_size + read2_size) else disk_size_gb_calculation - ) - Int memory_gb = ceil(db_size * 2) + modify_memory_gb String out_report = prefix + ".kraken2.txt" diff --git a/tools/ngsderive.wdl b/tools/ngsderive.wdl index 5f7366e7e..f7856be9a 100644 --- a/tools/ngsderive.wdl +++ b/tools/ngsderive.wdl @@ -400,10 +400,9 @@ task endedness { } Float bam_size = size(bam, "GB") - Int memory_gb = (if calc_rpt + Int memory_gb = if calc_rpt then (ceil(bam_size * 2.5) + 4 + modify_memory_gb) else 4 - ) Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb command <<< diff --git a/tools/picard.wdl b/tools/picard.wdl index 16fc2f451..e20c3484e 100755 --- a/tools/picard.wdl +++ b/tools/picard.wdl @@ -83,10 +83,10 @@ task mark_duplicates { Float bam_size = size(bam, "GB") Int memory_gb = min(ceil(bam_size + 12), 50) + modify_memory_gb - Int disk_size_gb = ((if create_bam + Int disk_size_gb = (if create_bam then ceil((bam_size * 2) + 10) else ceil(bam_size + 10) - ) + modify_disk_size_gb) + ) + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) @@ -200,10 +200,9 @@ task validate_bam { String mode_arg = if (summary_mode) then "--MODE SUMMARY" else "" - String stringency_arg = (if (index_validation_stringency_less_exhaustive) + String stringency_arg = if (index_validation_stringency_less_exhaustive) then "--INDEX_VALIDATION_STRINGENCY LESS_EXHAUSTIVE" else "" - ) Float bam_size = size(bam, "GB") Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) @@ -861,10 +860,10 @@ task bam_to_fastq { picard -Xmx~{java_heap_size}g SamToFastq INPUT="~{bam}" \ FASTQ="~{prefix}.R1.fastq" \ - ~{(if paired + ~{if paired then "SECOND_END_FASTQ='" + prefix + ".R2.fastq'" else "" - )} \ + } \ RE_REVERSE=true \ VALIDATION_STRINGENCY=SILENT diff --git a/tools/qualimap.wdl b/tools/qualimap.wdl index 59f10fd15..a1bdfe000 100755 --- a/tools/qualimap.wdl +++ b/tools/qualimap.wdl @@ -53,10 +53,10 @@ task rnaseq { # Qualimap has an inefficient name sorting algorithm and will # use an excessive amount of storage. - Int disk_size_gb = ((if name_sorted + Int disk_size_gb = (if name_sorted then ceil(bam_size + gtf_size + 15) else ceil(((bam_size + gtf_size) * 12) + 10) - ) + modify_disk_size_gb) + ) + modify_disk_size_gb command <<< set -euo pipefail diff --git a/tools/samtools.wdl b/tools/samtools.wdl index 1d4bdb992..719b079b3 100755 --- a/tools/samtools.wdl +++ b/tools/samtools.wdl @@ -880,11 +880,10 @@ task bam_to_fastq { then 4 else (ceil(bam_size * 0.4) + 4) ) + modify_memory_gb - Int disk_size_gb = ceil(bam_size * (if (retain_collated_bam && !collated && paired_end - ) + Int disk_size_gb = ceil(bam_size * if (retain_collated_bam && !collated && paired_end) then 5 else 2 - )) + 10 + modify_disk_size_gb + ) + 10 + modify_disk_size_gb command <<< set -euo pipefail @@ -910,10 +909,10 @@ task bam_to_fastq { } \ -O \ "~{bam}" \ - | tee ~{(if retain_collated_bam + | tee ~{if retain_collated_bam then "\"" + prefix + ".collated.bam\"" else "" - )} \ + } \ > bam_pipe \ & else @@ -926,29 +925,28 @@ task bam_to_fastq { -F "~{bitwise_filter.exclude_if_any}" \ --rf "~{bitwise_filter.include_if_any}" \ -G "~{bitwise_filter.exclude_if_all}" \ - ~{(if append_read_number + ~{if append_read_number then "-N" else "-n" - )} \ - -1 ~{(if paired_end + } \ + -1 ~{if paired_end then "\"" + prefix + ".R1.fastq.gz\"" else "\"" + prefix + ".fastq.gz\"" - )} \ - -2 ~{(if paired_end + } \ + -2 ~{if paired_end then "\"" + prefix + ".R2.fastq.gz\"" else "\"" + prefix + ".fastq.gz\"" - )} \ - ~{(if paired_end - then (if output_singletons + } \ + ~{if paired_end + then if output_singletons then "-s \"" + prefix + ".singleton.fastq.gz\"" else "-s junk.singleton.fastq.gz" - ) else "" - )} \ - -0 ~{(if paired_end + } \ + -0 ~{if paired_end then "junk.unknown_bit_setting.fastq.gz" else "\"" + prefix + ".fastq.gz\"" - )} \ + } \ bam_pipe rm bam_pipe diff --git a/tools/star.wdl b/tools/star.wdl index 8703e453f..f22d2cbef 100755 --- a/tools/star.wdl +++ b/tools/star.wdl @@ -766,34 +766,32 @@ task alignment { align_sj_stitch_mismatch_n_max.GC_AG_and_CT_GC_motif, align_sj_stitch_mismatch_n_max.AT_AC_and_GT_AT_motif, ]))} \ - --clip3pAdapterSeq "~{clip_3p_adapter_seq.left}" ~{(if (length(read_twos) != 0 - ) + --clip3pAdapterSeq "~{clip_3p_adapter_seq.left}" ~{if (length(read_twos) != 0) then "'" + clip_3p_adapter_seq.right + "'" else "" - )} \ - --clip3pAdapterMMp ~{clip_3p_adapter_mmp.left} ~{(if (length(read_twos) != 0) + } \ + --clip3pAdapterMMp ~{clip_3p_adapter_mmp.left} ~{if (length(read_twos) != 0) then clip_3p_adapter_mmp.right else None - )} \ - --alignEndsProtrude ~{align_ends_protrude.left} "~{(if (length(read_twos) != 0 - ) + } \ + --alignEndsProtrude ~{align_ends_protrude.left} "~{if (length(read_twos) != 0) then align_ends_protrude.right else None - )}" \ - --clip3pNbases ~{clip_3p_n_bases.left} ~{(if (length(read_twos) != 0) + }" \ + --clip3pNbases ~{clip_3p_n_bases.left} ~{if (length(read_twos) != 0) then clip_3p_n_bases.right else None - )} \ - --clip3pAfterAdapterNbases ~{clip_3p_after_adapter_n_bases.left} ~{(if (length( + } \ + --clip3pAfterAdapterNbases ~{clip_3p_after_adapter_n_bases.left} ~{if (length( read_twos ) != 0) then clip_3p_after_adapter_n_bases.right else None - )} \ - --clip5pNbases ~{clip_5p_n_bases.left} ~{(if (length(read_twos) != 0) + } \ + --clip5pNbases ~{clip_5p_n_bases.left} ~{if (length(read_twos) != 0) then clip_5p_n_bases.right else None - )} \ + } \ --readNameSeparator "~{read_name_separator}" \ --clipAdapterType "~{clip_adapter_type}" \ --outSAMstrandField "~{out_sam_strand_field}" \ @@ -801,13 +799,12 @@ task alignment { --outSAMunmapped "~{out_sam_unmapped}" \ --outSAMorder "~{out_sam_order}" \ --outSAMreadID "~{out_sam_read_id}" \ - --outSAMtlen ~{(if (out_sam_tlen == "left_plus") + --outSAMtlen ~{if (out_sam_tlen == "left_plus") then "1" - else (if (out_sam_tlen == "left_any") + else if (out_sam_tlen == "left_any") then "2" else "error" - ) - )} \ + } \ --outFilterType "~{out_filter_type}" \ --outFilterIntronMotifs "~{out_filter_intron_motifs}" \ --outFilterIntronStrands "~{out_filter_intron_strands}" \ diff --git a/tools/util.wdl b/tools/util.wdl index 3e59b5384..e1511d3e7 100644 --- a/tools/util.wdl +++ b/tools/util.wdl @@ -394,10 +394,10 @@ task check_fastq_and_rg_concordance { command <<< python3 /scripts/util/check_FQs_and_RGs.py \ --read-one-fastqs "~{sep(",", read_one_names)}" \ - ~{(if length(read_twos) > 0 + ~{if length(read_twos) > 0 then "--read-two-fastqs \"" + sep(",", squote(read_twos)) + "\"" else "" - )} \ + } \ --read-groups "~{sep(",", read_groups)}" >>> diff --git a/workflows/general/bam-to-fastqs.wdl b/workflows/general/bam-to-fastqs.wdl index 573db7c7d..e5bcbedc9 100644 --- a/workflows/general/bam-to-fastqs.wdl +++ b/workflows/general/bam-to-fastqs.wdl @@ -67,10 +67,9 @@ workflow bam_to_fastqs { } output { - Array[File] read1s = (if paired_end + Array[File] read1s = if paired_end then select_all(bam_to_fastq.read_one_fastq_gz) else select_all(bam_to_fastq.single_end_reads_fastq_gz) - ) Array[File?] read2s = bam_to_fastq.read_two_fastq_gz } } diff --git a/workflows/general/samtools-merge.wdl b/workflows/general/samtools-merge.wdl index fe6825278..de9053218 100644 --- a/workflows/general/samtools-merge.wdl +++ b/workflows/general/samtools-merge.wdl @@ -35,10 +35,9 @@ workflow samtools_merge { scatter (merge_num in range((bam_length / max_length) + 1)) { # Get the sublist of bams scatter (bam_num in range(max_length)) { - Int num = (if merge_num > 0 + Int num = if merge_num > 0 then bam_num + (merge_num * max_length) else bam_num - ) if (num < bam_length) { File bam_list = bams[num] } diff --git a/workflows/methylation/methylation-cohort.wdl b/workflows/methylation/methylation-cohort.wdl index b057fec87..652ab9c0d 100644 --- a/workflows/methylation/methylation-cohort.wdl +++ b/workflows/methylation/methylation-cohort.wdl @@ -42,10 +42,9 @@ workflow methylation_cohort { scatter (merge_num in range((beta_length / max_length) + 1)) { # Get the sublist of beta files scatter (beta_num in range(max_length)) { - Int num = (if merge_num > 0 + Int num = if merge_num > 0 then beta_num + (merge_num * max_length) else beta_num - ) if (num < beta_length) { File bam_list = unfiltered_normalized_beta[num] } @@ -68,10 +67,9 @@ workflow methylation_cohort { scatter (merge_num in range((pval_length / max_length) + 1)) { # Get the sublist of p-value files scatter (pval_num in range(max_length)) { - Int num_p = (if merge_num > 0 + Int num_p = if merge_num > 0 then pval_num + (merge_num * max_length) else pval_num - ) if (num_p < pval_length) { File pval_list = p_values[num_p] } @@ -104,13 +102,12 @@ workflow methylation_cohort { } } - File? pval_file = (if (pval_length > 0 && !skip_pvalue_check) + File? pval_file = if (pval_length > 0 && !skip_pvalue_check) then select_first([ final_merge_pvals.combined_file, simple_merge_pval.combined_file, ]) else None - ) call filter_probes { input: beta_values = select_first([ diff --git a/workflows/methylation/methylation-standard.wdl b/workflows/methylation/methylation-standard.wdl index 6c4e8b9f7..b3c3247bd 100644 --- a/workflows/methylation/methylation-standard.wdl +++ b/workflows/methylation/methylation-standard.wdl @@ -60,10 +60,9 @@ workflow methylation { scatter (merge_num in range((probelist_length / max_length) + 1)) { # Get the sublist of probe files scatter (probe_num in range(max_length)) { - Int num = (if merge_num > 0 + Int num = if merge_num > 0 then probe_num + (merge_num * max_length) else probe_num - ) if (num < probelist_length) { File probe_file_batches = probe_files[num] } @@ -98,10 +97,9 @@ workflow methylation { scatter (merge_num in range((non_genomic_probelist_length / max_length) + 1)) { # Get the sublist of probe files scatter (probe_num in range(max_length)) { - Int num_ng = (if merge_num > 0 + Int num_ng = if merge_num > 0 then probe_num + (merge_num * max_length) else probe_num - ) if (num_ng < non_genomic_probelist_length) { File non_genomic_probe_batches = non_genomic_probe_list[num_ng] } diff --git a/workflows/qc/quality-check-standard.wdl b/workflows/qc/quality-check-standard.wdl index bd7fb0e9d..c3d269bc6 100644 --- a/workflows/qc/quality-check-standard.wdl +++ b/workflows/qc/quality-check-standard.wdl @@ -209,10 +209,9 @@ workflow quality_check_standard { subsample_index.bam_index, bam_index, ]) - String post_subsample_prefix = (if (defined(subsample.sampled_bam)) + String post_subsample_prefix = if (defined(subsample.sampled_bam)) then prefix + ".subsampled" else prefix - ) call picard.validate_bam after quickcheck { input: bam = post_subsample_bam, @@ -483,12 +482,12 @@ workflow quality_check_standard { markdups_post.mosdepth_region_dist, [], ]), - (if (mark_duplicates && optical_distance > 0) + if (mark_duplicates && optical_distance > 0) then [ markdups.mark_duplicates_metrics, ] else [] - ), + , ])) call multiqc_tasks.multiqc { input: @@ -593,10 +592,9 @@ task parse_input { >>> output { - Array[String] labels = (if (coverage_beds_len > 0) + Array[String] labels = if (coverage_beds_len > 0) then read_lines("labels.txt") else [] - ) } runtime { diff --git a/workflows/rnaseq/rnaseq-core.wdl b/workflows/rnaseq/rnaseq-core.wdl index afad29046..819f8a110 100644 --- a/workflows/rnaseq/rnaseq-core.wdl +++ b/workflows/rnaseq/rnaseq-core.wdl @@ -199,14 +199,12 @@ workflow rnaseq_core { } } - Array[File] chosen_r1s = (if enable_read_trimming + Array[File] chosen_r1s = if enable_read_trimming then select_all(trim.read_one_fastq_gz) else read_one_fastqs_gz - ) - Array[File] chosen_r2s = (if enable_read_trimming + Array[File] chosen_r2s = if enable_read_trimming then select_all(trim.read_two_fastq_gz) else read_two_fastqs_gz - ) call star.alignment after validate { input: read_one_fastqs_gz = chosen_r1s, @@ -248,20 +246,19 @@ workflow rnaseq_core { gene_model = gtf, } - String htseq_strandedness = (if (provided_strandedness != "") + String htseq_strandedness = if (provided_strandedness != "") then htseq_strandedness_mapping[provided_strandedness] else htseq_strandedness_mapping[ngsderive_strandedness.strandedness_string] - ) call htseq.count as htseq_count { input: bam = alignment_post.processed_bam, gtf, strandedness = htseq_strandedness, - prefix = basename(alignment_post.processed_bam, "bam") + (if provided_strandedness + prefix = basename(alignment_post.processed_bam, "bam") + if provided_strandedness == "" then ngsderive_strandedness.strandedness_string else provided_strandedness - ), + , pos_sorted = true, }