diff --git a/data_structures/flag_filter.wdl b/data_structures/flag_filter.wdl index 3f1f0826e..7a27ea213 100644 --- a/data_structures/flag_filter.wdl +++ b/data_structures/flag_filter.wdl @@ -58,7 +58,6 @@ ## In short, those are all flags corresponding to the quality of the read ## and them being `true` may indicate that the read is of low quality and ## should be excluded. - version 1.1 struct FlagFilter { @@ -127,15 +126,15 @@ workflow validate_flag_filter { } call validate_string_is_12bit_int as validate_include_if_any { input: - number = flags.include_if_any + number = flags.include_if_any, } call validate_string_is_12bit_int as validate_include_if_all { input: - number = flags.include_if_all + number = flags.include_if_all, } call validate_string_is_12bit_int as validate_exclude_if_any { input: - number = flags.exclude_if_any + number = flags.exclude_if_any, } call validate_string_is_12bit_int as validate_exclude_if_all { input: - number = flags.exclude_if_all + number = flags.exclude_if_all, } } diff --git a/data_structures/read_group.wdl b/data_structures/read_group.wdl index c37306baa..3e49ccd1d 100644 --- a/data_structures/read_group.wdl +++ b/data_structures/read_group.wdl @@ -40,7 +40,6 @@ ## } ## } ## ``` - version 1.1 #@ except: SnakeCase @@ -99,8 +98,7 @@ workflow read_group_to_string { } output { - String validated_read_group - = inner_read_group_to_string.stringified_read_group + String validated_read_group = inner_read_group_to_string.stringified_read_group } } @@ -109,7 +107,7 @@ task get_read_groups { description: "Gets read group information from a BAM file and writes it out as JSON which is converted to a WDL struct." warning: "This task will uppercase any lowercase `PL` values it finds, as is required by the [SAM specification](https://samtools.github.io/hts-specs/SAMv1.pdf)." outputs: { - read_groups: "An array of `ReadGroup` structs containing read group information." + read_groups: "An array of `ReadGroup` structs containing read group information.", } } @@ -167,8 +165,18 @@ task validate_read_group { String sample_pattern = "sample.?" String restrictive_pattern = "\\ " # Disallow spaces Array[String] platforms = [ - "CAPILLARY", "DNBSEQ", "ELEMENT", "HELICOS", "ILLUMINA", "IONTORRENT", "LS454", - "ONT", "PACBIO", "SINGULAR", "SOLID", "ULTIMA", + "CAPILLARY", + "DNBSEQ", + "ELEMENT", + "HELICOS", + "ILLUMINA", + "IONTORRENT", + "LS454", + "ONT", + "PACBIO", + "SINGULAR", + "SOLID", + "ULTIMA", ] command <<< @@ -268,7 +276,10 @@ task validate_read_group { fi fi if [ "$(echo "~{sep(" ", required_fields)}" | grep -Ewc "KS")" -eq 1 ]; then - if [ -z "~{if defined(read_group.KS) then read_group.KS else ""}" ]; then + if [ -z "~{if defined(read_group.KS) + then read_group.KS + else "" + }" ]; then >&2 echo "KS is required" exit_code=1 fi @@ -366,7 +377,7 @@ task inner_read_group_to_string { description: "Converts a `ReadGroup` struct to a `String` **without any validation**." warning: "Please use the `read_group_to_string` workflow, which has validation of the `ReadGroup` contents." outputs: { - stringified_read_group: "Input `ReadGroup` as a string" + stringified_read_group: "Input `ReadGroup` as a string", } } @@ -383,7 +394,9 @@ task inner_read_group_to_string { Boolean format_as_sam_record = false } - String delimiter = if format_as_sam_record then "\\t" else " " + String delimiter = if format_as_sam_record + then "\\t" + else " " command <<< if ~{format_as_sam_record}; then diff --git a/tools/arriba.wdl b/tools/arriba.wdl index 05f00f0b8..196479133 100644 --- a/tools/arriba.wdl +++ b/tools/arriba.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://arriba.readthedocs.io/en/latest/) - version 1.1 task arriba { @@ -138,14 +137,40 @@ task arriba { File? protein_domains File? wgs_svs Array[String] interesting_contigs = [ - "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", - "15", "16", "17", "18", "19", "20", "21", "22", "X", "Y", "AC_*", "NC_*", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "X", + "Y", + "AC_*", + "NC_*", + ] + Array[String] viral_contigs = [ + "AC_*", + "NC_*", ] - Array[String] viral_contigs = ["AC_*", "NC_*"] Array[String] disable_filters = [] #@ except: LineWidth - String feature_name - = "gene_name=gene_name|gene_id,gene_id=gene_id,transcript_id=transcript_id,feature_exon=exon,feature_CDS=CDS" + String feature_name = "gene_name=gene_name|gene_id,gene_id=gene_id,transcript_id=transcript_id,feature_exon=exon,feature_CDS=CDS" String prefix = basename(bam, ".bam") + ".fusions" String strandedness = "auto" Boolean mark_duplicates = true @@ -176,10 +201,8 @@ task arriba { } Int bam_size_gb = ceil(size(bam, "GB")) - Int disk_size_gb = bam_size_gb - + ceil(size(gtf, "GB")) - + ceil(size(reference_fasta_gz, "GB")) - + modify_disk_size_gb + Int disk_size_gb = bam_size_gb + ceil(size(gtf, "GB")) + ceil(size(reference_fasta_gz, + "GB")) + modify_disk_size_gb Int memory_gb = bam_size_gb + modify_memory_gb command <<< @@ -198,21 +221,18 @@ task arriba { ~{"-d '" + wgs_svs + "'"} \ -D ~{max_genomic_breakpoint_distance} \ -s "~{strandedness}" \ - ~{( - if length(interesting_contigs) > 0 + ~{if length(interesting_contigs) > 0 then "-i " + sep(",", quote(interesting_contigs)) else "" - )} \ - ~{( - if length(viral_contigs) > 0 + } \ + ~{if length(viral_contigs) > 0 then "-v " + sep(",", quote(viral_contigs)) else "" - )} \ - ~{( - if length(disable_filters) > 0 + } \ + ~{if length(disable_filters) > 0 then "-f " + sep(",", quote(disable_filters)) else "" - )} \ + } \ -E ~{max_e_value} \ -S ~{min_supporting_reads} \ -m ~{max_mismappers} \ @@ -232,9 +252,18 @@ task arriba { -l ~{max_itd_length} \ -z ~{min_itd_allele_fraction} \ -Z ~{min_itd_supporting_reads} \ - ~{if mark_duplicates then "" else "-u"} \ - ~{if report_additional_columns then "-X" else ""} \ - ~{if fill_gaps then "-I" else ""} + ~{if mark_duplicates + then "" + else "-u" + } \ + ~{if report_additional_columns + then "-X" + else "" + } \ + ~{if fill_gaps + then "-I" + else "" + } >>> output { @@ -255,7 +284,7 @@ task arriba_tsv_to_vcf { meta { description: "Convert Arriba TSV format fusions to VCF format." outputs: { - fusions_vcf: "Output file of fusions in VCF format" + fusions_vcf: "Output file of fusions in VCF format", } } @@ -274,9 +303,7 @@ task arriba_tsv_to_vcf { } Int input_size_gb = ceil(size(fusions, "GB")) - Int disk_size_gb = ceil(input_size_gb) - + (ceil(size(reference_fasta, "GB")) * 3) - + modify_disk_size_gb + Int disk_size_gb = ceil(input_size_gb) + (ceil(size(reference_fasta, "GB")) * 3) + modify_disk_size_gb command <<< set -euo pipefail @@ -356,7 +383,7 @@ task arriba_annotate_exon_numbers { meta { description: "Annotate fusions with exon numbers." outputs: { - fusion_tsv: "TSV file with fusions annotated with exon numbers" + fusion_tsv: "TSV file with fusions annotated with exon numbers", } } diff --git a/tools/bwa.wdl b/tools/bwa.wdl index 1d65d6df4..a27cb71a0 100644 --- a/tools/bwa.wdl +++ b/tools/bwa.wdl @@ -1,12 +1,11 @@ ## [Homepage](https://github.com/lh3/bwa) - version 1.1 task bwa_aln { meta { description: "Maps Single-End FASTQ files to BAM format using bwa aln" outputs: { - bam: "Aligned BAM format file" + bam: "Aligned BAM format file", } } @@ -36,9 +35,7 @@ task bwa_aln { File fastq File bwa_db_tar_gz String read_group - String prefix = sub( - basename(fastq), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(fastq), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 ) Boolean use_all_cores = false @@ -50,8 +47,7 @@ task bwa_aln { Float input_fastq_size = size(fastq, "GB") Float reference_size = size(bwa_db_tar_gz, "GB") - Int disk_size_gb = ( - ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb + Int disk_size_gb = (ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb ) command <<< @@ -98,7 +94,7 @@ task bwa_aln_pe { meta { description: "Maps Paired-End FASTQ files to BAM format using bwa aln" outputs: { - bam: "Aligned BAM format file" + bam: "Aligned BAM format file", } } @@ -132,11 +128,8 @@ task bwa_aln_pe { File read_two_fastq_gz File bwa_db_tar_gz String read_group - String prefix = sub( - basename(read_one_fastq_gz), - "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", - "" - ) + String prefix = sub(basename(read_one_fastq_gz), "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", + "") Boolean use_all_cores = false Int ncpu = 4 Int modify_disk_size_gb = 0 @@ -144,12 +137,10 @@ task bwa_aln_pe { String output_bam = prefix + ".bam" - Float input_fastq_size = ( - size(read_one_fastq_gz, "GB") + size(read_two_fastq_gz, "GB") - ) + Float input_fastq_size = (size(read_one_fastq_gz, "GB") + size(read_two_fastq_gz, "GB" + )) Float reference_size = size(bwa_db_tar_gz, "GB") - Int disk_size_gb = ( - ceil((input_fastq_size + reference_size) * 2) + 5 + modify_disk_size_gb + Int disk_size_gb = (ceil((input_fastq_size + reference_size) * 2) + 5 + modify_disk_size_gb ) command <<< @@ -202,7 +193,7 @@ task bwa_mem { meta { description: "Maps FASTQ files to BAM format using bwa mem" outputs: { - bam: "Aligned BAM format file" + bam: "Aligned BAM format file", } } @@ -230,11 +221,8 @@ task bwa_mem { File bwa_db_tar_gz String read_group File? read_two_fastq_gz - String prefix = sub( - basename(read_one_fastq_gz), - "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", - "" - ) + String prefix = sub(basename(read_one_fastq_gz), "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", + "") Boolean use_all_cores = false Int ncpu = 4 Int modify_disk_size_gb = 0 @@ -242,11 +230,9 @@ task bwa_mem { String output_bam = prefix + ".bam" - Float input_fastq_size = size(read_one_fastq_gz, "GB") - + size(read_two_fastq_gz, "GB") + Float input_fastq_size = size(read_one_fastq_gz, "GB") + size(read_two_fastq_gz, "GB") Float reference_size = size(bwa_db_tar_gz, "GB") - Int disk_size_gb = ( - ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb + Int disk_size_gb = (ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb ) command <<< @@ -271,21 +257,23 @@ task bwa_mem { -R "~{read_group}" \ bwa_db/"$PREFIX" \ "~{basename(read_one_fastq_gz)}" \ - ~{( - if defined(read_two_fastq_gz) - then "'" + basename(select_first([read_two_fastq_gz])) + "'" + ~{if defined(read_two_fastq_gz) + then "'" + basename(select_first([ + read_two_fastq_gz, + ])) + "'" else "" - )} \ + } \ | samtools view --no-PG --threads "$samtools_cores" -hb - \ > "~{output_bam}" rm -r bwa_db rm "~{basename(read_one_fastq_gz)}" - ~{( - if defined(read_two_fastq_gz) - then "rm '" + basename(select_first([read_two_fastq_gz])) + "'" + ~{if defined(read_two_fastq_gz) + then "rm '" + basename(select_first([ + read_two_fastq_gz, + ])) + "'" else "" - )} + } >>> output { @@ -305,7 +293,7 @@ task build_bwa_db { meta { description: "Creates a BWA index and returns it as a compressed tar archive" outputs: { - bwa_db_tar_gz: "Tarballed bwa reference files" + bwa_db_tar_gz: "Tarballed bwa reference files", } } diff --git a/tools/deeptools.wdl b/tools/deeptools.wdl index f3327b1fe..197491901 100755 --- a/tools/deeptools.wdl +++ b/tools/deeptools.wdl @@ -1,12 +1,11 @@ ## [Homepage](https://deeptools.readthedocs.io/en/develop/index.html) - version 1.1 task bam_coverage { meta { description: "Generates a BigWig coverage track using bamCoverage from DeepTools" outputs: { - bigwig: "BigWig format coverage file" + bigwig: "BigWig format coverage file", } } diff --git a/tools/fastp.wdl b/tools/fastp.wdl index 42b78e34e..2701ec7b6 100644 --- a/tools/fastp.wdl +++ b/tools/fastp.wdl @@ -95,9 +95,7 @@ task fastp { input { File read_one_fastq File? read_two_fastq - String prefix = sub( - basename(read_one_fastq), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(read_one_fastq), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 ) + ".trimmed" Boolean output_fastq = true @@ -114,7 +112,9 @@ task fastp { Boolean phred64 = false Boolean use_all_cores = false Int first_n_reads = 0 - Int duplicate_accuracy = if deduplicate then 3 else 1 + Int duplicate_accuracy = if deduplicate + then 3 + else 1 Int n_base_limit = 5 Int qualified_quality = 15 Int unqualified_percent = 40 @@ -148,9 +148,9 @@ task fastp { Float input_size = size(read_one_fastq, "GB") + size(read_two_fastq, "GB") Int disk_size_gb = ceil(input_size) * 2 + 10 + modify_disk_size_gb - command <<< + command <<< set -euo pipefail - + n_cores=~{ncpu} if ~{use_all_cores}; then n_cores=$(nproc) @@ -159,41 +159,70 @@ task fastp { fastp \ -i "~{read_one_fastq}" \ ~{"-I '" + read_two_fastq + "'"} \ - ~{( - if output_fastq - then "-o '" + ( - if defined(read_two_fastq) + ~{if output_fastq + then "-o '" + if defined(read_two_fastq) then "~{prefix}.R1.fastq.gz" - else "~{prefix}.fastq.gz" - ) + "'" + else "~{prefix}.fastq.gz" + "'" else "" - )} \ - ~{( - if (defined(read_two_fastq) && output_fastq) + } \ + ~{if (defined(read_two_fastq) && output_fastq) then "-O '" + prefix + ".R2.fastq.gz'" else "" - )} \ + } \ --reads_to_process ~{first_n_reads} \ - ~{if deduplicate then "--dedup" else ""} \ + ~{if deduplicate + then "--dedup" + else "" + } \ --dup_calc_accuracy ~{duplicate_accuracy} \ - ~{if disable_duplicate_eval then "--dont_eval_duplication" else ""} \ - ~{if phred64 then "--phred64" else ""} \ - ~{if disable_quality_filter then "--disable_quality_filtering" else ""} \ + ~{if disable_duplicate_eval + then "--dont_eval_duplication" + else "" + } \ + ~{if phred64 + then "--phred64" + else "" + } \ + ~{if disable_quality_filter + then "--disable_quality_filtering" + else "" + } \ -n ~{n_base_limit} \ -q ~{qualified_quality} \ -u ~{unqualified_percent} \ -e ~{average_quality} \ - ~{if disable_length_filter then "--disable_length_filtering" else ""} \ + ~{if disable_length_filter + then "--disable_length_filtering" + else "" + } \ -l ~{length_required} \ --length_limit ~{length_limit} \ - ~{if enable_complexity_filter then "-y" else ""} \ + ~{if enable_complexity_filter + then "-y" + else "" + } \ -Y ~{complexity_threshold} \ - ~{if enable_overrepresentation_eval then "-p" else ""} \ + ~{if enable_overrepresentation_eval + then "-p" + else "" + } \ -P ~{overrepresentation_sampling} \ - ~{if disable_adapter_trimming then "--disable_adapter_trimming" else ""} \ - ~{if enable_pe_adapter_trimming then "-2" else ""} \ - ~{if allow_gap_overlap_trimming then "--allow_gap_overlap_trimming" else ""} \ - ~{if enable_base_correction then "-c" else ""} \ + ~{if disable_adapter_trimming + then "--disable_adapter_trimming" + else "" + } \ + ~{if enable_pe_adapter_trimming + then "-2" + else "" + } \ + ~{if allow_gap_overlap_trimming + then "--allow_gap_overlap_trimming" + else "" + } \ + ~{if enable_base_correction + then "-c" + else "" + } \ --overlap_len_require ~{overlap_len_require} \ --overlap_diff_limit ~{overlap_diff_limit} \ --overlap_diff_percent_limit ~{overlap_diff_percent_limit} \ @@ -219,11 +248,9 @@ task fastp { runtime { cpu: ncpu - memory: ( - if disable_duplicate_eval + memory: if disable_duplicate_eval then "4 GB" else dup_acc_to_mem[duplicate_accuracy] - ) disks: "~{disk_size_gb} GB" container: "quay.io/biocontainers/fastp:1.0.1--heae3180_0" maxRetries: 1 diff --git a/tools/fastqc.wdl b/tools/fastqc.wdl index 531be2b68..feddc237b 100755 --- a/tools/fastqc.wdl +++ b/tools/fastqc.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) - version 1.1 task fastqc { diff --git a/tools/fq.wdl b/tools/fq.wdl index c92495d8c..4bf2133e9 100755 --- a/tools/fq.wdl +++ b/tools/fq.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://github.com/stjude-rust-labs/fq) - version 1.1 task fqlint { @@ -67,9 +66,7 @@ task fqlint { Float read1_size = size(read_one_fastq, "GB") Float read2_size = size(read_two_fastq, "GB") - Int memory_gb = ( - ceil((read1_size + read2_size) * 0.25) + 1 + modify_memory_gb - ) + Int memory_gb = (ceil((read1_size + read2_size) * 0.25) + 1 + modify_memory_gb) Int disk_size_gb = ceil((read1_size + read2_size) * 2) + modify_disk_size_gb @@ -78,7 +75,10 @@ task fqlint { ~{sep(" ", prefix("--disable-validator ", squote(disable_validator_codes)))} \ --single-read-validation-level "~{single_read_validation_level}" \ --paired-read-validation-level "~{paired_read_validation_level}" \ - --lint-mode ~{if panic then "panic" else "log"} \ + --lint-mode ~{if panic + then "panic" + else "log" + } \ "~{read_one_fastq}" \ ~{"'" + read_two_fastq + "'"} >>> @@ -123,9 +123,7 @@ task subsample { input { File read_one_fastq File? read_two_fastq - String prefix = sub( - basename(read_one_fastq), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(read_one_fastq), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 ) Float probability = 1.0 @@ -138,12 +136,12 @@ task subsample { Int disk_size_gb = ceil((read1_size + read2_size) * 2) + modify_disk_size_gb - String probability_arg = ( - if (probability < 1.0 && probability > 0) + String probability_arg = if (probability < 1.0 && probability > 0) then "-p ~{probability}" else "" - ) - String record_count_arg = if (record_count > 0) then "-n ~{record_count}" else "" + String record_count_arg = if (record_count > 0) + then "-n ~{record_count}" + else "" String r1_dst = prefix + ".R1.subsampled.fastq.gz" String r2_dst = prefix + ".R2.subsampled.fastq.gz" diff --git a/tools/gatk4.wdl b/tools/gatk4.wdl index c0b9085e3..3e62b0d26 100644 --- a/tools/gatk4.wdl +++ b/tools/gatk4.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://software.broadinstitute.org/gatk) - version 1.1 task split_n_cigar_reads { @@ -13,7 +12,7 @@ task split_n_cigar_reads { } } - parameter_meta { + parameter_meta { bam: "Input BAM format file to with unsplit reads containing Ns in their CIGAR strings." bam_index: "BAM index file corresponding to the input BAM" fasta: "Reference genome in FASTA format. Must be uncompressed." @@ -37,23 +36,21 @@ task split_n_cigar_reads { Int ncpu = 8 } - Int disk_size_gb = ceil(size(bam, "GB") + 1) * 3 - + ceil(size(fasta, "GB")) - + modify_disk_size_gb + Int disk_size_gb = ceil(size(bam, "GB") + 1) * 3 + ceil(size(fasta, "GB")) + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) command <<< - set -euo pipefail - - gatk \ - --java-options "-Xms4000m -Xmx~{java_heap_size}g" \ - SplitNCigarReads \ - -R "~{fasta}" \ - -I "~{bam}" \ - -O "~{prefix}.bam" \ - -OBM true - # GATK is unreasonable and uses the plain ".bai" suffix. - mv "~{prefix}.bai" "~{prefix}.bam.bai" + set -euo pipefail + + gatk \ + --java-options "-Xms4000m -Xmx~{java_heap_size}g" \ + SplitNCigarReads \ + -R "~{fasta}" \ + -I "~{bam}" \ + -O "~{prefix}.bam" \ + -OBM true + # GATK is unreasonable and uses the plain ".bai" suffix. + mv "~{prefix}.bai" "~{prefix}.bam.bai" >>> output { @@ -76,11 +73,11 @@ task base_recalibrator { description: "Generates recalibration report for base quality score recalibration." external_help: "https://gatk.broadinstitute.org/hc/en-us/articles/360036897372-BaseRecalibratorSpark-BETA" outputs: { - recalibration_report: "Recalibration report file" + recalibration_report: "Recalibration report file", } } - parameter_meta { + parameter_meta { bam: "Input BAM format file on which to recabilbrate base quality scores" bam_index: "BAM index file corresponding to the input BAM" fasta: "Reference genome in FASTA format" @@ -114,26 +111,25 @@ task base_recalibrator { Int memory_gb = 25 Int modify_disk_size_gb = 0 Int ncpu = 4 - } + } - Int disk_size_gb = ceil(size(bam, "GB") + 1) * 3 - + ceil(size(fasta, "GB")) - + modify_disk_size_gb + Int disk_size_gb = ceil(size(bam, "GB") + 1) * 3 + ceil(size(fasta, "GB")) + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) #@ except: LineWidth command <<< # shellcheck disable=SC2102 gatk \ - --java-options "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms4000m -Xmx~{java_heap_size}g" \ + --java-options "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms4000m -Xmx~{ + java_heap_size + }g" \ BaseRecalibratorSpark \ -R "~{fasta}" \ -I "~{bam}" \ - ~{( - if use_original_quality_scores + ~{if use_original_quality_scores then "--use-original-qualities" else "" - )} \ + } \ -O "~{outfile_name}" \ --known-sites "~{dbSNP_vcf}" \ ~{sep(" ", prefix("--known-sites ", squote(known_indels_sites_vcfs)))} \ @@ -163,7 +159,7 @@ task apply_bqsr { } } - parameter_meta { + parameter_meta { bam: "Input BAM format file on which to apply base quality score recalibration" bam_index: "BAM index file corresponding to the input BAM" recalibration_report: "Recalibration report file" @@ -194,11 +190,16 @@ task apply_bqsr { # shellcheck disable=SC2102 gatk \ - --java-options "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms3000m -Xmx~{java_heap_size}g" \ + --java-options "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms3000m -Xmx~{ + java_heap_size + }g" \ ApplyBQSRSpark \ --spark-master local[~{ncpu}] \ -I "~{bam}" \ - ~{if use_original_quality_scores then "--use-original-qualities" else "" } \ + ~{if use_original_quality_scores + then "--use-original-qualities" + else "" + } \ -O "~{prefix}.bqsr.bam" \ --bqsr-recal-file "~{recalibration_report}" >>> @@ -227,7 +228,7 @@ task haplotype_caller { } } - parameter_meta { + parameter_meta { bam: "Input BAM format file on which to call variants" bam_index: "BAM index file corresponding to the input BAM" interval_list: { @@ -269,10 +270,7 @@ task haplotype_caller { Int ncpu = 4 } - Int disk_size_gb = ceil(size(bam, "GB") * 2) - + 30 - + ceil(size(fasta, "GB")) - + modify_disk_size_gb + Int disk_size_gb = ceil(size(bam, "GB") * 2) + 30 + ceil(size(fasta, "GB")) + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) #@ except: LineWidth @@ -284,7 +282,10 @@ task haplotype_caller { -I "~{bam}" \ -L "~{interval_list}" \ -O "~{prefix}.vcf.gz" \ - ~{if use_soft_clipped_bases then "" else "--dont-use-soft-clipped-bases"} \ + ~{if use_soft_clipped_bases + then "" + else "--dont-use-soft-clipped-bases" + } \ --standard-min-confidence-threshold-for-calling ~{stand_call_conf} \ --dbsnp "~{dbSNP_vcf}" >>> @@ -313,7 +314,7 @@ task variant_filtration { } } - parameter_meta { + parameter_meta { vcf: "Input VCF format file to filter" vcf_index: "VCF index file corresponding to the input VCF" fasta: "Reference genome in FASTA format" @@ -340,8 +341,14 @@ task variant_filtration { File fasta File fasta_index File dict - Array[String] filter_names = ["FS", "QD"] - Array[String] filter_expressions = ["FS > 30.0", "QD < 2.0"] + Array[String] filter_names = [ + "FS", + "QD", + ] + Array[String] filter_expressions = [ + "FS > 30.0", + "QD < 2.0", + ] String prefix = basename(vcf, ".vcf.gz") Int cluster = 3 Int window = 35 @@ -377,7 +384,7 @@ task variant_filtration { } task mark_duplicates_spark { - meta { + meta { description: "Marks duplicate reads in the input BAM file using GATK's Spark implementation of Picard's MarkDuplicates." external_help: "https://gatk.broadinstitute.org/hc/en-us/articles/13832682540699-MarkDuplicatesSpark" outputs: { @@ -427,7 +434,7 @@ task mark_duplicates_spark { group: "Common", } optical_distance: { - description: "Maximum distance between read coordinates to consider them optical duplicates. If `0`, then optical duplicate marking is disabled.", + description: "Maximum distance between read coordinates to consider them optical duplicates. If `0`, then optical duplicate marking is disabled.", help: "Suggested settings of 100 for unpatterned versions of the Illumina platform (e.g. HiSeq) or 2500 for patterned flowcell models (e.g. NovaSeq). Calculation of distance depends on coordinate data embedded in the read names, typically produced by the Illumina sequencing machines.", warning: "Optical duplicate detection will not work on non-standard names without modifying `read_name_regex`.", } @@ -452,13 +459,10 @@ task mark_duplicates_spark { Float bam_size = size(bam, "GB") Int memory_gb = min(ceil(bam_size + 15), 50) + modify_memory_gb - Int disk_size_gb = ( - ( - if create_bam - then ceil((bam_size * 2) + 10) - else ceil(bam_size + 10) - ) + modify_disk_size_gb - ) + Int disk_size_gb = (if create_bam + then ceil((bam_size * 2) + 10) + else ceil(bam_size + 10) + ) + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) @@ -470,12 +474,16 @@ task mark_duplicates_spark { --java-options "-Xmx~{java_heap_size}g" \ -I "~{bam}" \ -M "~{prefix}.metrics.txt" \ - -O "~{if create_bam then prefix + ".bam" else "/dev/null"}" \ + -O "~{if create_bam + then prefix + ".bam" + else "/dev/null" + }" \ --create-output-bam-index ~{create_bam} \ --read-validation-stringency "~{validation_stringency}" \ --duplicate-scoring-strategy "~{duplicate_scoring_strategy}" \ - --read-name-regex '~{ - if (optical_distance > 0) then read_name_regex else "null" + --read-name-regex '~{if (optical_distance > 0) + then read_name_regex + else "null" }' \ --duplicate-tagging-policy "~{tagging_policy}" \ --optical-duplicate-pixel-distance ~{optical_distance} \ diff --git a/tools/htseq.wdl b/tools/htseq.wdl index 5afba83a0..0552586b5 100755 --- a/tools/htseq.wdl +++ b/tools/htseq.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://github.com/htseq/htseq) - version 1.1 task count { @@ -9,7 +8,7 @@ task count { feature_counts: { description: "A two column TSV file. First column is feature names and second column is counts.", help: "Presence of a header is determined by the `include_custom_header` parameter.", - } + }, } } @@ -96,10 +95,14 @@ task count { Float bam_size = size(bam, "GB") Float gtf_size = size(gtf, "GB") - Int memory_gb = (if pos_sorted then ceil(bam_size) + 4 else 4) + modify_memory_gb + Int memory_gb = (if pos_sorted + then ceil(bam_size) + 4 + else 4 + ) + modify_memory_gb - Int disk_size_gb = ceil( - (bam_size + gtf_size) * if pos_sorted then 4 else 1 + Int disk_size_gb = ceil((bam_size + gtf_size) * if pos_sorted + then 4 + else 1 ) + 10 + modify_disk_size_gb command <<< @@ -114,19 +117,27 @@ task count { # 9223372036854776000 == max 64 bit Float htseq-count -f bam \ --max-reads-in-buffer 9223372036854776000 \ - -r ~{if pos_sorted then "pos" else "name"} \ + -r ~{if pos_sorted + then "pos" + else "name" + } \ -s "~{strandedness}" \ -a ~{minaqual} \ -t "~{feature_type}" \ -m "~{mode}" \ -i "~{idattr}" \ - --nonunique ~{if nonunique then "all" else "none"} \ - --secondary-alignments ~{if secondary_alignments then "score" else "ignore"} \ - --supplementary-alignments ~{( - if supplementary_alignments + --nonunique ~{if nonunique + then "all" + else "none" + } \ + --secondary-alignments ~{if secondary_alignments then "score" else "ignore" - )} \ + } \ + --supplementary-alignments ~{if supplementary_alignments + then "score" + else "ignore" + } \ "~{bam}" \ "~{gtf}" \ >> "~{outfile_name}" @@ -148,7 +159,7 @@ task calc_tpm { meta { description: "Given a feature counts file and a feature lengths file, calculate Transcripts Per Million (TPM)" outputs: { - tpm_file: "Transcripts Per Million (TPM) file. A two column headered TSV file." + tpm_file: "Transcripts Per Million (TPM) file. A two column headered TSV file.", } } @@ -180,7 +191,10 @@ task calc_tpm { "~{counts}" \ "~{feature_lengths}" \ "~{outfile_name}" \ - ~{if has_header then "--counts_has_header" else ""} + ~{if has_header + then "--counts_has_header" + else "" + } >>> output { diff --git a/tools/kraken2.wdl b/tools/kraken2.wdl index 57fc6bd42..601f8c1f6 100644 --- a/tools/kraken2.wdl +++ b/tools/kraken2.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://github.com/DerrickWood/kraken2) - version 1.1 task download_taxonomy { @@ -9,7 +8,7 @@ task download_taxonomy { taxonomy: { description: "The NCBI taxonomy, which is needed by the `build_db` task.", warning: "This output is not human-readable or meant for anything other than building a Kraken2 database.", - } + }, } } @@ -27,7 +26,10 @@ task download_taxonomy { set -euo pipefail kraken2-build --download-taxonomy \ - ~{if protein then "--protein" else ""} \ + ~{if protein + then "--protein" + else "" + } \ --use-ftp \ --db "~{db_name}" 2>&1 \ | awk '/gunzip:/ { print; exit 42 } !/gunzip:/ { print }' 1>&2 @@ -58,7 +60,7 @@ task download_library { library: { description: "A library of reference genomes, which is needed by the `build_db` task.", warning: "This output is not human-readable or meant for anything other than building a Kraken2 database.", - } + }, } } @@ -96,22 +98,24 @@ task download_library { String db_name = "kraken2_" + library_name + "_library" - #@ except: ExpressionSpacing - Int disk_size_gb = ( - ( - if library_name == "bacteria" then 300 - else if library_name == "nr" then 600 - else if library_name == "nt" then 2500 - else 25 - ) + modify_disk_size_gb - ) + Int disk_size_gb = (if library_name == "bacteria" + then 300 + else if library_name == "nr" + then 600 + else if library_name == "nt" + then 2500 + else 25 + ) + modify_disk_size_gb command <<< set -euo pipefail kraken2-build --download-library \ "~{library_name}" \ - ~{if protein then "--protein" else ""} \ + ~{if protein + then "--protein" + else "" + } \ --use-ftp \ --db "~{db_name}" 2>&1 \ | awk '/gunzip:/ { print; exit 42 } !/gunzip:/ { print }' 1>&2 @@ -140,7 +144,7 @@ task create_library_from_fastas { custom_library: { description: "Kraken2 compatible library, which is needed by the `build_db` task.", warning: "This output is not human-readable or meant for anything other than building a Kraken2 database.", - } + }, } } @@ -172,7 +176,10 @@ task create_library_from_fastas { while read -r fasta; do gunzip -c "$fasta" > tmp.fa kraken2-build \ - ~{if protein then "--protein" else ""} \ + ~{if protein + then "--protein" + else "" + } \ --add-to-library tmp.fa \ --db "~{db_name}" done < fastas.txt @@ -200,7 +207,7 @@ task build_db { meta { description: "Builds a custom Kraken2 database" outputs: { - built_db: "A complete Kraken2 database" + built_db: "A complete Kraken2 database", } } @@ -238,9 +245,15 @@ task build_db { String db_name = "kraken2_db" Boolean protein = false Boolean use_all_cores = false - Int kmer_len = if protein then 15 else 35 - Int minimizer_len = if protein then 12 else 31 - Int minimizer_spaces = if protein then 0 else 7 + Int kmer_len = if protein + then 15 + else 35 + Int minimizer_len = if protein + then 12 + else 31 + Int minimizer_spaces = if protein + then 0 + else 7 Int max_db_size_gb = -1 Int ncpu = 4 Int modify_memory_gb = 0 @@ -249,13 +262,10 @@ task build_db { Float tarballs_size = size(tarballs, "GB") Int disk_size_gb = ceil(tarballs_size * 6) + 10 + modify_disk_size_gb - Int memory_gb = ( - ( - if (max_db_size_gb > 0) - then ceil(max_db_size_gb * 1.2) - else ceil(tarballs_size * 2) - ) + modify_memory_gb - ) + Int memory_gb = (if (max_db_size_gb > 0) + then ceil(max_db_size_gb * 1.2) + else ceil(tarballs_size * 2) + ) + modify_memory_gb String max_db_size_bytes = "~{max_db_size_gb}000000000" @@ -277,15 +287,17 @@ task build_db { >&2 echo "*** start DB build ***" kraken2-build --build \ - ~{if protein then "--protein" else ""} \ + ~{if protein + then "--protein" + else "" + } \ --kmer-len ~{kmer_len} \ --minimizer-len ~{minimizer_len} \ --minimizer-spaces ~{minimizer_spaces} \ - ~{( - if (max_db_size_gb > 0) + ~{if (max_db_size_gb > 0) then "--max-db-size '" + max_db_size_bytes + "'" else "" - )} \ + } \ --threads "$n_cores" \ --db "~{db_name}" @@ -359,9 +371,7 @@ task kraken { File read_two_fastq_gz #@ except: InputName File db - String prefix = sub( - basename(read_one_fastq_gz), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(read_one_fastq_gz), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 ) Boolean store_sequences = false @@ -376,15 +386,11 @@ task kraken { Float db_size = size(db, "GB") Float read1_size = size(read_one_fastq_gz, "GB") Float read2_size = size(read_two_fastq_gz, "GB") - Int disk_size_gb_calculation = ( - ceil((db_size * 2) + read1_size + read2_size) + 10 + modify_disk_size_gb + Int disk_size_gb_calculation = (ceil((db_size * 2) + read1_size + read2_size) + 10 + modify_disk_size_gb ) - Int disk_size_gb = ( - if store_sequences + Int disk_size_gb = if store_sequences then disk_size_gb_calculation + ceil(read1_size + read2_size) else disk_size_gb_calculation - ) - Int memory_gb = ceil(db_size * 2) + modify_memory_gb String out_report = prefix + ".kraken2.txt" @@ -403,12 +409,18 @@ task kraken { kraken2 --db kraken2_db/ \ --paired \ - --output ~{if store_sequences then "'" + out_sequences + "'" else "-"} \ + --output ~{if store_sequences + then "'" + out_sequences + "'" + else "-" + } \ --threads "$n_cores" \ --minimum-base-quality ~{min_base_quality} \ --report "~{out_report}" \ --report-zero-counts \ - ~{if use_names then "--use-names" else ""} \ + ~{if use_names + then "--use-names" + else "" + } \ "~{read_one_fastq_gz}" \ "~{read_two_fastq_gz}" diff --git a/tools/librarian.wdl b/tools/librarian.wdl index 4d1d43755..9fe3efac5 100644 --- a/tools/librarian.wdl +++ b/tools/librarian.wdl @@ -1,5 +1,4 @@ ## # librarian - version 1.1 task librarian { @@ -24,18 +23,14 @@ task librarian { input { File read_one_fastq - String prefix = sub( - basename(read_one_fastq), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(read_one_fastq), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 ) + ".librarian" Int modify_disk_size_gb = 0 } Float read1_size = size(read_one_fastq, "GB") - Int disk_size_gb = ( - ceil(read1_size) + 10 + modify_disk_size_gb - ) + Int disk_size_gb = (ceil(read1_size) + 10 + modify_disk_size_gb) command <<< set -euo pipefail diff --git a/tools/md5sum.wdl b/tools/md5sum.wdl index af67e781f..aeb8d7780 100755 --- a/tools/md5sum.wdl +++ b/tools/md5sum.wdl @@ -1,12 +1,11 @@ ## [Homepage](https://github.com/coreutils/coreutils) - version 1.1 task compute_checksum { meta { description: "Generates an MD5 checksum for the input file" outputs: { - md5sum: "STDOUT of the `md5sum` command that has been redirected to a file" + md5sum: "STDOUT of the `md5sum` command that has been redirected to a file", } } diff --git a/tools/mosdepth.wdl b/tools/mosdepth.wdl index 746d5e67f..63606083d 100644 --- a/tools/mosdepth.wdl +++ b/tools/mosdepth.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://github.com/brentp/mosdepth) - version 1.1 task coverage { @@ -53,7 +52,10 @@ task coverage { -n \ ~{"-b '" + coverage_bed + "'"} \ -Q ~{min_mapping_quality} \ - ~{if (use_fast_mode) then "-x" else ""} \ + ~{if (use_fast_mode) + then "-x" + else "" + } \ "~{prefix}" \ "$CWD_BAM" diff --git a/tools/ngsderive.wdl b/tools/ngsderive.wdl index 0cab3af85..f7856be9a 100644 --- a/tools/ngsderive.wdl +++ b/tools/ngsderive.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://github.com/stjudecloud/ngsderive) - version 1.1 task strandedness { @@ -61,7 +60,10 @@ task strandedness { ln -s "~{gene_model}" "$CWD_GFF" ngsderive strandedness --verbose \ - ~{if split_by_rg then "--split-by-rg" else ""} \ + ~{if split_by_rg + then "--split-by-rg" + else "" + } \ -m ~{min_reads_per_gene} \ -n ~{num_genes} \ -q ~{min_mapq} \ @@ -398,21 +400,29 @@ task endedness { } Float bam_size = size(bam, "GB") - Int memory_gb = ( - if calc_rpt - then ( - ceil(bam_size * 2.5) + 4 + modify_memory_gb - ) + Int memory_gb = if calc_rpt + then (ceil(bam_size * 2.5) + 4 + modify_memory_gb) else 4 - ) Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb command <<< ngsderive endedness --verbose \ - ~{if lenient then "--lenient" else ""} \ - ~{if calc_rpt then "-r" else ""} \ - ~{if round_rpt then "--round-rpt" else ""} \ - ~{if split_by_rg then "--split-by-rg" else ""} \ + ~{if lenient + then "--lenient" + else "" + } \ + ~{if calc_rpt + then "-r" + else "" + } \ + ~{if round_rpt + then "--round-rpt" + else "" + } \ + ~{if split_by_rg + then "--split-by-rg" + else "" + } \ --paired-deviance ~{paired_deviance} \ -n ~{num_reads} \ "~{bam}" \ diff --git a/tools/picard.wdl b/tools/picard.wdl index fb3d49aad..e20c3484e 100755 --- a/tools/picard.wdl +++ b/tools/picard.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://broadinstitute.github.io/picard/) - version 1.1 task mark_duplicates { @@ -84,13 +83,10 @@ task mark_duplicates { Float bam_size = size(bam, "GB") Int memory_gb = min(ceil(bam_size + 12), 50) + modify_memory_gb - Int disk_size_gb = ( - ( - if create_bam - then ceil((bam_size * 2) + 10) - else ceil(bam_size + 10) - ) + modify_disk_size_gb - ) + Int disk_size_gb = (if create_bam + then ceil((bam_size * 2) + 10) + else ceil(bam_size + 10) + ) + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) @@ -100,13 +96,17 @@ task mark_duplicates { picard -Xmx~{java_heap_size}g MarkDuplicates \ -I "~{bam}" \ --METRICS_FILE "~{prefix}.metrics.txt" \ - -O "~{if create_bam then prefix + ".bam" else "/dev/null"}" \ + -O "~{if create_bam + then prefix + ".bam" + else "/dev/null" + }" \ --CREATE_INDEX ~{create_bam} \ --CREATE_MD5_FILE ~{create_bam} \ --VALIDATION_STRINGENCY "~{validation_stringency}" \ --DUPLICATE_SCORING_STRATEGY "~{duplicate_scoring_strategy}" \ - --READ_NAME_REGEX '~{ - if (optical_distance > 0) then read_name_regex else "null" + --READ_NAME_REGEX '~{if (optical_distance > 0) + then read_name_regex + else "null" }' \ --TAGGING_POLICY "~{tagging_policy}" \ --CLEAR_DT ~{clear_dt} \ @@ -194,13 +194,15 @@ task validate_bam { Int modify_disk_size_gb = 0 } - String outfile = if summary_mode then outfile_name else outfile_name + ".gz" - String mode_arg = if (summary_mode) then "--MODE SUMMARY" else "" - String stringency_arg = ( - if (index_validation_stringency_less_exhaustive) + String outfile = if summary_mode + then outfile_name + else outfile_name + ".gz" + String mode_arg = if (summary_mode) + then "--MODE SUMMARY" + else "" + String stringency_arg = if (index_validation_stringency_less_exhaustive) then "--INDEX_VALIDATION_STRINGENCY LESS_EXHAUSTIVE" else "" - ) Float bam_size = size(bam, "GB") Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) @@ -218,7 +220,10 @@ task validate_bam { --VALIDATION_STRINGENCY "~{validation_stringency}" \ ~{sep(" ", prefix("--IGNORE ", squote(ignore_list)))} \ --MAX_OUTPUT ~{max_errors} \ - ~{if !summary_mode then "| gzip" else ""} \ + ~{if !summary_mode + then "| gzip" + else "" + } \ > "~{outfile}" \ || rc=$? @@ -420,8 +425,10 @@ task merge_sam_files { File merged_bam_md5 = outfile_name + ".md5" } - runtime{ - cpu: if threading then 2 else 1 + runtime { + cpu: if threading + then 2 + else 1 memory: "~{memory_gb} GB" disks: "~{disk_size_gb} GB" container: "quay.io/biocontainers/picard:3.1.1--hdfd78af_0" @@ -506,7 +513,7 @@ task collect_wgs_metrics { wgs_metrics: { description: "Output report of `picard CollectWgsMetrics`", external_help: "https://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectWgsMetrics.WgsMetrics", - } + }, } } @@ -853,16 +860,18 @@ task bam_to_fastq { picard -Xmx~{java_heap_size}g SamToFastq INPUT="~{bam}" \ FASTQ="~{prefix}.R1.fastq" \ - ~{( - if paired + ~{if paired then "SECOND_END_FASTQ='" + prefix + ".R2.fastq'" else "" - )} \ + } \ RE_REVERSE=true \ VALIDATION_STRINGENCY=SILENT gzip "~{prefix}.R1.fastq" \ - ~{if paired then "'" + prefix + ".R2.fastq'" else ""} + ~{if paired + then "'" + prefix + ".R2.fastq'" + else "" + } >>> output { @@ -870,7 +879,7 @@ task bam_to_fastq { File? read_two_fastq_gz = "~{prefix}.R2.fastq.gz" } - runtime{ + runtime { memory: "~{memory_gb} GB" disks: "~{disk_size_gb} GB" container: "quay.io/biocontainers/picard:3.1.1--hdfd78af_0" @@ -934,7 +943,7 @@ task scatter_interval_list { } } - parameter_meta { + parameter_meta { interval_list: "Input interval list to split" scatter_count: "Number of interval lists to create" subdivision_mode: { @@ -1001,7 +1010,7 @@ task create_sequence_dictionary { description: "Creates a sequence dictionary for the input FASTA file using Picard" external_help: "https://gatk.broadinstitute.org/hc/en-us/articles/13832748622491-CreateSequenceDictionary-Picard-" outputs: { - dictionary: "Sequence dictionary produced by `picard CreateSequenceDictionary`." + dictionary: "Sequence dictionary produced by `picard CreateSequenceDictionary`.", } } diff --git a/tools/qualimap.wdl b/tools/qualimap.wdl index deb67571a..a1bdfe000 100755 --- a/tools/qualimap.wdl +++ b/tools/qualimap.wdl @@ -1,5 +1,4 @@ ## [Homepage](http://qualimap.bioinfo.cipf.es/) - version 1.1 task rnaseq { @@ -41,8 +40,12 @@ task rnaseq { } String out_tar_gz = prefix + ".tar.gz" - String name_sorted_arg = if (name_sorted) then "-s" else "" - String paired_end_arg = if (paired_end) then "-pe" else "" + String name_sorted_arg = if (name_sorted) + then "-s" + else "" + String paired_end_arg = if (paired_end) + then "-pe" + else "" Int java_heap_size = ceil(memory_gb * 0.9) Float bam_size = size(bam, "GB") @@ -50,13 +53,10 @@ task rnaseq { # Qualimap has an inefficient name sorting algorithm and will # use an excessive amount of storage. - Int disk_size_gb = ( - ( - if name_sorted - then ceil(bam_size + gtf_size + 15) - else ceil(((bam_size + gtf_size) * 12) + 10) - ) + modify_disk_size_gb - ) + Int disk_size_gb = (if name_sorted + then ceil(bam_size + gtf_size + 15) + else ceil(((bam_size + gtf_size) * 12) + 10) + ) + modify_disk_size_gb command <<< set -euo pipefail @@ -81,8 +81,7 @@ task rnaseq { output { File raw_summary = "~{prefix}/rnaseq_qc_results.txt" - File raw_coverage - = "~{prefix}/raw_data_qualimapReport/coverage_profile_along_genes_(total).txt" + File raw_coverage = "~{prefix}/raw_data_qualimapReport/coverage_profile_along_genes_(total).txt" File results = out_tar_gz } diff --git a/tools/sambamba.wdl b/tools/sambamba.wdl index b75b87815..f5e0eced0 100644 --- a/tools/sambamba.wdl +++ b/tools/sambamba.wdl @@ -1,12 +1,11 @@ ## [Homepage](https://lomereiter.github.io/sambamba/) - version 1.1 task index { meta { description: "Creates a `.bai` BAM index for the input BAM" outputs: { - bam_index: "A `.bai` BAM index associated with the input BAM. Filename will be `basename(bam) + '.bai'`." + bam_index: "A `.bai` BAM index associated with the input BAM. Filename will be `basename(bam) + '.bai'`.", } } @@ -63,7 +62,7 @@ task merge { meta { description: "Merges multiple sorted BAMs into a single BAM" outputs: { - merged_bam: "The BAM resulting from merging all the input BAMs" + merged_bam: "The BAM resulting from merging all the input BAMs", } } @@ -123,7 +122,7 @@ task sort { meta { description: "Sorts the input BAM file" outputs: { - sorted_bam: "The input BAM after it has been sorted according to `sort_order`" + sorted_bam: "The input BAM after it has been sorted according to `sort_order`", } } @@ -157,7 +156,10 @@ task sort { sambamba sort \ --nthreads ~{ncpu} \ -o "~{outfile_name}" \ - ~{if queryname_sort then "-n" else ""} \ + ~{if queryname_sort + then "-n" + else "" + } \ "~{bam}" >>> @@ -209,7 +211,10 @@ task markdup { command <<< sambamba markdup \ --nthreads ~{ncpu} \ - ~{if remove_duplicates then "--remove-duplicates" else ""} \ + ~{if remove_duplicates + then "--remove-duplicates" + else "" + } \ "~{bam}" \ "~{prefix}.markdup.bam" \ > "~{prefix}.markdup_log.txt" @@ -234,7 +239,7 @@ task flagstat { meta { description: "Produces a report containing statistics about the alignments based on the bit flags set in the BAM" outputs: { - flagstat_report: "`sambamba flagstat` STDOUT redirected to a file" + flagstat_report: "`sambamba flagstat` STDOUT redirected to a file", } } @@ -275,7 +280,7 @@ task flagstat { >>> output { - File flagstat_report = outfile_name + File flagstat_report = outfile_name } runtime { diff --git a/tools/samtools.wdl b/tools/samtools.wdl index 05d75f414..719b079b3 100755 --- a/tools/samtools.wdl +++ b/tools/samtools.wdl @@ -1,5 +1,4 @@ ## [Homepage](http://samtools.sourceforge.net/) - version 1.1 import "../data_structures/flag_filter.wdl" @@ -38,7 +37,7 @@ task split { meta { description: "Runs Samtools split on the input BAM file. This splits the BAM by read group into one or more output files." outputs: { - split_bams: "The split BAM files. The extensions will contain read group IDs, and will end in `.bam`." + split_bams: "The split BAM files. The extensions will contain read group IDs, and will end in `.bam`.", } } @@ -131,12 +130,12 @@ task split { rm first_read.sam done fi - + exit $EXITCODE >>> output { - Array[File] split_bams = glob("*.bam") + Array[File] split_bams = glob("*.bam") } runtime { @@ -152,7 +151,7 @@ task flagstat { meta { description: "Produces a `samtools flagstat` report containing statistics about the alignments based on the bit flags set in the BAM" outputs: { - flagstat_report: "`samtools flagstat` STDOUT redirected to a file" + flagstat_report: "`samtools flagstat` STDOUT redirected to a file", } } @@ -195,7 +194,7 @@ task flagstat { >>> output { - File flagstat_report = outfile_name + File flagstat_report = outfile_name } runtime { @@ -210,7 +209,7 @@ task index { meta { description: "Creates a `.bai` BAM index for the input BAM" outputs: { - bam_index: "A `.bai` BAM index associated with the input BAM. Filename will be `basename(bam) + '.bai'`." + bam_index: "A `.bai` BAM index associated with the input BAM. Filename will be `basename(bam) + '.bai'`.", } } @@ -397,7 +396,6 @@ task subsample { fi rm first_read.sam fi - >>> output { @@ -419,7 +417,7 @@ task filter { description: "Filters a BAM based on its bitwise flag value." help: "This task is a wrapper around `samtools view`. This task will fail if there are no reads in the output BAM. This can happen either because the input BAM was empty or because the supplied `bitwise_filter` was too strict. If you want to down-sample a BAM, use the `subsample` task instead." outputs: { - filtered_bam: "BAM file that has been filtered based on the input flags" + filtered_bam: "BAM file that has been filtered based on the input flags", } } @@ -505,7 +503,7 @@ task merge { meta { description: "Merges multiple sorted BAMs into a single BAM" outputs: { - merged_bam: "The BAM resulting from merging all the input BAMs" + merged_bam: "The BAM resulting from merging all the input BAMs", } } @@ -584,11 +582,26 @@ task merge { samtools merge \ --threads "$n_cores" \ ~{"-h \"" + new_header + "\""} \ - ~{if name_sorted then "-n" else ""} \ - ~{if (region != "") then "-R \"" + region + "\"" else ""} \ - ~{if attach_rg then "-r" else ""} \ - ~{if combine_rg then "-c" else ""} \ - ~{if combine_pg then "-p" else ""} \ + ~{if name_sorted + then "-n" + else "" + } \ + ~{if (region != "") + then "-R \"" + region + "\"" + else "" + } \ + ~{if attach_rg + then "-r" + else "" + } \ + ~{if combine_rg + then "-c" + else "" + } \ + ~{if combine_pg + then "-p" + else "" + } \ "~{prefix}.bam" \ "${bams[@]}" @@ -613,7 +626,7 @@ task addreplacerg { meta { description: "Adds or replaces read group tags" outputs: { - tagged_bam: "The transformed input BAM after read group modifications have been applied" + tagged_bam: "The transformed input BAM after read group modifications have been applied", } } @@ -677,8 +690,14 @@ task addreplacerg { --threads "$n_cores" \ ~{sep(" ", prefix("-r ", squote(read_group_line)))} \ ~{"-R \"" + read_group_id + "\""} \ - -m ~{if orphan_only then "orphan_only" else "overwrite_all"} \ - ~{if overwrite_header_record then "-w" else ""} \ + -m ~{if orphan_only + then "orphan_only" + else "overwrite_all" + } \ + ~{if overwrite_header_record + then "-w" + else "" + } \ -o "~{outfile_name}" \ "~{bam}" >>> @@ -700,7 +719,7 @@ task collate { meta { description: "Runs `samtools collate` on the input BAM file. Shuffles and groups reads together by their names." outputs: { - collated_bam: "A collated BAM (reads sharing a name next to each other, no other guarantee of sort order)" + collated_bam: "A collated BAM (reads sharing a name next to each other, no other guarantee of sort order)", } } @@ -751,7 +770,10 @@ task collate { samtools collate \ --threads "$n_cores" \ - ~{if fast_mode then "-f" else ""} \ + ~{if fast_mode + then "-f" + else "" + } \ -o "~{outfile_name}" \ "~{bam}" >>> @@ -854,16 +876,14 @@ task bam_to_fastq { } Float bam_size = size(bam, "GB") - Int memory_gb = ( - if (collated || !paired_end) + Int memory_gb = (if (collated || !paired_end) then 4 else (ceil(bam_size * 0.4) + 4) ) + modify_memory_gb - Int disk_size_gb = ceil(bam_size * ( - if (retain_collated_bam && !collated && paired_end) + Int disk_size_gb = ceil(bam_size * if (retain_collated_bam && !collated && paired_end) then 5 else 2 - )) + 10 + modify_disk_size_gb + ) + 10 + modify_disk_size_gb command <<< set -euo pipefail @@ -878,16 +898,21 @@ task bam_to_fastq { mkfifo bam_pipe if ! ~{collated} && ~{paired_end}; then samtools collate \ - ~{if retain_collated_bam then "" else "-u"} \ + ~{if retain_collated_bam + then "" + else "-u" + } \ --threads "$n_cores" \ - ~{if fast_mode then "-f" else ""} \ + ~{if fast_mode + then "-f" + else "" + } \ -O \ "~{bam}" \ - | tee ~{( - if retain_collated_bam + | tee ~{if retain_collated_bam then "\"" + prefix + ".collated.bam\"" else "" - )} \ + } \ > bam_pipe \ & else @@ -900,35 +925,28 @@ task bam_to_fastq { -F "~{bitwise_filter.exclude_if_any}" \ --rf "~{bitwise_filter.include_if_any}" \ -G "~{bitwise_filter.exclude_if_all}" \ - ~{( - if append_read_number + ~{if append_read_number then "-N" else "-n" - )} \ - -1 ~{( - if paired_end + } \ + -1 ~{if paired_end then "\"" + prefix + ".R1.fastq.gz\"" else "\"" + prefix + ".fastq.gz\"" - )} \ - -2 ~{( - if paired_end + } \ + -2 ~{if paired_end then "\"" + prefix + ".R2.fastq.gz\"" else "\"" + prefix + ".fastq.gz\"" - )} \ - ~{( - if paired_end - then ( - if output_singletons + } \ + ~{if paired_end + then if output_singletons then "-s \"" + prefix + ".singleton.fastq.gz\"" else "-s junk.singleton.fastq.gz" - ) else "" - )} \ - -0 ~{( - if paired_end + } \ + -0 ~{if paired_end then "junk.unknown_bit_setting.fastq.gz" else "\"" + prefix + ".fastq.gz\"" - )} \ + } \ bam_pipe rm bam_pipe @@ -971,7 +989,7 @@ task fixmate { description: "Runs `samtools fixmate` on the input BAM file. This fills in mate coordinates and insert size fields among other tags and fields." warning: "This task assumes a name-sorted or name-collated input BAM. If you have a position-sorted BAM, please use the `position_sorted_fixmate` task." outputs: { - fixmate_bam: "The BAM resulting from running `samtools fixmate` on the input BAM" + fixmate_bam: "The BAM resulting from running `samtools fixmate` on the input BAM", } } @@ -1042,11 +1060,26 @@ task fixmate { samtools fixmate \ --threads "$n_cores" \ - ~{if remove_unaligned_and_secondary then "-r" else ""} \ - ~{if disable_proper_pair_check then "-p" else ""} \ - ~{if add_cigar then "-c" else ""} \ - ~{if add_mate_score then "-m" else ""} \ - ~{if disable_flag_sanitization then "-z off" else ""} \ + ~{if remove_unaligned_and_secondary + then "-r" + else "" + } \ + ~{if disable_proper_pair_check + then "-p" + else "" + } \ + ~{if add_cigar + then "-c" + else "" + } \ + ~{if add_mate_score + then "-m" + else "" + } \ + ~{if disable_flag_sanitization + then "-z off" + else "" + } \ "~{bam}" \ "~{prefix}~{extension}" >>> @@ -1070,7 +1103,7 @@ task position_sorted_fixmate { warning: "If you already have a collated BAM, please use the `fixmate` task." help: "`fixmate` fills in mate coordinates and insert size fields among other tags and fields. This task collates the input BAM, runs `fixmate`, and then resorts the output into a position-sorted BAM." outputs: { - fixmate_bam: "BAM file with mate information added" + fixmate_bam: "BAM file with mate information added", } } @@ -1137,18 +1170,36 @@ task position_sorted_fixmate { samtools collate \ --threads "$n_cores" \ - ~{if fast_mode then "-f" else ""} \ + ~{if fast_mode + then "-f" + else "" + } \ -u \ -O \ "~{bam}" \ | samtools fixmate \ --threads "$n_cores" \ -u \ - ~{if remove_unaligned_and_secondary then "-r" else ""} \ - ~{if disable_proper_pair_check then "-p" else ""} \ - ~{if add_cigar then "-c" else ""} \ - ~{if add_mate_score then "-m" else ""} \ - ~{if disable_flag_sanitization then "-z off" else ""} \ + ~{if remove_unaligned_and_secondary + then "-r" + else "" + } \ + ~{if disable_proper_pair_check + then "-p" + else "" + } \ + ~{if add_cigar + then "-c" + else "" + } \ + ~{if add_mate_score + then "-m" + else "" + } \ + ~{if disable_flag_sanitization + then "-z off" + else "" + } \ - \ - \ | samtools sort \ @@ -1279,25 +1330,54 @@ task markdup { samtools markdup \ --threads "$n_cores" \ - -f "~{prefix + if json then ".json" else ".txt"}" \ + -f "~{prefix + if json + then ".json" + else ".txt" + }" \ --read-coords '~{read_coords_regex}' \ --coords-order "~{coordinates_order}" \ - ~{if remove_duplicates then "-r" else ""} \ - ~{if mark_supp_or_sec_or_unmapped_as_duplicates then "-S" else ""} \ - ~{if mark_duplicates_with_do_tag then "-t" else ""} \ - ~{if duplicate_count then "--duplicate-count" else ""} \ - ~{if include_qc_fails then "--include-fails" else ""} \ - ~{if duplicates_of_duplicates_check then "" else "--no-multi-dup"} \ - ~{if use_read_groups then "--use-read-groups" else ""} \ + ~{if remove_duplicates + then "-r" + else "" + } \ + ~{if mark_supp_or_sec_or_unmapped_as_duplicates + then "-S" + else "" + } \ + ~{if mark_duplicates_with_do_tag + then "-t" + else "" + } \ + ~{if duplicate_count + then "--duplicate-count" + else "" + } \ + ~{if include_qc_fails + then "--include-fails" + else "" + } \ + ~{if duplicates_of_duplicates_check + then "" + else "--no-multi-dup" + } \ + ~{if use_read_groups + then "--use-read-groups" + else "" + } \ -l ~{max_readlen} \ -d ~{optical_distance} \ -c \ "~{bam}" \ - "~{if create_bam then prefix + ".bam" else "/dev/null"}" + "~{if create_bam + then prefix + ".bam" + else "/dev/null" + }" >>> output { - File markdup_report = prefix + if json then ".json" else ".txt" + File markdup_report = prefix + if json + then ".json" + else ".txt" File? markdup_bam = prefix + ".bam" } @@ -1314,7 +1394,7 @@ task faidx { meta { description: "Creates a `.fai` FASTA index for the input FASTA" outputs: { - fasta_index: "A `.fai` FASTA index associated with the input FASTA. Filename will be `basename(fasta) + '.fai'`." + fasta_index: "A `.fai` FASTA index associated with the input FASTA. Filename will be `basename(fasta) + '.fai'`.", } } diff --git a/tools/star.wdl b/tools/star.wdl index 920e34a19..f22d2cbef 100755 --- a/tools/star.wdl +++ b/tools/star.wdl @@ -1,12 +1,11 @@ ## [Homepage](https://github.com/alexdobin/STAR) - version 1.1 task build_star_db { meta { description: "Runs STAR's build command to generate a STAR format reference for alignment" outputs: { - star_db: "A gzipped TAR file containing the STAR reference files. Suitable as the `star_db_tar_gz` input to the `alignment` task." + star_db: "A gzipped TAR file containing the STAR reference files. Suitable as the `star_db_tar_gz` input to the `alignment` task.", } } @@ -86,8 +85,7 @@ task build_star_db { Float reference_fasta_size = size(reference_fasta, "GB") Float gtf_size = size(gtf, "GB") - Int disk_size_gb = ( - ceil((reference_fasta_size + gtf_size) * 3) + 10 + modify_disk_size_gb + Int disk_size_gb = (ceil((reference_fasta_size + gtf_size) * 3) + 10 + modify_disk_size_gb ) # Leave 2GB as system overhead @@ -558,7 +556,11 @@ task alignment { Array[File] read_one_fastqs_gz Array[String] read_groups Array[File]? read_two_fastqs_gz - Array[Int] out_sj_filter_intron_max_vs_read_n = [50000, 100000, 200000] + Array[Int] out_sj_filter_intron_max_vs_read_n = [ + 50000, + 100000, + 200000, + ] SpliceJunctionMotifs out_sj_filter_overhang_min = SpliceJunctionMotifs { noncanonical_motifs: 30, GT_AG_and_CT_AC_motif: 12, @@ -595,9 +597,7 @@ task alignment { Pair[Int, Int] clip_3p_n_bases = (0, 0) Pair[Int, Int] clip_3p_after_adapter_n_bases = (0, 0) Pair[Int, Int] clip_5p_n_bases = (0, 0) - String prefix = sub( - basename(read_one_fastqs_gz[0]), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(read_one_fastqs_gz[0]), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 ) String read_name_separator = "/" @@ -699,16 +699,16 @@ task alignment { Int modify_disk_size_gb = 0 } - Array[File] read_twos = select_first([read_two_fastqs_gz, []]) + Array[File] read_twos = select_first([ + read_two_fastqs_gz, + [], + ]) Float read_one_fastqs_size = size(read_one_fastqs_gz, "GB") Float read_two_fastqs_size = size(read_twos, "GB") Float star_db_tar_gz_size = size(star_db_tar_gz, "GB") - Int disk_size_gb = ( - ( - ceil(read_one_fastqs_size + read_two_fastqs_size + star_db_tar_gz_size) * 3 - ) + 10 + modify_disk_size_gb - ) + Int disk_size_gb = ((ceil(read_one_fastqs_size + read_two_fastqs_size + star_db_tar_gz_size + ) * 3) + 10 + modify_disk_size_gb) command <<< set -euo pipefail @@ -733,9 +733,9 @@ task alignment { --outFileNamePrefix "~{prefix + "."}" \ --twopassMode "~{twopass_mode}" \ --outSAMattrRGline ~{sep(" , ", read_groups)} \ - --outSJfilterIntronMaxVsReadN ~{ - sep(" ", quote(out_sj_filter_intron_max_vs_read_n)) - } \ + --outSJfilterIntronMaxVsReadN ~{sep(" ", quote( + out_sj_filter_intron_max_vs_read_n + ))} \ --outSJfilterOverhangMin ~{sep(" ", quote([ out_sj_filter_overhang_min.noncanonical_motifs, out_sj_filter_overhang_min.GT_AG_and_CT_AC_motif, @@ -766,36 +766,32 @@ task alignment { align_sj_stitch_mismatch_n_max.GC_AG_and_CT_GC_motif, align_sj_stitch_mismatch_n_max.AT_AC_and_GT_AT_motif, ]))} \ - --clip3pAdapterSeq "~{clip_3p_adapter_seq.left}" ~{( - if (length(read_twos) != 0) + --clip3pAdapterSeq "~{clip_3p_adapter_seq.left}" ~{if (length(read_twos) != 0) then "'" + clip_3p_adapter_seq.right + "'" else "" - )} \ - --clip3pAdapterMMp ~{clip_3p_adapter_mmp.left} ~{( - if (length(read_twos) != 0) + } \ + --clip3pAdapterMMp ~{clip_3p_adapter_mmp.left} ~{if (length(read_twos) != 0) then clip_3p_adapter_mmp.right else None - )} \ - --alignEndsProtrude ~{align_ends_protrude.left} "~{( - if (length(read_twos) != 0) + } \ + --alignEndsProtrude ~{align_ends_protrude.left} "~{if (length(read_twos) != 0) then align_ends_protrude.right else None - )}" \ - --clip3pNbases ~{clip_3p_n_bases.left} ~{( - if (length(read_twos) != 0) + }" \ + --clip3pNbases ~{clip_3p_n_bases.left} ~{if (length(read_twos) != 0) then clip_3p_n_bases.right else None - )} \ - --clip3pAfterAdapterNbases ~{clip_3p_after_adapter_n_bases.left} ~{( - if (length(read_twos) != 0) + } \ + --clip3pAfterAdapterNbases ~{clip_3p_after_adapter_n_bases.left} ~{if (length( + read_twos + ) != 0) then clip_3p_after_adapter_n_bases.right else None - )} \ - --clip5pNbases ~{clip_5p_n_bases.left} ~{( - if (length(read_twos) != 0) + } \ + --clip5pNbases ~{clip_5p_n_bases.left} ~{if (length(read_twos) != 0) then clip_5p_n_bases.right else None - )} \ + } \ --readNameSeparator "~{read_name_separator}" \ --clipAdapterType "~{clip_adapter_type}" \ --outSAMstrandField "~{out_sam_strand_field}" \ @@ -803,13 +799,12 @@ task alignment { --outSAMunmapped "~{out_sam_unmapped}" \ --outSAMorder "~{out_sam_order}" \ --outSAMreadID "~{out_sam_read_id}" \ - --outSAMtlen ~{( - if (out_sam_tlen == "left_plus") + --outSAMtlen ~{if (out_sam_tlen == "left_plus") then "1" - else ( - if (out_sam_tlen == "left_any") then "2" else "error" - ) - )} \ + else if (out_sam_tlen == "left_any") + then "2" + else "error" + } \ --outFilterType "~{out_filter_type}" \ --outFilterIntronMotifs "~{out_filter_intron_motifs}" \ --outFilterIntronStrands "~{out_filter_intron_strands}" \ diff --git a/tools/util.wdl b/tools/util.wdl index f339cb603..e1511d3e7 100644 --- a/tools/util.wdl +++ b/tools/util.wdl @@ -1,12 +1,11 @@ ## # Utilities - version 1.1 task download { meta { description: "Uses wget to download a file from a remote URL to the local filesystem" outputs: { - downloaded_file: "File downloaded from provided URL" + downloaded_file: "File downloaded from provided URL", } } @@ -53,7 +52,7 @@ task split_string { description: "Split a string into an array of strings based on a delimiter" warning: "This implementation will result in a runtime error if the provided string has any embedded single quotes (`'`)!" outputs: { - split_strings: "Split string as an array" + split_strings: "Split string as an array", } } @@ -90,7 +89,7 @@ task calc_feature_lengths { description: "Calculate feature lengths from a GTF file using the non-overlapping exonic length algorithm" help: "The non-overlapping exonic length algorithm can be implemented as the sum of each base covered by at least one exon; where each base is given a value of 1 regardless of how many exons overlap it." outputs: { - feature_lengths: "A two column headered TSV file with feature names in the first column and feature lengths (as integers) in the second column" + feature_lengths: "A two column headered TSV file with feature names in the first column and feature lengths (as integers) in the second column", } } @@ -166,7 +165,7 @@ task add_to_bam_header { meta { description: "Adds another line of text to the bottom of a BAM header" outputs: { - reheadered_bam: "The BAM after its header has been modified" + reheadered_bam: "The BAM after its header has been modified", } } @@ -215,7 +214,7 @@ task unpack_tarball { meta { description: "Accepts a `.tar.gz` archive and converts it into a flat array of files. Any directory structure of the archive is ignored." outputs: { - tarball_contents: "An array of files found in the input tarball" + tarball_contents: "An array of files found in the input tarball", } } @@ -314,7 +313,7 @@ task global_phred_scores { meta { description: "Calculates statistics about PHRED scores of the input BAM" outputs: { - phred_scores: "Headered TSV file containing PHRED score statistics" + phred_scores: "Headered TSV file containing PHRED score statistics", } } @@ -339,7 +338,10 @@ task global_phred_scores { command <<< python3 /scripts/util/calc_global_phred_scores.py \ - ~{if fast_mode then "--fast_mode" else ""} \ + ~{if fast_mode + then "--fast_mode" + else "" + } \ "~{bam}" \ "~{prefix}" >>> @@ -384,16 +386,18 @@ task check_fastq_and_rg_concordance { Array[String]? read_two_names } - Array[String] read_twos = select_first([read_two_names, []]) + Array[String] read_twos = select_first([ + read_two_names, + [], + ]) command <<< python3 /scripts/util/check_FQs_and_RGs.py \ --read-one-fastqs "~{sep(",", read_one_names)}" \ - ~{( - if length(read_twos) > 0 + ~{if length(read_twos) > 0 then "--read-two-fastqs \"" + sep(",", squote(read_twos)) + "\"" else "" - )} \ + } \ --read-groups "~{sep(",", read_groups)}" >>> @@ -407,7 +411,7 @@ task split_fastq { meta { description: "Splits a FASTQ into multiple files based on the number of reads per file" outputs: { - fastqs: "Array of FASTQ files, each containing a subset of the input FASTQ" + fastqs: "Array of FASTQ files, each containing a subset of the input FASTQ", } } @@ -427,11 +431,7 @@ task split_fastq { input { File fastq - String prefix = sub( - basename(fastq), - "(fastq|fq)\\.gz$", - "" - ) + String prefix = sub(basename(fastq), "(fastq|fq)\\.gz$", "") Int reads_per_file = 10000000 Int modify_disk_size_gb = 0 Int ncpu = 2 diff --git a/workflows/chipseq/chipseq-standard.wdl b/workflows/chipseq/chipseq-standard.wdl index d9f9290df..aa30759fe 100755 --- a/workflows/chipseq/chipseq-standard.wdl +++ b/workflows/chipseq/chipseq-standard.wdl @@ -9,11 +9,14 @@ import "../../tools/samtools.wdl" import "../../tools/util.wdl" import "../general/bam-to-fastqs.wdl" as b2fq #@ except: LineWidth -import "https://raw.githubusercontent.com/stjude/seaseq/2.3/workflows/workflows/mapping.wdl" as seaseq_map +import "https://raw.githubusercontent.com/stjude/seaseq/2.3/workflows/workflows/mapping.wdl" + as seaseq_map #@ except: LineWidth -import "https://raw.githubusercontent.com/stjude/seaseq/3.0/workflows/tasks/samtools.wdl" as seaseq_samtools +import "https://raw.githubusercontent.com/stjude/seaseq/3.0/workflows/tasks/samtools.wdl" + as seaseq_samtools #@ except: LineWidth -import "https://raw.githubusercontent.com/stjude/seaseq/3.0/workflows/tasks/seaseq_util.wdl" as seaseq_util +import "https://raw.githubusercontent.com/stjude/seaseq/3.0/workflows/tasks/seaseq_util.wdl" + as seaseq_util workflow chipseq_standard_experimental { meta { @@ -67,7 +70,10 @@ workflow chipseq_standard_experimental { use_all_cores, } } - File selected_bam = select_first([subsample.sampled_bam, bam]) + File selected_bam = select_first([ + subsample.sampled_bam, + bam, + ]) call read_group.get_read_groups after validate_input_bam { input: bam = selected_bam, @@ -79,7 +85,7 @@ workflow chipseq_standard_experimental { use_all_cores, } - scatter (pair in zip(bam_to_fastqs.read1s, get_read_groups.read_groups)){ + scatter (pair in zip(bam_to_fastqs.read1s, get_read_groups.read_groups)) { if (enable_read_trimming) { call fp.fastp as trim { input: read_one_fastq = pair.left, @@ -93,10 +99,13 @@ workflow chipseq_standard_experimental { } } - File chosen_fastq = select_first([trim.single_end_reads_fastq_gz, pair.left]) + File chosen_fastq = select_first([ + trim.single_end_reads_fastq_gz, + pair.left, + ]) call seaseq_util.basicfastqstats as basic_stats { input: - fastqfile = chosen_fastq + fastqfile = chosen_fastq, } call seaseq_map.mapping as bowtie_single_end_mapping { input: fastqfile = chosen_fastq, @@ -104,13 +113,11 @@ workflow chipseq_standard_experimental { metricsfile = basic_stats.metrics_out, blacklist = excludelist, } - File chosen_bam = select_first( - [ - bowtie_single_end_mapping.bklist_bam, - bowtie_single_end_mapping.mkdup_bam, - bowtie_single_end_mapping.sorted_bam, - ] - ) + File chosen_bam = select_first([ + bowtie_single_end_mapping.bklist_bam, + bowtie_single_end_mapping.mkdup_bam, + bowtie_single_end_mapping.sorted_bam, + ]) call read_group.read_group_to_string { input: read_group = pair.right, @@ -127,7 +134,7 @@ workflow chipseq_standard_experimental { } Array[File] aligned_bams = addreplacerg.tagged_bam - scatter(aligned_bam in aligned_bams){ + scatter (aligned_bam in aligned_bams) { call picard.clean_sam as picard_clean { input: bam = aligned_bam, } @@ -147,7 +154,9 @@ workflow chipseq_standard_experimental { use_all_cores, } #@ except: UnusedCall - call picard.validate_bam { input: bam = markdup.mkdupbam } + call picard.validate_bam { input: + bam = markdup.mkdupbam, + } call md5sum.compute_checksum { input: file = markdup.mkdupbam, @@ -164,9 +173,13 @@ workflow chipseq_standard_experimental { File bam_checksum = compute_checksum.md5sum File bam_index = samtools_index.bam_index File bigwig = deeptools_bam_coverage.bigwig - Array[File] fastp_reports = select_all(flatten([fastp.report, trim.report])) - Array[File] fastp_jsons = select_all(flatten( - [fastp.report_json, trim.report_json] - )) + Array[File] fastp_reports = select_all(flatten([ + fastp.report, + trim.report, + ])) + Array[File] fastp_jsons = select_all(flatten([ + fastp.report_json, + trim.report_json, + ])) } } diff --git a/workflows/dnaseq/dnaseq-core.wdl b/workflows/dnaseq/dnaseq-core.wdl index 62b027db2..21a1d3990 100644 --- a/workflows/dnaseq/dnaseq-core.wdl +++ b/workflows/dnaseq/dnaseq-core.wdl @@ -1,5 +1,4 @@ ## **WARNING:** this workflow is experimental! Use at your own risk! - version 1.1 import "../../tools/bwa.wdl" @@ -69,10 +68,7 @@ workflow dnaseq_core_experimental { read_groups, } - scatter (tuple in zip( - zip(read_one_fastqs_gz, read_two_fastqs_gz), - read_groups - )) { + scatter (tuple in zip(zip(read_one_fastqs_gz, read_two_fastqs_gz), read_groups)) { if (enable_read_trimming) { call fp.fastp as trim after validate { input: read_one_fastq = tuple.left.left, @@ -87,8 +83,14 @@ workflow dnaseq_core_experimental { output_fastq = enable_read_trimming, } } - File chosen_r1_fastq = select_first([trim.read_one_fastq_gz, tuple.left.left]) - File chosen_r2_fastq = select_first([trim.read_two_fastq_gz, tuple.left.right]) + File chosen_r1_fastq = select_first([ + trim.read_one_fastq_gz, + tuple.left.left, + ]) + File chosen_r2_fastq = select_first([ + trim.read_two_fastq_gz, + tuple.left.right, + ]) call util.split_fastq as read_ones after validate { input: fastq = chosen_r1_fastq, @@ -105,11 +107,8 @@ workflow dnaseq_core_experimental { read_one_fastq_gz = t.left, read_two_fastq_gz = t.right, bwa_db_tar_gz = bwa_db, - prefix = sub(sub( - basename(t.left), - "(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", - "" - ), "\\.([rR][12])\\.", "."), + prefix = sub(sub(basename(t.left), "(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", + ""), "\\.([rR][12])\\.", "."), read_group = tuple.right, use_all_cores, } @@ -119,17 +118,17 @@ workflow dnaseq_core_experimental { read_one_fastq_gz = t.left, read_two_fastq_gz = t.right, bwa_db_tar_gz = bwa_db, - prefix = sub(sub( - basename(t.left), - "(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", - "" - ), "\\.([rR][12])\\.", "."), + prefix = sub(sub(basename(t.left), "(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", + ""), "\\.([rR][12])\\.", "."), read_group = tuple.right, use_all_cores, } } call picard.sort as sort { input: - bam = select_first([bwa_mem.bam, bwa_aln_pe.bam]) + bam = select_first([ + bwa_mem.bam, + bwa_aln_pe.bam, + ]), } } } @@ -146,9 +145,13 @@ workflow dnaseq_core_experimental { output { File harmonized_bam = merge.merged_bam File harmonized_bam_index = index.bam_index - Array[File] fastp_reports = select_all(flatten([fastp.report, trim.report])) - Array[File] fastp_jsons = select_all(flatten( - [fastp.report_json, trim.report_json] - )) + Array[File] fastp_reports = select_all(flatten([ + fastp.report, + trim.report, + ])) + Array[File] fastp_jsons = select_all(flatten([ + fastp.report_json, + trim.report_json, + ])) } } diff --git a/workflows/dnaseq/dnaseq-standard-fastq.wdl b/workflows/dnaseq/dnaseq-standard-fastq.wdl index c0542c19d..fdf48606b 100644 --- a/workflows/dnaseq/dnaseq-standard-fastq.wdl +++ b/workflows/dnaseq/dnaseq-standard-fastq.wdl @@ -1,5 +1,4 @@ ## **WARNING:** this workflow is experimental! Use at your own risk! - version 1.1 import "../../data_structures/read_group.wdl" @@ -54,9 +53,7 @@ workflow dnaseq_standard_fastq_experimental { Array[File] read_one_fastqs_gz Array[File] read_two_fastqs_gz Array[ReadGroup] read_groups - String prefix = sub( - basename(read_one_fastqs_gz[0]), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(read_one_fastqs_gz[0]), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 ) String aligner = "mem" @@ -101,12 +98,10 @@ workflow dnaseq_standard_fastq_experimental { subsample.subsampled_read1, read_one_fastqs_gz, ]) - Array[File] selected_read_two_fastqs = select_all( - select_first([ - subsample.subsampled_read2, - read_two_fastqs_gz, - ]) - ) + Array[File] selected_read_two_fastqs = select_all(select_first([ + subsample.subsampled_read2, + read_two_fastqs_gz, + ])) call dnaseq_core_wf.dnaseq_core_experimental after fqlint { input: read_one_fastqs_gz = selected_read_one_fastqs, diff --git a/workflows/dnaseq/dnaseq-standard.wdl b/workflows/dnaseq/dnaseq-standard.wdl index 3d4ff774a..270f6bd2d 100644 --- a/workflows/dnaseq/dnaseq-standard.wdl +++ b/workflows/dnaseq/dnaseq-standard.wdl @@ -1,5 +1,4 @@ ## **WARNING:** this workflow is experimental! Use at your own risk! - version 1.1 import "../../data_structures/read_group.wdl" @@ -55,7 +54,7 @@ workflow dnaseq_standard_experimental { } call parse_input { input: - aligner + aligner, } if (validate_input) { @@ -71,7 +70,10 @@ workflow dnaseq_standard_experimental { use_all_cores, } } - File selected_bam = select_first([subsample.sampled_bam, bam]) + File selected_bam = select_first([ + subsample.sampled_bam, + bam, + ]) call read_group.get_read_groups after parse_input { input: bam = selected_bam, @@ -95,7 +97,10 @@ workflow dnaseq_standard_experimental { SM: sample_override, } } - ReadGroup selected_rg = select_first([overriden_rg, rg]) + ReadGroup selected_rg = select_first([ + overriden_rg, + rg, + ]) call read_group.read_group_to_string { input: read_group = selected_rg, format_as_sam_record = true, diff --git a/workflows/general/alignment-post.wdl b/workflows/general/alignment-post.wdl index 53c18d64a..9caa5344c 100644 --- a/workflows/general/alignment-post.wdl +++ b/workflows/general/alignment-post.wdl @@ -4,7 +4,8 @@ import "../../tools/md5sum.wdl" import "../../tools/picard.wdl" import "../../tools/samtools.wdl" #@ except: LineWidth -import "https://raw.githubusercontent.com/stjude/XenoCP/4.0.0-alpha/wdl/workflows/xenocp.wdl" as xenocp_wf +import "https://raw.githubusercontent.com/stjude/XenoCP/4.0.0-alpha/wdl/workflows/xenocp.wdl" + as xenocp_wf workflow alignment_post { meta { @@ -12,7 +13,7 @@ workflow alignment_post { outputs: { processed_bam: "Input BAM after being transformed by standard processing", bam_index: "BAI index associated with `processed_bam`", - bam_checksum: "STDOUT of the `md5sum` command run on the input BAM that has been redirected to a file", + bam_checksum: "STDOUT of the `md5sum` command run on the input BAM that has been redirected to a file", validate_report: "Validation report produced by `picard ValidateSamFile`. Validation warnings and errors are logged.", } allowNestedInputs: true @@ -46,7 +47,9 @@ workflow alignment_post { Boolean use_all_cores = false } - call picard.sort as picard_sort { input: bam } + call picard.sort as picard_sort { input: + bam, + } if (cleanse_xenograft) { call samtools.index as pre_xenocp_index { input: @@ -57,14 +60,23 @@ workflow alignment_post { call xenocp_wf.xenocp { input: input_bam = picard_sort.sorted_bam, input_bai = pre_xenocp_index.bam_index, - reference_tar_gz = select_first([contaminant_db, ""]), - aligner = select_first([xenocp_aligner, "undefined"]), + reference_tar_gz = select_first([ + contaminant_db, + "", + ]), + aligner = select_first([ + xenocp_aligner, + "undefined", + ]), skip_duplicate_marking = true, } } if (mark_duplicates) { call picard.mark_duplicates as picard_markdup { input: - bam = select_first([xenocp.bam, picard_sort.sorted_bam]), + bam = select_first([ + xenocp.bam, + picard_sort.sorted_bam, + ]), } } @@ -79,9 +91,13 @@ workflow alignment_post { use_all_cores, } File aligned_bam_index = samtools_index.bam_index - call picard.validate_bam { input: bam = aligned_bam } + call picard.validate_bam { input: + bam = aligned_bam, + } - call md5sum.compute_checksum { input: file = aligned_bam } + call md5sum.compute_checksum { input: + file = aligned_bam, + } output { File processed_bam = aligned_bam diff --git a/workflows/general/bam-to-fastqs.wdl b/workflows/general/bam-to-fastqs.wdl index 409ac0133..e5bcbedc9 100644 --- a/workflows/general/bam-to-fastqs.wdl +++ b/workflows/general/bam-to-fastqs.wdl @@ -27,7 +27,9 @@ workflow bam_to_fastqs { Boolean use_all_cores = false } - call samtools.quickcheck { input: bam } + call samtools.quickcheck { input: + bam, + } call samtools.split after quickcheck { input: bam, @@ -42,11 +44,13 @@ workflow bam_to_fastqs { } if (paired_end) { - scatter (reads in - zip(bam_to_fastq.read_one_fastq_gz, bam_to_fastq.read_two_fastq_gz) - ) { + scatter (reads in zip(bam_to_fastq.read_one_fastq_gz, bam_to_fastq.read_two_fastq_gz + )) { call fq.fqlint { input: - read_one_fastq = select_first([reads.left, "undefined"]), + read_one_fastq = select_first([ + reads.left, + "undefined", + ]), read_two_fastq = reads.right, } } @@ -54,17 +58,18 @@ workflow bam_to_fastqs { if (!paired_end) { scatter (fq in bam_to_fastq.single_end_reads_fastq_gz) { call fq.fqlint as se_fqlint { input: - read_one_fastq = select_first([fq, "undefined"]), + read_one_fastq = select_first([ + fq, + "undefined", + ]), } } } output { - Array[File] read1s = ( - if paired_end + Array[File] read1s = if paired_end then select_all(bam_to_fastq.read_one_fastq_gz) else select_all(bam_to_fastq.single_end_reads_fastq_gz) - ) Array[File?] read2s = bam_to_fastq.read_two_fastq_gz } } diff --git a/workflows/general/samtools-merge.wdl b/workflows/general/samtools-merge.wdl index a34585c99..de9053218 100644 --- a/workflows/general/samtools-merge.wdl +++ b/workflows/general/samtools-merge.wdl @@ -1,21 +1,20 @@ ## **WARNING:** this workflow is experimental! Use at your own risk! - version 1.1 import "../../tools/samtools.wdl" workflow samtools_merge { - meta{ + meta { name: "Merge BAMs" description: "Runs `samtools merge`, with optional iteration to avoid maximum command line argument length" category: "Utility" outputs: { - merged_bam: "The BAM resulting from merging all the input BAMs" + merged_bam: "The BAM resulting from merging all the input BAMs", } allowNestedInputs: true } - parameter_meta{ + parameter_meta { bams: "BAMs to merge into a final BAM" prefix: "Prefix for output BAM." use_all_cores: "Use all cores? Recommended for cloud environments." @@ -31,22 +30,20 @@ workflow samtools_merge { Int bam_length = length(bams) - if (bam_length > max_length){ + if (bam_length > max_length) { # Find the number of merges required - scatter (merge_num in range((bam_length / max_length) + 1)){ + scatter (merge_num in range((bam_length / max_length) + 1)) { # Get the sublist of bams - scatter (bam_num in range(max_length)){ - Int num = ( - if merge_num > 0 + scatter (bam_num in range(max_length)) { + Int num = if merge_num > 0 then bam_num + (merge_num * max_length) else bam_num - ) - if (num < bam_length){ + if (num < bam_length) { File bam_list = bams[num] } } } - scatter (list in bam_list){ + scatter (list in bam_list) { call samtools.merge as inner_merge { input: bams = select_all(list), prefix, @@ -65,7 +62,7 @@ workflow samtools_merge { } } - if (bam_length < max_length){ + if (bam_length < max_length) { call samtools.merge as basic_merge { input: bams, prefix, @@ -76,6 +73,9 @@ workflow samtools_merge { } output { - File merged_bam = select_first([final_merge.merged_bam, basic_merge.merged_bam]) + File merged_bam = select_first([ + final_merge.merged_bam, + basic_merge.merged_bam, + ]) } } diff --git a/workflows/methylation/methylation-cohort.wdl b/workflows/methylation/methylation-cohort.wdl index 16e343e4c..652ab9c0d 100644 --- a/workflows/methylation/methylation-cohort.wdl +++ b/workflows/methylation/methylation-cohort.wdl @@ -38,21 +38,19 @@ workflow methylation_cohort { Int beta_length = length(unfiltered_normalized_beta) Int pval_length = length(p_values) - if (beta_length > max_length){ - scatter (merge_num in range((beta_length / max_length) + 1)){ + if (beta_length > max_length) { + scatter (merge_num in range((beta_length / max_length) + 1)) { # Get the sublist of beta files - scatter (beta_num in range(max_length)){ - Int num = ( - if merge_num > 0 + scatter (beta_num in range(max_length)) { + Int num = if merge_num > 0 then beta_num + (merge_num * max_length) else beta_num - ) - if (num < beta_length){ + if (num < beta_length) { File bam_list = unfiltered_normalized_beta[num] } } } - scatter (iter_index in range(length(bam_list))){ + scatter (iter_index in range(length(bam_list))) { call combine_data as inner_merge { input: files_to_combine = select_all(bam_list[iter_index]), combined_file_name = "~{iter_index}.combined.csv", @@ -64,22 +62,20 @@ workflow methylation_cohort { combined_file_name = "combined_beta.csv", } - if (pval_length > 0 && !skip_pvalue_check){ + if (pval_length > 0 && !skip_pvalue_check) { # If p-values are provided, merge those as well - scatter (merge_num in range((pval_length / max_length) + 1)){ + scatter (merge_num in range((pval_length / max_length) + 1)) { # Get the sublist of p-value files - scatter (pval_num in range(max_length)){ - Int num_p = ( - if merge_num > 0 + scatter (pval_num in range(max_length)) { + Int num_p = if merge_num > 0 then pval_num + (merge_num * max_length) else pval_num - ) - if (num_p < pval_length){ + if (num_p < pval_length) { File pval_list = p_values[num_p] } } } - scatter (iter_index in range(length(pval_list))){ + scatter (iter_index in range(length(pval_list))) { call combine_data as inner_merge_pvals { input: files_to_combine = select_all(pval_list[iter_index]), combined_file_name = "~{iter_index}.pvals.combined.csv", @@ -93,12 +89,12 @@ workflow methylation_cohort { } } - if (beta_length <= max_length){ + if (beta_length <= max_length) { call combine_data as simple_merge { input: files_to_combine = unfiltered_normalized_beta, combined_file_name = "combined_beta.csv", } - if (pval_length > 0 && !skip_pvalue_check){ + if (pval_length > 0 && !skip_pvalue_check) { call combine_data as simple_merge_pval { input: files_to_combine = p_values, combined_file_name = "combined_pvals.csv", @@ -106,23 +102,18 @@ workflow methylation_cohort { } } - File? pval_file = ( - if (pval_length > 0 && !skip_pvalue_check) - then select_first( - [ - final_merge_pvals.combined_file, - simple_merge_pval.combined_file, - ]) + File? pval_file = if (pval_length > 0 && !skip_pvalue_check) + then select_first([ + final_merge_pvals.combined_file, + simple_merge_pval.combined_file, + ]) else None - ) call filter_probes { input: - beta_values = select_first( - [ - final_merge.combined_file, - simple_merge.combined_file, - ] - ), + beta_values = select_first([ + final_merge.combined_file, + simple_merge.combined_file, + ]), p_values = pval_file, num_probes, additional_probes_to_exclude = select_all([ @@ -140,12 +131,10 @@ workflow methylation_cohort { } output { - File combined_beta = select_first( - [ - final_merge.combined_file, - simple_merge.combined_file, - ] - ) + File combined_beta = select_first([ + final_merge.combined_file, + simple_merge.combined_file, + ]) File filtered_beta = filter_probes.filtered_beta_values File filtered_probeset = filter_probes.filtered_probes File umap_embedding = generate_umap.umap @@ -159,7 +148,7 @@ task combine_data { meta { description: "Combine data from multiple CSV files by column" outputs: { - combined_file: "Combined CSV file" + combined_file: "Combined CSV file", } } @@ -183,16 +172,19 @@ task combine_data { Int modify_memory_gb = 0 } - Int memory_gb = ceil(size(files_to_combine, "GB") * - if simple_merge then 2 else 1) - + modify_memory_gb - + 2 + Int memory_gb = ceil(size(files_to_combine, "GB") * if simple_merge + then 2 + else 1 + ) + modify_memory_gb + 2 Int disk_size_gb = ceil(size(files_to_combine, "GB") * 2) + 2 command <<< python /scripts/methylation/combine.py \ --output-name "~{combined_file_name}" \ - ~{if simple_merge then "--simple-merge" else ""} \ + ~{if simple_merge + then "--simple-merge" + else "" + } \ ~{sep(" ", quote(files_to_combine))} >>> @@ -273,7 +265,7 @@ task generate_umap { meta { description: "Generate UMAP embedding" outputs: { - umap: "UMAP embedding for all samples" + umap: "UMAP embedding for all samples", } } @@ -312,7 +304,7 @@ task plot_umap { meta { description: "Plot UMAP embedding" outputs: { - umap_plot: "UMAP plot for all samples" + umap_plot: "UMAP plot for all samples", } } diff --git a/workflows/methylation/methylation-preprocess.wdl b/workflows/methylation/methylation-preprocess.wdl index 4274b3942..76fdafe25 100644 --- a/workflows/methylation/methylation-preprocess.wdl +++ b/workflows/methylation/methylation-preprocess.wdl @@ -53,10 +53,8 @@ task process_raw_idats { >>> output { - File beta_swan_norm_unfiltered - = out_base + ".beta_swan_norm_unfiltered.csv" - File beta_swan_norm_unfiltered_genomic - = out_base + ".beta_swan_norm_unfiltered.genomic.csv" + File beta_swan_norm_unfiltered = out_base + ".beta_swan_norm_unfiltered.csv" + File beta_swan_norm_unfiltered_genomic = out_base + ".beta_swan_norm_unfiltered.genomic.csv" File annotation = out_base + ".annotation.csv" File beta_unnorm = out_base + ".beta.csv" File cn_values = out_base + ".cn_values.csv" @@ -81,13 +79,15 @@ task list_sex_probes { meta { description: "List probes that map to the sex chromosomes" outputs: { - probe_list: "List of probe names that map to the sex chromosomes" + probe_list: "List of probe names that map to the sex chromosomes", } } - parameter_meta {} + parameter_meta { + } - input {} + input { + } command <<< set -euo pipefail diff --git a/workflows/methylation/methylation-standard.wdl b/workflows/methylation/methylation-standard.wdl index a3c9dfbdc..b3c3247bd 100644 --- a/workflows/methylation/methylation-standard.wdl +++ b/workflows/methylation/methylation-standard.wdl @@ -38,15 +38,15 @@ workflow methylation { scatter (pair in zip(green_idats, red_idats)) { call preprocess.process_raw_idats { input: - idats = pair + idats = pair, } } - call preprocess.list_sex_probes {} + call preprocess.list_sex_probes { + } call cohort.methylation_cohort { input: - unfiltered_normalized_beta = - process_raw_idats.beta_swan_norm_unfiltered_genomic, + unfiltered_normalized_beta = process_raw_idats.beta_swan_norm_unfiltered_genomic, p_values = process_raw_idats.probe_pvalues, sex_probe_list = list_sex_probes.probe_list, additional_probes_to_exclude, @@ -56,21 +56,19 @@ workflow methylation { Int probelist_length = length(probe_files) Int max_length = 100 - if (probelist_length > max_length){ - scatter (merge_num in range((probelist_length / max_length) + 1)){ + if (probelist_length > max_length) { + scatter (merge_num in range((probelist_length / max_length) + 1)) { # Get the sublist of probe files - scatter (probe_num in range(max_length)){ - Int num = ( - if merge_num > 0 + scatter (probe_num in range(max_length)) { + Int num = if merge_num > 0 then probe_num + (merge_num * max_length) else probe_num - ) - if (num < probelist_length){ + if (num < probelist_length) { File probe_file_batches = probe_files[num] } } } - scatter (iter_index in range(length(probe_file_batches))){ + scatter (iter_index in range(length(probe_file_batches))) { call concat_and_uniq { input: files_to_combine = select_all(probe_file_batches[iter_index]), output_file_name = "probes_with_snps_part_~{iter_index}.tab", @@ -79,13 +77,13 @@ workflow methylation { call concat_and_uniq as final_cat { input: files_to_combine = flatten([ - concat_and_uniq.combined_file + concat_and_uniq.combined_file, ]), output_file_name = "probes_with_snps.tab", } } - if (probelist_length <= max_length){ + if (probelist_length <= max_length) { call concat_and_uniq as simple_merge { input: files_to_combine = probe_files, output_file_name = "probes_with_snps.tab", @@ -95,21 +93,19 @@ workflow methylation { Array[File] non_genomic_probe_list = process_raw_idats.non_genomic_probes Int non_genomic_probelist_length = length(non_genomic_probe_list) - if (non_genomic_probelist_length > max_length){ - scatter (merge_num in range((non_genomic_probelist_length / max_length) + 1)){ + if (non_genomic_probelist_length > max_length) { + scatter (merge_num in range((non_genomic_probelist_length / max_length) + 1)) { # Get the sublist of probe files - scatter (probe_num in range(max_length)){ - Int num_ng = ( - if merge_num > 0 + scatter (probe_num in range(max_length)) { + Int num_ng = if merge_num > 0 then probe_num + (merge_num * max_length) else probe_num - ) - if (num_ng < non_genomic_probelist_length){ + if (num_ng < non_genomic_probelist_length) { File non_genomic_probe_batches = non_genomic_probe_list[num_ng] } } } - scatter (iter_index in range(length(non_genomic_probe_batches))){ + scatter (iter_index in range(length(non_genomic_probe_batches))) { call concat_and_uniq as non_genomic_concat { input: files_to_combine = select_all(non_genomic_probe_batches[iter_index]), output_file_name = "non_genomic_probes_part_~{iter_index}.tab", @@ -118,13 +114,13 @@ workflow methylation { call concat_and_uniq as final_cat_non_genomic { input: files_to_combine = flatten([ - non_genomic_concat.combined_file + non_genomic_concat.combined_file, ]), output_file_name = "non_genomic_probes.tab", } } - if (non_genomic_probelist_length <= max_length){ + if (non_genomic_probelist_length <= max_length) { call concat_and_uniq as simple_merge_non_genomic { input: files_to_combine = non_genomic_probe_list, output_file_name = "non_genomic_probes.tab", @@ -132,8 +128,7 @@ workflow methylation { } output { - Array[File] beta_swan_norm_unfiltered_genomic = - process_raw_idats.beta_swan_norm_unfiltered_genomic + Array[File] beta_swan_norm_unfiltered_genomic = process_raw_idats.beta_swan_norm_unfiltered_genomic File combined_beta = methylation_cohort.combined_beta File filtered_beta = methylation_cohort.filtered_beta File filtered_probeset = methylation_cohort.filtered_probeset @@ -157,7 +152,7 @@ task concat_and_uniq { meta { description: "Concatenate multiple files and retain unique lines" outputs: { - combined_file: "File containing unique lines from all input files" + combined_file: "File containing unique lines from all input files", } } diff --git a/workflows/qc/markdups-post.wdl b/workflows/qc/markdups-post.wdl index 70771d8e8..2e0420fa5 100644 --- a/workflows/qc/markdups-post.wdl +++ b/workflows/qc/markdups-post.wdl @@ -5,7 +5,6 @@ ## whether a read is a duplicate or not. ## But the tasks called below produce different results depending on whether the ## input BAM has been duplicate marked or not. - version 1.1 import "../../tools/mosdepth.wdl" @@ -61,7 +60,7 @@ workflow markdups_post { bam_index = markdups_bam_index, prefix = prefix + "." + "whole_genome", } - scatter(coverage_pair in zip(coverage_beds, coverage_labels)) { + scatter (coverage_pair in zip(coverage_beds, coverage_labels)) { call mosdepth.coverage as regions_coverage { input: bam = markdups_bam, bam_index = markdups_bam_index, @@ -72,8 +71,7 @@ workflow markdups_post { output { File insert_size_metrics = collect_insert_size_metrics.insert_size_metrics - File insert_size_metrics_pdf - = collect_insert_size_metrics.insert_size_metrics_pdf + File insert_size_metrics_pdf = collect_insert_size_metrics.insert_size_metrics_pdf File flagstat_report = flagstat.flagstat_report File mosdepth_global_summary = wg_coverage.summary File mosdepth_global_dist = wg_coverage.global_dist diff --git a/workflows/qc/quality-check-standard.wdl b/workflows/qc/quality-check-standard.wdl index 5115d3f90..c3d269bc6 100644 --- a/workflows/qc/quality-check-standard.wdl +++ b/workflows/qc/quality-check-standard.wdl @@ -126,8 +126,7 @@ workflow quality_check_standard { File kraken_db File? gtf #@ except: LineWidth - File multiqc_config - = "https://raw.githubusercontent.com/stjudecloud/workflows/main/workflows/qc/multiqc_config/multiqc_config.yaml" + File multiqc_config = "https://raw.githubusercontent.com/stjudecloud/workflows/main/workflows/qc/multiqc_config/multiqc_config.yaml" Array[File] extra_multiqc_inputs = [] Array[File] coverage_beds = [] Array[String] coverage_labels = [] @@ -164,20 +163,24 @@ workflow quality_check_standard { coverage_labels, } call flag_filter.validate_flag_filter as kraken_filter_validator { input: - flags = standard_filter + flags = standard_filter, } if (run_comparative_kraken) { - call flag_filter.validate_flag_filter - as comparative_kraken_filter_validator - { input: - flags = comparative_filter + call flag_filter.validate_flag_filter as comparative_kraken_filter_validator { input: + flags = comparative_filter, } } - call md5sum.compute_checksum after parse_input { input: file = bam } + call md5sum.compute_checksum after parse_input { input: + file = bam, + } - call samtools.quickcheck after parse_input { input: bam } - call util.compression_integrity after parse_input { input: bgzipped_file = bam } + call samtools.quickcheck after parse_input { input: + bam, + } + call util.compression_integrity after parse_input { input: + bgzipped_file = bam, + } if (subsample_n_reads > 0) { call samtools.subsample after quickcheck { input: @@ -188,7 +191,10 @@ workflow quality_check_standard { } if (defined(subsample.sampled_bam)) { call samtools.index as subsample_index { input: - bam = select_first([subsample.sampled_bam, "undefined"]), + bam = select_first([ + subsample.sampled_bam, + "undefined", + ]), use_all_cores, } } @@ -203,11 +209,9 @@ workflow quality_check_standard { subsample_index.bam_index, bam_index, ]) - String post_subsample_prefix = ( - if (defined(subsample.sampled_bam)) + String post_subsample_prefix = if (defined(subsample.sampled_bam)) then prefix + ".subsampled" else prefix - ) call picard.validate_bam after quickcheck { input: bam = post_subsample_bam, @@ -235,7 +239,9 @@ workflow quality_check_standard { outfile_name = post_subsample_prefix + ".readlength.tsv", } call ngsderive.encoding after quickcheck { input: - ngs_files = [post_subsample_bam], + ngs_files = [ + post_subsample_bam, + ], outfile_name = post_subsample_prefix + ".encoding.tsv", num_reads = -1, } @@ -249,9 +255,7 @@ workflow quality_check_standard { prefix = post_subsample_prefix, } - call samtools.bam_to_fastq after quickcheck - after kraken_filter_validator - { input: + call samtools.bam_to_fastq after quickcheck after kraken_filter_validator { input: bam = post_subsample_bam, bitwise_filter = standard_filter, prefix = post_subsample_prefix, @@ -267,14 +271,24 @@ workflow quality_check_standard { } call fq.fqlint { input: - read_one_fastq = select_first([bam_to_fastq.read_one_fastq_gz, "undefined"]), - read_two_fastq = select_first([bam_to_fastq.read_two_fastq_gz, "undefined"]), + read_one_fastq = select_first([ + bam_to_fastq.read_one_fastq_gz, + "undefined", + ]), + read_two_fastq = select_first([ + bam_to_fastq.read_two_fastq_gz, + "undefined", + ]), } call kraken2.kraken after fqlint { input: - read_one_fastq_gz - = select_first([bam_to_fastq.read_one_fastq_gz, "undefined"]), - read_two_fastq_gz - = select_first([bam_to_fastq.read_two_fastq_gz, "undefined"]), + read_one_fastq_gz = select_first([ + bam_to_fastq.read_one_fastq_gz, + "undefined", + ]), + read_two_fastq_gz = select_first([ + bam_to_fastq.read_two_fastq_gz, + "undefined", + ]), db = kraken_db, store_sequences = store_kraken_sequences, prefix = post_subsample_prefix, @@ -282,23 +296,29 @@ workflow quality_check_standard { } if (run_fastp) { call fp.fastp after fqlint { input: - read_one_fastq - = select_first([bam_to_fastq.read_one_fastq_gz, "undefined"]), - read_two_fastq - = select_first([bam_to_fastq.read_two_fastq_gz, "undefined"]), + read_one_fastq = select_first([ + bam_to_fastq.read_one_fastq_gz, + "undefined", + ]), + read_two_fastq = select_first([ + bam_to_fastq.read_two_fastq_gz, + "undefined", + ]), output_fastq = false, } } if (run_librarian) { call libraran_tasks.librarian after fqlint { input: - read_one_fastq = select_first([bam_to_fastq.read_one_fastq_gz, "undefined"]), + read_one_fastq = select_first([ + bam_to_fastq.read_one_fastq_gz, + "undefined", + ]), } } if (run_comparative_kraken) { - call samtools.bam_to_fastq as alt_filtered_fastq after quickcheck - after comparative_kraken_filter_validator - { input: + call samtools.bam_to_fastq as alt_filtered_fastq after quickcheck after comparative_kraken_filter_validator { + input: bam = post_subsample_bam, bitwise_filter = comparative_filter, prefix = post_subsample_prefix + ".alt_filtered", @@ -315,16 +335,24 @@ workflow quality_check_standard { use_all_cores, } call fq.fqlint as alt_filtered_fqlint { input: - read_one_fastq - = select_first([alt_filtered_fastq.read_one_fastq_gz, "undefined"]), - read_two_fastq - = select_first([alt_filtered_fastq.read_two_fastq_gz, "undefined"]), + read_one_fastq = select_first([ + alt_filtered_fastq.read_one_fastq_gz, + "undefined", + ]), + read_two_fastq = select_first([ + alt_filtered_fastq.read_two_fastq_gz, + "undefined", + ]), } call kraken2.kraken as comparative_kraken after alt_filtered_fqlint { input: - read_one_fastq_gz - = select_first([alt_filtered_fastq.read_one_fastq_gz, "undefined"]), - read_two_fastq_gz - = select_first([alt_filtered_fastq.read_two_fastq_gz, "undefined"]), + read_one_fastq_gz = select_first([ + alt_filtered_fastq.read_one_fastq_gz, + "undefined", + ]), + read_two_fastq_gz = select_first([ + alt_filtered_fastq.read_two_fastq_gz, + "undefined", + ]), db = kraken_db, store_sequences = store_kraken_sequences, prefix = post_subsample_prefix + ".alt_filtered", @@ -337,8 +365,8 @@ workflow quality_check_standard { bam_index = post_subsample_bam_index, prefix = post_subsample_prefix + ".whole_genome", } - scatter(coverage_pair in zip(coverage_beds, parse_input.labels)) { - call mosdepth.coverage as regions_coverage after quickcheck { input: + scatter (coverage_pair in zip(coverage_beds, parse_input.labels)) { + call mosdepth.coverage as regions_coverage after quickcheck { input: bam = post_subsample_bam, bam_index = post_subsample_bam_index, coverage_bed = coverage_pair.left, @@ -350,19 +378,31 @@ workflow quality_check_standard { call ngsderive.junction_annotation after quickcheck { input: bam = post_subsample_bam, bam_index = post_subsample_bam_index, - gene_model = select_first([gtf, "undefined"]), + gene_model = select_first([ + gtf, + "undefined", + ]), prefix = post_subsample_prefix, } call ngsderive.strandedness after quickcheck { input: bam = post_subsample_bam, bam_index = post_subsample_bam_index, - gene_model = select_first([gtf, "undefined"]), + gene_model = select_first([ + gtf, + "undefined", + ]), outfile_name = post_subsample_prefix + ".strandedness.tsv", } call qualimap.rnaseq as qualimap_rnaseq { input: - bam = select_first([bam_to_fastq.collated_bam, "undefined"]), + bam = select_first([ + bam_to_fastq.collated_bam, + "undefined", + ]), prefix = post_subsample_prefix + ".qualimap_rnaseq_results", - gtf = select_first([gtf, "undefined"]), + gtf = select_first([ + gtf, + "undefined", + ]), name_sorted = true, paired_end = true, # matches default but prevents user from overriding } @@ -434,17 +474,27 @@ workflow quality_check_standard { ], regions_coverage.summary, select_all(regions_coverage.region_dist), - select_first([markdups_post.mosdepth_region_summary, []]), - select_first([markdups_post.mosdepth_region_dist, []]), - ( - if (mark_duplicates && optical_distance > 0) - then [markdups.mark_duplicates_metrics] + select_first([ + markdups_post.mosdepth_region_summary, + [], + ]), + select_first([ + markdups_post.mosdepth_region_dist, + [], + ]), + if (mark_duplicates && optical_distance > 0) + then [ + markdups.mark_duplicates_metrics, + ] else [] - ), + , ])) call multiqc_tasks.multiqc { input: - files = flatten([multiqc_files, extra_multiqc_inputs]), + files = flatten([ + multiqc_files, + extra_multiqc_inputs, + ]), config = multiqc_config, report_name = post_subsample_prefix + ".multiqc", } @@ -483,7 +533,10 @@ workflow quality_check_standard { File? kraken_sequences = kraken.sequences File? comparative_kraken_sequences = comparative_kraken.sequences File? junctions = junction_annotation.junctions - Array[File] intermediate_files = select_first([optional_files, []]) + Array[File] intermediate_files = select_first([ + optional_files, + [], + ]) } } @@ -491,7 +544,7 @@ task parse_input { meta { description: "Parses and validates the `quality_check_standard` workflow's provided inputs" outputs: { - labels: "An array of labels to use on the result coverage files associated with each coverage BED" + labels: "An array of labels to use on the result coverage files associated with each coverage BED", } } @@ -539,11 +592,9 @@ task parse_input { >>> output { - Array[String] labels = ( - if (coverage_beds_len > 0) + Array[String] labels = if (coverage_beds_len > 0) then read_lines("labels.txt") else [] - ) } runtime { diff --git a/workflows/reference/bwa-db-build.wdl b/workflows/reference/bwa-db-build.wdl index 3aefef4f1..579385f1c 100644 --- a/workflows/reference/bwa-db-build.wdl +++ b/workflows/reference/bwa-db-build.wdl @@ -40,7 +40,7 @@ workflow bwa_db_build { } output { - File reference_fa = reference_download.downloaded_file - File bwa_db_tar_gz = build_bwa_db.bwa_db_tar_gz + File reference_fa = reference_download.downloaded_file + File bwa_db_tar_gz = build_bwa_db.bwa_db_tar_gz } } diff --git a/workflows/reference/gatk-reference.wdl b/workflows/reference/gatk-reference.wdl index 3b4dff835..2cf2e4087 100644 --- a/workflows/reference/gatk-reference.wdl +++ b/workflows/reference/gatk-reference.wdl @@ -88,16 +88,28 @@ workflow gatk_reference { if (defined(dbSNP_vcf_index_url) && defined(dbSNP_vcf_index_name)) { call util.download as dbsnp_index { input: - url = select_first([dbSNP_vcf_index_url, "undefined"]), - outfile_name = select_first([dbSNP_vcf_index_name, "undefined"]), + url = select_first([ + dbSNP_vcf_index_url, + "undefined", + ]), + outfile_name = select_first([ + dbSNP_vcf_index_name, + "undefined", + ]), disk_size_gb = dbSNP_vcf_index_disk_size_gb, } } if (defined(interval_list_url) && defined(interval_list_name)) { call util.download as intervals { input: - url = select_first([interval_list_url, "undefined"]), - outfile_name = select_first([interval_list_name, "undefined"]), + url = select_first([ + interval_list_url, + "undefined", + ]), + outfile_name = select_first([ + interval_list_name, + "undefined", + ]), disk_size_gb = interval_list_disk_size_gb, } } diff --git a/workflows/reference/qc-reference.wdl b/workflows/reference/qc-reference.wdl index 2d64b8901..28f67aebf 100644 --- a/workflows/reference/qc-reference.wdl +++ b/workflows/reference/qc-reference.wdl @@ -121,12 +121,12 @@ workflow qc_reference { } } - if ( - (length(kraken_fastas) > 0) - || (length(kraken_fasta_urls) > 0) - || (length(kraken_libraries) > 0) - ) { - call kraken2.download_taxonomy { input: protein } + if ((length(kraken_fastas) > 0) || (length(kraken_fasta_urls) > 0) || (length( + kraken_libraries + ) > 0)) { + call kraken2.download_taxonomy { input: + protein, + } } scatter (lib in kraken_libraries) { @@ -136,7 +136,10 @@ workflow qc_reference { } } - Array[File] custom_fastas = flatten([kraken_fastas, fastas_download.downloaded_file]) + Array[File] custom_fastas = flatten([ + kraken_fastas, + fastas_download.downloaded_file, + ]) if (length(custom_fastas) > 0) { call kraken2.create_library_from_fastas { input: fastas_gz = custom_fastas, @@ -145,9 +148,13 @@ workflow qc_reference { } Array[File] kraken_tarballs = flatten([ - select_all([download_taxonomy.taxonomy]), + select_all([ + download_taxonomy.taxonomy, + ]), download_library.library, - select_all([create_library_from_fastas.custom_library]), + select_all([ + create_library_from_fastas.custom_library, + ]), ]) if (length(kraken_tarballs) > 0) { call kraken2.build_db as kraken_build_db { input: diff --git a/workflows/reference/star-db-build.wdl b/workflows/reference/star-db-build.wdl index d3a99fbe2..d2d14b2a1 100644 --- a/workflows/reference/star-db-build.wdl +++ b/workflows/reference/star-db-build.wdl @@ -56,8 +56,8 @@ workflow star_db_build { } output { - File reference_fa = reference_download.downloaded_file - File gtf = gtf_download.downloaded_file - File star_db_tar_gz = build_star_db.star_db + File reference_fa = reference_download.downloaded_file + File gtf = gtf_download.downloaded_file + File star_db_tar_gz = build_star_db.star_db } } diff --git a/workflows/rnaseq/rnaseq-core.wdl b/workflows/rnaseq/rnaseq-core.wdl index b5d088317..819f8a110 100644 --- a/workflows/rnaseq/rnaseq-core.wdl +++ b/workflows/rnaseq/rnaseq-core.wdl @@ -144,9 +144,7 @@ workflow rnaseq_core { GC_AG_and_CT_GC_motif: 5, AT_AC_and_GT_AT_motif: 5, } - String prefix = sub( - basename(read_one_fastqs_gz[0]), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(read_one_fastqs_gz[0]), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 ) String xenocp_aligner = "star" @@ -201,16 +199,12 @@ workflow rnaseq_core { } } - Array[File] chosen_r1s = ( - if enable_read_trimming + Array[File] chosen_r1s = if enable_read_trimming then select_all(trim.read_one_fastq_gz) else read_one_fastqs_gz - ) - Array[File] chosen_r2s = ( - if enable_read_trimming + Array[File] chosen_r2s = if enable_read_trimming then select_all(trim.read_two_fastq_gz) else read_two_fastqs_gz - ) call star.alignment after validate { input: read_one_fastqs_gz = chosen_r1s, @@ -252,22 +246,19 @@ workflow rnaseq_core { gene_model = gtf, } - String htseq_strandedness = ( - if (provided_strandedness != "") + String htseq_strandedness = if (provided_strandedness != "") then htseq_strandedness_mapping[provided_strandedness] else htseq_strandedness_mapping[ngsderive_strandedness.strandedness_string] - ) call htseq.count as htseq_count { input: bam = alignment_post.processed_bam, gtf, strandedness = htseq_strandedness, - prefix = basename(alignment_post.processed_bam, "bam") - + ( - if provided_strandedness == "" - then ngsderive_strandedness.strandedness_string - else provided_strandedness - ), + prefix = basename(alignment_post.processed_bam, "bam") + if provided_strandedness + == "" + then ngsderive_strandedness.strandedness_string + else provided_strandedness + , pos_sorted = true, } @@ -280,9 +271,13 @@ workflow rnaseq_core { File feature_counts = htseq_count.feature_counts File inferred_strandedness = ngsderive_strandedness.strandedness_file String inferred_strandedness_string = ngsderive_strandedness.strandedness_string - Array[File] fastp_reports = select_all(flatten([fastp.report, trim.report])) - Array[File] fastp_jsons = select_all(flatten( - [fastp.report_json, trim.report_json] - )) + Array[File] fastp_reports = select_all(flatten([ + fastp.report, + trim.report, + ])) + Array[File] fastp_jsons = select_all(flatten([ + fastp.report_json, + trim.report_json, + ])) } } diff --git a/workflows/rnaseq/rnaseq-standard-fastq.wdl b/workflows/rnaseq/rnaseq-standard-fastq.wdl index dee188b10..29ff3bfc6 100644 --- a/workflows/rnaseq/rnaseq-standard-fastq.wdl +++ b/workflows/rnaseq/rnaseq-standard-fastq.wdl @@ -73,9 +73,7 @@ workflow rnaseq_standard_fastq { Array[File] read_two_fastqs_gz Array[ReadGroup] read_groups File? contaminant_db - String prefix = sub( - basename(read_one_fastqs_gz[0]), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(read_one_fastqs_gz[0]), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 ) String xenocp_aligner = "star" @@ -100,7 +98,7 @@ workflow rnaseq_standard_fastq { } } - if (validate_input){ + if (validate_input) { scatter (reads in zip(read_one_fastqs_gz, read_two_fastqs_gz)) { call fq.fqlint after parse_input { input: read_one_fastq = reads.left, @@ -123,12 +121,10 @@ workflow rnaseq_standard_fastq { subsample.subsampled_read1, read_one_fastqs_gz, ]) - Array[File] selected_read_two_fastqs = select_all( - select_first([ - subsample.subsampled_read2, - read_two_fastqs_gz, - ]) - ) + Array[File] selected_read_two_fastqs = select_all(select_first([ + subsample.subsampled_read2, + read_two_fastqs_gz, + ])) call rnaseq_core_wf.rnaseq_core after fqlint { input: read_one_fastqs_gz = selected_read_one_fastqs, diff --git a/workflows/rnaseq/rnaseq-standard.wdl b/workflows/rnaseq/rnaseq-standard.wdl index edac26733..793446486 100755 --- a/workflows/rnaseq/rnaseq-standard.wdl +++ b/workflows/rnaseq/rnaseq-standard.wdl @@ -92,7 +92,10 @@ workflow rnaseq_standard { use_all_cores, } } - File selected_bam = select_first([subsample.sampled_bam, bam]) + File selected_bam = select_first([ + subsample.sampled_bam, + bam, + ]) call read_group.get_read_groups after validate_input_bam { input: bam = selected_bam, diff --git a/workflows/rnaseq/rnaseq-variant-calling.wdl b/workflows/rnaseq/rnaseq-variant-calling.wdl index 8df2e61a1..ab7a8a445 100644 --- a/workflows/rnaseq/rnaseq-variant-calling.wdl +++ b/workflows/rnaseq/rnaseq-variant-calling.wdl @@ -54,7 +54,7 @@ workflow rnaseq_variant_calling { Int scatter_count = 6 } - if (!bam_is_dup_marked){ + if (!bam_is_dup_marked) { call picard.mark_duplicates { input: bam, create_bam = true, @@ -62,8 +62,14 @@ workflow rnaseq_variant_calling { } call gatk.split_n_cigar_reads { input: - bam = select_first([mark_duplicates.duplicate_marked_bam, bam]), - bam_index = select_first([mark_duplicates.duplicate_marked_bam_index, bam_index]), + bam = select_first([ + mark_duplicates.duplicate_marked_bam, + bam, + ]), + bam_index = select_first([ + mark_duplicates.duplicate_marked_bam_index, + bam_index, + ]), fasta, fasta_index, dict,