Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions data_structures/flag_filter.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@
## In short, those are all flags corresponding to the quality of the read
## and them being `true` may indicate that the read is of low quality and
## should be excluded.

version 1.1

struct FlagFilter {
Expand Down Expand Up @@ -127,15 +126,15 @@ workflow validate_flag_filter {
}

call validate_string_is_12bit_int as validate_include_if_any { input:
number = flags.include_if_any
number = flags.include_if_any,
}
call validate_string_is_12bit_int as validate_include_if_all { input:
number = flags.include_if_all
number = flags.include_if_all,
}
call validate_string_is_12bit_int as validate_exclude_if_any { input:
number = flags.exclude_if_any
number = flags.exclude_if_any,
}
call validate_string_is_12bit_int as validate_exclude_if_all { input:
number = flags.exclude_if_all
number = flags.exclude_if_all,
}
}
31 changes: 22 additions & 9 deletions data_structures/read_group.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
## }
## }
## ```

version 1.1

#@ except: SnakeCase
Expand Down Expand Up @@ -99,8 +98,7 @@ workflow read_group_to_string {
}

output {
String validated_read_group
= inner_read_group_to_string.stringified_read_group
String validated_read_group = inner_read_group_to_string.stringified_read_group
}
}

Expand All @@ -109,7 +107,7 @@ task get_read_groups {
description: "Gets read group information from a BAM file and writes it out as JSON which is converted to a WDL struct."
warning: "This task will uppercase any lowercase `PL` values it finds, as is required by the [SAM specification](https://samtools.github.io/hts-specs/SAMv1.pdf)."
outputs: {
read_groups: "An array of `ReadGroup` structs containing read group information."
read_groups: "An array of `ReadGroup` structs containing read group information.",
}
}

Expand Down Expand Up @@ -167,8 +165,18 @@ task validate_read_group {
String sample_pattern = "sample.?"
String restrictive_pattern = "\\ " # Disallow spaces
Array[String] platforms = [
"CAPILLARY", "DNBSEQ", "ELEMENT", "HELICOS", "ILLUMINA", "IONTORRENT", "LS454",
"ONT", "PACBIO", "SINGULAR", "SOLID", "ULTIMA",
"CAPILLARY",
"DNBSEQ",
"ELEMENT",
"HELICOS",
"ILLUMINA",
"IONTORRENT",
"LS454",
"ONT",
"PACBIO",
"SINGULAR",
"SOLID",
"ULTIMA",
]

command <<<
Expand Down Expand Up @@ -268,7 +276,10 @@ task validate_read_group {
fi
fi
if [ "$(echo "~{sep(" ", required_fields)}" | grep -Ewc "KS")" -eq 1 ]; then
if [ -z "~{if defined(read_group.KS) then read_group.KS else ""}" ]; then
if [ -z "~{if defined(read_group.KS)
then read_group.KS
else ""
}" ]; then
>&2 echo "KS is required"
exit_code=1
fi
Expand Down Expand Up @@ -366,7 +377,7 @@ task inner_read_group_to_string {
description: "Converts a `ReadGroup` struct to a `String` **without any validation**."
warning: "Please use the `read_group_to_string` workflow, which has validation of the `ReadGroup` contents."
outputs: {
stringified_read_group: "Input `ReadGroup` as a string"
stringified_read_group: "Input `ReadGroup` as a string",
}
}

Expand All @@ -383,7 +394,9 @@ task inner_read_group_to_string {
Boolean format_as_sam_record = false
}

String delimiter = if format_as_sam_record then "\\t" else " "
String delimiter = if format_as_sam_record
then "\\t"
else " "

command <<<
if ~{format_as_sam_record}; then
Expand Down
81 changes: 54 additions & 27 deletions tools/arriba.wdl
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
## [Homepage](https://arriba.readthedocs.io/en/latest/)

version 1.1

task arriba {
Expand Down Expand Up @@ -138,14 +137,40 @@ task arriba {
File? protein_domains
File? wgs_svs
Array[String] interesting_contigs = [
"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14",
"15", "16", "17", "18", "19", "20", "21", "22", "X", "Y", "AC_*", "NC_*",
"1",
"2",
"3",
"4",
"5",
"6",
"7",
"8",
"9",
"10",
"11",
"12",
"13",
"14",
"15",
"16",
"17",
"18",
"19",
"20",
"21",
"22",
"X",
"Y",
"AC_*",
"NC_*",
]
Array[String] viral_contigs = [
"AC_*",
"NC_*",
]
Array[String] viral_contigs = ["AC_*", "NC_*"]
Array[String] disable_filters = []
#@ except: LineWidth
String feature_name
= "gene_name=gene_name|gene_id,gene_id=gene_id,transcript_id=transcript_id,feature_exon=exon,feature_CDS=CDS"
String feature_name = "gene_name=gene_name|gene_id,gene_id=gene_id,transcript_id=transcript_id,feature_exon=exon,feature_CDS=CDS"
String prefix = basename(bam, ".bam") + ".fusions"
String strandedness = "auto"
Boolean mark_duplicates = true
Expand Down Expand Up @@ -176,10 +201,8 @@ task arriba {
}

Int bam_size_gb = ceil(size(bam, "GB"))
Int disk_size_gb = bam_size_gb
+ ceil(size(gtf, "GB"))
+ ceil(size(reference_fasta_gz, "GB"))
+ modify_disk_size_gb
Int disk_size_gb = bam_size_gb + ceil(size(gtf, "GB")) + ceil(size(reference_fasta_gz,
"GB")) + modify_disk_size_gb
Comment on lines +204 to +205
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isn't that bad, but I'd almost prefer to treat addition chains similarly to arrays and line break on each element. This line break makes sense to me as it is the first available break that doesn't exceed the line length, but it feels weird to break in the middle of the size call inside of a ceil and within the + operator.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe length of chain should be considered? Something like "if more than N (3? 4?) binary operators in a chain, line break on each"?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this comment is superseded by the "priority list" and "clauses" discussion we've had.

Int memory_gb = bam_size_gb + modify_memory_gb

command <<<
Expand All @@ -198,21 +221,18 @@ task arriba {
~{"-d '" + wgs_svs + "'"} \
-D ~{max_genomic_breakpoint_distance} \
-s "~{strandedness}" \
~{(
if length(interesting_contigs) > 0
~{if length(interesting_contigs) > 0
then "-i " + sep(",", quote(interesting_contigs))
else ""
)} \
~{(
if length(viral_contigs) > 0
} \
~{if length(viral_contigs) > 0
then "-v " + sep(",", quote(viral_contigs))
else ""
)} \
~{(
if length(disable_filters) > 0
} \
~{if length(disable_filters) > 0
then "-f " + sep(",", quote(disable_filters))
else ""
)} \
} \
-E ~{max_e_value} \
-S ~{min_supporting_reads} \
-m ~{max_mismappers} \
Expand All @@ -232,9 +252,18 @@ task arriba {
-l ~{max_itd_length} \
-z ~{min_itd_allele_fraction} \
-Z ~{min_itd_supporting_reads} \
~{if mark_duplicates then "" else "-u"} \
~{if report_additional_columns then "-X" else ""} \
~{if fill_gaps then "-I" else ""}
~{if mark_duplicates
then ""
else "-u"
} \
~{if report_additional_columns
then "-X"
else ""
} \
~{if fill_gaps
then "-I"
else ""
}
>>>

output {
Expand All @@ -255,7 +284,7 @@ task arriba_tsv_to_vcf {
meta {
description: "Convert Arriba TSV format fusions to VCF format."
outputs: {
fusions_vcf: "Output file of fusions in VCF format"
fusions_vcf: "Output file of fusions in VCF format",
}
}

Expand All @@ -274,9 +303,7 @@ task arriba_tsv_to_vcf {
}

Int input_size_gb = ceil(size(fusions, "GB"))
Int disk_size_gb = ceil(input_size_gb)
+ (ceil(size(reference_fasta, "GB")) * 3)
+ modify_disk_size_gb
Int disk_size_gb = ceil(input_size_gb) + (ceil(size(reference_fasta, "GB")) * 3) + modify_disk_size_gb

command <<<
set -euo pipefail
Expand Down Expand Up @@ -356,7 +383,7 @@ task arriba_annotate_exon_numbers {
meta {
description: "Annotate fusions with exon numbers."
outputs: {
fusion_tsv: "TSV file with fusions annotated with exon numbers"
fusion_tsv: "TSV file with fusions annotated with exon numbers",
}
}

Expand Down
Loading
Loading