diff --git a/.github/workflows/cd.yaml b/.github/workflows/cd.yaml index 7c65d7abd..e2d99494f 100644 --- a/.github/workflows/cd.yaml +++ b/.github/workflows/cd.yaml @@ -4,6 +4,7 @@ on: push: branches: - main + - staging/sprocket-doc jobs: gh-pages: @@ -17,11 +18,11 @@ jobs: run: rustup update stable && rustup default stable - name: Build Sprocket run: | - cargo install sprocket --locked + cargo install sprocket --locked --git https://github.com/Serial-ATA/sprocket.git --branch doc-tables - name: Build Docs run: | cd $GITHUB_WORKSPACE/workflows - sprocket dev doc -v --homepage assets/DOCS.md --prioritize-workflows-view . + sprocket dev doc --homepage assets/DOCS.md --with-doc-comments . cp -r assets docs/ - name: Deploy uses: peaceiris/actions-gh-pages@v4 diff --git a/data_structures/flag_filter.wdl b/data_structures/flag_filter.wdl index 3f1f0826e..c6b97cbf1 100644 --- a/data_structures/flag_filter.wdl +++ b/data_structures/flag_filter.wdl @@ -1,6 +1,7 @@ -## # FlagFilter -## +version 1.1 + ## A struct to represent the filtering flags used in various `samtools` commands. +## ## The order of precedence is `include_if_all`, `exclude_if_any`, `include_if_any`, ## and `exclude_if_all`. ## These four fields correspond to the samtools flags @@ -58,14 +59,15 @@ ## In short, those are all flags corresponding to the quality of the read ## and them being `true` may indicate that the read is of low quality and ## should be excluded. - -version 1.1 - struct FlagFilter { - String include_if_all # samtools -f - String exclude_if_any # samtools -F - String include_if_any # samtools --rf - String exclude_if_all # samtools -G + ## Corresponds to `samtools -f` + String include_if_all + ## Corresponds to `samtools -F` + String exclude_if_any + ## Corresponds to `samtools --rf` + String include_if_any + ## Corresponds to `samtools -G` + String exclude_if_all } task validate_string_is_12bit_int { diff --git a/data_structures/read_group.wdl b/data_structures/read_group.wdl index eadb9edc1..91a28a8da 100644 --- a/data_structures/read_group.wdl +++ b/data_structures/read_group.wdl @@ -1,32 +1,7 @@ -## Read groups are defined in the SAM spec -## - `ID`: Read group identifier. Each Read Group must have a unique ID. -## The value of ID is used in the RG tags of alignment records. -## - `BC`: Barcode sequence identifying the sample or library. This value is the -## expected barcode bases as read by the sequencing machine in the absence -## of errors. If there are several barcodes for the sample/library -## (e.g., one on each end of the template), the recommended implementation -## concatenates all the barcodes separating them with hyphens (`-`). -## - `CN`: Name of sequencing center producing the read. -## - `DS`: Description. -## - `DT`: Date the run was produced (ISO8601 date or date/time). -## - `FO`: Flow order. The array of nucleotide bases that correspond to the nucleotides -## used for each flow of each read. Multi-base flows are encoded in IUPAC format, -## and non-nucleotide flows by various other characters. -## Format: `/\\*|[ACMGRSVTWYHKDBN]+/` -## - `KS`: The array of nucleotide bases that correspond to the key sequence of each read. -## - `LB`: Library. -## - `PG`: Programs used for processing the read group. -## - `PI`: Predicted median insert size, rounded to the nearest integer. -## - `PL`: Platform/technology used to produce the reads. -## Valid values: CAPILLARY, DNBSEQ (MGI/BGI), ELEMENT, HELICOS, ILLUMINA, IONTORRENT, -## LS454, ONT (Oxford Nanopore), PACBIO (Pacific Biosciences), SINGULAR, SOLID, -## and ULTIMA. This field should be omitted when the technology is not in this list -## (though the PM field may still be present in this case) or is unknown. -## - `PM`: Platform model. Free-form text providing further details of the -## platform/technology used. -## - `PU`: Platform unit (e.g., flowcell-barcode.lane for Illumina or slide -## for SOLiD). Unique identifier. -## - `SM`: Sample. Use pool name where a pool is being sequenced. +version 1.1 + +## Read groups are defined in the SAM specification. This struct +## provides utlity for constructing, validating, and parsing read groups. ## ## An example input JSON entry for `read_group` might look like this: ## ```json @@ -40,25 +15,50 @@ ## } ## } ## ``` - -version 1.1 - #@ except: SnakeCase struct ReadGroup { + ## `ID`: Read group identifier. Each Read Group must have a unique ID. + ## The value of ID is used in the RG tags of alignment records. String ID - String? BC + ## `CN`: Name of sequencing center producing the read. String? CN + ## `DS`: Description. String? DS + ## `DT`: Date the run was produced (ISO8601 date or date/time). String? DT + ## `FO`: Flow order. The array of nucleotide bases that correspond to the nucleotides + ## used for each flow of each read. Multi-base flows are encoded in IUPAC format, + ## and non-nucleotide flows by various other characters. + ## Format: `/\\*|[ACMGRSVTWYHKDBN]+/` String? FO + ## `KS`: The array of nucleotide bases that correspond to the key sequence of each read. String? KS + ## `LB`: Library. String? LB + ## `PG`: Programs used for processing the read group. String? PG + ## `PI`: Predicted median insert size, rounded to the nearest integer. Int? PI + ## `PL`: Platform/technology used to produce the reads. + ## Valid values: CAPILLARY, DNBSEQ (MGI/BGI), ELEMENT, HELICOS, ILLUMINA, IONTORRENT, + ## LS454, ONT (Oxford Nanopore), PACBIO (Pacific Biosciences), SINGULAR, SOLID, + ## and ULTIMA. This field should be omitted when the technology is not in this list + ## (though the PM field may still be present in this case) or is unknown. String? PL + ## `PM`: Platform model. Free-form text providing further details of the + ## platform/technology used. String? PM + ## `PU`: Platform unit (e.g., flowcell-barcode.lane for Illumina or slide + ## for SOLiD). Unique identifier. String? PU + ## `SM`: Sample. Use pool name where a pool is being sequenced. String? SM + ## `BC`: Barcode sequence identifying the sample or library. This value is the + ## expected barcode bases as read by the sequencing machine in the absence + ## of errors. If there are several barcodes for the sample/library + ## (e.g., one on each end of the template), the recommended implementation + ## concatenates all the barcodes separating them with hyphens (`-`). + String? BC } workflow read_group_to_string { diff --git a/data_structures/strandedness.wdl b/data_structures/strandedness.wdl new file mode 100644 index 000000000..fccfe9a73 --- /dev/null +++ b/data_structures/strandedness.wdl @@ -0,0 +1,16 @@ +version 1.3 + +## Possible strandedness protocols used during RNA-Seq library prep. +enum Strandedness { + ## The protocol is unknown or otherwise unspecified. + ## + ## Some tooling can automatically derive a suspected strandedness protocol, + ## if one is not specified. + Unspecified, + ## The RNA-Seq library was prepped with an unstranded protocol. + Unstranded, + ## The RNA-Seq library was prepped with a Stranded-Reverse protocol. + StrandedReverse, + ## The RNA-Seq library was prepped with a Stranded-Forward protocol. + StrandedForward, +}