Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 5 additions & 14 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,30 +1,21 @@
# task_template x.y.z
# task_spatial_segmentation x.y.z

## BREAKING CHANGES

<!-- * Restructured `src` directory (PR #3). -->

## NEW FUNCTIONALITY

* Added `control_methods/true_labels` component (PR #5).

* Added `methods/logistic_regression` component (PR #5).

* Added `metrics/accuracy` component (PR #5).
* ...

## MAJOR CHANGES

* Updated `api` files (PR #5).

* Updated configs, components and CI to the latest Viash version (PR #8).

* Updated to Viash 0.9.4 (PR #12).

* Use dependencies in `openproblems-bio/openproblems` (PR #12).
* ...

## MINOR CHANGES

* Updated `README.md` (PR #5).
* ...

## BUGFIXES

* ...
8 changes: 4 additions & 4 deletions scripts/create_resources/resources.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ cd "$REPO_ROOT"

# remove this when you have implemented the script
echo "TODO: once the 'process_datasets' workflow is implemented, update this script to use it."
echo " Step 1: replace 'task_template' with the name of the task in the following command."
echo " Step 1: replace 'task_spatial_segmentation' with the name of the task in the following command."
echo " Step 2: replace the rename keys parameters to fit your process_dataset inputs"
echo " Step 3: replace the settings parameter to fit your process_dataset outputs"
echo " Step 4: remove this message"
Expand All @@ -19,10 +19,10 @@ input_states: s3://openproblems-data/resources/datasets/**/state.yaml
rename_keys: 'input_spatial_unlabelled:output_spatial_unlabelled,input_spatial_solution:output_spatial_solution,input_scrnaseq_reference:output_scrnaseq_reference'
output_state: '$id/state.yaml'
settings: '{"output_spatial_unlabelled": "$id/output_spatial_unlabelled.zarr", "output_spatial_solution": "$id/output_spatial_solution.zarr", "output_scrnaseq": "$id/output_scrnaseq.h5ad"}'
publish_dir: s3://openproblems-data/resources/task_template/datasets/
publish_dir: s3://openproblems-data/resources/task_spatial_segmentation/datasets/
HERE

tw launch https://github.com/openproblems-bio/task_template.git \
tw launch https://github.com/openproblems-bio/task_spatial_segmentation.git \
--revision build/main \
--pull-latest \
--main-script target/nextflow/workflows/process_datasets/main.nf \
Expand All @@ -31,4 +31,4 @@ tw launch https://github.com/openproblems-bio/task_template.git \
--params-file /tmp/params.yaml \
--entry-name auto \
--config common/nextflow_helpers/labels_tw.config \
--labels task_template,process_datasets
--labels task_spatial_segmentation,process_datasets
4 changes: 2 additions & 2 deletions scripts/run_benchmark/run_full_local.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ cd "$REPO_ROOT"

# remove this when you have implemented the script
echo "TODO: once the 'run_benchmark' workflow has been implemented, update this script to use it."
echo " Step 1: replace 'task_template' with the name of the task in the following command."
echo " Step 1: replace 'task_spatial_segmentation' with the name of the task in the following command."
echo " Step 2: replace the rename keys parameters to fit your run_benchmark inputs"
echo " Step 3: replace the settings parameter to fit your run_benchmark outputs"
echo " Step 4: remove this message"
Expand All @@ -37,7 +37,7 @@ publish_dir: "$publish_dir"
HERE

# run the benchmark
nextflow run openproblems-bio/task_template \
nextflow run openproblems-bio/task_spatial_segmentation \
-r build/main \
-main-script target/nextflow/workflows/run_benchmark/main.nf \
-profile docker \
Expand Down
10 changes: 5 additions & 5 deletions scripts/run_benchmark/run_full_seqeracloud.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ cd "$REPO_ROOT"

# remove this when you have implemented the script
echo "TODO: once the 'run_benchmark' workflow has been implemented, update this script to use it."
echo " Step 1: replace 'task_template' with the name of the task in the following command."
echo " Step 1: replace 'task_spatial_segmentation' with the name of the task in the following command."
echo " Step 2: replace the rename keys parameters to fit your run_benchmark inputs"
echo " Step 3: replace the settings parameter to fit your run_benchmark outputs"
echo " Step 4: remove this message"
Expand All @@ -18,17 +18,17 @@ set -e

# generate a unique id
RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)"
publish_dir="s3://openproblems-data/resources/task_template/results/${RUN_ID}"
publish_dir="s3://openproblems-data/resources/task_spatial_segmentation/results/${RUN_ID}"

# write the parameters to file
cat > /tmp/params.yaml << HERE
input_states: s3://openproblems-data/resources/task_template/datasets/**/state.yaml
input_states: s3://openproblems-data/resources/task_spatial_segmentation/datasets/**/state.yaml
rename_keys: 'input_spatial_unlabelled:output_spatial_unlabelled,input_spatial_solution:output_spatial_solution,input_scrnaseq_reference:output_scrnaseq_reference'
output_state: "state.yaml"
publish_dir: "$publish_dir"
HERE

tw launch https://github.com/openproblems-bio/task_template.git \
tw launch https://github.com/openproblems-bio/task_spatial_segmentation.git \
--revision build/main \
--pull-latest \
--main-script target/nextflow/workflows/run_benchmark/main.nf \
Expand All @@ -37,4 +37,4 @@ tw launch https://github.com/openproblems-bio/task_template.git \
--params-file /tmp/params.yaml \
--entry-name auto \
--config common/nextflow_helpers/labels_tw.config \
--labels task_template,full
--labels task_spatial_segmentation,full
30 changes: 11 additions & 19 deletions scripts/run_benchmark/run_test_seqeracloud.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,35 +6,27 @@ REPO_ROOT=$(git rev-parse --show-toplevel)
# ensure that the command below is run from the root of the repository
cd "$REPO_ROOT"

# remove this when you have implemented the script
echo "TODO: once the 'run_benchmark' workflow has been implemented, update this script to use it."
echo " Step 1: replace 'task_template' with the name of the task in the following command."
echo " Step 2: replace the rename keys parameters to fit your run_benchmark inputs"
echo " Step 3: replace the settings parameter to fit your run_benchmark outputs"
echo " Step 4: remove this message"
exit 1

set -e

resources_test_s3=s3://openproblems-data/resources_test/task_template
publish_dir_s3="s3://openproblems-nextflow/temp/results/$(date +%Y-%m-%d_%H-%M-%S)"
resources_test_s3=s3://openproblems-data/resources_test/task_spatial_segmentation
publish_dir_s3="s3://hca-op-spatial/temp/results/$(date +%Y-%m-%d_%H-%M-%S)"

# write the parameters to file
cat > /tmp/params.yaml << HERE
id: cxg_mouse_pancreas_atlas
input_train: $resources_test_s3/cxg_mouse_pancreas_atlas/train.h5ad
input_test: $resources_test_s3/cxg_mouse_pancreas_atlas/test.h5ad
input_solution: $resources_test_s3/cxg_mouse_pancreas_atlas/solution.h5ad
id: mouse_brain_combined
input_spatial_unlabelled: $resources_test_s3/mouse_brain_combined/spatial_unlabelled.zarr
input_spatial_solution: $resources_test_s3/mouse_brain_combined/spatial_solution.zarr
input_scrnaseq_reference: $resources_test_s3/mouse_brain_combined/scrnaseq_reference.h5ad
output_state: "state.yaml"
publish_dir: $publish_dir_s3
HERE

tw launch https://github.com/openproblems-bio/task_template.git \
tw launch https://github.com/openproblems-bio/task_spatial_segmentation.git \
--revision build/main \
--pull-latest \
--main-script target/nextflow/workflows/run_benchmark/main.nf \
--workspace 53907369739130 \
--compute-env 6TeIFgV5OY4pJCk8I0bfOh \
--workspace 8386213183400 \
--compute-env 7Odt43ln9XureGja6Frdm7 \
--params-file /tmp/params.yaml \
--config common/nextflow_helpers/labels_tw.config \
--labels task_template,test
--config src/base/labels_tw.config \
--labels task_spatial_segmentation,test
155 changes: 155 additions & 0 deletions src/base/labels_tw.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
// copied from 'common/nextflow_helpers/labels_tw.config', but the queues in the gpu labels have been updated

def exitStrat(task, max_attempts = 3) {
println "Determining exit strategy for task (attempt '${task.attempt}', exit status '${task.exitStatus}')"

// if the component failed 3 times, ignore the error so the workflow can continue
// it's important 'ignore' is returned even if maxRetries is set to 3,
// otherwise the workflow will stop
if (task.attempt >= 3) {
return 'ignore'
}
// when an aws spot instance is reclaimed, nextflow seems to use exit code 2147483647
// throwing in some extra conditions just in case
if (task.exitStatus == null || task.exitStatus <= -1 || task.exitStatus > 2100000000 || !(task.exitStatus.toString().isNumber())) {
return 'retry'
}
// if component failed, retry once
if (task.exitStatus == 1 && task.attempt < 2) {
return 'retry'
}
// if component ran out of memory, retry with more memory and disk
if (task.exitStatus in [137, 139] && task.attempt < max_attempts) {
return 'retry'
}
// return 'ignore' for all other cases to ignore the error,
// otherwise the workflow will stop
return 'ignore'
}

aws {
batch {
maxTransferAttempts = 3
delayBetweenAttempts = '5 sec'
maxSpotAttempts = 8
}
}

process {
executor = 'awsbatch'

// Default disk space
disk = 50.GB

// Retry for exit codes that have something to do with memory issues
// always retry once
errorStrategy = { exitStrat(task) }
maxRetries = 3
maxMemory = null

// Resource labels
withLabel: lowcpu { cpus = 5 }
withLabel: midcpu { cpus = 15 }
withLabel: highcpu { cpus = 30 }
withLabel: lowmem {
memory = { get_memory( 20.GB * task.attempt ) }
disk = { 50.GB * task.attempt }
}
withLabel: midmem {
memory = { get_memory( 50.GB * task.attempt ) }
disk = { 100.GB * task.attempt }
}
withLabel: highmem {
memory = { get_memory( 100.GB * task.attempt ) }
disk = { 200.GB * task.attempt }
}
withLabel: veryhighmem {
memory = { get_memory( 200.GB * task.attempt ) }
disk = { 400.GB * task.attempt }
}
withLabel: lowsharedmem {
containerOptions = { workflow.containerEngine != 'singularity' ? "--shm-size ${String.format("%.0f",task.memory.mega * 0.05)}" : ""}
}
withLabel: midsharedmem {
containerOptions = { workflow.containerEngine != 'singularity' ? "--shm-size ${String.format("%.0f",task.memory.mega * 0.1)}" : ""}
}
withLabel: highsharedmem {
containerOptions = { workflow.containerEngine != 'singularity' ? "--shm-size ${String.format("%.0f",task.memory.mega * 0.25)}" : ""}
}
withLabel: gpu {
// assuming g6.8xlarge
cpus = 32
accelerator = 1
memory = 100.GB
// queue = "TowerForge-YxBvZ5IJWipqLJBWlIx34-work" // spot (less expensive, might need to wait longer)
queue = "TowerForge-1DIo1otpXKvOF1jgVVKo8U" // ondemand (more expensive)
containerOptions = { workflow.containerEngine == "singularity" ? '--nv':
( workflow.containerEngine == "docker" ? '--gpus all': null ) }
}
withLabel: midgpu {
// assuming g6.8xlarge
cpus = 32
accelerator = 4
memory = 100.GB
// queue = "TowerForge-YxBvZ5IJWipqLJBWlIx34-work" // spot (less expensive, might need to wait longer)
queue = "TowerForge-1DIo1otpXKvOF1jgVVKo8U" // ondemand (more expensive)
containerOptions = { workflow.containerEngine == "singularity" ? '--nv':
( workflow.containerEngine == "docker" ? '--gpus all': null ) }
}
withLabel: highgpu {
// assuming g6.16xlarge
cpus = 64
accelerator = 8
memory = 200.GB
// queue = "TowerForge-YxBvZ5IJWipqLJBWlIx34-work" // spot (less expensive, might need to wait longer)
queue = "TowerForge-1DIo1otpXKvOF1jgVVKo8U" // ondemand (more expensive)
containerOptions = { workflow.containerEngine == "singularity" ? '--nv':
( workflow.containerEngine == "docker" ? '--gpus all': null ) }
}
withLabel: biggpu {
// assuming p5.4xlarge
cpus = 16
accelerator = 1
memory = 200.GB
queue = "TowerForge-jvrqgsfAj9Zm3kua7j07P-work" // spot (less expensive, might need to wait longer)
containerOptions = { workflow.containerEngine == "singularity" ? '--nv':
( workflow.containerEngine == "docker" ? '--gpus all': null ) }
}

// make sure publishstates gets enough disk space and memory
withName:'.*publishStatesProc' {
memory = '16GB'
disk = '100GB'
}
}

def get_memory(to_compare) {
if (!process.containsKey("maxMemory") || !process.maxMemory) {
return to_compare
}

try {
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
return process.maxMemory
}
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
return max_memory as nextflow.util.MemoryUnit
}
else {
return to_compare
}
} catch (all) {
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
System.exit(1)
}
}

// set tracing file
trace {
enabled = true
overwrite = true
file = "${params.publish_dir}/trace.txt"
}

aws.batch.maxSpotAttempts = 5
google.batch.maxSpotAttempts = 5
Loading