From ad2af76877e535e33d331d42065d3e681919ab5d Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Wed, 13 May 2026 13:17:31 +0200
Subject: [PATCH 1/2] update scripts

---
 CHANGELOG.md                                  |  19 +--
 scripts/create_resources/resources.sh         |   8 +-
 scripts/run_benchmark/run_full_local.sh       |   4 +-
 scripts/run_benchmark/run_full_seqeracloud.sh |  10 +-
 scripts/run_benchmark/run_test_seqeracloud.sh |  30 ++--
 src/base/labels_tw.config                     | 158 ++++++++++++++++++
 6 files changed, 185 insertions(+), 44 deletions(-)
 create mode 100644 src/base/labels_tw.config

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5962f06..7158473 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,4 @@
-# task_template x.y.z
+# task_spatial_segmentation x.y.z
 
 ## BREAKING CHANGES
 
@@ -6,25 +6,16 @@
 
 ## NEW FUNCTIONALITY
 
-* Added `control_methods/true_labels` component (PR #5).
-
-* Added `methods/logistic_regression` component (PR #5).
-
-* Added `metrics/accuracy` component (PR #5).
+* ...
 
 ## MAJOR CHANGES
 
-* Updated `api` files (PR #5).
-
-* Updated configs, components and CI to the latest Viash version (PR #8).
-
-* Updated to Viash 0.9.4 (PR #12).
-
-* Use dependencies in `openproblems-bio/openproblems` (PR #12).
+* ...
 
 ## MINOR CHANGES
 
-* Updated `README.md` (PR #5).
+* ...
 
 ## BUGFIXES
 
+* ...
diff --git a/scripts/create_resources/resources.sh b/scripts/create_resources/resources.sh
index 4a921f8..969ee5b 100755
--- a/scripts/create_resources/resources.sh
+++ b/scripts/create_resources/resources.sh
@@ -8,7 +8,7 @@ cd "$REPO_ROOT"
 
 # remove this when you have implemented the script
 echo "TODO: once the 'process_datasets' workflow is implemented, update this script to use it."
-echo "  Step 1: replace 'task_template' with the name of the task in the following command."
+echo "  Step 1: replace 'task_spatial_segmentation' with the name of the task in the following command."
 echo "  Step 2: replace the rename keys parameters to fit your process_dataset inputs"
 echo "  Step 3: replace the settings parameter to fit your process_dataset outputs"
 echo "  Step 4: remove this message"
@@ -19,10 +19,10 @@ input_states: s3://openproblems-data/resources/datasets/**/state.yaml
 rename_keys: 'input_spatial_unlabelled:output_spatial_unlabelled,input_spatial_solution:output_spatial_solution,input_scrnaseq_reference:output_scrnaseq_reference'
 output_state: '$id/state.yaml'
 settings: '{"output_spatial_unlabelled": "$id/output_spatial_unlabelled.zarr", "output_spatial_solution": "$id/output_spatial_solution.zarr", "output_scrnaseq": "$id/output_scrnaseq.h5ad"}'
-publish_dir: s3://openproblems-data/resources/task_template/datasets/
+publish_dir: s3://openproblems-data/resources/task_spatial_segmentation/datasets/
 HERE
 
-tw launch https://github.com/openproblems-bio/task_template.git \
+tw launch https://github.com/openproblems-bio/task_spatial_segmentation.git \
   --revision build/main \
   --pull-latest \
   --main-script target/nextflow/workflows/process_datasets/main.nf \
@@ -31,4 +31,4 @@ tw launch https://github.com/openproblems-bio/task_template.git \
   --params-file /tmp/params.yaml \
   --entry-name auto \
   --config common/nextflow_helpers/labels_tw.config \
-  --labels task_template,process_datasets
+  --labels task_spatial_segmentation,process_datasets
diff --git a/scripts/run_benchmark/run_full_local.sh b/scripts/run_benchmark/run_full_local.sh
index 808df7f..4dac141 100755
--- a/scripts/run_benchmark/run_full_local.sh
+++ b/scripts/run_benchmark/run_full_local.sh
@@ -13,7 +13,7 @@ cd "$REPO_ROOT"
 
 # remove this when you have implemented the script
 echo "TODO: once the 'run_benchmark' workflow has been implemented, update this script to use it."
-echo "  Step 1: replace 'task_template' with the name of the task in the following command."
+echo "  Step 1: replace 'task_spatial_segmentation' with the name of the task in the following command."
 echo "  Step 2: replace the rename keys parameters to fit your run_benchmark inputs"
 echo "  Step 3: replace the settings parameter to fit your run_benchmark outputs"
 echo "  Step 4: remove this message"
@@ -37,7 +37,7 @@ publish_dir: "$publish_dir"
 HERE
 
 # run the benchmark
-nextflow run openproblems-bio/task_template \
+nextflow run openproblems-bio/task_spatial_segmentation \
   -r build/main \
   -main-script target/nextflow/workflows/run_benchmark/main.nf \
   -profile docker \
diff --git a/scripts/run_benchmark/run_full_seqeracloud.sh b/scripts/run_benchmark/run_full_seqeracloud.sh
index 745aa77..50ae775 100755
--- a/scripts/run_benchmark/run_full_seqeracloud.sh
+++ b/scripts/run_benchmark/run_full_seqeracloud.sh
@@ -8,7 +8,7 @@ cd "$REPO_ROOT"
 
 # remove this when you have implemented the script
 echo "TODO: once the 'run_benchmark' workflow has been implemented, update this script to use it."
-echo "  Step 1: replace 'task_template' with the name of the task in the following command."
+echo "  Step 1: replace 'task_spatial_segmentation' with the name of the task in the following command."
 echo "  Step 2: replace the rename keys parameters to fit your run_benchmark inputs"
 echo "  Step 3: replace the settings parameter to fit your run_benchmark outputs"
 echo "  Step 4: remove this message"
@@ -18,17 +18,17 @@ set -e
 
 # generate a unique id
 RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)"
-publish_dir="s3://openproblems-data/resources/task_template/results/${RUN_ID}"
+publish_dir="s3://openproblems-data/resources/task_spatial_segmentation/results/${RUN_ID}"
 
 # write the parameters to file
 cat > /tmp/params.yaml << HERE
-input_states: s3://openproblems-data/resources/task_template/datasets/**/state.yaml
+input_states: s3://openproblems-data/resources/task_spatial_segmentation/datasets/**/state.yaml
 rename_keys: 'input_spatial_unlabelled:output_spatial_unlabelled,input_spatial_solution:output_spatial_solution,input_scrnaseq_reference:output_scrnaseq_reference'
 output_state: "state.yaml"
 publish_dir: "$publish_dir"
 HERE
 
-tw launch https://github.com/openproblems-bio/task_template.git \
+tw launch https://github.com/openproblems-bio/task_spatial_segmentation.git \
   --revision build/main \
   --pull-latest \
   --main-script target/nextflow/workflows/run_benchmark/main.nf \
@@ -37,4 +37,4 @@ tw launch https://github.com/openproblems-bio/task_template.git \
   --params-file /tmp/params.yaml \
   --entry-name auto \
   --config common/nextflow_helpers/labels_tw.config \
-  --labels task_template,full
\ No newline at end of file
+  --labels task_spatial_segmentation,full
\ No newline at end of file
diff --git a/scripts/run_benchmark/run_test_seqeracloud.sh b/scripts/run_benchmark/run_test_seqeracloud.sh
index bc9c619..44f14c5 100755
--- a/scripts/run_benchmark/run_test_seqeracloud.sh
+++ b/scripts/run_benchmark/run_test_seqeracloud.sh
@@ -6,35 +6,27 @@ REPO_ROOT=$(git rev-parse --show-toplevel)
 # ensure that the command below is run from the root of the repository
 cd "$REPO_ROOT"
 
-# remove this when you have implemented the script
-echo "TODO: once the 'run_benchmark' workflow has been implemented, update this script to use it."
-echo "  Step 1: replace 'task_template' with the name of the task in the following command."
-echo "  Step 2: replace the rename keys parameters to fit your run_benchmark inputs"
-echo "  Step 3: replace the settings parameter to fit your run_benchmark outputs"
-echo "  Step 4: remove this message"
-exit 1
-
 set -e
 
-resources_test_s3=s3://openproblems-data/resources_test/task_template
-publish_dir_s3="s3://openproblems-nextflow/temp/results/$(date +%Y-%m-%d_%H-%M-%S)"
+resources_test_s3=s3://openproblems-data/resources_test/task_spatial_segmentation
+publish_dir_s3="s3://hca-op-spatial/temp/results/$(date +%Y-%m-%d_%H-%M-%S)"
 
 # write the parameters to file
 cat > /tmp/params.yaml << HERE
-id: cxg_mouse_pancreas_atlas
-input_train: $resources_test_s3/cxg_mouse_pancreas_atlas/train.h5ad
-input_test: $resources_test_s3/cxg_mouse_pancreas_atlas/test.h5ad
-input_solution: $resources_test_s3/cxg_mouse_pancreas_atlas/solution.h5ad
+id: mouse_brain_combined
+input_spatial_unlabelled: $resources_test_s3/mouse_brain_combined/spatial_unlabelled.zarr
+input_spatial_solution: $resources_test_s3/mouse_brain_combined/spatial_solution.zarr
+input_scrnaseq_reference: $resources_test_s3/mouse_brain_combined/scrnaseq_reference.h5ad
 output_state: "state.yaml"
 publish_dir: $publish_dir_s3
 HERE
 
-tw launch https://github.com/openproblems-bio/task_template.git \
+tw launch https://github.com/openproblems-bio/task_spatial_segmentation.git \
   --revision build/main \
   --pull-latest \
   --main-script target/nextflow/workflows/run_benchmark/main.nf \
-  --workspace 53907369739130 \
-  --compute-env 6TeIFgV5OY4pJCk8I0bfOh \
+  --workspace 8386213183400 \
+  --compute-env 7Odt43ln9XureGja6Frdm7 \
   --params-file /tmp/params.yaml \
-  --config common/nextflow_helpers/labels_tw.config \
-  --labels task_template,test
+  --config src/base/labels_tw.config \
+  --labels task_spatial_segmentation,test
diff --git a/src/base/labels_tw.config b/src/base/labels_tw.config
new file mode 100644
index 0000000..595e63e
--- /dev/null
+++ b/src/base/labels_tw.config
@@ -0,0 +1,158 @@
+// copied from 'common/nextflow_helpers/labels_tw.config', but the queues in the gpu labels have been updated
+
+def exitStrat(task, max_attempts = 3) {
+  println "Determining exit strategy for task (attempt '${task.attempt}', exit status '${task.exitStatus}')"
+
+  // if the component failed 3 times, ignore the error so the workflow can continue
+  // it's important 'ignore' is returned even if maxRetries is set to 3,
+  // otherwise the workflow will stop
+  if (task.attempt >= 3) {
+    return 'ignore'
+  }
+  // when an aws spot instance is reclaimed, nextflow seems to use exit code 2147483647
+  // throwing in some extra conditions just in case
+  if (task.exitStatus == null || task.exitStatus <= -1 || task.exitStatus > 2100000000 || !(task.exitStatus.toString().isNumber())) {
+    return 'retry'
+  }
+  // if component failed, retry once
+  if (task.exitStatus == 1 && task.attempt < 2) {
+    return 'retry'
+  }
+  // if component ran out of memory, retry with more memory and disk
+  if (task.exitStatus in [137, 139] && task.attempt < max_attempts) {
+    return 'retry'
+  }
+  // return 'ignore' for all other cases to ignore the error,
+  // otherwise the workflow will stop
+  return 'ignore'
+}
+
+aws {
+  batch {
+    maxTransferAttempts = 3
+    delayBetweenAttempts = '5 sec'
+    maxSpotAttempts = 8
+  }
+}
+
+process {
+  executor = 'awsbatch'
+
+  // Default disk space
+  disk = 50.GB
+
+  // Retry for exit codes that have something to do with memory issues
+  // always retry once
+  errorStrategy = { exitStrat(task) }
+  maxRetries = 3
+  maxMemory = null
+
+  // Resource labels
+  withLabel: lowcpu { cpus = 5 }
+  withLabel: midcpu { cpus = 15 }
+  withLabel: highcpu { cpus = 30 }
+  withLabel: lowmem {
+    memory = { get_memory( 20.GB * task.attempt ) }
+    disk = { 50.GB * task.attempt }
+  }
+  withLabel: midmem {
+    memory = { get_memory( 50.GB * task.attempt ) }
+    disk = { 100.GB * task.attempt }
+  }
+  withLabel: highmem {
+    memory = { get_memory( 100.GB * task.attempt ) }
+    disk = { 200.GB * task.attempt }
+  }
+  withLabel: veryhighmem {
+    memory = { get_memory( 200.GB * task.attempt ) }
+    disk = { 400.GB * task.attempt }
+  }
+  withLabel: lowsharedmem {
+    containerOptions = { workflow.containerEngine != 'singularity' ? "--shm-size ${String.format("%.0f",task.memory.mega * 0.05)}" : ""}
+  }
+  withLabel: midsharedmem {
+    containerOptions = { workflow.containerEngine != 'singularity' ? "--shm-size ${String.format("%.0f",task.memory.mega * 0.1)}" : ""}
+  }
+  withLabel: highsharedmem {
+    containerOptions = { workflow.containerEngine != 'singularity' ? "--shm-size ${String.format("%.0f",task.memory.mega * 0.25)}" : ""}
+  }
+  withLabel: gpu {
+    // assuming g6.8xlarge
+    cpus = 32
+    accelerator = 1
+    memory = 100.GB
+    queue = "TowerForge-YxBvZ5IJWipqLJBWlIx34-work"
+    // ondemand:
+    // queue = "TowerForge-1DIo1otpXKvOF1jgVVKo8U-work"
+    containerOptions = { workflow.containerEngine == "singularity" ? '--nv':
+       ( workflow.containerEngine == "docker" ? '--gpus all': null ) }
+  }
+  withLabel: midgpu {
+    // assuming g6.8xlarge
+    cpus = 32
+    accelerator = 4
+    memory = 100.GB
+    queue = "TowerForge-YxBvZ5IJWipqLJBWlIx34-work"
+    // ondemand:
+    // queue = "TowerForge-1DIo1otpXKvOF1jgVVKo8U-work"
+    containerOptions = { workflow.containerEngine == "singularity" ? '--nv':
+       ( workflow.containerEngine == "docker" ? '--gpus all': null ) }
+  }
+  withLabel: highgpu {
+    // assuming g6.16xlarge
+    cpus = 64
+    accelerator = 8
+    memory = 200.GB
+    queue = "TowerForge-YxBvZ5IJWipqLJBWlIx34-work"
+    // ondemand:
+    // queue = "TowerForge-1DIo1otpXKvOF1jgVVKo8U-work"
+    containerOptions = { workflow.containerEngine == "singularity" ? '--nv':
+       ( workflow.containerEngine == "docker" ? '--gpus all': null ) }
+  }
+  withLabel: biggpu {
+    // assuming p5.4xlarge
+    cpus = 16
+    accelerator = 1
+    memory = 200.GB
+    queue = "TowerForge-jvrqgsfAj9Zm3kua7j07P-work"
+    containerOptions = { workflow.containerEngine == "singularity" ? '--nv':
+       ( workflow.containerEngine == "docker" ? '--gpus all': null ) }
+  }
+
+  // make sure publishstates gets enough disk space and memory
+  withName:'.*publishStatesProc' {
+    memory = '16GB'
+    disk = '100GB'
+  }
+}
+
+def get_memory(to_compare) {
+  if (!process.containsKey("maxMemory") || !process.maxMemory) {
+    return to_compare
+  }
+
+  try {
+    if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
+      return process.maxMemory
+    }
+    else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
+      return max_memory as nextflow.util.MemoryUnit
+    }
+    else {
+      return to_compare
+    }
+  } catch (all) {
+        println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
+        System.exit(1)
+  }
+}
+
+// set tracing file
+trace {
+    enabled = true
+    overwrite = true
+    file = "${params.publish_dir}/trace.txt"
+}
+
+aws.batch.maxSpotAttempts = 5
+google.batch.maxSpotAttempts = 5

From 0cd3daf3fa0556645d6ffcc9887591d18c965360 Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Wed, 13 May 2026 13:39:30 +0200
Subject: [PATCH 2/2] update config

---
 src/base/labels_tw.config | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/src/base/labels_tw.config b/src/base/labels_tw.config
index 595e63e..3a03eb1 100644
--- a/src/base/labels_tw.config
+++ b/src/base/labels_tw.config
@@ -81,9 +81,8 @@ process {
     cpus = 32
     accelerator = 1
     memory = 100.GB
-    queue = "TowerForge-YxBvZ5IJWipqLJBWlIx34-work"
-    // ondemand:
-    // queue = "TowerForge-1DIo1otpXKvOF1jgVVKo8U-work"
+    // queue = "TowerForge-YxBvZ5IJWipqLJBWlIx34-work" // spot (less expensive, might need to wait longer)
+    queue = "TowerForge-1DIo1otpXKvOF1jgVVKo8U" // ondemand (more expensive)
     containerOptions = { workflow.containerEngine == "singularity" ? '--nv':
        ( workflow.containerEngine == "docker" ? '--gpus all': null ) }
   }
@@ -92,9 +91,8 @@ process {
     cpus = 32
     accelerator = 4
     memory = 100.GB
-    queue = "TowerForge-YxBvZ5IJWipqLJBWlIx34-work"
-    // ondemand:
-    // queue = "TowerForge-1DIo1otpXKvOF1jgVVKo8U-work"
+    // queue = "TowerForge-YxBvZ5IJWipqLJBWlIx34-work" // spot (less expensive, might need to wait longer)
+    queue = "TowerForge-1DIo1otpXKvOF1jgVVKo8U" // ondemand (more expensive)
     containerOptions = { workflow.containerEngine == "singularity" ? '--nv':
        ( workflow.containerEngine == "docker" ? '--gpus all': null ) }
   }
@@ -103,9 +101,8 @@ process {
     cpus = 64
     accelerator = 8
     memory = 200.GB
-    queue = "TowerForge-YxBvZ5IJWipqLJBWlIx34-work"
-    // ondemand:
-    // queue = "TowerForge-1DIo1otpXKvOF1jgVVKo8U-work"
+    // queue = "TowerForge-YxBvZ5IJWipqLJBWlIx34-work" // spot (less expensive, might need to wait longer)
+    queue = "TowerForge-1DIo1otpXKvOF1jgVVKo8U" // ondemand (more expensive)
     containerOptions = { workflow.containerEngine == "singularity" ? '--nv':
        ( workflow.containerEngine == "docker" ? '--gpus all': null ) }
   }
@@ -114,7 +111,7 @@ process {
     cpus = 16
     accelerator = 1
     memory = 200.GB
-    queue = "TowerForge-jvrqgsfAj9Zm3kua7j07P-work"
+    queue = "TowerForge-jvrqgsfAj9Zm3kua7j07P-work" // spot (less expensive, might need to wait longer)
     containerOptions = { workflow.containerEngine == "singularity" ? '--nv':
        ( workflow.containerEngine == "docker" ? '--gpus all': null ) }
   }