From c148b76299b0258b0934688c4237863f2329622f Mon Sep 17 00:00:00 2001
From: dariarom94 <romanovskaiadaria@gmail.com>
Date: Tue, 12 May 2026 09:47:29 +0200
Subject: [PATCH 1/2] add tacco

---
 .../tacco/config.vsh.yaml                     | 82 +++++++++++++++++++
 .../tacco/script.py                           | 58 +++++++++++++
 src/workflows/run_benchmark/config.vsh.yaml   |  1 +
 src/workflows/run_benchmark/main.nf           | 13 +++
 4 files changed, 154 insertions(+)
 create mode 100644 src/methods_cell_type_annotation/tacco/config.vsh.yaml
 create mode 100644 src/methods_cell_type_annotation/tacco/script.py

diff --git a/src/methods_cell_type_annotation/tacco/config.vsh.yaml b/src/methods_cell_type_annotation/tacco/config.vsh.yaml
new file mode 100644
index 0000000..dd08f5c
--- /dev/null
+++ b/src/methods_cell_type_annotation/tacco/config.vsh.yaml
@@ -0,0 +1,82 @@
+name: tacco
+namespace: methods_cell_type_annotation
+label: "TACCO"
+summary: "Assign cell types to segmented cells via optimal transport against a scRNA-seq reference."
+description: |
+  TACCO (Transfer of Annotations to Cells and their Compartments) takes a
+  processed segmentation prediction (cells with per-cell gene expression) and
+  assigns a cell type to each cell by comparing its expression profile to a
+  scRNA-seq reference via optimal transport. It runs as a post-processing step
+  after any segmentation method and adds a cell_type column to the cell table.
+links:
+  documentation: "https://simonwm.github.io/tacco/"
+  repository: "https://github.com/simonwm/tacco"
+references:
+  doi: "10.1038/s41587-023-01657-3"
+
+
+arguments:
+  - name: --input_processed_prediction
+    __merge__: /src/api/file_processed_prediction.yaml
+    direction: input
+    required: true
+
+  - name: --input_scrnaseq_reference
+    __merge__: /src/api/file_scrnaseq_reference.yaml
+    direction: input
+    required: true
+
+  - name: --output
+    type: file
+    label: "Cell type annotation"
+    summary: "AnnData with predicted cell type labels in obs."
+    description: "An h5ad file containing obs with cell_type and cell_id columns, plus uns metadata (dataset_id, method_id)."
+    direction: output
+    required: true
+    default: output.h5ad
+    example: output.h5ad
+    info:
+      format:
+        type: h5ad
+        obs:
+          - type: string
+            name: cell_type
+            description: Predicted cell type label
+            required: true
+          - type: string
+            name: cell_id
+            description: Cell ID matching the segmentation table
+            required: true
+        uns:
+          - type: string
+            name: dataset_id
+            required: true
+          - type: string
+            name: method_id
+            required: true
+
+test_resources:
+  - type: python_script
+    path: /common/component_tests/run_and_check_output.py
+  - path: /resources_test/task_spatial_segmentation/mouse_brain_combined
+    dest: resources_test/task_spatial_segmentation/mouse_brain_combined
+
+resources:
+  - type: python_script
+    path: script.py
+
+engines:
+  - type: docker
+    image: openproblems/base_python:1
+    setup:
+      - type: python
+        pypi: [tacco]
+    __merge__:
+      - /src/base/setup_spatialdata_partial.yaml
+  - type: native
+
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [midtime, midcpu, highmem]
\ No newline at end of file
diff --git a/src/methods_cell_type_annotation/tacco/script.py b/src/methods_cell_type_annotation/tacco/script.py
new file mode 100644
index 0000000..b2d8d0b
--- /dev/null
+++ b/src/methods_cell_type_annotation/tacco/script.py
@@ -0,0 +1,58 @@
+import anndata as ad
+import numpy as np
+import spatialdata as sd
+import tacco
+
+## VIASH START
+par = {
+    'input_processed_prediction': 'resources_test/task_spatial_segmentation/mouse_brain_combined/processed_prediction.zarr',
+    'input_scrnaseq_reference': 'resources_test/task_spatial_segmentation/mouse_brain_combined/scrnaseq_reference.h5ad',
+    'output': 'output.h5ad',
+}
+meta = {
+    'name': 'tacco',
+}
+## VIASH END
+
+print('Reading inputs', flush=True)
+sdata_pred = sd.read_zarr(par['input_processed_prediction'])
+adata_sc = ad.read_h5ad(par['input_scrnaseq_reference'])
+
+table = sdata_pred.tables['table']
+
+if table.n_obs == 0:
+    print('No cells detected in prediction — skipping annotation', flush=True)
+    cell_types = []
+else:
+    # remap Ensembl IDs to gene symbols in-place if needed
+    if 'feature_name' in adata_sc.var.columns:
+        adata_sc.var_names = adata_sc.var['feature_name'].values
+        adata_sc = adata_sc[:, ~adata_sc.var_names.duplicated()].copy()
+
+    if 'counts' not in adata_sc.layers:
+        raise ValueError("scRNA-seq reference is missing the 'counts' layer.")
+
+    common_genes = sorted(set(table.var_names) & set(adata_sc.var_names))
+    if len(common_genes) == 0:
+        raise ValueError('No common genes between prediction cells and scRNA-seq reference.')
+    print(f'Using {len(common_genes)} common genes', flush=True)
+
+    adata_sp_sub = table[:, common_genes].copy()
+    adata_sp_sub.X = adata_sp_sub.layers['counts']
+    adata_sc_sub = adata_sc[:, common_genes].copy()
+    adata_sc_sub.X = adata_sc_sub.layers['counts']
+
+    print('Running TACCO annotation', flush=True)
+    cell_type_annotation = tacco.tl.annotate(
+        adata=adata_sp_sub,
+        reference=adata_sc_sub,
+        annotation_key='cell_type',
+    )
+    best_type_idx = np.argmax(cell_type_annotation.values, axis=1)
+    cell_types = cell_type_annotation.columns[best_type_idx].tolist()
+
+print('Writing output', flush=True)
+output = ad.AnnData(obs={'cell_type': cell_types, 'cell_id': table.obs['cell_id'].values})
+output.uns['dataset_id'] = table.uns['dataset_id']
+output.uns['method_id'] = table.uns['method_id']
+output.write_h5ad(par['output'], compression='gzip')
\ No newline at end of file
diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml
index 67bf53c..4bcb7cb 100644
--- a/src/workflows/run_benchmark/config.vsh.yaml
+++ b/src/workflows/run_benchmark/config.vsh.yaml
@@ -65,6 +65,7 @@ dependencies:
   - name: control_methods/empty_labels
   - name: control_methods/random_voronoi
   - name: methods/cellpose
+  - name: methods_cell_type_annotation/tacco
   - name: metrics/ari
   - name: data_processors/process_prediction
 
diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf
index a61478f..417417a 100644
--- a/src/workflows/run_benchmark/main.nf
+++ b/src/workflows/run_benchmark/main.nf
@@ -95,6 +95,19 @@ workflow run_wf {
       }
     )
 
+    // annotate segmented cells with cell types
+    | tacco.run(
+      fromState: [
+        input_processed_prediction: "input_prediction",
+        input_scrnaseq_reference: "input_scrnaseq_reference"
+      ],
+      toState: { id, output, state ->
+        state + [
+          cell_type_annotation: output.output
+        ]
+      }
+    )
+
     // run all metrics
     | runEach(
       components: metrics,

From 8abe4fcfa59d91c5d171270441dd056d2d50d1a4 Mon Sep 17 00:00:00 2001
From: dariarom94 <romanovskaiadaria@gmail.com>
Date: Thu, 14 May 2026 13:41:04 +0200
Subject: [PATCH 2/2] move tacco to data processors

---
 .../cell_type_annotation_tacco}/config.vsh.yaml               | 4 ++--
 .../cell_type_annotation_tacco}/script.py                     | 0
 src/workflows/run_benchmark/config.vsh.yaml                   | 2 +-
 src/workflows/run_benchmark/main.nf                           | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)
 rename src/{methods_cell_type_annotation/tacco => data_processors/cell_type_annotation_tacco}/config.vsh.yaml (97%)
 rename src/{methods_cell_type_annotation/tacco => data_processors/cell_type_annotation_tacco}/script.py (100%)

diff --git a/src/methods_cell_type_annotation/tacco/config.vsh.yaml b/src/data_processors/cell_type_annotation_tacco/config.vsh.yaml
similarity index 97%
rename from src/methods_cell_type_annotation/tacco/config.vsh.yaml
rename to src/data_processors/cell_type_annotation_tacco/config.vsh.yaml
index dd08f5c..695b4a8 100644
--- a/src/methods_cell_type_annotation/tacco/config.vsh.yaml
+++ b/src/data_processors/cell_type_annotation_tacco/config.vsh.yaml
@@ -1,5 +1,5 @@
-name: tacco
-namespace: methods_cell_type_annotation
+name: cell_type_annotation_tacco
+namespace: data_processors
 label: "TACCO"
 summary: "Assign cell types to segmented cells via optimal transport against a scRNA-seq reference."
 description: |
diff --git a/src/methods_cell_type_annotation/tacco/script.py b/src/data_processors/cell_type_annotation_tacco/script.py
similarity index 100%
rename from src/methods_cell_type_annotation/tacco/script.py
rename to src/data_processors/cell_type_annotation_tacco/script.py
diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml
index 4bcb7cb..10c513a 100644
--- a/src/workflows/run_benchmark/config.vsh.yaml
+++ b/src/workflows/run_benchmark/config.vsh.yaml
@@ -65,7 +65,7 @@ dependencies:
   - name: control_methods/empty_labels
   - name: control_methods/random_voronoi
   - name: methods/cellpose
-  - name: methods_cell_type_annotation/tacco
+  - name: data_processors/cell_type_annotation_tacco
   - name: metrics/ari
   - name: data_processors/process_prediction
 
diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf
index 417417a..75043f2 100644
--- a/src/workflows/run_benchmark/main.nf
+++ b/src/workflows/run_benchmark/main.nf
@@ -96,7 +96,7 @@ workflow run_wf {
     )
 
     // annotate segmented cells with cell types
-    | tacco.run(
+    | cell_type_annotation_tacco.run(
       fromState: [
         input_processed_prediction: "input_prediction",
         input_scrnaseq_reference: "input_scrnaseq_reference"