From c148b76299b0258b0934688c4237863f2329622f Mon Sep 17 00:00:00 2001 From: dariarom94 Date: Tue, 12 May 2026 09:47:29 +0200 Subject: [PATCH 1/2] add tacco --- .../tacco/config.vsh.yaml | 82 +++++++++++++++++++ .../tacco/script.py | 58 +++++++++++++ src/workflows/run_benchmark/config.vsh.yaml | 1 + src/workflows/run_benchmark/main.nf | 13 +++ 4 files changed, 154 insertions(+) create mode 100644 src/methods_cell_type_annotation/tacco/config.vsh.yaml create mode 100644 src/methods_cell_type_annotation/tacco/script.py diff --git a/src/methods_cell_type_annotation/tacco/config.vsh.yaml b/src/methods_cell_type_annotation/tacco/config.vsh.yaml new file mode 100644 index 0000000..dd08f5c --- /dev/null +++ b/src/methods_cell_type_annotation/tacco/config.vsh.yaml @@ -0,0 +1,82 @@ +name: tacco +namespace: methods_cell_type_annotation +label: "TACCO" +summary: "Assign cell types to segmented cells via optimal transport against a scRNA-seq reference." +description: | + TACCO (Transfer of Annotations to Cells and their Compartments) takes a + processed segmentation prediction (cells with per-cell gene expression) and + assigns a cell type to each cell by comparing its expression profile to a + scRNA-seq reference via optimal transport. It runs as a post-processing step + after any segmentation method and adds a cell_type column to the cell table. +links: + documentation: "https://simonwm.github.io/tacco/" + repository: "https://github.com/simonwm/tacco" +references: + doi: "10.1038/s41587-023-01657-3" + + +arguments: + - name: --input_processed_prediction + __merge__: /src/api/file_processed_prediction.yaml + direction: input + required: true + + - name: --input_scrnaseq_reference + __merge__: /src/api/file_scrnaseq_reference.yaml + direction: input + required: true + + - name: --output + type: file + label: "Cell type annotation" + summary: "AnnData with predicted cell type labels in obs." + description: "An h5ad file containing obs with cell_type and cell_id columns, plus uns metadata (dataset_id, method_id)." + direction: output + required: true + default: output.h5ad + example: output.h5ad + info: + format: + type: h5ad + obs: + - type: string + name: cell_type + description: Predicted cell type label + required: true + - type: string + name: cell_id + description: Cell ID matching the segmentation table + required: true + uns: + - type: string + name: dataset_id + required: true + - type: string + name: method_id + required: true + +test_resources: + - type: python_script + path: /common/component_tests/run_and_check_output.py + - path: /resources_test/task_spatial_segmentation/mouse_brain_combined + dest: resources_test/task_spatial_segmentation/mouse_brain_combined + +resources: + - type: python_script + path: script.py + +engines: + - type: docker + image: openproblems/base_python:1 + setup: + - type: python + pypi: [tacco] + __merge__: + - /src/base/setup_spatialdata_partial.yaml + - type: native + +runners: + - type: executable + - type: nextflow + directives: + label: [midtime, midcpu, highmem] \ No newline at end of file diff --git a/src/methods_cell_type_annotation/tacco/script.py b/src/methods_cell_type_annotation/tacco/script.py new file mode 100644 index 0000000..b2d8d0b --- /dev/null +++ b/src/methods_cell_type_annotation/tacco/script.py @@ -0,0 +1,58 @@ +import anndata as ad +import numpy as np +import spatialdata as sd +import tacco + +## VIASH START +par = { + 'input_processed_prediction': 'resources_test/task_spatial_segmentation/mouse_brain_combined/processed_prediction.zarr', + 'input_scrnaseq_reference': 'resources_test/task_spatial_segmentation/mouse_brain_combined/scrnaseq_reference.h5ad', + 'output': 'output.h5ad', +} +meta = { + 'name': 'tacco', +} +## VIASH END + +print('Reading inputs', flush=True) +sdata_pred = sd.read_zarr(par['input_processed_prediction']) +adata_sc = ad.read_h5ad(par['input_scrnaseq_reference']) + +table = sdata_pred.tables['table'] + +if table.n_obs == 0: + print('No cells detected in prediction — skipping annotation', flush=True) + cell_types = [] +else: + # remap Ensembl IDs to gene symbols in-place if needed + if 'feature_name' in adata_sc.var.columns: + adata_sc.var_names = adata_sc.var['feature_name'].values + adata_sc = adata_sc[:, ~adata_sc.var_names.duplicated()].copy() + + if 'counts' not in adata_sc.layers: + raise ValueError("scRNA-seq reference is missing the 'counts' layer.") + + common_genes = sorted(set(table.var_names) & set(adata_sc.var_names)) + if len(common_genes) == 0: + raise ValueError('No common genes between prediction cells and scRNA-seq reference.') + print(f'Using {len(common_genes)} common genes', flush=True) + + adata_sp_sub = table[:, common_genes].copy() + adata_sp_sub.X = adata_sp_sub.layers['counts'] + adata_sc_sub = adata_sc[:, common_genes].copy() + adata_sc_sub.X = adata_sc_sub.layers['counts'] + + print('Running TACCO annotation', flush=True) + cell_type_annotation = tacco.tl.annotate( + adata=adata_sp_sub, + reference=adata_sc_sub, + annotation_key='cell_type', + ) + best_type_idx = np.argmax(cell_type_annotation.values, axis=1) + cell_types = cell_type_annotation.columns[best_type_idx].tolist() + +print('Writing output', flush=True) +output = ad.AnnData(obs={'cell_type': cell_types, 'cell_id': table.obs['cell_id'].values}) +output.uns['dataset_id'] = table.uns['dataset_id'] +output.uns['method_id'] = table.uns['method_id'] +output.write_h5ad(par['output'], compression='gzip') \ No newline at end of file diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml index 67bf53c..4bcb7cb 100644 --- a/src/workflows/run_benchmark/config.vsh.yaml +++ b/src/workflows/run_benchmark/config.vsh.yaml @@ -65,6 +65,7 @@ dependencies: - name: control_methods/empty_labels - name: control_methods/random_voronoi - name: methods/cellpose + - name: methods_cell_type_annotation/tacco - name: metrics/ari - name: data_processors/process_prediction diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf index a61478f..417417a 100644 --- a/src/workflows/run_benchmark/main.nf +++ b/src/workflows/run_benchmark/main.nf @@ -95,6 +95,19 @@ workflow run_wf { } ) + // annotate segmented cells with cell types + | tacco.run( + fromState: [ + input_processed_prediction: "input_prediction", + input_scrnaseq_reference: "input_scrnaseq_reference" + ], + toState: { id, output, state -> + state + [ + cell_type_annotation: output.output + ] + } + ) + // run all metrics | runEach( components: metrics, From 8abe4fcfa59d91c5d171270441dd056d2d50d1a4 Mon Sep 17 00:00:00 2001 From: dariarom94 Date: Thu, 14 May 2026 13:41:04 +0200 Subject: [PATCH 2/2] move tacco to data processors --- .../cell_type_annotation_tacco}/config.vsh.yaml | 4 ++-- .../cell_type_annotation_tacco}/script.py | 0 src/workflows/run_benchmark/config.vsh.yaml | 2 +- src/workflows/run_benchmark/main.nf | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) rename src/{methods_cell_type_annotation/tacco => data_processors/cell_type_annotation_tacco}/config.vsh.yaml (97%) rename src/{methods_cell_type_annotation/tacco => data_processors/cell_type_annotation_tacco}/script.py (100%) diff --git a/src/methods_cell_type_annotation/tacco/config.vsh.yaml b/src/data_processors/cell_type_annotation_tacco/config.vsh.yaml similarity index 97% rename from src/methods_cell_type_annotation/tacco/config.vsh.yaml rename to src/data_processors/cell_type_annotation_tacco/config.vsh.yaml index dd08f5c..695b4a8 100644 --- a/src/methods_cell_type_annotation/tacco/config.vsh.yaml +++ b/src/data_processors/cell_type_annotation_tacco/config.vsh.yaml @@ -1,5 +1,5 @@ -name: tacco -namespace: methods_cell_type_annotation +name: cell_type_annotation_tacco +namespace: data_processors label: "TACCO" summary: "Assign cell types to segmented cells via optimal transport against a scRNA-seq reference." description: | diff --git a/src/methods_cell_type_annotation/tacco/script.py b/src/data_processors/cell_type_annotation_tacco/script.py similarity index 100% rename from src/methods_cell_type_annotation/tacco/script.py rename to src/data_processors/cell_type_annotation_tacco/script.py diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml index 4bcb7cb..10c513a 100644 --- a/src/workflows/run_benchmark/config.vsh.yaml +++ b/src/workflows/run_benchmark/config.vsh.yaml @@ -65,7 +65,7 @@ dependencies: - name: control_methods/empty_labels - name: control_methods/random_voronoi - name: methods/cellpose - - name: methods_cell_type_annotation/tacco + - name: data_processors/cell_type_annotation_tacco - name: metrics/ari - name: data_processors/process_prediction diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf index 417417a..75043f2 100644 --- a/src/workflows/run_benchmark/main.nf +++ b/src/workflows/run_benchmark/main.nf @@ -96,7 +96,7 @@ workflow run_wf { ) // annotate segmented cells with cell types - | tacco.run( + | cell_type_annotation_tacco.run( fromState: [ input_processed_prediction: "input_prediction", input_scrnaseq_reference: "input_scrnaseq_reference"