Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 82 additions & 0 deletions src/data_processors/cell_type_annotation_tacco/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
name: cell_type_annotation_tacco
namespace: data_processors
label: "TACCO"
summary: "Assign cell types to segmented cells via optimal transport against a scRNA-seq reference."
description: |
TACCO (Transfer of Annotations to Cells and their Compartments) takes a
processed segmentation prediction (cells with per-cell gene expression) and
assigns a cell type to each cell by comparing its expression profile to a
scRNA-seq reference via optimal transport. It runs as a post-processing step
after any segmentation method and adds a cell_type column to the cell table.
links:
documentation: "https://simonwm.github.io/tacco/"
repository: "https://github.com/simonwm/tacco"
references:
doi: "10.1038/s41587-023-01657-3"


arguments:
- name: --input_processed_prediction
__merge__: /src/api/file_processed_prediction.yaml
direction: input
required: true

- name: --input_scrnaseq_reference
__merge__: /src/api/file_scrnaseq_reference.yaml
direction: input
required: true

- name: --output
type: file
label: "Cell type annotation"
summary: "AnnData with predicted cell type labels in obs."
description: "An h5ad file containing obs with cell_type and cell_id columns, plus uns metadata (dataset_id, method_id)."
direction: output
required: true
default: output.h5ad
example: output.h5ad
info:
format:
type: h5ad
obs:
- type: string
name: cell_type
description: Predicted cell type label
required: true
- type: string
name: cell_id
description: Cell ID matching the segmentation table
required: true
uns:
- type: string
name: dataset_id
required: true
- type: string
name: method_id
required: true

test_resources:
- type: python_script
path: /common/component_tests/run_and_check_output.py
- path: /resources_test/task_spatial_segmentation/mouse_brain_combined
dest: resources_test/task_spatial_segmentation/mouse_brain_combined

resources:
- type: python_script
path: script.py

engines:
- type: docker
image: openproblems/base_python:1
setup:
- type: python
pypi: [tacco]
__merge__:
- /src/base/setup_spatialdata_partial.yaml
- type: native

runners:
- type: executable
- type: nextflow
directives:
label: [midtime, midcpu, highmem]
58 changes: 58 additions & 0 deletions src/data_processors/cell_type_annotation_tacco/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import anndata as ad
import numpy as np
import spatialdata as sd
import tacco

## VIASH START
par = {
'input_processed_prediction': 'resources_test/task_spatial_segmentation/mouse_brain_combined/processed_prediction.zarr',
'input_scrnaseq_reference': 'resources_test/task_spatial_segmentation/mouse_brain_combined/scrnaseq_reference.h5ad',
'output': 'output.h5ad',
}
meta = {
'name': 'tacco',
}
## VIASH END

print('Reading inputs', flush=True)
sdata_pred = sd.read_zarr(par['input_processed_prediction'])
adata_sc = ad.read_h5ad(par['input_scrnaseq_reference'])

table = sdata_pred.tables['table']

if table.n_obs == 0:
print('No cells detected in prediction — skipping annotation', flush=True)
cell_types = []
else:
# remap Ensembl IDs to gene symbols in-place if needed
if 'feature_name' in adata_sc.var.columns:
adata_sc.var_names = adata_sc.var['feature_name'].values
adata_sc = adata_sc[:, ~adata_sc.var_names.duplicated()].copy()

if 'counts' not in adata_sc.layers:
raise ValueError("scRNA-seq reference is missing the 'counts' layer.")

common_genes = sorted(set(table.var_names) & set(adata_sc.var_names))
if len(common_genes) == 0:
raise ValueError('No common genes between prediction cells and scRNA-seq reference.')
print(f'Using {len(common_genes)} common genes', flush=True)

adata_sp_sub = table[:, common_genes].copy()
adata_sp_sub.X = adata_sp_sub.layers['counts']
adata_sc_sub = adata_sc[:, common_genes].copy()
adata_sc_sub.X = adata_sc_sub.layers['counts']

print('Running TACCO annotation', flush=True)
cell_type_annotation = tacco.tl.annotate(
adata=adata_sp_sub,
reference=adata_sc_sub,
annotation_key='cell_type',
)
best_type_idx = np.argmax(cell_type_annotation.values, axis=1)
cell_types = cell_type_annotation.columns[best_type_idx].tolist()

print('Writing output', flush=True)
output = ad.AnnData(obs={'cell_type': cell_types, 'cell_id': table.obs['cell_id'].values})
output.uns['dataset_id'] = table.uns['dataset_id']
output.uns['method_id'] = table.uns['method_id']
output.write_h5ad(par['output'], compression='gzip')
1 change: 1 addition & 0 deletions src/workflows/run_benchmark/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ dependencies:
- name: control_methods/empty_labels
- name: control_methods/random_voronoi
- name: methods/cellpose
- name: data_processors/cell_type_annotation_tacco
- name: metrics/ari
- name: data_processors/process_prediction

Expand Down
13 changes: 13 additions & 0 deletions src/workflows/run_benchmark/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,19 @@ workflow run_wf {
}
)

// annotate segmented cells with cell types
| cell_type_annotation_tacco.run(
fromState: [
input_processed_prediction: "input_prediction",
input_scrnaseq_reference: "input_scrnaseq_reference"
],
toState: { id, output, state ->
state + [
cell_type_annotation: output.output
]
}
)

// run all metrics
| runEach(
components: metrics,
Expand Down
Loading