Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 91 additions & 0 deletions src/methods/proseg/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
__merge__: /src/api/comp_method.yaml

name: proseg
label: "Proseg"

# summary and description are generated by claude sonnet 4.6 based on the paper, github repo and documentation
summary: Infers cell boundaries from transcript spatial distributions using a Cellular Potts model–inspired probabilistic framework.
description: |
Proseg assigns a probability to every configuration of cell labels on a voxel lattice, driven by a
spatial transcript count model that favors transcripts being contained within a single cell, a cell
compactness prior, and optional diffusion terms to account for transcripts that leak outside their
true cell of origin. The model is fit using a Markov chain Monte Carlo sampler that alternates between
sampling cell assignments for each voxel and sampling cell-specific expression parameters. A burn-in
phase at a coarser voxel resolution speeds up initialization before the final-resolution sampling
begins. Because the method is purely transcript-driven it does not require a morphology image, though
an optional nuclear-segmentation prior can be incorporated. Cell boundaries are reported as polygon
shapes and are rasterized to a label image for downstream analysis.

links:
documentation: "https://github.com/dcjones/proseg"
repository: "https://github.com/dcjones/proseg"
references:
doi: "10.1038/s41592-025-02697-0"

arguments:
- name: --voxel_layers
type: integer
required: false
description: "Number of layers on the z-axis to model 3D cells."
default: 4
info:
test_default: 1

- name: --samples
type: integer
required: false
description: "Run the sampler for this many iterations."
default: 200
info:
test_default: 10

- name: --burnin_samples
type: integer
required: false
description: "Run the sampler for a preliminary N samples at a lower resolution."
default: 200
info:
test_default: 10

- name: --voxel_size
type: double
required: false
description: "Voxel size in microns on the x/y axis."
default: 1.0
info:
test_default: 2.0

- name: --burnin_voxel_size
type: double
required: false
description: "Larger voxel size for the burn-in phase. Must be an integer multiple of voxel_size."
default: 2.0
info:
test_default: 4.0

resources:
- type: python_script
path: script.py

engines:
- type: docker
image: openproblems/base_python:1
__merge__:
- /src/base/setup_spatialdata_partial.yaml
setup:
- type: docker
env:
- PATH="/root/.cargo/bin:${PATH}"
run:
- curl https://sh.rustup.rs -sSf | sh -s -- -y
- echo 'source $HOME/.cargo/env' >> $HOME/.bashrc
- cargo install proseg
- type: python
pypi: [rasterio]
- type: native

runners:
- type: executable
- type: nextflow
directives:
label: [ hightime, highcpu, midmem ]
164 changes: 164 additions & 0 deletions src/methods/proseg/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
import os
import shutil
import subprocess
from pathlib import Path
import numpy as np
import pandas as pd
import xarray as xr
import anndata as ad
import spatialdata as sd
from shapely.affinity import affine_transform as shapely_affine_transform
from spatialdata.models import Labels2DModel
from spatialdata.transformations import get_transformation
from rasterio.features import rasterize as rio_rasterize

## VIASH START
# The following code has been auto-generated by Viash.
par = {
'input': r'resources_test/task_spatial_segmentation/mouse_brain_combined/spatial_unlabelled.zarr',
'output': r'resources_test/task_spatial_segmentation/mouse_brain_combined/prediction.zarr',
'voxel_layers': int(r'4'),
'samples': int(r'200'),
'burnin_samples': int(r'200'),
'voxel_size': float(r'1.0'),
'burnin_voxel_size': float(r'2.0')
}
meta = {
'name': r'proseg',
'functionality_name': r'proseg',
'resources_dir': r'/private/tmp/viash_inject_proseg11113472565944867086',
'executable': r'/private/tmp/viash_inject_proseg11113472565944867086/proseg',
'config': r'/private/tmp/viash_inject_proseg11113472565944867086/.config.vsh.yaml',
'temp_dir': r'/var/folders/fq/ymt0vml175s4yvqxzbmlmpz80000gn/T/',
'cpus': int(r'123'),
'memory_b': int(r'123'),
'memory_kb': int(r'123'),
'memory_mb': int(r'123'),
'memory_gb': int(r'123'),
'memory_tb': int(r'123'),
'memory_pb': int(r'123'),
'memory_kib': int(r'123'),
'memory_mib': int(r'123'),
'memory_gib': int(r'123'),
'memory_tib': int(r'123'),
'memory_pib': int(r'123')
}
dep = {

}

## VIASH END

proseg_dir = Path(meta['temp_dir'] or '/tmp') / 'proseg'
proseg_dir.mkdir(parents=True, exist_ok=True)

print('Reading input', flush=True)
sdata = sd.read_zarr(par['input'])
image = sdata['morphology_mip']['scale0'].image.compute().to_numpy()
transformation = sdata['morphology_mip']['scale0'].image.transform.copy()
h, w = image.shape[-2:]

print('Exporting transcripts to CSV', flush=True)
transcripts_df = sdata['transcripts'].compute()
has_z = 'z' in transcripts_df.columns and transcripts_df['z'].notna().any()

# Proseg requires a prior cell-ID column. Build a simple grid-based prior so
# proseg can refine the segmentation from a meaningful starting state.
x_min, x_max = float(transcripts_df['x'].min()), float(transcripts_df['x'].max())
y_min, y_max = float(transcripts_df['y'].min()), float(transcripts_df['y'].max())
grid_size = 20.0 # microns per grid cell — roughly one cell diameter
x_bins = np.arange(x_min, x_max + grid_size, grid_size)
y_bins = np.arange(y_min, y_max + grid_size, grid_size)
x_idx = np.digitize(transcripts_df['x'].to_numpy(), x_bins) - 1
y_idx = np.digitize(transcripts_df['y'].to_numpy(), y_bins) - 1
n_x = max(len(x_bins) - 1, 1)
prior_cell_ids = (y_idx * n_x + x_idx) + 1 # 1-indexed; 0 is "unassigned"

z_vals = transcripts_df['z'] if has_z else np.zeros(len(transcripts_df))
csv_data = {
'x': transcripts_df['x'],
'y': transcripts_df['y'],
'z': z_vals,
'gene': transcripts_df['feature_name'],
'cell_id': prior_cell_ids,
}
transcript_csv = proseg_dir / 'transcripts.csv'
pd.DataFrame(csv_data).to_csv(transcript_csv, index=False)

n_threads = max((meta.get('cpus') or os.cpu_count() or 1) - 2, 1)
cmd = [
'proseg', 'transcripts.csv',
'--x-column', 'x',
'--y-column', 'y',
'--z-column', 'z',
'--gene-column', 'gene',
'--cell-id-column', 'cell_id',
'--cell-id-unassigned', '0',
'--nthreads', str(n_threads),
'--ncomponents', '10',
'--diffusion-probability', '0.2',
'--diffusion-sigma-far', '4',
'--diffusion-sigma-near', '1',
'--nuclear-reassignment-prob', '0.2',
'--cell-compactness', '0.03',
'--voxel-layers', str(par['voxel_layers']),
'--samples', str(par['samples']),
'--burnin-samples', str(par['burnin_samples']),
'--recorded-samples', str(par['samples']),
'--voxel-size', str(par['voxel_size']),
'--burnin-voxel-size', str(par['burnin_voxel_size']),
]
print(f'Running Proseg: {" ".join(cmd)}', flush=True)
subprocess.run(cmd, cwd=str(proseg_dir), check=True)

print('Reading Proseg Zarr output', flush=True)
proseg_sdata = sd.read_zarr(str(proseg_dir / 'proseg-output.zarr'))
shapes = proseg_sdata.shapes['cell_boundaries']
print(f'Found {len(shapes)} cell boundaries', flush=True)

# Proseg boundaries are in global (micron) coordinates. Convert to pixel space
# using the inverse of the morphology image's pixel-to-global transformation.
print('Converting boundaries to pixel space and rasterizing', flush=True)
img_transform = get_transformation(sdata['morphology_mip'], to_coordinate_system='global')
affine_mat = img_transform.to_affine_matrix(input_axes=('x', 'y'), output_axes=('x', 'y'))
inv_affine = np.linalg.inv(affine_mat)

def global_to_pixel(geom):
a, b, xoff = inv_affine[0, 0], inv_affine[0, 1], inv_affine[0, 2]
d, e, yoff = inv_affine[1, 0], inv_affine[1, 1], inv_affine[1, 2]
return shapely_affine_transform(geom, [a, b, d, e, xoff, yoff])

pixel_geoms = shapes['geometry'].apply(global_to_pixel)
shape_geoms = list(zip(pixel_geoms, range(1, len(shapes) + 1)))
labels = rio_rasterize(shape_geoms, out_shape=(h, w), fill=0, dtype=np.int32)

max_val = labels.max()
if max_val <= np.iinfo(np.uint8).max:
labels = labels.astype(np.uint8)
elif max_val <= np.iinfo(np.uint16).max:
labels = labels.astype(np.uint16)
elif max_val <= np.iinfo(np.uint32).max:
labels = labels.astype(np.uint32)

print('Creating output data structure', flush=True)
sd_output = sd.SpatialData(
labels={
'segmentation': Labels2DModel.parse(
xr.DataArray(labels, name='segmentation', dims=('y', 'x')),
transformations=transformation,
)
},
tables={
'table': ad.AnnData(
uns={
'dataset_id': sdata.tables['table'].uns['dataset_id'],
'method_id': meta['name'],
}
)
},
)

print('Writing output', flush=True)
if os.path.exists(par['output']):
shutil.rmtree(par['output'])
sd_output.write(par['output'])
54 changes: 54 additions & 0 deletions src/methods/stardist/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
__merge__: /src/api/comp_method.yaml

name: stardist
label: "StarDist"

# summary and description are generated by claude sonnet 4.6 based on the paper, github repo and documentation
summary: Detects star-convex shaped cells by predicting radial distances to the object boundary from each pixel.
description: |
StarDist represents each cell as a star-convex polygon defined by a set of radial distances from the cell's
center pixel to its boundary, measured at evenly spaced angles. A U-Net–style convolutional neural network
is trained to predict, for every pixel, both the probability that the pixel lies inside any cell and the
set of radial distances describing the cell polygon centered at that pixel. At inference time, non-maximum
suppression selects the most confident polygon proposals and suppresses overlapping detections, yielding a
final instance-segmentation label image. Because the star-convex parameterization compactly encodes shapes
typical of fluorescence-microscopy nuclei and cells, the method is highly data-efficient and generalizes
well across imaging modalities with pretrained models such as "2D_versatile_fluo".

links:
documentation: "https://stardist.net"
repository: "https://github.com/stardist/stardist"
references:
doi: "10.48550/arXiv.1806.03535"

arguments:
- name: --model
type: string
default: "2D_versatile_fluo"
description: "Pretrained StarDist model name."
info:
test_default: "2D_versatile_fluo"

resources:
- type: python_script
path: script.py

engines:
- type: docker
image: openproblems/base_python:1
__merge__:
- /src/base/setup_spatialdata_partial.yaml
setup:
- type: python
pypi: stardist
- type: python
pypi:
- "numpy<2.0.0"
- tensorflow
- type: native

runners:
- type: executable
- type: nextflow
directives:
label: [ midtime, midcpu, highmem, gpu ]
Loading
Loading