diff --git a/src/methods/proseg/config.vsh.yaml b/src/methods/proseg/config.vsh.yaml new file mode 100644 index 0000000..420a97c --- /dev/null +++ b/src/methods/proseg/config.vsh.yaml @@ -0,0 +1,91 @@ +__merge__: /src/api/comp_method.yaml + +name: proseg +label: "Proseg" + +# summary and description are generated by claude sonnet 4.6 based on the paper, github repo and documentation +summary: Infers cell boundaries from transcript spatial distributions using a Cellular Potts model–inspired probabilistic framework. +description: | + Proseg assigns a probability to every configuration of cell labels on a voxel lattice, driven by a + spatial transcript count model that favors transcripts being contained within a single cell, a cell + compactness prior, and optional diffusion terms to account for transcripts that leak outside their + true cell of origin. The model is fit using a Markov chain Monte Carlo sampler that alternates between + sampling cell assignments for each voxel and sampling cell-specific expression parameters. A burn-in + phase at a coarser voxel resolution speeds up initialization before the final-resolution sampling + begins. Because the method is purely transcript-driven it does not require a morphology image, though + an optional nuclear-segmentation prior can be incorporated. Cell boundaries are reported as polygon + shapes and are rasterized to a label image for downstream analysis. + +links: + documentation: "https://github.com/dcjones/proseg" + repository: "https://github.com/dcjones/proseg" +references: + doi: "10.1038/s41592-025-02697-0" + +arguments: + - name: --voxel_layers + type: integer + required: false + description: "Number of layers on the z-axis to model 3D cells." + default: 4 + info: + test_default: 1 + + - name: --samples + type: integer + required: false + description: "Run the sampler for this many iterations." + default: 200 + info: + test_default: 10 + + - name: --burnin_samples + type: integer + required: false + description: "Run the sampler for a preliminary N samples at a lower resolution." + default: 200 + info: + test_default: 10 + + - name: --voxel_size + type: double + required: false + description: "Voxel size in microns on the x/y axis." + default: 1.0 + info: + test_default: 2.0 + + - name: --burnin_voxel_size + type: double + required: false + description: "Larger voxel size for the burn-in phase. Must be an integer multiple of voxel_size." + default: 2.0 + info: + test_default: 4.0 + +resources: + - type: python_script + path: script.py + +engines: + - type: docker + image: openproblems/base_python:1 + __merge__: + - /src/base/setup_spatialdata_partial.yaml + setup: + - type: docker + env: + - PATH="/root/.cargo/bin:${PATH}" + run: + - curl https://sh.rustup.rs -sSf | sh -s -- -y + - echo 'source $HOME/.cargo/env' >> $HOME/.bashrc + - cargo install proseg + - type: python + pypi: [rasterio] + - type: native + +runners: + - type: executable + - type: nextflow + directives: + label: [ hightime, highcpu, midmem ] diff --git a/src/methods/proseg/script.py b/src/methods/proseg/script.py new file mode 100644 index 0000000..6c1d678 --- /dev/null +++ b/src/methods/proseg/script.py @@ -0,0 +1,164 @@ +import os +import shutil +import subprocess +from pathlib import Path +import numpy as np +import pandas as pd +import xarray as xr +import anndata as ad +import spatialdata as sd +from shapely.affinity import affine_transform as shapely_affine_transform +from spatialdata.models import Labels2DModel +from spatialdata.transformations import get_transformation +from rasterio.features import rasterize as rio_rasterize + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': r'resources_test/task_spatial_segmentation/mouse_brain_combined/spatial_unlabelled.zarr', + 'output': r'resources_test/task_spatial_segmentation/mouse_brain_combined/prediction.zarr', + 'voxel_layers': int(r'4'), + 'samples': int(r'200'), + 'burnin_samples': int(r'200'), + 'voxel_size': float(r'1.0'), + 'burnin_voxel_size': float(r'2.0') +} +meta = { + 'name': r'proseg', + 'functionality_name': r'proseg', + 'resources_dir': r'/private/tmp/viash_inject_proseg11113472565944867086', + 'executable': r'/private/tmp/viash_inject_proseg11113472565944867086/proseg', + 'config': r'/private/tmp/viash_inject_proseg11113472565944867086/.config.vsh.yaml', + 'temp_dir': r'/var/folders/fq/ymt0vml175s4yvqxzbmlmpz80000gn/T/', + 'cpus': int(r'123'), + 'memory_b': int(r'123'), + 'memory_kb': int(r'123'), + 'memory_mb': int(r'123'), + 'memory_gb': int(r'123'), + 'memory_tb': int(r'123'), + 'memory_pb': int(r'123'), + 'memory_kib': int(r'123'), + 'memory_mib': int(r'123'), + 'memory_gib': int(r'123'), + 'memory_tib': int(r'123'), + 'memory_pib': int(r'123') +} +dep = { + +} + +## VIASH END + +proseg_dir = Path(meta['temp_dir'] or '/tmp') / 'proseg' +proseg_dir.mkdir(parents=True, exist_ok=True) + +print('Reading input', flush=True) +sdata = sd.read_zarr(par['input']) +image = sdata['morphology_mip']['scale0'].image.compute().to_numpy() +transformation = sdata['morphology_mip']['scale0'].image.transform.copy() +h, w = image.shape[-2:] + +print('Exporting transcripts to CSV', flush=True) +transcripts_df = sdata['transcripts'].compute() +has_z = 'z' in transcripts_df.columns and transcripts_df['z'].notna().any() + +# Proseg requires a prior cell-ID column. Build a simple grid-based prior so +# proseg can refine the segmentation from a meaningful starting state. +x_min, x_max = float(transcripts_df['x'].min()), float(transcripts_df['x'].max()) +y_min, y_max = float(transcripts_df['y'].min()), float(transcripts_df['y'].max()) +grid_size = 20.0 # microns per grid cell — roughly one cell diameter +x_bins = np.arange(x_min, x_max + grid_size, grid_size) +y_bins = np.arange(y_min, y_max + grid_size, grid_size) +x_idx = np.digitize(transcripts_df['x'].to_numpy(), x_bins) - 1 +y_idx = np.digitize(transcripts_df['y'].to_numpy(), y_bins) - 1 +n_x = max(len(x_bins) - 1, 1) +prior_cell_ids = (y_idx * n_x + x_idx) + 1 # 1-indexed; 0 is "unassigned" + +z_vals = transcripts_df['z'] if has_z else np.zeros(len(transcripts_df)) +csv_data = { + 'x': transcripts_df['x'], + 'y': transcripts_df['y'], + 'z': z_vals, + 'gene': transcripts_df['feature_name'], + 'cell_id': prior_cell_ids, +} +transcript_csv = proseg_dir / 'transcripts.csv' +pd.DataFrame(csv_data).to_csv(transcript_csv, index=False) + +n_threads = max((meta.get('cpus') or os.cpu_count() or 1) - 2, 1) +cmd = [ + 'proseg', 'transcripts.csv', + '--x-column', 'x', + '--y-column', 'y', + '--z-column', 'z', + '--gene-column', 'gene', + '--cell-id-column', 'cell_id', + '--cell-id-unassigned', '0', + '--nthreads', str(n_threads), + '--ncomponents', '10', + '--diffusion-probability', '0.2', + '--diffusion-sigma-far', '4', + '--diffusion-sigma-near', '1', + '--nuclear-reassignment-prob', '0.2', + '--cell-compactness', '0.03', + '--voxel-layers', str(par['voxel_layers']), + '--samples', str(par['samples']), + '--burnin-samples', str(par['burnin_samples']), + '--recorded-samples', str(par['samples']), + '--voxel-size', str(par['voxel_size']), + '--burnin-voxel-size', str(par['burnin_voxel_size']), +] +print(f'Running Proseg: {" ".join(cmd)}', flush=True) +subprocess.run(cmd, cwd=str(proseg_dir), check=True) + +print('Reading Proseg Zarr output', flush=True) +proseg_sdata = sd.read_zarr(str(proseg_dir / 'proseg-output.zarr')) +shapes = proseg_sdata.shapes['cell_boundaries'] +print(f'Found {len(shapes)} cell boundaries', flush=True) + +# Proseg boundaries are in global (micron) coordinates. Convert to pixel space +# using the inverse of the morphology image's pixel-to-global transformation. +print('Converting boundaries to pixel space and rasterizing', flush=True) +img_transform = get_transformation(sdata['morphology_mip'], to_coordinate_system='global') +affine_mat = img_transform.to_affine_matrix(input_axes=('x', 'y'), output_axes=('x', 'y')) +inv_affine = np.linalg.inv(affine_mat) + +def global_to_pixel(geom): + a, b, xoff = inv_affine[0, 0], inv_affine[0, 1], inv_affine[0, 2] + d, e, yoff = inv_affine[1, 0], inv_affine[1, 1], inv_affine[1, 2] + return shapely_affine_transform(geom, [a, b, d, e, xoff, yoff]) + +pixel_geoms = shapes['geometry'].apply(global_to_pixel) +shape_geoms = list(zip(pixel_geoms, range(1, len(shapes) + 1))) +labels = rio_rasterize(shape_geoms, out_shape=(h, w), fill=0, dtype=np.int32) + +max_val = labels.max() +if max_val <= np.iinfo(np.uint8).max: + labels = labels.astype(np.uint8) +elif max_val <= np.iinfo(np.uint16).max: + labels = labels.astype(np.uint16) +elif max_val <= np.iinfo(np.uint32).max: + labels = labels.astype(np.uint32) + +print('Creating output data structure', flush=True) +sd_output = sd.SpatialData( + labels={ + 'segmentation': Labels2DModel.parse( + xr.DataArray(labels, name='segmentation', dims=('y', 'x')), + transformations=transformation, + ) + }, + tables={ + 'table': ad.AnnData( + uns={ + 'dataset_id': sdata.tables['table'].uns['dataset_id'], + 'method_id': meta['name'], + } + ) + }, +) + +print('Writing output', flush=True) +if os.path.exists(par['output']): + shutil.rmtree(par['output']) +sd_output.write(par['output']) diff --git a/src/methods/stardist/config.vsh.yaml b/src/methods/stardist/config.vsh.yaml new file mode 100644 index 0000000..f372f11 --- /dev/null +++ b/src/methods/stardist/config.vsh.yaml @@ -0,0 +1,54 @@ +__merge__: /src/api/comp_method.yaml + +name: stardist +label: "StarDist" + +# summary and description are generated by claude sonnet 4.6 based on the paper, github repo and documentation +summary: Detects star-convex shaped cells by predicting radial distances to the object boundary from each pixel. +description: | + StarDist represents each cell as a star-convex polygon defined by a set of radial distances from the cell's + center pixel to its boundary, measured at evenly spaced angles. A U-Net–style convolutional neural network + is trained to predict, for every pixel, both the probability that the pixel lies inside any cell and the + set of radial distances describing the cell polygon centered at that pixel. At inference time, non-maximum + suppression selects the most confident polygon proposals and suppresses overlapping detections, yielding a + final instance-segmentation label image. Because the star-convex parameterization compactly encodes shapes + typical of fluorescence-microscopy nuclei and cells, the method is highly data-efficient and generalizes + well across imaging modalities with pretrained models such as "2D_versatile_fluo". + +links: + documentation: "https://stardist.net" + repository: "https://github.com/stardist/stardist" +references: + doi: "10.48550/arXiv.1806.03535" + +arguments: + - name: --model + type: string + default: "2D_versatile_fluo" + description: "Pretrained StarDist model name." + info: + test_default: "2D_versatile_fluo" + +resources: + - type: python_script + path: script.py + +engines: + - type: docker + image: openproblems/base_python:1 + __merge__: + - /src/base/setup_spatialdata_partial.yaml + setup: + - type: python + pypi: stardist + - type: python + pypi: + - "numpy<2.0.0" + - tensorflow + - type: native + +runners: + - type: executable + - type: nextflow + directives: + label: [ midtime, midcpu, highmem, gpu ] diff --git a/src/methods/stardist/script.py b/src/methods/stardist/script.py new file mode 100644 index 0000000..36b7e54 --- /dev/null +++ b/src/methods/stardist/script.py @@ -0,0 +1,120 @@ +import os +import shutil +import numpy as np +import xarray as xr +import spatialdata as sd +import anndata as ad +from csbdeep.data import Normalizer, normalize_mi_ma +from stardist.models import StarDist2D + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': r'resources_test/task_spatial_segmentation/mouse_brain_combined/spatial_unlabelled.zarr', + 'output': r'resources_test/task_spatial_segmentation/mouse_brain_combined/prediction.zarr', + 'model': r'2D_versatile_fluo' +} +meta = { + 'name': r'stardist', + 'functionality_name': r'stardist', + 'resources_dir': r'/private/tmp/viash_inject_stardist15754903354685377792', + 'executable': r'/private/tmp/viash_inject_stardist15754903354685377792/stardist', + 'config': r'/private/tmp/viash_inject_stardist15754903354685377792/.config.vsh.yaml', + 'temp_dir': r'/var/folders/fq/ymt0vml175s4yvqxzbmlmpz80000gn/T/', + 'cpus': int(r'123'), + 'memory_b': int(r'123'), + 'memory_kb': int(r'123'), + 'memory_mb': int(r'123'), + 'memory_gb': int(r'123'), + 'memory_tb': int(r'123'), + 'memory_pb': int(r'123'), + 'memory_kib': int(r'123'), + 'memory_mib': int(r'123'), + 'memory_gib': int(r'123'), + 'memory_tib': int(r'123'), + 'memory_pib': int(r'123') +} +dep = { + +} + +## VIASH END + + +def convert_to_lower_dtype(arr): + max_val = arr.max() + if max_val <= np.iinfo(np.uint8).max: + new_dtype = np.uint8 + elif max_val <= np.iinfo(np.uint16).max: + new_dtype = np.uint16 + elif max_val <= np.iinfo(np.uint32).max: + new_dtype = np.uint32 + else: + new_dtype = np.uint64 + return arr.astype(new_dtype) + + +# Normalize per block without clipping using percentile-based min/max +class PercentileNormalizer(Normalizer): + def __init__(self, mi, ma): + self.mi, self.ma = mi, ma + + def before(self, x, axes): + return normalize_mi_ma(x, self.mi, self.ma, dtype=np.float32) + + def after(self, *args, **kwargs): + assert False + + @property + def do_after(self): + return False + + +print("Reading input", flush=True) +sdata = sd.read_zarr(par["input"]) +image = sdata["morphology_mip"]["scale0"].image.compute().to_numpy() +transformation = sdata["morphology_mip"]["scale0"].image.transform.copy() + +print(f"Loading pretrained StarDist model: {par['model']}", flush=True) +model = StarDist2D.from_pretrained(par["model"]) + +print("Running StarDist segmentation", flush=True) +mi, ma = np.percentile(image, [1, 99.8]) +normalizer = PercentileNormalizer(mi, ma) +block_size = min(image.shape[1] // 3, 4096) +offset = int(min(block_size / 5.5, 128)) + +labels, _ = model.predict_instances_big( + image[0, :, :], + axes="YX", + block_size=block_size, + min_overlap=offset, + context=offset, + normalizer=normalizer, +) + +print("Post-processing segmentation results", flush=True) +labels = convert_to_lower_dtype(labels) + +print("Creating output data structure", flush=True) +sd_output = sd.SpatialData( + labels={ + "segmentation": sd.models.Labels2DModel.parse( + xr.DataArray(labels, name="segmentation", dims=("y", "x")), + transformations=transformation, + ) + }, + tables={ + "table": ad.AnnData( + uns={ + "dataset_id": sdata.tables["table"].uns["dataset_id"], + "method_id": meta["name"], + } + ) + }, +) + +print("Writing output", flush=True) +if os.path.exists(par["output"]): + shutil.rmtree(par["output"]) +sd_output.write(par["output"]) diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml index 10c513a..a999117 100644 --- a/src/workflows/run_benchmark/config.vsh.yaml +++ b/src/workflows/run_benchmark/config.vsh.yaml @@ -65,6 +65,8 @@ dependencies: - name: control_methods/empty_labels - name: control_methods/random_voronoi - name: methods/cellpose + - name: methods/stardist + - name: methods/proseg - name: data_processors/cell_type_annotation_tacco - name: metrics/ari - name: data_processors/process_prediction diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf index 75043f2..368a954 100644 --- a/src/workflows/run_benchmark/main.nf +++ b/src/workflows/run_benchmark/main.nf @@ -10,7 +10,9 @@ methods = [ true_labels, empty_labels, random_voronoi, - cellpose + cellpose, + stardist, + proseg ] // construct list of metrics