From a210a0a005dedba2ebed47ab8e4b055832518412 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 25 Mar 2026 15:45:23 +0000
Subject: [PATCH 1/2] Merge overlapping tools and update README

Co-authored-by: ypriverol <52113+ypriverol@users.noreply.github.com>
Agent-Logs-Url: https://github.com/OpenMS/agentomics/sessions/a6d63691-a178-4fa1-b118-46b4dbb887ca
---
 README.md                                     |  13 +-
 .../isotope_pattern_analyzer/README.md        |  83 +++
 .../isotope_pattern_analyzer.py               | 415 +++++++++++++++
 .../requirements.txt                          |   1 -
 .../tests/conftest.py                         |   0
 .../tests/test_isotope_pattern_analyzer.py    | 218 ++++++++
 .../isotope_pattern_fit_scorer/README.md      |  16 -
 .../isotope_pattern_fit_scorer.py             | 258 ----------
 .../requirements.txt                          |   2 -
 .../tests/test_isotope_pattern_fit_scorer.py  | 111 -----
 .../isotope_pattern_matcher/README.md         |  11 -
 .../isotope_pattern_matcher.py                | 147 ------
 .../tests/test_isotope_pattern_matcher.py     |  50 --
 .../isotope_pattern_scorer.py                 | 154 ------
 .../isotope_pattern_scorer/tests/conftest.py  |  15 -
 .../tests/test_isotope_pattern_scorer.py      |  53 --
 .../mgf_mzml_converter/README.md              |  50 ++
 .../mgf_mzml_converter/mgf_mzml_converter.py  | 471 ++++++++++++++++++
 .../mgf_mzml_converter}/requirements.txt      |   1 -
 .../mgf_mzml_converter}/tests/conftest.py     |   0
 .../tests/test_mgf_mzml_converter.py          | 300 +++++++++++
 .../mgf_to_mzml_converter/README.md           |  15 -
 .../mgf_to_mzml_converter.py                  | 110 ----
 .../mgf_to_mzml_converter/requirements.txt    |   2 -
 .../mgf_to_mzml_converter/tests/conftest.py   |  15 -
 .../tests/test_mgf_to_mzml_converter.py       | 101 ----
 .../mzml_to_mgf_converter/README.md           |  19 -
 .../mzml_to_mgf_converter.py                  | 214 --------
 .../mzml_to_mgf_converter/requirements.txt    |   2 -
 .../mzml_to_mgf_converter/tests/conftest.py   |  15 -
 .../tests/test_mzml_to_mgf_converter.py       | 186 -------
 31 files changed, 1542 insertions(+), 1506 deletions(-)
 create mode 100644 tools/metabolomics/spectral_analysis/isotope_pattern_analyzer/README.md
 create mode 100644 tools/metabolomics/spectral_analysis/isotope_pattern_analyzer/isotope_pattern_analyzer.py
 rename tools/metabolomics/spectral_analysis/{isotope_pattern_matcher => isotope_pattern_analyzer}/requirements.txt (60%)
 rename tools/metabolomics/spectral_analysis/{isotope_pattern_fit_scorer => isotope_pattern_analyzer}/tests/conftest.py (100%)
 create mode 100644 tools/metabolomics/spectral_analysis/isotope_pattern_analyzer/tests/test_isotope_pattern_analyzer.py
 delete mode 100644 tools/metabolomics/spectral_analysis/isotope_pattern_fit_scorer/README.md
 delete mode 100644 tools/metabolomics/spectral_analysis/isotope_pattern_fit_scorer/isotope_pattern_fit_scorer.py
 delete mode 100644 tools/metabolomics/spectral_analysis/isotope_pattern_fit_scorer/requirements.txt
 delete mode 100644 tools/metabolomics/spectral_analysis/isotope_pattern_fit_scorer/tests/test_isotope_pattern_fit_scorer.py
 delete mode 100644 tools/metabolomics/spectral_analysis/isotope_pattern_matcher/README.md
 delete mode 100644 tools/metabolomics/spectral_analysis/isotope_pattern_matcher/isotope_pattern_matcher.py
 delete mode 100644 tools/metabolomics/spectral_analysis/isotope_pattern_matcher/tests/test_isotope_pattern_matcher.py
 delete mode 100644 tools/metabolomics/spectral_analysis/isotope_pattern_scorer/isotope_pattern_scorer.py
 delete mode 100644 tools/metabolomics/spectral_analysis/isotope_pattern_scorer/tests/conftest.py
 delete mode 100644 tools/metabolomics/spectral_analysis/isotope_pattern_scorer/tests/test_isotope_pattern_scorer.py
 create mode 100644 tools/proteomics/file_conversion/mgf_mzml_converter/README.md
 create mode 100644 tools/proteomics/file_conversion/mgf_mzml_converter/mgf_mzml_converter.py
 rename tools/{metabolomics/spectral_analysis/isotope_pattern_scorer => proteomics/file_conversion/mgf_mzml_converter}/requirements.txt (60%)
 rename tools/{metabolomics/spectral_analysis/isotope_pattern_matcher => proteomics/file_conversion/mgf_mzml_converter}/tests/conftest.py (100%)
 create mode 100644 tools/proteomics/file_conversion/mgf_mzml_converter/tests/test_mgf_mzml_converter.py
 delete mode 100644 tools/proteomics/file_conversion/mgf_to_mzml_converter/README.md
 delete mode 100644 tools/proteomics/file_conversion/mgf_to_mzml_converter/mgf_to_mzml_converter.py
 delete mode 100644 tools/proteomics/file_conversion/mgf_to_mzml_converter/requirements.txt
 delete mode 100644 tools/proteomics/file_conversion/mgf_to_mzml_converter/tests/conftest.py
 delete mode 100644 tools/proteomics/file_conversion/mgf_to_mzml_converter/tests/test_mgf_to_mzml_converter.py
 delete mode 100644 tools/proteomics/file_conversion/mzml_to_mgf_converter/README.md
 delete mode 100644 tools/proteomics/file_conversion/mzml_to_mgf_converter/mzml_to_mgf_converter.py
 delete mode 100644 tools/proteomics/file_conversion/mzml_to_mgf_converter/requirements.txt
 delete mode 100644 tools/proteomics/file_conversion/mzml_to_mgf_converter/tests/conftest.py
 delete mode 100644 tools/proteomics/file_conversion/mzml_to_mgf_converter/tests/test_mzml_to_mgf_converter.py

diff --git a/README.md b/README.md
index 0d0b861..6d4e05d 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # Agentomics
 
-A growing collection of **123 standalone CLI tools** built with [pyopenms](https://pyopenms.readthedocs.io/) for proteomics and metabolomics workflows. Every tool in this repository fills a gap not covered by existing OpenMS TOPP tools — small, focused utilities that researchers need daily but typically write as throwaway scripts.
+A growing collection of **118 standalone CLI tools** built with [pyopenms](https://pyopenms.readthedocs.io/) for proteomics and metabolomics workflows. Every tool in this repository fills a gap not covered by existing OpenMS TOPP tools — small, focused utilities that researchers need daily but typically write as throwaway scripts.
 
 ## Why This Exists
 
@@ -155,12 +155,11 @@ Both `ruff` and `pytest` must pass with zero errors.
 | [`fasta_in_silico_digest_stats`](tools/proteomics/fasta_utils/fasta_in_silico_digest_stats/) | Digest a FASTA and report peptide-level statistics |
 | [`fasta_taxonomy_splitter`](tools/proteomics/fasta_utils/fasta_taxonomy_splitter/) | Split multi-organism FASTA by taxonomy from headers |
 
-#### File Conversion (8 tools)
+#### File Conversion (7 tools)
 
 | Tool | Description |
 |------|-------------|
-| [`mzml_to_mgf_converter`](tools/proteomics/file_conversion/mzml_to_mgf_converter/) | Convert MS2 spectra from mzML to MGF format |
-| [`mgf_to_mzml_converter`](tools/proteomics/file_conversion/mgf_to_mzml_converter/) | Convert MGF files to mzML format |
+| [`mgf_mzml_converter`](tools/proteomics/file_conversion/mgf_mzml_converter/) | Bidirectional MGF ↔ mzML converter with spectrum filtering (merged from `mgf_to_mzml_converter` + `mzml_to_mgf_converter`) |
 | [`consensus_map_to_matrix`](tools/proteomics/file_conversion/consensus_map_to_matrix/) | Convert consensusXML to flat quantification matrix |
 | [`idxml_to_tsv_exporter`](tools/proteomics/file_conversion/idxml_to_tsv_exporter/) | Export idXML identification results to flat TSV |
 | [`ms_data_to_csv_exporter`](tools/proteomics/file_conversion/ms_data_to_csv_exporter/) | Export mzML/featureXML data to CSV with column selection |
@@ -281,15 +280,13 @@ Both `ruff` and `pytest` must pass with zero errors.
 | [`mass_defect_filter`](tools/metabolomics/feature_processing/mass_defect_filter/) | Filter features by mass defect and Kendrick mass defect |
 | [`metabolite_feature_detection`](tools/metabolomics/feature_processing/metabolite_feature_detection/) | Metabolite feature detection from LC-MS data |
 
-#### Spectral Analysis (6 tools)
+#### Spectral Analysis (4 tools)
 
 | Tool | Description |
 |------|-------------|
 | [`spectral_entropy_scorer`](tools/metabolomics/spectral_analysis/spectral_entropy_scorer/) | Compute spectral entropy similarity (Li & Fiehn 2021) |
 | [`neutral_loss_scanner`](tools/metabolomics/spectral_analysis/neutral_loss_scanner/) | Scan MS2 spectra for characteristic neutral losses |
-| [`isotope_pattern_scorer`](tools/metabolomics/spectral_analysis/isotope_pattern_scorer/) | Score observed vs. theoretical isotope patterns |
-| [`isotope_pattern_matcher`](tools/metabolomics/spectral_analysis/isotope_pattern_matcher/) | Generate theoretical isotope distributions and cosine similarity scoring |
-| [`isotope_pattern_fit_scorer`](tools/metabolomics/spectral_analysis/isotope_pattern_fit_scorer/) | Score isotope pattern fit, detect Cl/Br from M+2 enhancement |
+| [`isotope_pattern_analyzer`](tools/metabolomics/spectral_analysis/isotope_pattern_analyzer/) | Generate theoretical isotope distributions, cosine similarity scoring, Da/ppm tolerance, Cl/Br halogen detection (merged from `isotope_pattern_matcher` + `isotope_pattern_scorer` + `isotope_pattern_fit_scorer`) |
 | [`massql_query_tool`](tools/metabolomics/spectral_analysis/massql_query_tool/) | Query mzML data using MassQL-like syntax |
 
 #### Compound Annotation (4 tools)
diff --git a/tools/metabolomics/spectral_analysis/isotope_pattern_analyzer/README.md b/tools/metabolomics/spectral_analysis/isotope_pattern_analyzer/README.md
new file mode 100644
index 0000000..c4b4f44
--- /dev/null
+++ b/tools/metabolomics/spectral_analysis/isotope_pattern_analyzer/README.md
@@ -0,0 +1,83 @@
+# Isotope Pattern Analyzer
+
+Generate theoretical isotope distributions for molecular formulas, score observed
+isotope patterns using cosine similarity, and detect halogenation (Cl/Br).
+
+This tool consolidates `isotope_pattern_matcher`, `isotope_pattern_scorer`, and
+`isotope_pattern_fit_scorer` into a single, improved utility.
+
+## Features
+
+- Theoretical isotope pattern generation via pyopenms `CoarseIsotopePatternGenerator`
+- Cosine similarity scoring between observed and theoretical patterns
+- **Da or ppm m/z tolerance** — choose your preferred unit
+- Halogen (Cl/Br) detection from M+2 peak enhancement
+- JSON output with per-peak detail
+- Terminal bar-chart preview of the theoretical distribution
+- Optional numpy acceleration for cosine computation
+
+## Installation
+
+```bash
+pip install pyopenms
+```
+
+## CLI Usage
+
+```bash
+# Generate and display the isotope pattern for glucose
+python isotope_pattern_analyzer.py --formula C6H12O6
+
+# Score observed peaks against the formula (colon-separated format)
+python isotope_pattern_analyzer.py --formula C6H12O6 \
+    --observed "180.063:100,181.067:6.5,182.070:0.5" \
+    --output result.json
+
+# Use legacy comma-separated format (one --peaks flag per peak)
+python isotope_pattern_analyzer.py --formula C6H12O6 \
+    --peaks 180.063,100.0 --peaks 181.067,6.5 \
+    --output result.json
+
+# Use ppm tolerance
+python isotope_pattern_analyzer.py --formula C6H12O6 \
+    --observed "180.063:100,181.067:6.5" \
+    --tolerance 10 --tolerance-unit ppm
+
+# Detect halogenation (chlorinated compound example)
+python isotope_pattern_analyzer.py --formula C6H5Cl \
+    --observed "112.007:100,113.011:5.5,114.004:33.0" \
+    --output halogen_result.json
+```
+
+## Output JSON Structure
+
+```json
+{
+  "formula": "C6H12O6",
+  "cosine_similarity": 0.9987,
+  "n_peaks_compared": 3,
+  "tolerance": 0.05,
+  "tolerance_unit": "da",
+  "peaks": [
+    {"peak_index": 0, "obs_mz": 180.063, "theo_mz": 180.0634, "obs_intensity": 100.0, "theo_intensity": 100.0},
+    ...
+  ],
+  "theoretical_pattern": [...],
+  "halogen_detection": {
+    "m2_ratio_observed": 0.5,
+    "m2_ratio_theoretical": 0.42,
+    "m2_excess": 0.08,
+    "halogen_flag": false,
+    "possible_halogen": "none"
+  }
+}
+```
+
+## Halogen Detection Thresholds
+
+| M+2 excess above theoretical | Interpretation                  |
+|------------------------------|---------------------------------|
+| < 10 %                       | No halogenation detected        |
+| 10–20 %                      | Cl (weak signal)                |
+| 20–70 %                      | Cl                              |
+| > 70 %                       | Br                              |
diff --git a/tools/metabolomics/spectral_analysis/isotope_pattern_analyzer/isotope_pattern_analyzer.py b/tools/metabolomics/spectral_analysis/isotope_pattern_analyzer/isotope_pattern_analyzer.py
new file mode 100644
index 0000000..a29de8e
--- /dev/null
+++ b/tools/metabolomics/spectral_analysis/isotope_pattern_analyzer/isotope_pattern_analyzer.py
@@ -0,0 +1,415 @@
+"""
+Isotope Pattern Analyzer
+========================
+Generate theoretical isotope distributions for molecular formulas, score observed
+isotope patterns against theoretical ones using cosine similarity, and detect
+halogenation (Cl/Br) from M+2 peak enhancement.
+
+This tool consolidates the functionality of isotope_pattern_matcher,
+isotope_pattern_scorer, and isotope_pattern_fit_scorer into a single,
+more capable utility.
+
+Features
+--------
+- Theoretical isotope pattern generation via pyopenms CoarseIsotopePatternGenerator
+- Cosine similarity scoring between observed and theoretical patterns
+- Da or ppm m/z tolerance modes
+- Halogen (Cl/Br) detection from M+2 peak enhancement
+- JSON output with per-peak detail
+- Bar-chart preview in the terminal
+
+Usage
+-----
+    # Generate isotope pattern for glucose
+    python isotope_pattern_analyzer.py --formula C6H12O6
+
+    # Score observed peaks (mz:intensity format) against formula
+    python isotope_pattern_analyzer.py --formula C6H12O6 \\
+        --observed "180.063:100,181.067:6.5,182.070:0.5" \\
+        --output result.json
+
+    # Score using comma-separated mz,intensity pairs (legacy format)
+    python isotope_pattern_analyzer.py --formula C6H12O6 \\
+        --peaks 180.063,100.0 --peaks 181.067,6.5 \\
+        --output result.json
+
+    # Use ppm tolerance
+    python isotope_pattern_analyzer.py --formula C6H12O6 \\
+        --observed "180.063:100,181.067:6.5" --tolerance 10 --tolerance-unit ppm
+"""
+
+import json
+import math
+import sys
+
+import click
+
+try:
+    import pyopenms as oms
+except ImportError:
+    sys.exit("pyopenms is required. Install it with:  pip install pyopenms")
+
+try:
+    import numpy as np
+
+    _HAS_NUMPY = True
+except ImportError:
+    _HAS_NUMPY = False
+
+
+def get_theoretical_pattern(formula: str, max_isotopes: int = 6) -> list[tuple[float, float]]:
+    """Compute the theoretical isotope distribution for a molecular formula.
+
+    Parameters
+    ----------
+    formula:
+        Empirical formula string, e.g. ``"C6H12O6"``.
+    max_isotopes:
+        Maximum number of isotope peaks to return (default: 6).
+
+    Returns
+    -------
+    list of (mz, relative_abundance)
+        Sorted by m/z; relative abundances normalized so the maximum is 100.
+    """
+    ef = oms.EmpiricalFormula(formula)
+    iso_dist = ef.getIsotopeDistribution(oms.CoarseIsotopePatternGenerator(max_isotopes))
+    container = iso_dist.getContainer()
+    peaks = [(p.getMZ(), p.getIntensity()) for p in container]
+    if not peaks:
+        return []
+    max_int = max(p[1] for p in peaks)
+    if max_int == 0:
+        return peaks
+    return [(mz, intensity / max_int * 100.0) for mz, intensity in peaks]
+
+
+def _mz_tolerance_da(mz: float, tolerance: float, unit: str) -> float:
+    """Convert tolerance to Da, supporting 'da' and 'ppm' units.
+
+    Parameters
+    ----------
+    mz:
+        Reference m/z value (only used when unit is 'ppm').
+    tolerance:
+        Tolerance value.
+    unit:
+        Either ``'da'`` or ``'ppm'``.
+
+    Returns
+    -------
+    float
+        Tolerance in Daltons.
+    """
+    if unit == "ppm":
+        return mz * tolerance * 1e-6
+    return tolerance
+
+
+def cosine_similarity(
+    observed: list[tuple[float, float]],
+    theoretical: list[tuple[float, float]],
+    tolerance: float = 0.05,
+    tolerance_unit: str = "da",
+) -> float:
+    """Compute cosine similarity between observed and theoretical isotope patterns.
+
+    Each theoretical peak is matched to the closest observed peak within the
+    specified tolerance.  Unmatched theoretical peaks contribute zero to the
+    dot product.
+
+    Parameters
+    ----------
+    observed:
+        List of (mz, intensity) for observed peaks.
+    theoretical:
+        List of (mz, intensity) for theoretical peaks.
+    tolerance:
+        m/z tolerance value.
+    tolerance_unit:
+        ``'da'`` for Daltons (default) or ``'ppm'`` for parts per million.
+
+    Returns
+    -------
+    float
+        Cosine similarity score in [0, 1].
+    """
+    if not observed or not theoretical:
+        return 0.0
+
+    if _HAS_NUMPY:
+        obs_mz = np.array([p[0] for p in observed])
+        obs_int = np.array([p[1] for p in observed], dtype=float)
+        theo_int = np.array([p[1] for p in theoretical], dtype=float)
+
+        matched_obs = np.zeros(len(theoretical))
+        for i, (tmz, _) in enumerate(theoretical):
+            tol_da = _mz_tolerance_da(tmz, tolerance, tolerance_unit)
+            diffs = np.abs(obs_mz - tmz)
+            mask = diffs <= tol_da
+            if mask.any():
+                matched_obs[i] = obs_int[np.argmin(diffs)]
+
+        dot = float(np.dot(theo_int, matched_obs))
+        norm_t = float(np.linalg.norm(theo_int))
+        norm_o = float(np.linalg.norm(matched_obs))
+    else:
+        matched_obs = []
+        for tmz, _ in theoretical:
+            tol_da = _mz_tolerance_da(tmz, tolerance, tolerance_unit)
+            best = 0.0
+            for omz, oint in observed:
+                if abs(omz - tmz) <= tol_da:
+                    best = oint
+                    break
+            matched_obs.append(best)
+
+        dot = sum(t * o for (_, t), o in zip(theoretical, matched_obs))
+        norm_t = math.sqrt(sum(t ** 2 for _, t in theoretical))
+        norm_o = math.sqrt(sum(o ** 2 for o in matched_obs))
+
+    if norm_t == 0 or norm_o == 0:
+        return 0.0
+    return dot / (norm_t * norm_o)
+
+
+def detect_halogenation(
+    observed: list[tuple[float, float]],
+    theoretical: list[tuple[float, float]],
+) -> dict:
+    """Detect Cl/Br halogenation from an enhanced M+2 isotope peak.
+
+    Chlorine has a natural Cl-37 / Cl-35 ratio of ~32.5%, giving an M+2
+    peak ~32.5% of M.  Bromine has an almost 1:1 Br-79/Br-81 ratio, giving
+    an M+2 peak ~97% of M.  If the observed M+2 significantly exceeds the
+    theoretical value, a halogen is flagged.
+
+    Parameters
+    ----------
+    observed:
+        Observed isotope pattern as (mz, intensity) list.
+    theoretical:
+        Theoretical isotope pattern (without explicit halogens) as (mz,
+        intensity) list.
+
+    Returns
+    -------
+    dict
+        Keys: m2_ratio_observed, m2_ratio_theoretical, m2_excess,
+        halogen_flag, possible_halogen.
+    """
+    result: dict = {
+        "m2_ratio_observed": None,
+        "m2_ratio_theoretical": None,
+        "m2_excess": None,
+        "halogen_flag": False,
+        "possible_halogen": "none",
+    }
+    if len(observed) < 3 or len(theoretical) < 3:
+        return result
+
+    obs_m0 = observed[0][1]
+    obs_m2 = observed[2][1]
+    theo_m0 = max(theoretical[0][1], 1e-10)
+    theo_m2 = theoretical[2][1]
+
+    if obs_m0 == 0:
+        return result
+
+    obs_ratio = obs_m2 / obs_m0 * 100.0
+    theo_ratio = theo_m2 / theo_m0 * 100.0
+    excess = obs_ratio - theo_ratio
+
+    result["m2_ratio_observed"] = round(obs_ratio, 2)
+    result["m2_ratio_theoretical"] = round(theo_ratio, 2)
+    result["m2_excess"] = round(excess, 2)
+
+    if excess > 10.0:
+        result["halogen_flag"] = True
+        if excess > 70.0:
+            result["possible_halogen"] = "Br"
+        elif excess > 20.0:
+            result["possible_halogen"] = "Cl"
+        else:
+            result["possible_halogen"] = "Cl (weak signal)"
+
+    return result
+
+
+def parse_observed(observed_str: str) -> list[tuple[float, float]]:
+    """Parse an observed peak string into (mz, intensity) pairs.
+
+    Accepts both ``"mz:intensity"`` (colon-separated) and
+    ``"mz,intensity"`` (comma-separated, one pair per call) formats.
+    Multiple peaks are separated by commas in colon format.
+
+    Parameters
+    ----------
+    observed_str:
+        String of the form ``"180.063:100,181.067:6.5"`` (colon-separated
+        pairs) or a single ``"180.063,100.0"`` (comma-separated pair).
+
+    Returns
+    -------
+    list of (mz, intensity)
+
+    Raises
+    ------
+    ValueError
+        If a token cannot be parsed.
+    """
+    peaks = []
+    # If colons are present, assume "mz:intensity,mz:intensity,..." format
+    if ":" in observed_str:
+        for token in observed_str.split(","):
+            token = token.strip()
+            if not token:
+                continue
+            parts = token.split(":")
+            if len(parts) != 2:
+                raise ValueError(f"Expected 'mz:intensity', got: {token!r}")
+            peaks.append((float(parts[0].strip()), float(parts[1].strip())))
+    else:
+        # Single "mz,intensity" pair (legacy --peaks flag style)
+        parts = observed_str.split(",")
+        if len(parts) != 2:
+            raise ValueError(f"Expected 'mz,intensity', got: {observed_str!r}")
+        peaks.append((float(parts[0].strip()), float(parts[1].strip())))
+    return peaks
+
+
+def score_pattern(
+    observed: list[tuple[float, float]],
+    formula: str,
+    max_isotopes: int = 6,
+    tolerance: float = 0.05,
+    tolerance_unit: str = "da",
+) -> dict:
+    """Score an observed isotope pattern against a theoretical one.
+
+    Parameters
+    ----------
+    observed:
+        List of (mz, intensity) observed peaks.
+    formula:
+        Molecular formula string.
+    max_isotopes:
+        Maximum number of theoretical isotope peaks to generate.
+    tolerance:
+        m/z tolerance value.
+    tolerance_unit:
+        ``'da'`` or ``'ppm'``.
+
+    Returns
+    -------
+    dict
+        Contains formula, cosine_similarity, per-peak detail, and
+        halogen detection results.
+    """
+    theoretical = get_theoretical_pattern(formula, max_isotopes)
+    cos_sim = cosine_similarity(observed, theoretical, tolerance, tolerance_unit)
+    halogen = detect_halogenation(observed, theoretical)
+
+    n = min(len(observed), len(theoretical))
+    peak_detail = []
+    for i in range(n):
+        peak_detail.append({
+            "peak_index": i,
+            "obs_mz": round(observed[i][0], 6),
+            "theo_mz": round(theoretical[i][0], 6),
+            "obs_intensity": round(observed[i][1], 4),
+            "theo_intensity": round(theoretical[i][1], 4),
+        })
+
+    return {
+        "formula": formula,
+        "cosine_similarity": round(cos_sim, 6),
+        "n_peaks_compared": n,
+        "tolerance": tolerance,
+        "tolerance_unit": tolerance_unit,
+        "peaks": peak_detail,
+        "theoretical_pattern": [{"mz": round(mz, 6), "intensity": round(ab, 4)} for mz, ab in theoretical],
+        "halogen_detection": halogen,
+    }
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+
+@click.command(
+    help="Generate theoretical isotope patterns and optionally score them against observed peaks."
+)
+@click.option("--formula", required=True, help="Molecular formula (e.g. C6H12O6)")
+@click.option("--max-isotopes", type=int, default=6, help="Max isotope peaks to generate (default: 6)")
+@click.option(
+    "--observed", default=None,
+    help="Observed peaks as 'mz:int,mz:int,...' (colon-separated pairs).",
+)
+@click.option(
+    "--peaks", multiple=True,
+    help="Observed peaks as 'mz,intensity' pairs (repeatable, legacy format).",
+)
+@click.option("--tolerance", type=float, default=0.05, help="m/z tolerance (default: 0.05)")
+@click.option(
+    "--tolerance-unit", type=click.Choice(["da", "ppm"]), default="da",
+    help="Tolerance unit: 'da' (Daltons, default) or 'ppm'.",
+)
+@click.option("--output", default=None, help="Output JSON file (optional)")
+def main(formula, max_isotopes, observed, peaks, tolerance, tolerance_unit, output):
+    """CLI entry point."""
+    theoretical = get_theoretical_pattern(formula, max_isotopes)
+    if not theoretical:
+        print("Could not compute isotope distribution for the given formula.")
+        return
+
+    # Print theoretical pattern
+    print(f"Theoretical isotope pattern for {formula}:")
+    print(f"\n{'Peak':>5}  {'m/z':>12}  {'Rel. Abundance (%)':>18}")
+    print("-" * 42)
+    for i, (mz, rel_ab) in enumerate(theoretical):
+        bar = "#" * int(rel_ab / 5)
+        print(f"  M+{i}  {mz:>12.4f}  {rel_ab:>6.2f} %  {bar}")
+
+    # Collect observed peaks
+    obs_peaks: list[tuple[float, float]] = []
+    if observed:
+        obs_peaks = parse_observed(observed)
+    elif peaks:
+        for p in peaks:
+            obs_peaks.extend(parse_observed(p))
+
+    if obs_peaks:
+        result = score_pattern(obs_peaks, formula, max_isotopes, tolerance, tolerance_unit)
+        cos_sim = result["cosine_similarity"]
+        print(f"\nCosine similarity ({tolerance} {tolerance_unit}): {cos_sim:.4f}")
+        if cos_sim >= 0.9:
+            print("  ✓ Excellent match (≥ 0.90)")
+        elif cos_sim >= 0.7:
+            print("  ~ Good match (≥ 0.70)")
+        else:
+            print("  ✗ Poor match (< 0.70)")
+
+        hal = result["halogen_detection"]
+        if hal["halogen_flag"]:
+            print(f"\nHalogen detected: {hal['possible_halogen']}")
+            print(f"  M+2 observed: {hal['m2_ratio_observed']}% | theoretical: {hal['m2_ratio_theoretical']}%")
+
+        if output:
+            with open(output, "w") as fh:
+                json.dump(result, fh, indent=2)
+            print(f"\nDetailed results written to {output}")
+    elif output:
+        # No observed peaks but output requested — write theoretical pattern only
+        data = {
+            "formula": formula,
+            "theoretical_pattern": [{"mz": round(mz, 6), "intensity": round(ab, 4)} for mz, ab in theoretical],
+        }
+        with open(output, "w") as fh:
+            json.dump(data, fh, indent=2)
+        print(f"\nTheoretical pattern written to {output}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/metabolomics/spectral_analysis/isotope_pattern_matcher/requirements.txt b/tools/metabolomics/spectral_analysis/isotope_pattern_analyzer/requirements.txt
similarity index 60%
rename from tools/metabolomics/spectral_analysis/isotope_pattern_matcher/requirements.txt
rename to tools/metabolomics/spectral_analysis/isotope_pattern_analyzer/requirements.txt
index 18d6bbb..7ce28ec 100644
--- a/tools/metabolomics/spectral_analysis/isotope_pattern_matcher/requirements.txt
+++ b/tools/metabolomics/spectral_analysis/isotope_pattern_analyzer/requirements.txt
@@ -1,2 +1 @@
 pyopenms
-click
diff --git a/tools/metabolomics/spectral_analysis/isotope_pattern_fit_scorer/tests/conftest.py b/tools/metabolomics/spectral_analysis/isotope_pattern_analyzer/tests/conftest.py
similarity index 100%
rename from tools/metabolomics/spectral_analysis/isotope_pattern_fit_scorer/tests/conftest.py
rename to tools/metabolomics/spectral_analysis/isotope_pattern_analyzer/tests/conftest.py
diff --git a/tools/metabolomics/spectral_analysis/isotope_pattern_analyzer/tests/test_isotope_pattern_analyzer.py b/tools/metabolomics/spectral_analysis/isotope_pattern_analyzer/tests/test_isotope_pattern_analyzer.py
new file mode 100644
index 0000000..2b046d0
--- /dev/null
+++ b/tools/metabolomics/spectral_analysis/isotope_pattern_analyzer/tests/test_isotope_pattern_analyzer.py
@@ -0,0 +1,218 @@
+"""Tests for isotope_pattern_analyzer."""
+
+import json
+import os
+import tempfile
+
+import pytest
+from conftest import requires_pyopenms
+
+
+@requires_pyopenms
+class TestGetTheoreticalPattern:
+    def test_glucose_pattern(self):
+        from isotope_pattern_analyzer import get_theoretical_pattern
+
+        pattern = get_theoretical_pattern("C6H12O6", max_isotopes=3)
+        assert len(pattern) == 3
+        # First peak should be the most abundant (normalized to 100)
+        assert pattern[0][1] == pytest.approx(100.0)
+        # Subsequent peaks should be smaller
+        assert pattern[1][1] < 100.0
+
+    def test_sucrose_max_isotopes(self):
+        from isotope_pattern_analyzer import get_theoretical_pattern
+
+        pattern = get_theoretical_pattern("C12H22O11", max_isotopes=4)
+        assert len(pattern) == 4
+
+    def test_mz_ordering(self):
+        from isotope_pattern_analyzer import get_theoretical_pattern
+
+        pattern = get_theoretical_pattern("C12H22O11", max_isotopes=4)
+        mzs = [mz for mz, _ in pattern]
+        assert mzs == sorted(mzs)
+
+    def test_empty_formula_raises(self):
+        from isotope_pattern_analyzer import get_theoretical_pattern
+
+        # Empty formula should return empty list (pyopenms may raise or return empty)
+        try:
+            result = get_theoretical_pattern("", max_isotopes=3)
+            assert result == []
+        except Exception:
+            pass  # acceptable
+
+
+@requires_pyopenms
+class TestCosineSimilarity:
+    def test_perfect_match(self):
+        from isotope_pattern_analyzer import cosine_similarity
+
+        peaks = [(180.0, 100.0), (181.0, 6.5), (182.0, 0.5)]
+        sim = cosine_similarity(peaks, peaks, tolerance=0.05)
+        assert abs(sim - 1.0) < 1e-6
+
+    def test_no_overlap(self):
+        from isotope_pattern_analyzer import cosine_similarity
+
+        obs = [(180.0, 100.0), (181.0, 6.5)]
+        theo = [(300.0, 100.0), (301.0, 6.5)]
+        sim = cosine_similarity(obs, theo, tolerance=0.05)
+        assert sim == 0.0
+
+    def test_partial_overlap(self):
+        from isotope_pattern_analyzer import cosine_similarity
+
+        obs = [(180.0, 100.0), (181.0, 6.5), (182.0, 50.0)]  # Enhanced M+2
+        theo = [(180.0, 100.0), (181.0, 6.5), (182.0, 0.5)]
+        sim = cosine_similarity(obs, theo, tolerance=0.05)
+        assert 0.0 < sim < 1.0
+
+    def test_empty_returns_zero(self):
+        from isotope_pattern_analyzer import cosine_similarity
+
+        assert cosine_similarity([], [(180.0, 100.0)]) == 0.0
+        assert cosine_similarity([(180.0, 100.0)], []) == 0.0
+
+    def test_ppm_tolerance(self):
+        from isotope_pattern_analyzer import cosine_similarity
+
+        # At 180 Da, 10 ppm = 0.0018 Da
+        peaks = [(180.001, 100.0), (181.002, 6.5)]
+        theo = [(180.0, 100.0), (181.0, 6.5)]
+        sim_ppm = cosine_similarity(peaks, theo, tolerance=10.0, tolerance_unit="ppm")
+        sim_da = cosine_similarity(peaks, theo, tolerance=0.01, tolerance_unit="da")
+        # Both should give reasonably similar results since 10 ppm ≈ 0.0018 Da at 180 Da
+        assert sim_ppm > 0.9
+        assert sim_da > 0.9
+
+
+@requires_pyopenms
+class TestDetectHalogenation:
+    def test_no_halogen(self):
+        from isotope_pattern_analyzer import detect_halogenation
+
+        obs = [(180.0, 100.0), (181.0, 6.5), (182.0, 0.5)]
+        theo = [(180.0, 100.0), (181.0, 6.5), (182.0, 0.5)]
+        result = detect_halogenation(obs, theo)
+        assert result["halogen_flag"] is False
+        assert result["possible_halogen"] == "none"
+
+    def test_chlorine_detection(self):
+        from isotope_pattern_analyzer import detect_halogenation
+
+        # Chlorine: M+2 ~32.5% of M+0
+        obs = [(180.0, 100.0), (181.0, 6.5), (182.0, 35.0)]
+        theo = [(180.0, 100.0), (181.0, 6.5), (182.0, 0.5)]
+        result = detect_halogenation(obs, theo)
+        assert result["halogen_flag"] is True
+        assert "Cl" in result["possible_halogen"]
+
+    def test_bromine_detection(self):
+        from isotope_pattern_analyzer import detect_halogenation
+
+        # Bromine: M+2 ~97% of M+0
+        obs = [(180.0, 100.0), (181.0, 6.5), (182.0, 98.0)]
+        theo = [(180.0, 100.0), (181.0, 6.5), (182.0, 0.5)]
+        result = detect_halogenation(obs, theo)
+        assert result["halogen_flag"] is True
+        assert result["possible_halogen"] == "Br"
+
+    def test_insufficient_peaks(self):
+        from isotope_pattern_analyzer import detect_halogenation
+
+        obs = [(180.0, 100.0), (181.0, 6.5)]  # only 2 peaks
+        theo = [(180.0, 100.0), (181.0, 6.5)]
+        result = detect_halogenation(obs, theo)
+        assert result["halogen_flag"] is False
+        assert result["m2_ratio_observed"] is None
+
+
+@requires_pyopenms
+class TestParseObserved:
+    def test_colon_format(self):
+        from isotope_pattern_analyzer import parse_observed
+
+        peaks = parse_observed("180.063:100,181.067:6.5,182.070:0.5")
+        assert len(peaks) == 3
+        assert peaks[0] == (180.063, 100.0)
+        assert peaks[2] == (182.070, 0.5)
+
+    def test_comma_format_single(self):
+        from isotope_pattern_analyzer import parse_observed
+
+        peaks = parse_observed("181.0709,100.0")
+        assert len(peaks) == 1
+        assert peaks[0] == (181.0709, 100.0)
+
+    def test_whitespace_stripped(self):
+        from isotope_pattern_analyzer import parse_observed
+
+        peaks = parse_observed("180.063 : 100 , 181.067 : 6.5")
+        assert len(peaks) == 2
+
+    def test_invalid_format_raises(self):
+        from isotope_pattern_analyzer import parse_observed
+
+        with pytest.raises(ValueError):
+            parse_observed("notavalue")
+
+
+@requires_pyopenms
+class TestScorePattern:
+    def test_basic_scoring(self):
+        from isotope_pattern_analyzer import score_pattern
+
+        obs = [(180.063, 100.0), (181.067, 6.5), (182.070, 0.5)]
+        result = score_pattern(obs, "C6H12O6")
+        assert "cosine_similarity" in result
+        assert result["cosine_similarity"] > 0.0
+        assert "halogen_detection" in result
+        assert "theoretical_pattern" in result
+
+    def test_score_range(self):
+        from isotope_pattern_analyzer import score_pattern
+
+        obs = [(180.063, 100.0), (181.067, 6.5), (182.070, 0.5)]
+        result = score_pattern(obs, "C6H12O6")
+        assert 0.0 <= result["cosine_similarity"] <= 1.0
+
+    def test_ppm_tolerance_mode(self):
+        from isotope_pattern_analyzer import score_pattern
+
+        obs = [(180.063, 100.0), (181.067, 6.5)]
+        result = score_pattern(obs, "C6H12O6", tolerance=10.0, tolerance_unit="ppm")
+        assert result["tolerance_unit"] == "ppm"
+        assert result["cosine_similarity"] > 0.0
+
+    def test_output_json(self):
+        from isotope_pattern_analyzer import score_pattern
+
+        obs = [(180.063, 100.0), (181.067, 6.5)]
+        result = score_pattern(obs, "C6H12O6")
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            out_path = os.path.join(tmpdir, "result.json")
+            with open(out_path, "w") as fh:
+                json.dump(result, fh, indent=2)
+            assert os.path.exists(out_path)
+            with open(out_path) as fh:
+                loaded = json.load(fh)
+            assert loaded["formula"] == "C6H12O6"
+
+    def test_n_peaks_compared(self):
+        from isotope_pattern_analyzer import score_pattern
+
+        # 2 observed peaks, 6 theoretical → n_peaks_compared = 2
+        obs = [(180.063, 100.0), (181.067, 6.5)]
+        result = score_pattern(obs, "C6H12O6", max_isotopes=6)
+        assert result["n_peaks_compared"] == 2
+
+    def test_halogen_integration(self):
+        from isotope_pattern_analyzer import score_pattern
+
+        # Chlorinated compound with enhanced M+2
+        obs = [(180.0, 100.0), (181.0, 6.5), (182.0, 35.0)]
+        result = score_pattern(obs, "C6H12O6")
+        assert result["halogen_detection"]["halogen_flag"] is True
diff --git a/tools/metabolomics/spectral_analysis/isotope_pattern_fit_scorer/README.md b/tools/metabolomics/spectral_analysis/isotope_pattern_fit_scorer/README.md
deleted file mode 100644
index 716d595..0000000
--- a/tools/metabolomics/spectral_analysis/isotope_pattern_fit_scorer/README.md
+++ /dev/null
@@ -1,16 +0,0 @@
-# Isotope Pattern Fit Scorer
-
-Score observed vs theoretical isotope patterns using cosine similarity. Detect Cl/Br halogenation from enhanced M+2 peaks.
-
-## Usage
-
-```bash
-python isotope_pattern_fit_scorer.py --observed "180.063:100,181.067:6.5,182.070:0.5" --formula C6H12O6 --output fit.json
-```
-
-### Output
-
-JSON file with:
-- `cosine_similarity`: Score between 0 and 1
-- `observed_peaks` and `theoretical_peaks`: Peak lists
-- `halogen_detection`: M+2 excess analysis with Cl/Br flagging
diff --git a/tools/metabolomics/spectral_analysis/isotope_pattern_fit_scorer/isotope_pattern_fit_scorer.py b/tools/metabolomics/spectral_analysis/isotope_pattern_fit_scorer/isotope_pattern_fit_scorer.py
deleted file mode 100644
index d53d9ce..0000000
--- a/tools/metabolomics/spectral_analysis/isotope_pattern_fit_scorer/isotope_pattern_fit_scorer.py
+++ /dev/null
@@ -1,258 +0,0 @@
-"""
-Isotope Pattern Fit Scorer
-==========================
-Score observed vs theoretical isotope patterns using cosine similarity.
-Detect halogenation (Cl/Br) from enhanced M+2 peaks.
-
-Usage
------
-    python isotope_pattern_fit_scorer.py \
-        --observed "180.063:100,181.067:6.5,182.070:0.5" \
-        --formula C6H12O6 --output fit.json
-"""
-
-import json
-import math
-import sys
-
-import click
-
-try:
-    import pyopenms as oms
-except ImportError:
-    sys.exit("pyopenms is required. Install it with:  pip install pyopenms")
-
-
-def get_theoretical_pattern(
-    formula: str,
-    max_isotopes: int = 6,
-) -> list[tuple[float, float]]:
-    """Compute the theoretical isotope distribution for a molecular formula.
-
-    Parameters
-    ----------
-    formula:
-        Empirical formula string, e.g. ``"C6H12O6"``.
-    max_isotopes:
-        Maximum number of isotope peaks.
-
-    Returns
-    -------
-    list of (mz, relative_abundance)
-        Relative abundances normalized so the max is 100.
-    """
-    ef = oms.EmpiricalFormula(formula)
-    iso_dist = ef.getIsotopeDistribution(
-        oms.CoarseIsotopePatternGenerator(max_isotopes)
-    )
-    container = iso_dist.getContainer()
-    peaks = [(p.getMZ(), p.getIntensity()) for p in container]
-    if not peaks:
-        return []
-    max_int = max(p[1] for p in peaks)
-    if max_int == 0:
-        return peaks
-    return [(mz, intensity / max_int * 100.0) for mz, intensity in peaks]
-
-
-def parse_observed(observed_str: str) -> list[tuple[float, float]]:
-    """Parse observed peaks from a string.
-
-    Format: ``"mz1:int1,mz2:int2,..."``
-
-    Parameters
-    ----------
-    observed_str:
-        Comma-separated mz:intensity pairs.
-
-    Returns
-    -------
-    list of (mz, intensity)
-    """
-    peaks = []
-    for pair in observed_str.split(","):
-        pair = pair.strip()
-        if not pair:
-            continue
-        parts = pair.split(":")
-        if len(parts) != 2:
-            raise ValueError(f"Invalid peak format: {pair!r}. Expected mz:intensity")
-        mz = float(parts[0].strip())
-        intensity = float(parts[1].strip())
-        peaks.append((mz, intensity))
-    return peaks
-
-
-def cosine_similarity(
-    observed: list[tuple[float, float]],
-    theoretical: list[tuple[float, float]],
-    mz_tolerance: float = 0.05,
-) -> float:
-    """Compute cosine similarity between observed and theoretical patterns.
-
-    Parameters
-    ----------
-    observed:
-        List of (mz, intensity) for observed peaks.
-    theoretical:
-        List of (mz, intensity) for theoretical peaks.
-    mz_tolerance:
-        Maximum m/z difference to consider a match.
-
-    Returns
-    -------
-    float
-        Cosine similarity score between 0 and 1.
-    """
-    if not observed or not theoretical:
-        return 0.0
-
-    dot_product = 0.0
-    norm_obs = 0.0
-    norm_theo = 0.0
-
-    for obs_mz, obs_int in observed:
-        norm_obs += obs_int ** 2
-
-    for theo_mz, theo_int in theoretical:
-        norm_theo += theo_int ** 2
-
-    for obs_mz, obs_int in observed:
-        for theo_mz, theo_int in theoretical:
-            if abs(obs_mz - theo_mz) <= mz_tolerance:
-                dot_product += obs_int * theo_int
-
-    if norm_obs == 0 or norm_theo == 0:
-        return 0.0
-    return dot_product / (math.sqrt(norm_obs) * math.sqrt(norm_theo))
-
-
-def detect_halogenation(
-    observed: list[tuple[float, float]],
-    theoretical: list[tuple[float, float]],
-    mz_tolerance: float = 0.05,
-) -> dict:
-    """Detect Cl/Br halogenation from enhanced M+2 peak.
-
-    Cl has a natural isotope ratio of ~32.5% for M+2 relative to M+0.
-    Br has a natural isotope ratio of ~97% for M+2 relative to M+0.
-    If the observed M+2 is significantly higher than theoretical, flag
-    potential halogenation.
-
-    Parameters
-    ----------
-    observed:
-        Observed isotope pattern.
-    theoretical:
-        Theoretical isotope pattern (without halogens).
-    mz_tolerance:
-        m/z tolerance for peak matching.
-
-    Returns
-    -------
-    dict
-        Keys: m2_observed, m2_theoretical, m2_excess, halogen_flag, possible_halogen.
-    """
-    result = {
-        "m2_observed": None,
-        "m2_theoretical": None,
-        "m2_excess": None,
-        "halogen_flag": False,
-        "possible_halogen": "none",
-    }
-
-    if len(observed) < 3 or len(theoretical) < 3:
-        return result
-
-    # M+0 is the first peak; M+2 is the third peak
-    obs_m0_int = observed[0][1]
-    obs_m2_int = observed[2][1] if len(observed) > 2 else 0.0
-    theo_m2_int = theoretical[2][1] if len(theoretical) > 2 else 0.0
-
-    if obs_m0_int == 0:
-        return result
-
-    obs_m2_ratio = obs_m2_int / obs_m0_int * 100.0
-    theo_m2_ratio = theo_m2_int / max(theoretical[0][1], 1e-10) * 100.0
-
-    result["m2_observed"] = round(obs_m2_ratio, 2)
-    result["m2_theoretical"] = round(theo_m2_ratio, 2)
-    excess = obs_m2_ratio - theo_m2_ratio
-    result["m2_excess"] = round(excess, 2)
-
-    if excess > 10.0:
-        result["halogen_flag"] = True
-        if excess > 70.0:
-            result["possible_halogen"] = "Br"
-        elif excess > 20.0:
-            result["possible_halogen"] = "Cl"
-        else:
-            result["possible_halogen"] = "Cl (weak signal)"
-
-    return result
-
-
-def score_pattern(
-    observed_str: str,
-    formula: str,
-    max_isotopes: int = 6,
-    mz_tolerance: float = 0.05,
-) -> dict:
-    """Score an observed isotope pattern against a theoretical one.
-
-    Parameters
-    ----------
-    observed_str:
-        Comma-separated mz:intensity pairs.
-    formula:
-        Molecular formula.
-    max_isotopes:
-        Max isotope peaks.
-    mz_tolerance:
-        m/z tolerance.
-
-    Returns
-    -------
-    dict
-        Score results including cosine_similarity, halogen detection, and peak data.
-    """
-    observed = parse_observed(observed_str)
-    theoretical = get_theoretical_pattern(formula, max_isotopes=max_isotopes)
-    cos_sim = cosine_similarity(observed, theoretical, mz_tolerance=mz_tolerance)
-    halogen = detect_halogenation(observed, theoretical, mz_tolerance=mz_tolerance)
-
-    return {
-        "formula": formula,
-        "cosine_similarity": round(cos_sim, 6),
-        "observed_peaks": [{"mz": mz, "intensity": i} for mz, i in observed],
-        "theoretical_peaks": [{"mz": round(mz, 6), "intensity": round(i, 4)} for mz, i in theoretical],
-        "halogen_detection": halogen,
-    }
-
-
-@click.command()
-@click.option("--observed", required=True,
-              help="Observed peaks as 'mz1:int1,mz2:int2,...'")
-@click.option("--formula", required=True, help="Molecular formula (e.g. C6H12O6)")
-@click.option("--max-isotopes", type=int, default=6, help="Max isotope peaks (default: 6)")
-@click.option("--mz-tolerance", type=float, default=0.05, help="m/z tolerance (default: 0.05)")
-@click.option("--output", required=True, help="Output JSON file")
-def main(observed, formula, max_isotopes, mz_tolerance, output) -> None:
-    """CLI entry point."""
-    result = score_pattern(
-        observed, formula,
-        max_isotopes=max_isotopes, mz_tolerance=mz_tolerance,
-    )
-
-    with open(output, "w") as fh:
-        json.dump(result, fh, indent=2)
-
-    print(f"Cosine similarity: {result['cosine_similarity']:.4f}")
-    halogen = result["halogen_detection"]
-    if halogen["halogen_flag"]:
-        print(f"Halogen detected: {halogen['possible_halogen']} (M+2 excess: {halogen['m2_excess']}%)")
-    print(f"Results written to {output}")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/tools/metabolomics/spectral_analysis/isotope_pattern_fit_scorer/requirements.txt b/tools/metabolomics/spectral_analysis/isotope_pattern_fit_scorer/requirements.txt
deleted file mode 100644
index 18d6bbb..0000000
--- a/tools/metabolomics/spectral_analysis/isotope_pattern_fit_scorer/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-pyopenms
-click
diff --git a/tools/metabolomics/spectral_analysis/isotope_pattern_fit_scorer/tests/test_isotope_pattern_fit_scorer.py b/tools/metabolomics/spectral_analysis/isotope_pattern_fit_scorer/tests/test_isotope_pattern_fit_scorer.py
deleted file mode 100644
index deb337a..0000000
--- a/tools/metabolomics/spectral_analysis/isotope_pattern_fit_scorer/tests/test_isotope_pattern_fit_scorer.py
+++ /dev/null
@@ -1,111 +0,0 @@
-"""Tests for isotope_pattern_fit_scorer."""
-
-import json
-import os
-import tempfile
-
-import pytest
-from conftest import requires_pyopenms
-
-
-@requires_pyopenms
-class TestIsotopePatternFitScorer:
-    def test_get_theoretical_pattern(self):
-        from isotope_pattern_fit_scorer import get_theoretical_pattern
-
-        pattern = get_theoretical_pattern("C6H12O6", max_isotopes=4)
-        assert len(pattern) == 4
-        # First peak should be the most abundant (normalized to 100)
-        assert pattern[0][1] == pytest.approx(100.0)
-        # Subsequent peaks should be smaller
-        assert pattern[1][1] < 100.0
-
-    def test_parse_observed(self):
-        from isotope_pattern_fit_scorer import parse_observed
-
-        peaks = parse_observed("180.063:100,181.067:6.5,182.070:0.5")
-        assert len(peaks) == 3
-        assert peaks[0] == (180.063, 100.0)
-        assert peaks[1] == (181.067, 6.5)
-
-    def test_parse_observed_with_spaces(self):
-        from isotope_pattern_fit_scorer import parse_observed
-
-        peaks = parse_observed("180.063:100 , 181.067:6.5")
-        assert len(peaks) == 2
-
-    def test_cosine_similarity_perfect(self):
-        from isotope_pattern_fit_scorer import cosine_similarity
-
-        peaks = [(180.0, 100.0), (181.0, 6.5), (182.0, 0.5)]
-        sim = cosine_similarity(peaks, peaks, mz_tolerance=0.1)
-        assert abs(sim - 1.0) < 1e-6
-
-    def test_cosine_similarity_no_overlap(self):
-        from isotope_pattern_fit_scorer import cosine_similarity
-
-        obs = [(180.0, 100.0), (181.0, 6.5)]
-        theo = [(300.0, 100.0), (301.0, 6.5)]
-        sim = cosine_similarity(obs, theo, mz_tolerance=0.1)
-        assert sim == 0.0
-
-    def test_cosine_similarity_partial(self):
-        from isotope_pattern_fit_scorer import cosine_similarity
-
-        obs = [(180.0, 100.0), (181.0, 6.5), (182.0, 50.0)]  # Enhanced M+2
-        theo = [(180.0, 100.0), (181.0, 6.5), (182.0, 0.5)]
-        sim = cosine_similarity(obs, theo, mz_tolerance=0.1)
-        assert 0.0 < sim < 1.0
-
-    def test_detect_halogenation_no_halogen(self):
-        from isotope_pattern_fit_scorer import detect_halogenation
-
-        obs = [(180.0, 100.0), (181.0, 6.5), (182.0, 0.5)]
-        theo = [(180.0, 100.0), (181.0, 6.5), (182.0, 0.5)]
-        result = detect_halogenation(obs, theo)
-        assert result["halogen_flag"] is False
-
-    def test_detect_halogenation_chlorine(self):
-        from isotope_pattern_fit_scorer import detect_halogenation
-
-        obs = [(180.0, 100.0), (181.0, 6.5), (182.0, 35.0)]  # Enhanced M+2 (~35%)
-        theo = [(180.0, 100.0), (181.0, 6.5), (182.0, 0.5)]   # Expected ~0.5%
-        result = detect_halogenation(obs, theo)
-        assert result["halogen_flag"] is True
-        assert "Cl" in result["possible_halogen"]
-
-    def test_detect_halogenation_bromine(self):
-        from isotope_pattern_fit_scorer import detect_halogenation
-
-        obs = [(180.0, 100.0), (181.0, 6.5), (182.0, 98.0)]  # Enhanced M+2 (~98%)
-        theo = [(180.0, 100.0), (181.0, 6.5), (182.0, 0.5)]
-        result = detect_halogenation(obs, theo)
-        assert result["halogen_flag"] is True
-        assert result["possible_halogen"] == "Br"
-
-    def test_score_pattern(self):
-        from isotope_pattern_fit_scorer import score_pattern
-
-        result = score_pattern(
-            "180.063:100,181.067:6.5,182.070:0.5",
-            "C6H12O6",
-        )
-        assert "cosine_similarity" in result
-        assert result["cosine_similarity"] > 0.0
-        assert "halogen_detection" in result
-        assert "observed_peaks" in result
-        assert "theoretical_peaks" in result
-
-    def test_score_pattern_output_json(self):
-        from isotope_pattern_fit_scorer import score_pattern
-
-        result = score_pattern("180.063:100,181.067:6.5", "C6H12O6")
-
-        with tempfile.TemporaryDirectory() as tmpdir:
-            out_path = os.path.join(tmpdir, "fit.json")
-            with open(out_path, "w") as fh:
-                json.dump(result, fh, indent=2)
-            assert os.path.exists(out_path)
-            with open(out_path) as fh:
-                loaded = json.load(fh)
-            assert loaded["formula"] == "C6H12O6"
diff --git a/tools/metabolomics/spectral_analysis/isotope_pattern_matcher/README.md b/tools/metabolomics/spectral_analysis/isotope_pattern_matcher/README.md
deleted file mode 100644
index e56d2a7..0000000
--- a/tools/metabolomics/spectral_analysis/isotope_pattern_matcher/README.md
+++ /dev/null
@@ -1,11 +0,0 @@
-# Isotope Pattern Generator & Matcher
-
-Generate theoretical isotope distributions for any molecular formula and
-optionally compute a cosine similarity score against observed peaks.
-
-## Usage
-
-```bash
-python isotope_pattern_matcher.py --formula C6H12O6
-python isotope_pattern_matcher.py --formula C6H12O6 --peaks 181.0709,100.0 182.0742,6.7 183.0775,0.4
-```
diff --git a/tools/metabolomics/spectral_analysis/isotope_pattern_matcher/isotope_pattern_matcher.py b/tools/metabolomics/spectral_analysis/isotope_pattern_matcher/isotope_pattern_matcher.py
deleted file mode 100644
index e974619..0000000
--- a/tools/metabolomics/spectral_analysis/isotope_pattern_matcher/isotope_pattern_matcher.py
+++ /dev/null
@@ -1,147 +0,0 @@
-"""
-Isotope Pattern Generator & Matcher
-=====================================
-Generate the theoretical isotope distribution for a molecular formula
-and optionally compare it against an observed spectrum to compute a
-cosine similarity score using pyopenms.
-
-Usage
------
-    # Generate isotope pattern for glucose
-    python isotope_pattern_matcher.py --formula C6H12O6
-
-    # Compare against observed peaks (m/z intensity pairs on stdin or --peaks)
-    python isotope_pattern_matcher.py --formula C6H12O6 \\
-        --peaks 181.0709,100.0 --peaks 182.0742,6.7 --peaks 183.0775,0.4
-"""
-
-import math
-import sys
-
-import click
-
-try:
-    import pyopenms as oms
-except ImportError:
-    sys.exit(
-        "pyopenms is required. Install it with:  pip install pyopenms"
-    )
-
-
-def get_isotope_distribution(
-    formula: str,
-    max_isotopes: int = 5,
-) -> list:
-    """Compute the theoretical isotope distribution for a molecular formula.
-
-    Parameters
-    ----------
-    formula:
-        Empirical formula string, e.g. ``"C6H12O6"``.
-    max_isotopes:
-        Maximum number of isotope peaks to return (default: 5).
-
-    Returns
-    -------
-    list of (mz, relative_abundance)
-        Sorted by m/z; relative abundances sum to 100.
-    """
-    ef = oms.EmpiricalFormula(formula)
-    isotope_dist = ef.getIsotopeDistribution(
-        oms.CoarseIsotopePatternGenerator(max_isotopes)
-    )
-    peaks = [(p.getMZ(), p.getIntensity()) for p in isotope_dist.getContainer()]
-    if not peaks:
-        return []
-    max_ab = max(ab for _, ab in peaks)
-    return [(mz, ab / max_ab * 100) for mz, ab in peaks]
-
-
-def cosine_similarity(
-    theoretical: list,
-    observed: list,
-    mz_tolerance: float = 0.02,
-) -> float:
-    """Compute cosine similarity between theoretical and observed peak lists.
-
-    Parameters
-    ----------
-    theoretical:
-        List of ``(mz, intensity)`` tuples for the theoretical pattern.
-    observed:
-        List of ``(mz, intensity)`` tuples for the observed spectrum.
-    mz_tolerance:
-        Maximum m/z difference to match a pair of peaks (default: 0.02 Da).
-
-    Returns
-    -------
-    float
-        Cosine similarity in [0, 1].
-    """
-    theo_vec = []
-    obs_vec = []
-    for tmz, tint in theoretical:
-        matched = 0.0
-        for omz, oint in observed:
-            if abs(omz - tmz) <= mz_tolerance:
-                matched = oint
-                break
-        theo_vec.append(tint)
-        obs_vec.append(matched)
-
-    dot = sum(t * o for t, o in zip(theo_vec, obs_vec))
-    norm_t = math.sqrt(sum(t ** 2 for t in theo_vec))
-    norm_o = math.sqrt(sum(o ** 2 for o in obs_vec))
-    if norm_t == 0 or norm_o == 0:
-        return 0.0
-    return dot / (norm_t * norm_o)
-
-
-def parse_peaks(peak_strings: list) -> list:
-    """Parse ``"mz,intensity"`` strings into ``(float, float)`` tuples."""
-    peaks = []
-    for s in peak_strings:
-        parts = s.split(",")
-        if len(parts) != 2:
-            raise ValueError(
-                f"Expected 'mz,intensity' format, got: {s!r}"
-            )
-        peaks.append((float(parts[0]), float(parts[1])))
-    return peaks
-
-
-@click.command()
-@click.option("--formula", required=True, help="Molecular formula (e.g. C6H12O6)")
-@click.option("--max-isotopes", type=int, default=5,
-              help="Maximum isotope peaks to compute (default: 5)")
-@click.option("--peaks", multiple=True,
-              help="Observed peaks as 'mz,intensity' pairs for similarity scoring")
-@click.option("--tolerance", type=float, default=0.02,
-              help="m/z tolerance in Da for peak matching (default: 0.02)")
-def main(formula, max_isotopes, peaks, tolerance):
-    distribution = get_isotope_distribution(formula, max_isotopes)
-    if not distribution:
-        print("Could not compute isotope distribution for the given formula.")
-        return
-
-    print(f"Isotope distribution for {formula}:")
-    print(f"\n{'Peak':>5}  {'m/z':>12}  {'Relative Abundance (%)':>22}")
-    print("-" * 44)
-    for i, (mz, rel_ab) in enumerate(distribution):
-        bar = "#" * int(rel_ab / 5)
-        print(f"  M+{i}  {mz:>12.4f}  {rel_ab:>6.2f} %  {bar}")
-
-    if peaks:
-        observed = parse_peaks(list(peaks))
-        sim = cosine_similarity(distribution, observed, tolerance)
-        print(f"\nCosine similarity vs. observed peaks: {sim:.4f}")
-        if sim >= 0.9:
-            print("  ✓ Excellent match (≥ 0.90)")
-        elif sim >= 0.7:
-            print("  ~ Good match (≥ 0.70)")
-        else:
-            print("  ✗ Poor match (< 0.70)")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/tools/metabolomics/spectral_analysis/isotope_pattern_matcher/tests/test_isotope_pattern_matcher.py b/tools/metabolomics/spectral_analysis/isotope_pattern_matcher/tests/test_isotope_pattern_matcher.py
deleted file mode 100644
index a0999cb..0000000
--- a/tools/metabolomics/spectral_analysis/isotope_pattern_matcher/tests/test_isotope_pattern_matcher.py
+++ /dev/null
@@ -1,50 +0,0 @@
-"""Tests for isotope_pattern_matcher."""
-
-import pytest
-from conftest import requires_pyopenms
-
-
-@requires_pyopenms
-class TestIsotopePatternMatcher:
-    def test_glucose_pattern(self):
-        from isotope_pattern_matcher import get_isotope_distribution
-
-        dist = get_isotope_distribution("C6H12O6", max_isotopes=3)
-        assert len(dist) == 3
-        assert dist[0][1] == pytest.approx(100.0)
-        assert dist[1][1] < dist[0][1]
-
-    def test_pattern_mz_ordering(self):
-        from isotope_pattern_matcher import get_isotope_distribution
-
-        dist = get_isotope_distribution("C12H22O11", max_isotopes=4)
-        mzs = [mz for mz, _ in dist]
-        assert mzs == sorted(mzs)
-
-    def test_cosine_similarity_perfect(self):
-        from isotope_pattern_matcher import cosine_similarity
-
-        peaks = [(100.0, 50.0), (101.0, 20.0), (102.0, 5.0)]
-        sim = cosine_similarity(peaks, peaks, mz_tolerance=0.01)
-        assert abs(sim - 1.0) < 1e-9
-
-    def test_cosine_similarity_no_overlap(self):
-        from isotope_pattern_matcher import cosine_similarity
-
-        theoretical = [(100.0, 50.0), (101.0, 20.0)]
-        observed = [(200.0, 50.0), (201.0, 20.0)]
-        sim = cosine_similarity(theoretical, observed, mz_tolerance=0.01)
-        assert sim == 0.0
-
-    def test_parse_peaks(self):
-        from isotope_pattern_matcher import parse_peaks
-
-        result = parse_peaks(["181.0709,100.0", "182.0742,6.7"])
-        assert len(result) == 2
-        assert result[0] == (181.0709, 100.0)
-
-    def test_parse_peaks_invalid(self):
-        from isotope_pattern_matcher import parse_peaks
-
-        with pytest.raises(ValueError):
-            parse_peaks(["181.0709"])
diff --git a/tools/metabolomics/spectral_analysis/isotope_pattern_scorer/isotope_pattern_scorer.py b/tools/metabolomics/spectral_analysis/isotope_pattern_scorer/isotope_pattern_scorer.py
deleted file mode 100644
index 10e1a75..0000000
--- a/tools/metabolomics/spectral_analysis/isotope_pattern_scorer/isotope_pattern_scorer.py
+++ /dev/null
@@ -1,154 +0,0 @@
-"""
-Isotope Pattern Scorer
-=======================
-Score observed isotope patterns against theoretical patterns computed
-from a molecular formula using pyopenms.
-
-The score is a cosine similarity between observed and theoretical
-isotope intensity ratios.
-
-Usage
------
-    python isotope_pattern_scorer.py --observed "180.063:100,181.067:6.5" --formula C6H12O6 --output fit.json
-"""
-
-import json
-import sys
-
-import click
-
-try:
-    import pyopenms as oms
-except ImportError:
-    sys.exit("pyopenms is required. Install it with:  pip install pyopenms")
-
-
-def parse_observed(observed_str: str) -> list[tuple[float, float]]:
-    """Parse observed isotope pattern from a string.
-
-    Parameters
-    ----------
-    observed_str:
-        Comma-separated ``mz:intensity`` pairs, e.g.
-        ``"180.063:100,181.067:6.5"``.
-
-    Returns
-    -------
-    list[tuple[float, float]]
-        List of (mz, intensity) tuples.
-    """
-    peaks = []
-    for pair in observed_str.split(","):
-        pair = pair.strip()
-        if ":" in pair:
-            mz_str, int_str = pair.split(":", 1)
-            peaks.append((float(mz_str), float(int_str)))
-    return peaks
-
-
-def get_theoretical_pattern(formula: str, n_peaks: int = 5) -> list[tuple[float, float]]:
-    """Compute theoretical isotope pattern for a formula.
-
-    Parameters
-    ----------
-    formula:
-        Molecular formula string.
-    n_peaks:
-        Number of isotope peaks to generate.
-
-    Returns
-    -------
-    list[tuple[float, float]]
-        List of (mz, relative_intensity) tuples, normalized to max=100.
-    """
-    ef = oms.EmpiricalFormula(formula)
-    gen = oms.CoarseIsotopePatternGenerator(n_peaks)
-    iso = ef.getIsotopeDistribution(gen)
-    container = iso.getContainer()
-
-    peaks = [(peak.getMZ(), peak.getIntensity()) for peak in container]
-    if not peaks:
-        return []
-
-    max_int = max(p[1] for p in peaks)
-    if max_int > 0:
-        peaks = [(mz, intensity / max_int * 100.0) for mz, intensity in peaks]
-
-    return peaks
-
-
-def score_pattern(
-    observed: list[tuple[float, float]],
-    theoretical: list[tuple[float, float]],
-) -> dict:
-    """Score observed vs theoretical isotope patterns.
-
-    Parameters
-    ----------
-    observed:
-        Observed (mz, intensity) pairs.
-    theoretical:
-        Theoretical (mz, intensity) pairs.
-
-    Returns
-    -------
-    dict
-        Contains cosine_score, per-peak comparisons.
-    """
-    n = min(len(observed), len(theoretical))
-    if n == 0:
-        return {"cosine_score": 0.0, "n_peaks_compared": 0, "peaks": []}
-
-    obs_int = [observed[i][1] for i in range(n)]
-    theo_int = [theoretical[i][1] for i in range(n)]
-
-    # Normalize observed to max=100
-    obs_max = max(obs_int) if obs_int else 1.0
-    if obs_max > 0:
-        obs_int = [v / obs_max * 100.0 for v in obs_int]
-
-    # Cosine similarity
-    dot = sum(a * b for a, b in zip(obs_int, theo_int))
-    mag_a = sum(a ** 2 for a in obs_int) ** 0.5
-    mag_b = sum(b ** 2 for b in theo_int) ** 0.5
-
-    cosine = dot / (mag_a * mag_b) if (mag_a > 0 and mag_b > 0) else 0.0
-
-    peaks = []
-    for i in range(n):
-        peaks.append({
-            "peak_index": i,
-            "obs_mz": round(observed[i][0], 6),
-            "theo_mz": round(theoretical[i][0], 6),
-            "obs_intensity": round(obs_int[i], 4),
-            "theo_intensity": round(theo_int[i], 4),
-        })
-
-    return {
-        "cosine_score": round(cosine, 6),
-        "n_peaks_compared": n,
-        "peaks": peaks,
-    }
-
-
-@click.command()
-@click.option("--observed", required=True,
-              help='Observed pattern as "mz:int,mz:int,..." (e.g. "180.063:100,181.067:6.5")')
-@click.option("--formula", required=True, help="Molecular formula (e.g. C6H12O6)")
-@click.option("--output", required=True, help="Output JSON file")
-def main(observed, formula, output):
-    observed_peaks = parse_observed(observed)
-    theoretical = get_theoretical_pattern(formula, n_peaks=len(observed_peaks))
-    result = score_pattern(observed_peaks, theoretical)
-    result["formula"] = formula
-
-    with open(output, "w") as fh:
-        json.dump(result, fh, indent=2)
-
-    print(f"Isotope fit written to {output}")
-    print(f"  Cosine score: {result['cosine_score']:.6f}")
-    print(f"  Peaks compared: {result['n_peaks_compared']}")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/tools/metabolomics/spectral_analysis/isotope_pattern_scorer/tests/conftest.py b/tools/metabolomics/spectral_analysis/isotope_pattern_scorer/tests/conftest.py
deleted file mode 100644
index 1a21ede..0000000
--- a/tools/metabolomics/spectral_analysis/isotope_pattern_scorer/tests/conftest.py
+++ /dev/null
@@ -1,15 +0,0 @@
-import os
-import sys
-
-import pytest
-
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
-
-try:
-    import pyopenms  # noqa: F401
-
-    HAS_PYOPENMS = True
-except ImportError:
-    HAS_PYOPENMS = False
-
-requires_pyopenms = pytest.mark.skipif(not HAS_PYOPENMS, reason="pyopenms not installed")
diff --git a/tools/metabolomics/spectral_analysis/isotope_pattern_scorer/tests/test_isotope_pattern_scorer.py b/tools/metabolomics/spectral_analysis/isotope_pattern_scorer/tests/test_isotope_pattern_scorer.py
deleted file mode 100644
index 07a8d00..0000000
--- a/tools/metabolomics/spectral_analysis/isotope_pattern_scorer/tests/test_isotope_pattern_scorer.py
+++ /dev/null
@@ -1,53 +0,0 @@
-"""Tests for isotope_pattern_scorer."""
-
-from conftest import requires_pyopenms
-
-
-@requires_pyopenms
-class TestIsotopePatternScorer:
-    def test_parse_observed(self):
-        from isotope_pattern_scorer import parse_observed
-
-        peaks = parse_observed("180.063:100,181.067:6.5,182.070:0.5")
-        assert len(peaks) == 3
-        assert peaks[0] == (180.063, 100.0)
-
-    def test_get_theoretical_pattern(self):
-        from isotope_pattern_scorer import get_theoretical_pattern
-
-        pattern = get_theoretical_pattern("C6H12O6", n_peaks=3)
-        assert len(pattern) == 3
-        # First peak should be the most intense (normalized to 100)
-        assert pattern[0][1] == 100.0
-
-    def test_perfect_match_score(self):
-        from isotope_pattern_scorer import get_theoretical_pattern, score_pattern
-
-        theo = get_theoretical_pattern("C6H12O6", n_peaks=3)
-        # Use theoretical as observed
-        result = score_pattern(theo, theo)
-        assert result["cosine_score"] > 0.999
-
-    def test_score_range(self):
-        from isotope_pattern_scorer import get_theoretical_pattern, score_pattern
-
-        theo = get_theoretical_pattern("C6H12O6", n_peaks=3)
-        observed = [(180.063, 100.0), (181.067, 50.0), (182.070, 30.0)]
-        result = score_pattern(observed, theo)
-        assert 0.0 <= result["cosine_score"] <= 1.0
-
-    def test_empty_observed(self):
-        from isotope_pattern_scorer import score_pattern
-
-        result = score_pattern([], [])
-        assert result["cosine_score"] == 0.0
-        assert result["n_peaks_compared"] == 0
-
-    def test_peaks_detail(self):
-        from isotope_pattern_scorer import get_theoretical_pattern, score_pattern
-
-        theo = get_theoretical_pattern("C6H12O6", n_peaks=2)
-        observed = [(180.063, 100.0), (181.067, 7.0)]
-        result = score_pattern(observed, theo)
-        assert result["n_peaks_compared"] == 2
-        assert len(result["peaks"]) == 2
diff --git a/tools/proteomics/file_conversion/mgf_mzml_converter/README.md b/tools/proteomics/file_conversion/mgf_mzml_converter/README.md
new file mode 100644
index 0000000..9e6d700
--- /dev/null
+++ b/tools/proteomics/file_conversion/mgf_mzml_converter/README.md
@@ -0,0 +1,50 @@
+# MGF ↔ mzML Converter
+
+Bidirectional conversion between MGF (Mascot Generic Format) and mzML.
+Consolidates `mgf_to_mzml_converter` and `mzml_to_mgf_converter` into a single tool.
+
+## Features
+
+- **MGF → mzML**: preserves title, precursor m/z, charge, retention time, fragment peaks
+- **mzML → MGF**: full filter suite (charge, RT range, m/z range, min peaks,
+  min base-peak intensity, MS level)
+- **Auto-detection** of conversion direction from file extensions
+- Conversion statistics printed to stdout
+
+## Installation
+
+```bash
+pip install pyopenms
+```
+
+## CLI Usage
+
+```bash
+# Auto-detect direction from file extensions
+python mgf_mzml_converter.py --input spectra.mgf --output spectra.mzML
+python mgf_mzml_converter.py --input run.mzML --output spectra.mgf
+
+# Explicit direction
+python mgf_mzml_converter.py --direction mgf2mzml --input spectra.mgf --output out.mzML
+python mgf_mzml_converter.py --direction mzml2mgf --input run.mzML --output out.mgf
+
+# mzML → MGF with filters
+python mgf_mzml_converter.py --input run.mzML --output out.mgf \
+    --charge 2 3 \
+    --rt-min 600 --rt-max 1800 \
+    --mz-min 400 --mz-max 1200 \
+    --min-peaks 5 \
+    --min-intensity 1000
+```
+
+## Options
+
+| Option | Direction | Description |
+|---|---|---|
+| `--direction` | both | `mgf2mzml` or `mzml2mgf` (auto if omitted) |
+| `--ms-level` | mzml2mgf | MS level to export (default: 2) |
+| `--min-peaks` | mzml2mgf | Minimum fragment peaks (default: 1) |
+| `--charge` | mzml2mgf | Keep only these precursor charges (repeatable) |
+| `--rt-min` / `--rt-max` | mzml2mgf | Retention time window in seconds |
+| `--mz-min` / `--mz-max` | mzml2mgf | Precursor m/z window |
+| `--min-intensity` | mzml2mgf | Minimum base-peak intensity |
diff --git a/tools/proteomics/file_conversion/mgf_mzml_converter/mgf_mzml_converter.py b/tools/proteomics/file_conversion/mgf_mzml_converter/mgf_mzml_converter.py
new file mode 100644
index 0000000..4ece445
--- /dev/null
+++ b/tools/proteomics/file_conversion/mgf_mzml_converter/mgf_mzml_converter.py
@@ -0,0 +1,471 @@
+"""
+MGF ↔ mzML Converter
+=====================
+Bidirectional conversion between MGF (Mascot Generic Format) and mzML.
+Combines and improves upon the separate mgf_to_mzml_converter and
+mzml_to_mgf_converter tools.
+
+Conversion direction is inferred automatically from the input file extension
+when ``--direction`` is not supplied:
+    - ``.mgf``  → mzML
+    - ``.mzML`` / ``.mzml`` → MGF
+
+Features
+--------
+- MGF → mzML: preserves title, precursor m/z, charge, retention time
+- mzML → MGF: full filter suite (charge, RT range, m/z range, min peaks,
+  min base-peak intensity, MS level)
+- Auto-detection of conversion direction from file extension
+- Conversion statistics printed to stdout
+
+Usage
+-----
+    # Auto-detect direction from extensions
+    python mgf_mzml_converter.py --input spectra.mgf --output spectra.mzML
+    python mgf_mzml_converter.py --input run.mzML --output spectra.mgf
+
+    # Explicit direction
+    python mgf_mzml_converter.py --direction mgf2mzml --input spectra.mgf --output out.mzML
+    python mgf_mzml_converter.py --direction mzml2mgf --input run.mzML --output out.mgf
+
+    # mzML → MGF with filters
+    python mgf_mzml_converter.py --input run.mzML --output out.mgf \\
+        --charge 2 3 --rt-min 600 --rt-max 1800 --mz-min 400 --mz-max 1200 \\
+        --min-peaks 5 --min-intensity 1000
+"""
+
+import os
+import sys
+from typing import List, Optional
+
+import click
+
+try:
+    import pyopenms as oms
+except ImportError:
+    sys.exit("pyopenms is required. Install it with:  pip install pyopenms")
+
+PROTON = 1.007276
+
+
+# ---------------------------------------------------------------------------
+# MGF → mzML
+# ---------------------------------------------------------------------------
+
+
+def parse_mgf(input_path: str) -> List[dict]:
+    """Parse an MGF file and return a list of spectrum dicts.
+
+    Each dict has keys: title, pepmass, charge, rt, peaks (list of
+    (mz, intensity) tuples).
+
+    Parameters
+    ----------
+    input_path:
+        Path to the MGF file.
+
+    Returns
+    -------
+    list of dict
+    """
+    spectra = []
+    current: Optional[dict] = None
+
+    with open(input_path) as fh:
+        for line in fh:
+            line = line.strip()
+            if line == "BEGIN IONS":
+                current = {"title": "", "pepmass": 0.0, "charge": 0, "rt": 0.0, "peaks": []}
+            elif line == "END IONS":
+                if current is not None:
+                    spectra.append(current)
+                current = None
+            elif current is not None:
+                if line.startswith("TITLE="):
+                    current["title"] = line[6:]
+                elif line.startswith("PEPMASS="):
+                    parts = line[8:].split()
+                    current["pepmass"] = float(parts[0])
+                elif line.startswith("CHARGE="):
+                    charge_str = line[7:].replace("+", "").replace("-", "").strip()
+                    try:
+                        current["charge"] = int(charge_str)
+                    except ValueError:
+                        current["charge"] = 0
+                elif line.startswith("RTINSECONDS="):
+                    current["rt"] = float(line[12:])
+                elif line and line[0].isdigit():
+                    parts = line.split()
+                    if len(parts) >= 2:
+                        try:
+                            current["peaks"].append((float(parts[0]), float(parts[1])))
+                        except ValueError:
+                            pass
+
+    return spectra
+
+
+def convert_mgf_to_mzml(input_path: str, output_path: str) -> dict:
+    """Convert an MGF file to mzML format.
+
+    Parameters
+    ----------
+    input_path:
+        Path to the input MGF file.
+    output_path:
+        Path to the output mzML file.
+
+    Returns
+    -------
+    dict
+        ``{"spectra_converted": int}``
+    """
+    mgf_spectra = parse_mgf(input_path)
+    exp = oms.MSExperiment()
+
+    for i, spec_data in enumerate(mgf_spectra):
+        spectrum = oms.MSSpectrum()
+        spectrum.setMSLevel(2)
+        spectrum.setRT(spec_data["rt"])
+        spectrum.setNativeID(spec_data["title"] if spec_data["title"] else f"index={i}")
+
+        if spec_data["pepmass"] > 0:
+            prec = oms.Precursor()
+            prec.setMZ(spec_data["pepmass"])
+            if spec_data["charge"] > 0:
+                prec.setCharge(spec_data["charge"])
+            spectrum.setPrecursors([prec])
+
+        if spec_data["peaks"]:
+            mzs = [p[0] for p in spec_data["peaks"]]
+            intensities = [p[1] for p in spec_data["peaks"]]
+            spectrum.set_peaks((mzs, intensities))
+
+        exp.addSpectrum(spectrum)
+
+    oms.MzMLFile().store(output_path, exp)
+    return {"spectra_converted": len(mgf_spectra)}
+
+
+# ---------------------------------------------------------------------------
+# mzML → MGF
+# ---------------------------------------------------------------------------
+
+
+def load_mzml(input_path: str) -> oms.MSExperiment:
+    """Load an mzML file into an MSExperiment.
+
+    Parameters
+    ----------
+    input_path:
+        Path to the mzML file.
+
+    Returns
+    -------
+    oms.MSExperiment
+    """
+    exp = oms.MSExperiment()
+    oms.MzMLFile().load(input_path, exp)
+    return exp
+
+
+def passes_filters(
+    spectrum: oms.MSSpectrum,
+    min_peaks: int = 1,
+    charges: Optional[tuple] = None,
+    rt_min: Optional[float] = None,
+    rt_max: Optional[float] = None,
+    mz_min: Optional[float] = None,
+    mz_max: Optional[float] = None,
+    min_intensity: Optional[float] = None,
+) -> bool:
+    """Check whether a spectrum passes all active filters.
+
+    Parameters
+    ----------
+    spectrum:
+        The spectrum to check.
+    min_peaks:
+        Minimum number of fragment peaks required.
+    charges:
+        Allowed charge states (precursor).  ``None`` means any charge.
+    rt_min:
+        Minimum retention time in seconds.
+    rt_max:
+        Maximum retention time in seconds.
+    mz_min:
+        Minimum precursor m/z.
+    mz_max:
+        Maximum precursor m/z.
+    min_intensity:
+        Minimum base-peak intensity.
+
+    Returns
+    -------
+    bool
+    """
+    mz_array, intensity_array = spectrum.get_peaks()
+    if len(mz_array) < min_peaks:
+        return False
+
+    if rt_min is not None and spectrum.getRT() < rt_min:
+        return False
+    if rt_max is not None and spectrum.getRT() > rt_max:
+        return False
+
+    precursors = spectrum.getPrecursors()
+    if precursors:
+        prec = precursors[0]
+        if charges and prec.getCharge() not in charges:
+            return False
+        if mz_min is not None and prec.getMZ() < mz_min:
+            return False
+        if mz_max is not None and prec.getMZ() > mz_max:
+            return False
+    elif charges or mz_min is not None or mz_max is not None:
+        return False
+
+    if min_intensity is not None:
+        if len(intensity_array) == 0 or max(intensity_array) < min_intensity:
+            return False
+
+    return True
+
+
+def spectrum_to_mgf_block(spectrum: oms.MSSpectrum, index: int) -> str:
+    """Convert a single spectrum to an MGF block string.
+
+    Parameters
+    ----------
+    spectrum:
+        The spectrum to convert.
+    index:
+        Fallback index used in the title when the native ID is empty.
+
+    Returns
+    -------
+    str
+        MGF block ending with a blank line.
+    """
+    lines = ["BEGIN IONS"]
+
+    native_id = spectrum.getNativeID() if spectrum.getNativeID() else f"index={index}"
+    lines.append(f"TITLE={native_id}")
+    lines.append(f"RTINSECONDS={spectrum.getRT():.4f}")
+
+    precursors = spectrum.getPrecursors()
+    if precursors:
+        prec = precursors[0]
+        lines.append(f"PEPMASS={prec.getMZ():.6f}")
+        if prec.getCharge() > 0:
+            lines.append(f"CHARGE={prec.getCharge()}+")
+
+    mz_array, intensity_array = spectrum.get_peaks()
+    for mz_val, intensity_val in zip(mz_array, intensity_array):
+        lines.append(f"{mz_val:.6f} {intensity_val:.4f}")
+
+    lines.append("END IONS")
+    lines.append("")
+    return "\n".join(lines)
+
+
+def convert_mzml_to_mgf(
+    input_path: str,
+    output_path: str,
+    ms_level: int = 2,
+    min_peaks: int = 1,
+    charges: Optional[tuple] = None,
+    rt_min: Optional[float] = None,
+    rt_max: Optional[float] = None,
+    mz_min: Optional[float] = None,
+    mz_max: Optional[float] = None,
+    min_intensity: Optional[float] = None,
+) -> dict:
+    """Convert an mzML file to MGF format with optional filtering.
+
+    Parameters
+    ----------
+    input_path:
+        Path to the input mzML file.
+    output_path:
+        Path to the output MGF file.
+    ms_level:
+        MS level of spectra to export (default: 2).
+    min_peaks:
+        Minimum fragment peaks required to include a spectrum.
+    charges:
+        Allowed precursor charge states.
+    rt_min:
+        Minimum retention time in seconds.
+    rt_max:
+        Maximum retention time in seconds.
+    mz_min:
+        Minimum precursor m/z.
+    mz_max:
+        Maximum precursor m/z.
+    min_intensity:
+        Minimum base-peak intensity.
+
+    Returns
+    -------
+    dict
+        ``{"total_spectra", "ms_level_spectra", "converted", "filtered_out"}``
+    """
+    exp = load_mzml(input_path)
+    target_spectra = [s for s in exp if s.getMSLevel() == ms_level]
+
+    converted = 0
+    filtered_out = 0
+    with open(output_path, "w") as fh:
+        for i, spectrum in enumerate(target_spectra):
+            if not passes_filters(spectrum, min_peaks, charges, rt_min, rt_max, mz_min, mz_max, min_intensity):
+                filtered_out += 1
+                continue
+            fh.write(spectrum_to_mgf_block(spectrum, i) + "\n")
+            converted += 1
+
+    return {
+        "total_spectra": exp.size(),
+        "ms_level_spectra": len(target_spectra),
+        "converted": converted,
+        "filtered_out": filtered_out,
+    }
+
+
+# ---------------------------------------------------------------------------
+# Synthetic test data helper
+# ---------------------------------------------------------------------------
+
+
+def create_synthetic_mzml(output_path: str, n_spectra: int = 5) -> None:
+    """Create a synthetic mzML file with MS2 spectra for testing.
+
+    Parameters
+    ----------
+    output_path:
+        Path for the output mzML file.
+    n_spectra:
+        Number of MS2 spectra to generate (default: 5).
+    """
+    exp = oms.MSExperiment()
+
+    ms1 = oms.MSSpectrum()
+    ms1.setMSLevel(1)
+    ms1.setRT(10.0)
+    ms1.set_peaks(([100.0, 200.0, 300.0], [1000.0, 2000.0, 1500.0]))
+    exp.addSpectrum(ms1)
+
+    for i in range(n_spectra):
+        ms2 = oms.MSSpectrum()
+        ms2.setMSLevel(2)
+        ms2.setRT(10.0 + i * 0.5)
+        ms2.setNativeID(f"scan={i + 1}")
+
+        prec = oms.Precursor()
+        prec.setMZ(500.0 + i * 50.0)
+        prec.setCharge(2 if i % 2 == 0 else 3)
+        ms2.setPrecursors([prec])
+
+        mzs = [100.0 + j * 50.0 for j in range(10)]
+        intensities = [1000.0 * (10 - j) for j in range(10)]
+        ms2.set_peaks((mzs, intensities))
+        exp.addSpectrum(ms2)
+
+    oms.MzMLFile().store(output_path, exp)
+
+
+# ---------------------------------------------------------------------------
+# Direction auto-detection
+# ---------------------------------------------------------------------------
+
+
+def _detect_direction(input_path: str, output_path: str) -> str:
+    """Infer conversion direction from file extensions.
+
+    Parameters
+    ----------
+    input_path:
+        Path to the input file.
+    output_path:
+        Path to the output file.
+
+    Returns
+    -------
+    str
+        ``"mgf2mzml"`` or ``"mzml2mgf"``.
+
+    Raises
+    ------
+    click.UsageError
+        If the direction cannot be determined.
+    """
+    in_ext = os.path.splitext(input_path)[1].lower()
+    if in_ext == ".mgf":
+        return "mgf2mzml"
+    if in_ext in (".mzml", ".mzxml"):
+        return "mzml2mgf"
+    out_ext = os.path.splitext(output_path)[1].lower()
+    if out_ext in (".mzml", ".mzxml"):
+        return "mgf2mzml"
+    if out_ext == ".mgf":
+        return "mzml2mgf"
+    raise click.UsageError(
+        "Cannot determine conversion direction from file extensions. "
+        "Use --direction mgf2mzml or --direction mzml2mgf explicitly."
+    )
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+
+@click.command(help="Bidirectional MGF ↔ mzML converter with optional spectrum filtering.")
+@click.option("--input", "input_path", required=True, help="Input file (MGF or mzML)")
+@click.option("--output", "output_path", required=True, help="Output file (mzML or MGF)")
+@click.option(
+    "--direction",
+    type=click.Choice(["mgf2mzml", "mzml2mgf"]),
+    default=None,
+    help="Conversion direction (auto-detected from extension if omitted).",
+)
+@click.option("--ms-level", type=int, default=2, help="MS level to export (mzml2mgf, default: 2)")
+@click.option("--min-peaks", type=int, default=1, help="Min fragment peaks to keep (mzml2mgf, default: 1)")
+@click.option(
+    "--charge", multiple=True, type=int,
+    help="Keep only these precursor charge states (mzml2mgf, repeatable).",
+)
+@click.option("--rt-min", type=float, default=None, help="Min retention time in seconds (mzml2mgf)")
+@click.option("--rt-max", type=float, default=None, help="Max retention time in seconds (mzml2mgf)")
+@click.option("--mz-min", type=float, default=None, help="Min precursor m/z (mzml2mgf)")
+@click.option("--mz-max", type=float, default=None, help="Max precursor m/z (mzml2mgf)")
+@click.option(
+    "--min-intensity", type=float, default=None,
+    help="Min base-peak intensity to keep a spectrum (mzml2mgf).",
+)
+def main(
+    input_path, output_path, direction, ms_level, min_peaks, charge,
+    rt_min, rt_max, mz_min, mz_max, min_intensity,
+) -> None:
+    """CLI entry point."""
+    if direction is None:
+        direction = _detect_direction(input_path, output_path)
+
+    if direction == "mgf2mzml":
+        stats = convert_mgf_to_mzml(input_path, output_path)
+        print(f"Converted {stats['spectra_converted']} spectra  {input_path} → {output_path}")
+    else:
+        charges = tuple(charge) if charge else None
+        stats = convert_mzml_to_mgf(
+            input_path, output_path, ms_level, min_peaks, charges,
+            rt_min, rt_max, mz_min, mz_max, min_intensity,
+        )
+        print(
+            f"Converted {stats['converted']} / {stats['ms_level_spectra']} "
+            f"MS{ms_level} spectra  {input_path} → {output_path} "
+            f"({stats['filtered_out']} filtered out)"
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/metabolomics/spectral_analysis/isotope_pattern_scorer/requirements.txt b/tools/proteomics/file_conversion/mgf_mzml_converter/requirements.txt
similarity index 60%
rename from tools/metabolomics/spectral_analysis/isotope_pattern_scorer/requirements.txt
rename to tools/proteomics/file_conversion/mgf_mzml_converter/requirements.txt
index 18d6bbb..7ce28ec 100644
--- a/tools/metabolomics/spectral_analysis/isotope_pattern_scorer/requirements.txt
+++ b/tools/proteomics/file_conversion/mgf_mzml_converter/requirements.txt
@@ -1,2 +1 @@
 pyopenms
-click
diff --git a/tools/metabolomics/spectral_analysis/isotope_pattern_matcher/tests/conftest.py b/tools/proteomics/file_conversion/mgf_mzml_converter/tests/conftest.py
similarity index 100%
rename from tools/metabolomics/spectral_analysis/isotope_pattern_matcher/tests/conftest.py
rename to tools/proteomics/file_conversion/mgf_mzml_converter/tests/conftest.py
diff --git a/tools/proteomics/file_conversion/mgf_mzml_converter/tests/test_mgf_mzml_converter.py b/tools/proteomics/file_conversion/mgf_mzml_converter/tests/test_mgf_mzml_converter.py
new file mode 100644
index 0000000..11cc862
--- /dev/null
+++ b/tools/proteomics/file_conversion/mgf_mzml_converter/tests/test_mgf_mzml_converter.py
@@ -0,0 +1,300 @@
+"""Tests for mgf_mzml_converter."""
+
+import os
+import tempfile
+
+from conftest import requires_pyopenms
+
+SAMPLE_MGF = """\
+BEGIN IONS
+TITLE=scan=1
+RTINSECONDS=120.5
+PEPMASS=500.250000
+CHARGE=2+
+100.100000 1000.0000
+200.200000 2000.0000
+300.300000 1500.0000
+END IONS
+
+BEGIN IONS
+TITLE=scan=2
+RTINSECONDS=130.0
+PEPMASS=600.350000
+CHARGE=3+
+150.150000 800.0000
+250.250000 1200.0000
+END IONS
+"""
+
+
+# ---------------------------------------------------------------------------
+# MGF → mzML tests
+# ---------------------------------------------------------------------------
+
+
+@requires_pyopenms
+def test_parse_mgf():
+    from mgf_mzml_converter import parse_mgf
+
+    with tempfile.TemporaryDirectory() as tmp:
+        mgf_path = os.path.join(tmp, "test.mgf")
+        with open(mgf_path, "w") as fh:
+            fh.write(SAMPLE_MGF)
+
+        spectra = parse_mgf(mgf_path)
+        assert len(spectra) == 2
+        assert spectra[0]["title"] == "scan=1"
+        assert abs(spectra[0]["pepmass"] - 500.25) < 0.01
+        assert spectra[0]["charge"] == 2
+        assert len(spectra[0]["peaks"]) == 3
+        assert len(spectra[1]["peaks"]) == 2
+
+
+@requires_pyopenms
+def test_convert_mgf_to_mzml():
+    import pyopenms as oms
+    from mgf_mzml_converter import convert_mgf_to_mzml
+
+    with tempfile.TemporaryDirectory() as tmp:
+        mgf_path = os.path.join(tmp, "test.mgf")
+        mzml_path = os.path.join(tmp, "test.mzML")
+
+        with open(mgf_path, "w") as fh:
+            fh.write(SAMPLE_MGF)
+
+        stats = convert_mgf_to_mzml(mgf_path, mzml_path)
+        assert stats["spectra_converted"] == 2
+
+        exp = oms.MSExperiment()
+        oms.MzMLFile().load(mzml_path, exp)
+        assert exp.size() == 2
+
+        s = exp[0]
+        assert s.getMSLevel() == 2
+        precursors = s.getPrecursors()
+        assert len(precursors) == 1
+        assert abs(precursors[0].getMZ() - 500.25) < 0.01
+        assert precursors[0].getCharge() == 2
+
+        mz_arr, _ = s.get_peaks()
+        assert len(mz_arr) == 3
+
+
+# ---------------------------------------------------------------------------
+# mzML → MGF tests
+# ---------------------------------------------------------------------------
+
+
+@requires_pyopenms
+def test_create_synthetic_mzml():
+    import pyopenms as oms
+    from mgf_mzml_converter import create_synthetic_mzml
+
+    with tempfile.TemporaryDirectory() as tmp:
+        mzml_path = os.path.join(tmp, "test.mzML")
+        create_synthetic_mzml(mzml_path, n_spectra=3)
+
+        exp = oms.MSExperiment()
+        oms.MzMLFile().load(mzml_path, exp)
+        assert exp.size() == 4  # 1 MS1 + 3 MS2
+
+
+@requires_pyopenms
+def test_convert_mzml_to_mgf():
+    from mgf_mzml_converter import convert_mzml_to_mgf, create_synthetic_mzml
+
+    with tempfile.TemporaryDirectory() as tmp:
+        mzml_path = os.path.join(tmp, "test.mzML")
+        mgf_path = os.path.join(tmp, "test.mgf")
+
+        create_synthetic_mzml(mzml_path, n_spectra=3)
+        stats = convert_mzml_to_mgf(mzml_path, mgf_path)
+
+        assert stats["converted"] == 3
+        assert stats["ms_level_spectra"] == 3
+        assert stats["filtered_out"] == 0
+
+        with open(mgf_path) as fh:
+            content = fh.read()
+        assert content.count("BEGIN IONS") == 3
+        assert "PEPMASS=" in content
+        assert "CHARGE=" in content
+
+
+@requires_pyopenms
+def test_mgf_content_format():
+    from mgf_mzml_converter import convert_mzml_to_mgf, create_synthetic_mzml
+
+    with tempfile.TemporaryDirectory() as tmp:
+        mzml_path = os.path.join(tmp, "test.mzML")
+        mgf_path = os.path.join(tmp, "test.mgf")
+
+        create_synthetic_mzml(mzml_path, n_spectra=1)
+        convert_mzml_to_mgf(mzml_path, mgf_path)
+
+        with open(mgf_path) as fh:
+            lines = fh.readlines()
+
+        assert lines[0].strip() == "BEGIN IONS"
+        assert any(line.startswith("TITLE=") for line in lines)
+        assert any(line.startswith("PEPMASS=") for line in lines)
+
+
+@requires_pyopenms
+def test_min_peaks_filter():
+    from mgf_mzml_converter import convert_mzml_to_mgf, create_synthetic_mzml
+
+    with tempfile.TemporaryDirectory() as tmp:
+        mzml_path = os.path.join(tmp, "test.mzML")
+        mgf_path = os.path.join(tmp, "test.mgf")
+
+        create_synthetic_mzml(mzml_path, n_spectra=3)
+        stats = convert_mzml_to_mgf(mzml_path, mgf_path, min_peaks=100)
+        assert stats["converted"] == 0
+        assert stats["filtered_out"] == 3
+
+
+@requires_pyopenms
+def test_charge_filter():
+    from mgf_mzml_converter import convert_mzml_to_mgf, create_synthetic_mzml
+
+    with tempfile.TemporaryDirectory() as tmp:
+        mzml_path = os.path.join(tmp, "test.mzML")
+        mgf_path = os.path.join(tmp, "test.mgf")
+
+        create_synthetic_mzml(mzml_path, n_spectra=6)
+        stats = convert_mzml_to_mgf(mzml_path, mgf_path, charges=(2,))
+        assert stats["converted"] == 3
+        assert stats["filtered_out"] == 3
+
+        with open(mgf_path) as fh:
+            content = fh.read()
+        assert "CHARGE=2+" in content
+        assert "CHARGE=3+" not in content
+
+
+@requires_pyopenms
+def test_rt_range_filter():
+    from mgf_mzml_converter import convert_mzml_to_mgf, create_synthetic_mzml
+
+    with tempfile.TemporaryDirectory() as tmp:
+        mzml_path = os.path.join(tmp, "test.mzML")
+        mgf_path = os.path.join(tmp, "test.mgf")
+
+        create_synthetic_mzml(mzml_path, n_spectra=5)
+        stats = convert_mzml_to_mgf(mzml_path, mgf_path, rt_min=10.5, rt_max=11.5)
+        assert stats["converted"] == 3
+        assert stats["filtered_out"] == 2
+
+
+@requires_pyopenms
+def test_mz_range_filter():
+    from mgf_mzml_converter import convert_mzml_to_mgf, create_synthetic_mzml
+
+    with tempfile.TemporaryDirectory() as tmp:
+        mzml_path = os.path.join(tmp, "test.mzML")
+        mgf_path = os.path.join(tmp, "test.mgf")
+
+        create_synthetic_mzml(mzml_path, n_spectra=5)
+        stats = convert_mzml_to_mgf(mzml_path, mgf_path, mz_min=550.0, mz_max=650.0)
+        assert stats["converted"] == 3
+        assert stats["filtered_out"] == 2
+
+
+@requires_pyopenms
+def test_min_intensity_filter():
+    from mgf_mzml_converter import convert_mzml_to_mgf, create_synthetic_mzml
+
+    with tempfile.TemporaryDirectory() as tmp:
+        mzml_path = os.path.join(tmp, "test.mzML")
+        mgf_path = os.path.join(tmp, "test.mgf")
+
+        create_synthetic_mzml(mzml_path, n_spectra=3)
+        stats = convert_mzml_to_mgf(mzml_path, mgf_path, min_intensity=10001.0)
+        assert stats["converted"] == 0
+
+        stats = convert_mzml_to_mgf(mzml_path, mgf_path, min_intensity=5000.0)
+        assert stats["converted"] == 3
+
+
+@requires_pyopenms
+def test_combined_filters():
+    from mgf_mzml_converter import convert_mzml_to_mgf, create_synthetic_mzml
+
+    with tempfile.TemporaryDirectory() as tmp:
+        mzml_path = os.path.join(tmp, "test.mzML")
+        mgf_path = os.path.join(tmp, "test.mgf")
+
+        create_synthetic_mzml(mzml_path, n_spectra=6)
+        stats = convert_mzml_to_mgf(
+            mzml_path, mgf_path,
+            charges=(2,), rt_min=10.0, rt_max=11.5, mz_min=500.0, mz_max=700.0,
+        )
+        assert stats["converted"] == 2
+
+
+# ---------------------------------------------------------------------------
+# Direction auto-detection tests
+# ---------------------------------------------------------------------------
+
+
+@requires_pyopenms
+def test_auto_detect_mgf_to_mzml():
+    from mgf_mzml_converter import _detect_direction
+
+    direction = _detect_direction("spectra.mgf", "spectra.mzML")
+    assert direction == "mgf2mzml"
+
+
+@requires_pyopenms
+def test_auto_detect_mzml_to_mgf():
+    from mgf_mzml_converter import _detect_direction
+
+    direction = _detect_direction("run.mzML", "out.mgf")
+    assert direction == "mzml2mgf"
+
+
+@requires_pyopenms
+def test_auto_detect_mzml_lowercase():
+    from mgf_mzml_converter import _detect_direction
+
+    direction = _detect_direction("run.mzml", "out.mgf")
+    assert direction == "mzml2mgf"
+
+
+@requires_pyopenms
+def test_auto_detect_from_output_extension():
+    """When input has unknown extension, infer from output extension."""
+    from mgf_mzml_converter import _detect_direction
+
+    direction = _detect_direction("data.unknown", "out.mzML")
+    assert direction == "mgf2mzml"
+
+
+@requires_pyopenms
+def test_roundtrip_mgf_mzml_mgf():
+    """MGF → mzML → MGF round-trip preserves spectrum count."""
+    import pyopenms as oms
+    from mgf_mzml_converter import convert_mgf_to_mzml, convert_mzml_to_mgf
+
+    with tempfile.TemporaryDirectory() as tmp:
+        mgf_in = os.path.join(tmp, "in.mgf")
+        mzml_path = os.path.join(tmp, "intermediate.mzML")
+        mgf_out = os.path.join(tmp, "out.mgf")
+
+        with open(mgf_in, "w") as fh:
+            fh.write(SAMPLE_MGF)
+
+        stats1 = convert_mgf_to_mzml(mgf_in, mzml_path)
+        assert stats1["spectra_converted"] == 2
+
+        exp = oms.MSExperiment()
+        oms.MzMLFile().load(mzml_path, exp)
+        assert exp.size() == 2
+
+        stats2 = convert_mzml_to_mgf(mzml_path, mgf_out)
+        assert stats2["converted"] == 2
+
+        with open(mgf_out) as fh:
+            content = fh.read()
+        assert content.count("BEGIN IONS") == 2
diff --git a/tools/proteomics/file_conversion/mgf_to_mzml_converter/README.md b/tools/proteomics/file_conversion/mgf_to_mzml_converter/README.md
deleted file mode 100644
index 1950599..0000000
--- a/tools/proteomics/file_conversion/mgf_to_mzml_converter/README.md
+++ /dev/null
@@ -1,15 +0,0 @@
-# MGF to mzML Converter
-
-Convert MGF (Mascot Generic Format) spectra to mzML format.
-
-## Installation
-
-```bash
-pip install -r requirements.txt
-```
-
-## Usage
-
-```bash
-python mgf_to_mzml_converter.py --input spectra.mgf --output spectra.mzML
-```
diff --git a/tools/proteomics/file_conversion/mgf_to_mzml_converter/mgf_to_mzml_converter.py b/tools/proteomics/file_conversion/mgf_to_mzml_converter/mgf_to_mzml_converter.py
deleted file mode 100644
index 51aa79e..0000000
--- a/tools/proteomics/file_conversion/mgf_to_mzml_converter/mgf_to_mzml_converter.py
+++ /dev/null
@@ -1,110 +0,0 @@
-"""
-MGF to mzML Converter
-=====================
-Convert MGF (Mascot Generic Format) spectra to mzML format.
-
-Usage
------
-    python mgf_to_mzml_converter.py --input spectra.mgf --output spectra.mzML
-"""
-
-import sys
-from typing import List
-
-import click
-
-try:
-    import pyopenms as oms
-except ImportError:
-    sys.exit("pyopenms is required. Install it with:  pip install pyopenms")
-
-
-def parse_mgf(input_path: str) -> List[dict]:
-    """Parse an MGF file and return a list of spectrum dicts.
-
-    Each dict has keys: title, pepmass, charge, rt, peaks (list of (mz, intensity) tuples).
-    """
-    spectra = []
-    current = None
-
-    with open(input_path) as fh:
-        for line in fh:
-            line = line.strip()
-            if line == "BEGIN IONS":
-                current = {"title": "", "pepmass": 0.0, "charge": 0, "rt": 0.0, "peaks": []}
-            elif line == "END IONS":
-                if current is not None:
-                    spectra.append(current)
-                current = None
-            elif current is not None:
-                if line.startswith("TITLE="):
-                    current["title"] = line[6:]
-                elif line.startswith("PEPMASS="):
-                    parts = line[8:].split()
-                    current["pepmass"] = float(parts[0])
-                elif line.startswith("CHARGE="):
-                    charge_str = line[7:].replace("+", "").replace("-", "")
-                    try:
-                        current["charge"] = int(charge_str)
-                    except ValueError:
-                        current["charge"] = 0
-                elif line.startswith("RTINSECONDS="):
-                    current["rt"] = float(line[12:])
-                elif line and line[0].isdigit():
-                    parts = line.split()
-                    if len(parts) >= 2:
-                        try:
-                            mz = float(parts[0])
-                            intensity = float(parts[1])
-                            current["peaks"].append((mz, intensity))
-                        except ValueError:
-                            pass
-
-    return spectra
-
-
-def convert_mgf_to_mzml(input_path: str, output_path: str) -> dict:
-    """Convert MGF to mzML format.
-
-    Returns statistics about the conversion.
-    """
-    mgf_spectra = parse_mgf(input_path)
-    exp = oms.MSExperiment()
-
-    for i, spec_data in enumerate(mgf_spectra):
-        spectrum = oms.MSSpectrum()
-        spectrum.setMSLevel(2)
-        spectrum.setRT(spec_data["rt"])
-        spectrum.setNativeID(spec_data["title"] if spec_data["title"] else f"index={i}")
-
-        # Set precursor
-        if spec_data["pepmass"] > 0:
-            prec = oms.Precursor()
-            prec.setMZ(spec_data["pepmass"])
-            if spec_data["charge"] > 0:
-                prec.setCharge(spec_data["charge"])
-            spectrum.setPrecursors([prec])
-
-        # Set peaks
-        if spec_data["peaks"]:
-            mzs = [p[0] for p in spec_data["peaks"]]
-            intensities = [p[1] for p in spec_data["peaks"]]
-            spectrum.set_peaks((mzs, intensities))
-
-        exp.addSpectrum(spectrum)
-
-    oms.MzMLFile().store(output_path, exp)
-
-    return {"spectra_converted": len(mgf_spectra)}
-
-
-@click.command(help="Convert MGF to mzML format.")
-@click.option("--input", "input", required=True, help="Input MGF file")
-@click.option("--output", required=True, help="Output mzML file")
-def main(input, output) -> None:
-    stats = convert_mgf_to_mzml(input, output)
-    print(f"Converted {stats['spectra_converted']} spectra to {output}")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/tools/proteomics/file_conversion/mgf_to_mzml_converter/requirements.txt b/tools/proteomics/file_conversion/mgf_to_mzml_converter/requirements.txt
deleted file mode 100644
index 18d6bbb..0000000
--- a/tools/proteomics/file_conversion/mgf_to_mzml_converter/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-pyopenms
-click
diff --git a/tools/proteomics/file_conversion/mgf_to_mzml_converter/tests/conftest.py b/tools/proteomics/file_conversion/mgf_to_mzml_converter/tests/conftest.py
deleted file mode 100644
index 1a21ede..0000000
--- a/tools/proteomics/file_conversion/mgf_to_mzml_converter/tests/conftest.py
+++ /dev/null
@@ -1,15 +0,0 @@
-import os
-import sys
-
-import pytest
-
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
-
-try:
-    import pyopenms  # noqa: F401
-
-    HAS_PYOPENMS = True
-except ImportError:
-    HAS_PYOPENMS = False
-
-requires_pyopenms = pytest.mark.skipif(not HAS_PYOPENMS, reason="pyopenms not installed")
diff --git a/tools/proteomics/file_conversion/mgf_to_mzml_converter/tests/test_mgf_to_mzml_converter.py b/tools/proteomics/file_conversion/mgf_to_mzml_converter/tests/test_mgf_to_mzml_converter.py
deleted file mode 100644
index f51f243..0000000
--- a/tools/proteomics/file_conversion/mgf_to_mzml_converter/tests/test_mgf_to_mzml_converter.py
+++ /dev/null
@@ -1,101 +0,0 @@
-"""Tests for mgf_to_mzml_converter."""
-
-import os
-import tempfile
-
-from conftest import requires_pyopenms
-
-SAMPLE_MGF = """\
-BEGIN IONS
-TITLE=scan=1
-RTINSECONDS=120.5
-PEPMASS=500.250000
-CHARGE=2+
-100.100000 1000.0000
-200.200000 2000.0000
-300.300000 1500.0000
-END IONS
-
-BEGIN IONS
-TITLE=scan=2
-RTINSECONDS=130.0
-PEPMASS=600.350000
-CHARGE=3+
-150.150000 800.0000
-250.250000 1200.0000
-END IONS
-"""
-
-
-@requires_pyopenms
-def test_parse_mgf():
-    from mgf_to_mzml_converter import parse_mgf
-
-    with tempfile.TemporaryDirectory() as tmp:
-        mgf_path = os.path.join(tmp, "test.mgf")
-        with open(mgf_path, "w") as fh:
-            fh.write(SAMPLE_MGF)
-
-        spectra = parse_mgf(mgf_path)
-        assert len(spectra) == 2
-        assert spectra[0]["title"] == "scan=1"
-        assert abs(spectra[0]["pepmass"] - 500.25) < 0.01
-        assert spectra[0]["charge"] == 2
-        assert len(spectra[0]["peaks"]) == 3
-        assert len(spectra[1]["peaks"]) == 2
-
-
-@requires_pyopenms
-def test_convert_mgf_to_mzml():
-    import pyopenms as oms
-    from mgf_to_mzml_converter import convert_mgf_to_mzml
-
-    with tempfile.TemporaryDirectory() as tmp:
-        mgf_path = os.path.join(tmp, "test.mgf")
-        mzml_path = os.path.join(tmp, "test.mzML")
-
-        with open(mgf_path, "w") as fh:
-            fh.write(SAMPLE_MGF)
-
-        stats = convert_mgf_to_mzml(mgf_path, mzml_path)
-        assert stats["spectra_converted"] == 2
-
-        exp = oms.MSExperiment()
-        oms.MzMLFile().load(mzml_path, exp)
-        assert exp.size() == 2
-
-        # Check first spectrum
-        s = exp[0]
-        assert s.getMSLevel() == 2
-        precursors = s.getPrecursors()
-        assert len(precursors) == 1
-        assert abs(precursors[0].getMZ() - 500.25) < 0.01
-        assert precursors[0].getCharge() == 2
-
-        mz_arr, int_arr = s.get_peaks()
-        assert len(mz_arr) == 3
-
-
-@requires_pyopenms
-def test_roundtrip_mgf_mzml_mgf():
-    """Test MGF -> mzML -> MGF roundtrip."""
-    import sys
-
-    from mgf_to_mzml_converter import convert_mgf_to_mzml
-    sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "mzml_to_mgf_converter"))
-
-    with tempfile.TemporaryDirectory() as tmp:
-        mgf_path = os.path.join(tmp, "test.mgf")
-        mzml_path = os.path.join(tmp, "test.mzML")
-
-        with open(mgf_path, "w") as fh:
-            fh.write(SAMPLE_MGF)
-
-        stats = convert_mgf_to_mzml(mgf_path, mzml_path)
-        assert stats["spectra_converted"] == 2
-
-        # Verify mzML is valid
-        import pyopenms as oms
-        exp = oms.MSExperiment()
-        oms.MzMLFile().load(mzml_path, exp)
-        assert exp.size() == 2
diff --git a/tools/proteomics/file_conversion/mzml_to_mgf_converter/README.md b/tools/proteomics/file_conversion/mzml_to_mgf_converter/README.md
deleted file mode 100644
index 9dd8b98..0000000
--- a/tools/proteomics/file_conversion/mzml_to_mgf_converter/README.md
+++ /dev/null
@@ -1,19 +0,0 @@
-# mzML to MGF Converter
-
-Convert MS2 spectra from mzML format to MGF (Mascot Generic Format).
-
-## Installation
-
-```bash
-pip install -r requirements.txt
-```
-
-## Usage
-
-```bash
-# Convert MS2 spectra
-python mzml_to_mgf_converter.py --input run.mzML --ms-level 2 --output spectra.mgf
-
-# Filter by minimum number of peaks
-python mzml_to_mgf_converter.py --input run.mzML --min-peaks 10 --output spectra.mgf
-```
diff --git a/tools/proteomics/file_conversion/mzml_to_mgf_converter/mzml_to_mgf_converter.py b/tools/proteomics/file_conversion/mzml_to_mgf_converter/mzml_to_mgf_converter.py
deleted file mode 100644
index 658d900..0000000
--- a/tools/proteomics/file_conversion/mzml_to_mgf_converter/mzml_to_mgf_converter.py
+++ /dev/null
@@ -1,214 +0,0 @@
-"""
-mzML to MGF Converter
-=====================
-Convert MS2 spectra from mzML format to MGF (Mascot Generic Format)
-with optional filtering by charge state, retention time, precursor m/z,
-and minimum peak count.
-
-Usage
------
-    python mzml_to_mgf_converter.py --input run.mzML --output spectra.mgf
-    python mzml_to_mgf_converter.py --input run.mzML --output spectra.mgf --charge 2 3
-    python mzml_to_mgf_converter.py --input run.mzML --output spectra.mgf --rt-min 600 --rt-max 1800
-    python mzml_to_mgf_converter.py --input run.mzML --output spectra.mgf --mz-min 400 --mz-max 1200
-"""
-
-import sys
-from typing import List, Optional
-
-import click
-
-try:
-    import pyopenms as oms
-except ImportError:
-    sys.exit("pyopenms is required. Install it with:  pip install pyopenms")
-
-PROTON = 1.007276
-
-
-def load_mzml(input_path: str) -> oms.MSExperiment:
-    """Load an mzML file into an MSExperiment."""
-    exp = oms.MSExperiment()
-    oms.MzMLFile().load(input_path, exp)
-    return exp
-
-
-def get_spectra_by_level(exp: oms.MSExperiment, ms_level: int = 2) -> List[oms.MSSpectrum]:
-    """Extract spectra of a given MS level from an experiment."""
-    return [s for s in exp if s.getMSLevel() == ms_level]
-
-
-def passes_filters(
-    spectrum: oms.MSSpectrum,
-    min_peaks: int = 1,
-    charges: Optional[tuple] = None,
-    rt_min: Optional[float] = None,
-    rt_max: Optional[float] = None,
-    mz_min: Optional[float] = None,
-    mz_max: Optional[float] = None,
-    min_intensity: Optional[float] = None,
-) -> bool:
-    """Check whether a spectrum passes all active filters."""
-    mz_array, intensity_array = spectrum.get_peaks()
-    if len(mz_array) < min_peaks:
-        return False
-
-    if rt_min is not None and spectrum.getRT() < rt_min:
-        return False
-    if rt_max is not None and spectrum.getRT() > rt_max:
-        return False
-
-    precursors = spectrum.getPrecursors()
-    if precursors:
-        prec = precursors[0]
-        if charges and prec.getCharge() not in charges:
-            return False
-        if mz_min is not None and prec.getMZ() < mz_min:
-            return False
-        if mz_max is not None and prec.getMZ() > mz_max:
-            return False
-    elif charges or mz_min is not None or mz_max is not None:
-        return False
-
-    if min_intensity is not None:
-        if len(intensity_array) == 0 or max(intensity_array) < min_intensity:
-            return False
-
-    return True
-
-
-def spectrum_to_mgf_block(spectrum: oms.MSSpectrum, index: int) -> str:
-    """Convert a single spectrum to an MGF block string."""
-    lines = ["BEGIN IONS"]
-
-    native_id = spectrum.getNativeID() if spectrum.getNativeID() else f"index={index}"
-    lines.append(f"TITLE={native_id}")
-
-    rt = spectrum.getRT()
-    lines.append(f"RTINSECONDS={rt:.4f}")
-
-    precursors = spectrum.getPrecursors()
-    if precursors:
-        prec = precursors[0]
-        mz = prec.getMZ()
-        charge = prec.getCharge()
-        lines.append(f"PEPMASS={mz:.6f}")
-        if charge > 0:
-            lines.append(f"CHARGE={charge}+")
-
-    mz_array, intensity_array = spectrum.get_peaks()
-    for mz_val, intensity_val in zip(mz_array, intensity_array):
-        lines.append(f"{mz_val:.6f} {intensity_val:.4f}")
-
-    lines.append("END IONS")
-    lines.append("")
-    return "\n".join(lines)
-
-
-def convert_mzml_to_mgf(
-    input_path: str,
-    output_path: str,
-    ms_level: int = 2,
-    min_peaks: int = 1,
-    charges: Optional[tuple] = None,
-    rt_min: Optional[float] = None,
-    rt_max: Optional[float] = None,
-    mz_min: Optional[float] = None,
-    mz_max: Optional[float] = None,
-    min_intensity: Optional[float] = None,
-) -> dict:
-    """Convert mzML to MGF format with optional filtering.
-
-    Returns statistics about the conversion.
-    """
-    exp = load_mzml(input_path)
-    spectra = get_spectra_by_level(exp, ms_level)
-
-    converted = 0
-    filtered_out = 0
-    with open(output_path, "w") as fh:
-        for i, spectrum in enumerate(spectra):
-            if not passes_filters(
-                spectrum, min_peaks, charges, rt_min, rt_max, mz_min, mz_max,
-                min_intensity,
-            ):
-                filtered_out += 1
-                continue
-            block = spectrum_to_mgf_block(spectrum, i)
-            fh.write(block + "\n")
-            converted += 1
-
-    return {
-        "total_spectra": exp.size(),
-        "ms_level_spectra": len(spectra),
-        "converted": converted,
-        "filtered_out": filtered_out,
-    }
-
-
-def create_synthetic_mzml(output_path: str, n_spectra: int = 5) -> None:
-    """Create a synthetic mzML file with MS2 spectra for testing.
-
-    Generates spectra with:
-    - RT: 10.0, 10.5, 11.0, ... (0.5s apart)
-    - precursor m/z: 500, 550, 600, ...
-    - charge: alternating 2, 3
-    - base peak intensity: 10000, 9000, 8000, ...
-    """
-    exp = oms.MSExperiment()
-
-    ms1 = oms.MSSpectrum()
-    ms1.setMSLevel(1)
-    ms1.setRT(10.0)
-    ms1.set_peaks(([100.0, 200.0, 300.0], [1000.0, 2000.0, 1500.0]))
-    exp.addSpectrum(ms1)
-
-    for i in range(n_spectra):
-        ms2 = oms.MSSpectrum()
-        ms2.setMSLevel(2)
-        ms2.setRT(10.0 + i * 0.5)
-        ms2.setNativeID(f"scan={i + 1}")
-
-        prec = oms.Precursor()
-        prec.setMZ(500.0 + i * 50.0)
-        prec.setCharge(2 if i % 2 == 0 else 3)
-        ms2.setPrecursors([prec])
-
-        mzs = [100.0 + j * 50.0 for j in range(10)]
-        intensities = [1000.0 * (10 - j) for j in range(10)]
-        ms2.set_peaks((mzs, intensities))
-        exp.addSpectrum(ms2)
-
-    oms.MzMLFile().store(output_path, exp)
-
-
-@click.command(help="Convert MS2 spectra from mzML to MGF format with optional filtering.")
-@click.option("--input", "input", required=True, help="Input mzML file")
-@click.option("--ms-level", type=int, default=2, help="MS level to extract (default: 2)")
-@click.option("--min-peaks", type=int, default=1, help="Minimum peaks per spectrum (default: 1)")
-@click.option("--charge", multiple=True, type=int, help="Keep only these charge states (repeatable)")
-@click.option("--rt-min", type=float, default=None, help="Minimum retention time in seconds")
-@click.option("--rt-max", type=float, default=None, help="Maximum retention time in seconds")
-@click.option("--mz-min", type=float, default=None, help="Minimum precursor m/z")
-@click.option("--mz-max", type=float, default=None, help="Maximum precursor m/z")
-@click.option(
-    "--min-intensity", type=float, default=None,
-    help="Minimum base peak intensity to keep a spectrum",
-)
-@click.option("--output", required=True, help="Output MGF file")
-def main(input, ms_level, min_peaks, charge, rt_min, rt_max, mz_min, mz_max,
-         min_intensity, output) -> None:
-    charges = tuple(charge) if charge else None
-    stats = convert_mzml_to_mgf(
-        input, output, ms_level, min_peaks, charges, rt_min, rt_max,
-        mz_min, mz_max, min_intensity,
-    )
-    print(
-        f"Converted {stats['converted']} / {stats['ms_level_spectra']} "
-        f"MS{ms_level} spectra to {output} "
-        f"({stats['filtered_out']} filtered out)"
-    )
-
-
-if __name__ == "__main__":
-    main()
diff --git a/tools/proteomics/file_conversion/mzml_to_mgf_converter/requirements.txt b/tools/proteomics/file_conversion/mzml_to_mgf_converter/requirements.txt
deleted file mode 100644
index 18d6bbb..0000000
--- a/tools/proteomics/file_conversion/mzml_to_mgf_converter/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-pyopenms
-click
diff --git a/tools/proteomics/file_conversion/mzml_to_mgf_converter/tests/conftest.py b/tools/proteomics/file_conversion/mzml_to_mgf_converter/tests/conftest.py
deleted file mode 100644
index 1a21ede..0000000
--- a/tools/proteomics/file_conversion/mzml_to_mgf_converter/tests/conftest.py
+++ /dev/null
@@ -1,15 +0,0 @@
-import os
-import sys
-
-import pytest
-
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
-
-try:
-    import pyopenms  # noqa: F401
-
-    HAS_PYOPENMS = True
-except ImportError:
-    HAS_PYOPENMS = False
-
-requires_pyopenms = pytest.mark.skipif(not HAS_PYOPENMS, reason="pyopenms not installed")
diff --git a/tools/proteomics/file_conversion/mzml_to_mgf_converter/tests/test_mzml_to_mgf_converter.py b/tools/proteomics/file_conversion/mzml_to_mgf_converter/tests/test_mzml_to_mgf_converter.py
deleted file mode 100644
index 8902e5e..0000000
--- a/tools/proteomics/file_conversion/mzml_to_mgf_converter/tests/test_mzml_to_mgf_converter.py
+++ /dev/null
@@ -1,186 +0,0 @@
-"""Tests for mzml_to_mgf_converter."""
-
-import os
-import tempfile
-
-from conftest import requires_pyopenms
-
-
-@requires_pyopenms
-def test_create_synthetic_mzml():
-    import pyopenms as oms
-    from mzml_to_mgf_converter import create_synthetic_mzml
-
-    with tempfile.TemporaryDirectory() as tmp:
-        mzml_path = os.path.join(tmp, "test.mzML")
-        create_synthetic_mzml(mzml_path, n_spectra=3)
-
-        exp = oms.MSExperiment()
-        oms.MzMLFile().load(mzml_path, exp)
-        assert exp.size() == 4  # 1 MS1 + 3 MS2
-
-
-@requires_pyopenms
-def test_convert_mzml_to_mgf():
-    from mzml_to_mgf_converter import convert_mzml_to_mgf, create_synthetic_mzml
-
-    with tempfile.TemporaryDirectory() as tmp:
-        mzml_path = os.path.join(tmp, "test.mzML")
-        mgf_path = os.path.join(tmp, "test.mgf")
-
-        create_synthetic_mzml(mzml_path, n_spectra=3)
-        stats = convert_mzml_to_mgf(mzml_path, mgf_path)
-
-        assert stats["converted"] == 3
-        assert stats["ms_level_spectra"] == 3
-        assert stats["filtered_out"] == 0
-
-        with open(mgf_path) as fh:
-            content = fh.read()
-        assert content.count("BEGIN IONS") == 3
-        assert "PEPMASS=" in content
-        assert "CHARGE=" in content
-
-
-@requires_pyopenms
-def test_mgf_content_format():
-    from mzml_to_mgf_converter import convert_mzml_to_mgf, create_synthetic_mzml
-
-    with tempfile.TemporaryDirectory() as tmp:
-        mzml_path = os.path.join(tmp, "test.mzML")
-        mgf_path = os.path.join(tmp, "test.mgf")
-
-        create_synthetic_mzml(mzml_path, n_spectra=1)
-        convert_mzml_to_mgf(mzml_path, mgf_path)
-
-        with open(mgf_path) as fh:
-            lines = fh.readlines()
-
-        assert lines[0].strip() == "BEGIN IONS"
-        assert any(line.startswith("TITLE=") for line in lines)
-        assert any(line.startswith("PEPMASS=") for line in lines)
-
-
-@requires_pyopenms
-def test_min_peaks_filter():
-    from mzml_to_mgf_converter import convert_mzml_to_mgf, create_synthetic_mzml
-
-    with tempfile.TemporaryDirectory() as tmp:
-        mzml_path = os.path.join(tmp, "test.mzML")
-        mgf_path = os.path.join(tmp, "test.mgf")
-
-        create_synthetic_mzml(mzml_path, n_spectra=3)
-        stats = convert_mzml_to_mgf(mzml_path, mgf_path, min_peaks=100)
-        assert stats["converted"] == 0
-        assert stats["filtered_out"] == 3
-
-
-@requires_pyopenms
-def test_charge_filter():
-    """Synthetic spectra alternate charge 2, 3. Filter for charge 2 only."""
-    from mzml_to_mgf_converter import convert_mzml_to_mgf, create_synthetic_mzml
-
-    with tempfile.TemporaryDirectory() as tmp:
-        mzml_path = os.path.join(tmp, "test.mzML")
-        mgf_path = os.path.join(tmp, "test.mgf")
-
-        create_synthetic_mzml(mzml_path, n_spectra=6)
-        stats = convert_mzml_to_mgf(mzml_path, mgf_path, charges=(2,))
-        # Spectra 0, 2, 4 have charge 2; spectra 1, 3, 5 have charge 3
-        assert stats["converted"] == 3
-        assert stats["filtered_out"] == 3
-
-        with open(mgf_path) as fh:
-            content = fh.read()
-        assert "CHARGE=2+" in content
-        assert "CHARGE=3+" not in content
-
-
-@requires_pyopenms
-def test_charge_filter_multiple():
-    """Filter for both charge 2 and 3 — should keep all."""
-    from mzml_to_mgf_converter import convert_mzml_to_mgf, create_synthetic_mzml
-
-    with tempfile.TemporaryDirectory() as tmp:
-        mzml_path = os.path.join(tmp, "test.mzML")
-        mgf_path = os.path.join(tmp, "test.mgf")
-
-        create_synthetic_mzml(mzml_path, n_spectra=4)
-        stats = convert_mzml_to_mgf(mzml_path, mgf_path, charges=(2, 3))
-        assert stats["converted"] == 4
-        assert stats["filtered_out"] == 0
-
-
-@requires_pyopenms
-def test_rt_range_filter():
-    """Synthetic spectra at RT 10.0, 10.5, 11.0, 11.5, 12.0. Filter RT 10.5-11.5."""
-    from mzml_to_mgf_converter import convert_mzml_to_mgf, create_synthetic_mzml
-
-    with tempfile.TemporaryDirectory() as tmp:
-        mzml_path = os.path.join(tmp, "test.mzML")
-        mgf_path = os.path.join(tmp, "test.mgf")
-
-        create_synthetic_mzml(mzml_path, n_spectra=5)
-        stats = convert_mzml_to_mgf(
-            mzml_path, mgf_path, rt_min=10.5, rt_max=11.5,
-        )
-        # RT 10.5, 11.0, 11.5 pass; RT 10.0, 12.0 filtered out
-        assert stats["converted"] == 3
-        assert stats["filtered_out"] == 2
-
-
-@requires_pyopenms
-def test_mz_range_filter():
-    """Synthetic precursor m/z: 500, 550, 600, 650, 700. Filter 550-650."""
-    from mzml_to_mgf_converter import convert_mzml_to_mgf, create_synthetic_mzml
-
-    with tempfile.TemporaryDirectory() as tmp:
-        mzml_path = os.path.join(tmp, "test.mzML")
-        mgf_path = os.path.join(tmp, "test.mgf")
-
-        create_synthetic_mzml(mzml_path, n_spectra=5)
-        stats = convert_mzml_to_mgf(
-            mzml_path, mgf_path, mz_min=550.0, mz_max=650.0,
-        )
-        # m/z 550, 600, 650 pass; 500, 700 filtered out
-        assert stats["converted"] == 3
-        assert stats["filtered_out"] == 2
-
-
-@requires_pyopenms
-def test_min_intensity_filter():
-    """All synthetic spectra have base peak intensity 10000. Filter > 10000 removes all."""
-    from mzml_to_mgf_converter import convert_mzml_to_mgf, create_synthetic_mzml
-
-    with tempfile.TemporaryDirectory() as tmp:
-        mzml_path = os.path.join(tmp, "test.mzML")
-        mgf_path = os.path.join(tmp, "test.mgf")
-
-        create_synthetic_mzml(mzml_path, n_spectra=3)
-        stats = convert_mzml_to_mgf(mzml_path, mgf_path, min_intensity=10001.0)
-        assert stats["converted"] == 0
-
-        stats = convert_mzml_to_mgf(mzml_path, mgf_path, min_intensity=5000.0)
-        assert stats["converted"] == 3
-
-
-@requires_pyopenms
-def test_combined_filters():
-    """Apply charge + RT + m/z filters together."""
-    from mzml_to_mgf_converter import convert_mzml_to_mgf, create_synthetic_mzml
-
-    with tempfile.TemporaryDirectory() as tmp:
-        mzml_path = os.path.join(tmp, "test.mzML")
-        mgf_path = os.path.join(tmp, "test.mgf")
-
-        # 6 spectra: RT 10.0-12.5, m/z 500-750, charge alternating 2/3
-        create_synthetic_mzml(mzml_path, n_spectra=6)
-        stats = convert_mzml_to_mgf(
-            mzml_path, mgf_path,
-            charges=(2,), rt_min=10.0, rt_max=11.5, mz_min=500.0, mz_max=700.0,
-        )
-        # charge 2: indices 0, 2, 4 (RT 10.0/11.0/12.0, m/z 500/600/700)
-        # RT filter: removes index 4 (RT 12.0)
-        # m/z filter: all 500, 600, 700 pass (700 is at boundary, passes <=)
-        # Result: indices 0, 2 pass
-        assert stats["converted"] == 2

From 6233d206fe734af4f8ef775ed093e76701311f14 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 25 Mar 2026 15:56:23 +0000
Subject: [PATCH 2/2] Fix CI: skip linting/testing deleted tool directories

Co-authored-by: ypriverol <52113+ypriverol@users.noreply.github.com>
Agent-Logs-Url: https://github.com/OpenMS/agentomics/sessions/ea65b391-3afc-47cb-9dee-24639c3de03a
---
 .github/workflows/validate.yml | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml
index a209594..01bae5b 100644
--- a/.github/workflows/validate.yml
+++ b/.github/workflows/validate.yml
@@ -20,10 +20,12 @@ jobs:
         name: Detect changed tool directories
         run: |
           # Note: github.base_ref is only available on pull_request events
-          # Find all tool directories that changed in this PR
+          # Find all tool directories that changed in this PR, keeping only
+          # those that still exist (deleted tool dirs must not be linted/tested).
           CHANGED=$(git diff --name-only origin/${{ github.base_ref }}...HEAD -- 'tools/' \
             | grep -oP 'tools/[^/]+/[^/]+/[^/]+/' \
             | sort -u \
+            | while read -r dir; do [ -d "$dir" ] && echo "$dir"; done \
             | jq -R -s -c 'split("\n") | map(select(length > 0))')
 
           if [ "$CHANGED" = "[]" ] || [ -z "$CHANGED" ]; then
@@ -60,9 +62,11 @@ jobs:
         run: |
           DIRS='${{ needs.detect-changes.outputs.matrix }}'
           echo "$DIRS" | jq -r '.[]' | while read -r dir; do
-            echo "::group::ruff $dir"
-            /tmp/validate_venv/bin/python -m ruff check "$dir"
-            echo "::endgroup::"
+            if [ -d "$dir" ]; then
+              echo "::group::ruff $dir"
+              /tmp/validate_venv/bin/python -m ruff check "$dir"
+              echo "::endgroup::"
+            fi
           done
 
       - name: Test changed tools