From f101864320783ba425e038ab2c5679fdc07dfcd7 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 24 Mar 2026 17:43:55 +0000
Subject: [PATCH 1/2] Initial plan


From 9e73afc87b25850d7a101ebe711cc6c0eb06dd5a Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 24 Mar 2026 17:50:46 +0000
Subject: [PATCH 2/2] Add pyopenms scripts for proteomics and metabolomics with
 tests and docs

Co-authored-by: ypriverol <52113+ypriverol@users.noreply.github.com>
Agent-Logs-Url: https://github.com/OpenMS/agentomics/sessions/871c3e78-4575-48b8-beca-2f71e43e6ed0
---
 README.md                                     |  63 ++++++-
 requirements.txt                              |   1 +
 scripts/metabolomics/README.md                |  46 +++++
 .../metabolomics/isotope_pattern_matcher.py   | 169 ++++++++++++++++++
 .../metabolomics/mass_accuracy_calculator.py  | 140 +++++++++++++++
 .../metabolite_feature_detection.py           | 145 +++++++++++++++
 scripts/proteomics/README.md                  |  54 ++++++
 .../feature_detection_proteomics.py           | 101 +++++++++++
 scripts/proteomics/peptide_mass_calculator.py | 133 ++++++++++++++
 scripts/proteomics/protein_digest.py          | 162 +++++++++++++++++
 scripts/proteomics/spectrum_file_info.py      | 134 ++++++++++++++
 tests/test_metabolomics.py                    | 122 +++++++++++++
 tests/test_proteomics.py                      | 111 ++++++++++++
 13 files changed, 1380 insertions(+), 1 deletion(-)
 create mode 100644 requirements.txt
 create mode 100644 scripts/metabolomics/README.md
 create mode 100644 scripts/metabolomics/isotope_pattern_matcher.py
 create mode 100644 scripts/metabolomics/mass_accuracy_calculator.py
 create mode 100644 scripts/metabolomics/metabolite_feature_detection.py
 create mode 100644 scripts/proteomics/README.md
 create mode 100644 scripts/proteomics/feature_detection_proteomics.py
 create mode 100644 scripts/proteomics/peptide_mass_calculator.py
 create mode 100644 scripts/proteomics/protein_digest.py
 create mode 100644 scripts/proteomics/spectrum_file_info.py
 create mode 100644 tests/test_metabolomics.py
 create mode 100644 tests/test_proteomics.py

diff --git a/README.md b/README.md
index fd929c9..3325b78 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,63 @@
 # agentomics
-A repository of agentic created tools in proteomics using pyopenms
+
+A repository of agentic-created scripts using [pyopenms](https://pyopenms.readthedocs.io/)
+for proteomics and metabolomics users.
+
+## Requirements
+
+```bash
+pip install pyopenms
+```
+
+Or install all dependencies at once:
+
+```bash
+pip install -r requirements.txt
+```
+
+## Repository Structure
+
+```
+agentomics/
+├── scripts/
+│   ├── proteomics/          # Scripts for proteomics workflows
+│   │   ├── peptide_mass_calculator.py
+│   │   ├── protein_digest.py
+│   │   ├── spectrum_file_info.py
+│   │   └── feature_detection_proteomics.py
+│   └── metabolomics/        # Scripts for metabolomics workflows
+│       ├── metabolite_feature_detection.py
+│       ├── mass_accuracy_calculator.py
+│       └── isotope_pattern_matcher.py
+└── tests/
+    ├── test_proteomics.py
+    └── test_metabolomics.py
+```
+
+## Proteomics Scripts
+
+| Script | Description |
+|--------|-------------|
+| [`peptide_mass_calculator.py`](scripts/proteomics/peptide_mass_calculator.py) | Monoisotopic/average masses and b/y fragment ion series for peptide sequences |
+| [`protein_digest.py`](scripts/proteomics/protein_digest.py) | In-silico enzymatic protein digestion (Trypsin, Lys-C, …) |
+| [`spectrum_file_info.py`](scripts/proteomics/spectrum_file_info.py) | Summary statistics for mzML files (spectra, RT range, TIC) |
+| [`feature_detection_proteomics.py`](scripts/proteomics/feature_detection_proteomics.py) | FeatureFinderCentroided-based peptide feature detection from LC-MS/MS data |
+
+See [`scripts/proteomics/README.md`](scripts/proteomics/README.md) for usage examples.
+
+## Metabolomics Scripts
+
+| Script | Description |
+|--------|-------------|
+| [`metabolite_feature_detection.py`](scripts/metabolomics/metabolite_feature_detection.py) | FeatureFinderMetabo-based metabolite feature detection from LC-MS data |
+| [`mass_accuracy_calculator.py`](scripts/metabolomics/mass_accuracy_calculator.py) | Compute m/z mass accuracy (ppm error) for sequences or molecular formulas |
+| [`isotope_pattern_matcher.py`](scripts/metabolomics/isotope_pattern_matcher.py) | Generate theoretical isotope distributions and score against observed peaks |
+
+See [`scripts/metabolomics/README.md`](scripts/metabolomics/README.md) for usage examples.
+
+## Running Tests
+
+```bash
+pip install pytest pyopenms
+python -m pytest tests/ -v
+```
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..7ce28ec
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+pyopenms
diff --git a/scripts/metabolomics/README.md b/scripts/metabolomics/README.md
new file mode 100644
index 0000000..c57cff4
--- /dev/null
+++ b/scripts/metabolomics/README.md
@@ -0,0 +1,46 @@
+# Metabolomics Scripts
+
+A collection of agentic-created Python scripts for common metabolomics tasks
+using [pyopenms](https://pyopenms.readthedocs.io/).
+
+## Scripts
+
+### `metabolite_feature_detection.py`
+Detect small-molecule features (isotope envelopes) in centroided LC-MS data
+using the `FeatureFinderMetabo` pipeline (mass tracing → elution peak
+detection → feature finding).  Output is written as a featureXML file.
+
+```bash
+python metabolite_feature_detection.py --input sample.mzML
+python metabolite_feature_detection.py --input sample.mzML --output features.featureXML --noise 1e5
+```
+
+### `mass_accuracy_calculator.py`
+Calculate the mass accuracy (ppm error) between a theoretical value derived
+from a peptide sequence or molecular formula and one or more observed m/z
+values.
+
+```bash
+# Peptide sequence
+python mass_accuracy_calculator.py --sequence PEPTIDEK --observed 803.4560
+
+# Molecular formula
+python mass_accuracy_calculator.py --formula C6H12O6 --observed 181.0709
+
+# Multiple observed values at charge 2
+python mass_accuracy_calculator.py --sequence ACDEFGHIK --charge 2 \
+    --observed 554.2478 554.2480 554.2482
+```
+
+### `isotope_pattern_matcher.py`
+Generate a theoretical isotope distribution for any molecular formula and
+optionally compute a cosine similarity score against observed peaks.
+
+```bash
+# Generate pattern for glucose
+python isotope_pattern_matcher.py --formula C6H12O6
+
+# Compare against observed peaks
+python isotope_pattern_matcher.py --formula C6H12O6 \
+    --peaks 181.0709,100.0 182.0742,6.7 183.0775,0.4
+```
diff --git a/scripts/metabolomics/isotope_pattern_matcher.py b/scripts/metabolomics/isotope_pattern_matcher.py
new file mode 100644
index 0000000..f0510d4
--- /dev/null
+++ b/scripts/metabolomics/isotope_pattern_matcher.py
@@ -0,0 +1,169 @@
+"""
+Isotope Pattern Generator & Matcher
+=====================================
+Generate the theoretical isotope distribution for a molecular formula
+and optionally compare it against an observed spectrum to compute a
+cosine similarity score using pyopenms.
+
+Usage
+-----
+    # Generate isotope pattern for glucose
+    python isotope_pattern_matcher.py --formula C6H12O6
+
+    # Compare against observed peaks (m/z intensity pairs on stdin or --peaks)
+    python isotope_pattern_matcher.py --formula C6H12O6 \\
+        --peaks 181.0709,100.0 182.0742,6.7 183.0775,0.4
+"""
+
+import argparse
+import math
+import sys
+
+try:
+    import pyopenms as oms
+except ImportError:
+    sys.exit(
+        "pyopenms is required. Install it with:  pip install pyopenms"
+    )
+
+
+def get_isotope_distribution(
+    formula: str,
+    max_isotopes: int = 5,
+) -> list:
+    """Compute the theoretical isotope distribution for a molecular formula.
+
+    Parameters
+    ----------
+    formula:
+        Empirical formula string, e.g. ``"C6H12O6"``.
+    max_isotopes:
+        Maximum number of isotope peaks to return (default: 5).
+
+    Returns
+    -------
+    list of (mz, relative_abundance)
+        Sorted by m/z; relative abundances sum to 100.
+    """
+    ef = oms.EmpiricalFormula(formula)
+    isotope_dist = ef.getIsotopeDistribution(
+        oms.CoarseIsotopePatternGenerator(max_isotopes)
+    )
+    peaks = [(p.getMZ(), p.getIntensity()) for p in isotope_dist.getContainer()]
+    if not peaks:
+        return []
+    max_ab = max(ab for _, ab in peaks)
+    return [(mz, ab / max_ab * 100) for mz, ab in peaks]
+
+
+def cosine_similarity(
+    theoretical: list,
+    observed: list,
+    mz_tolerance: float = 0.02,
+) -> float:
+    """Compute cosine similarity between theoretical and observed peak lists.
+
+    Parameters
+    ----------
+    theoretical:
+        List of ``(mz, intensity)`` tuples for the theoretical pattern.
+    observed:
+        List of ``(mz, intensity)`` tuples for the observed spectrum.
+    mz_tolerance:
+        Maximum m/z difference to match a pair of peaks (default: 0.02 Da).
+
+    Returns
+    -------
+    float
+        Cosine similarity in [0, 1].
+    """
+    theo_vec = []
+    obs_vec = []
+    for tmz, tint in theoretical:
+        matched = 0.0
+        for omz, oint in observed:
+            if abs(omz - tmz) <= mz_tolerance:
+                matched = oint
+                break
+        theo_vec.append(tint)
+        obs_vec.append(matched)
+
+    dot = sum(t * o for t, o in zip(theo_vec, obs_vec))
+    norm_t = math.sqrt(sum(t ** 2 for t in theo_vec))
+    norm_o = math.sqrt(sum(o ** 2 for o in obs_vec))
+    if norm_t == 0 or norm_o == 0:
+        return 0.0
+    return dot / (norm_t * norm_o)
+
+
+def parse_peaks(peak_strings: list) -> list:
+    """Parse ``"mz,intensity"`` strings into ``(float, float)`` tuples."""
+    peaks = []
+    for s in peak_strings:
+        parts = s.split(",")
+        if len(parts) != 2:
+            raise ValueError(
+                f"Expected 'mz,intensity' format, got: {s!r}"
+            )
+        peaks.append((float(parts[0]), float(parts[1])))
+    return peaks
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate theoretical isotope patterns and optionally "
+                    "compare them against observed peaks using pyopenms."
+    )
+    parser.add_argument(
+        "--formula",
+        required=True,
+        help="Molecular formula (e.g. C6H12O6)",
+    )
+    parser.add_argument(
+        "--max-isotopes",
+        type=int,
+        default=5,
+        dest="max_isotopes",
+        help="Maximum isotope peaks to compute (default: 5)",
+    )
+    parser.add_argument(
+        "--peaks",
+        nargs="+",
+        metavar="MZ,INTENSITY",
+        help="Observed peaks as 'mz,intensity' pairs for similarity scoring",
+    )
+    parser.add_argument(
+        "--tolerance",
+        type=float,
+        default=0.02,
+        metavar="DA",
+        help="m/z tolerance in Da for peak matching (default: 0.02)",
+    )
+    args = parser.parse_args()
+
+    distribution = get_isotope_distribution(args.formula, args.max_isotopes)
+    if not distribution:
+        print("Could not compute isotope distribution for the given formula.")
+        return
+
+    print(f"Isotope distribution for {args.formula}:")
+    print(f"\n{'Peak':>5}  {'m/z':>12}  {'Relative Abundance (%)':>22}")
+    print("-" * 44)
+    for i, (mz, rel_ab) in enumerate(distribution):
+        bar = "#" * int(rel_ab / 5)
+        print(f"  M+{i}  {mz:>12.4f}  {rel_ab:>6.2f} %  {bar}")
+
+    if args.peaks:
+        observed = parse_peaks(args.peaks)
+        sim = cosine_similarity(distribution, observed, args.tolerance)
+        print(f"\nCosine similarity vs. observed peaks: {sim:.4f}")
+        if sim >= 0.9:
+            print("  ✓ Excellent match (≥ 0.90)")
+        elif sim >= 0.7:
+            print("  ~ Good match (≥ 0.70)")
+        else:
+            print("  ✗ Poor match (< 0.70)")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/metabolomics/mass_accuracy_calculator.py b/scripts/metabolomics/mass_accuracy_calculator.py
new file mode 100644
index 0000000..b8c1b7a
--- /dev/null
+++ b/scripts/metabolomics/mass_accuracy_calculator.py
@@ -0,0 +1,140 @@
+"""
+Mass Accuracy Calculator
+========================
+Calculate the mass accuracy (ppm error) between a theoretical mass
+(or peptide/formula string) and one or more observed m/z values.
+
+Supports both:
+- Amino acid sequence inputs  (e.g. ``PEPTIDEK``)
+- Empirical formula inputs    (e.g. ``C6H12O6``)
+
+Usage
+-----
+    # Peptide sequence
+    python mass_accuracy_calculator.py --sequence PEPTIDEK --observed 803.4560
+
+    # Molecular formula (charge 1 default)
+    python mass_accuracy_calculator.py --formula C6H12O6 --observed 181.0709
+
+    # Multiple observed values at charge 2
+    python mass_accuracy_calculator.py --sequence ACDEFGHIK --charge 2 \\
+        --observed 554.2478 554.2480 554.2482
+"""
+
+import argparse
+import sys
+
+try:
+    import pyopenms as oms
+except ImportError:
+    sys.exit(
+        "pyopenms is required. Install it with:  pip install pyopenms"
+    )
+
+PROTON = 1.007276
+
+
+def theoretical_mz_from_sequence(sequence: str, charge: int) -> float:
+    """Compute the theoretical m/z for a peptide sequence.
+
+    Parameters
+    ----------
+    sequence:
+        Amino acid sequence, optionally with bracket-enclosed modifications.
+    charge:
+        Charge state.
+
+    Returns
+    -------
+    float
+        Theoretical m/z value.
+    """
+    aa_seq = oms.AASequence.fromString(sequence)
+    mass = aa_seq.getMonoWeight()
+    return (mass + charge * PROTON) / charge
+
+
+def theoretical_mz_from_formula(formula: str, charge: int) -> float:
+    """Compute the theoretical m/z for a molecular formula.
+
+    Parameters
+    ----------
+    formula:
+        Empirical formula string, e.g. ``"C6H12O6"``.
+    charge:
+        Charge state (used for proton addition).
+
+    Returns
+    -------
+    float
+        Theoretical m/z value (monoisotopic).
+    """
+    ef = oms.EmpiricalFormula(formula)
+    mass = ef.getMonoWeight()
+    return (mass + charge * PROTON) / charge
+
+
+def ppm_error(theoretical: float, observed: float) -> float:
+    """Calculate the mass accuracy in parts-per-million (ppm).
+
+    Parameters
+    ----------
+    theoretical:
+        Theoretical m/z value.
+    observed:
+        Observed m/z value.
+
+    Returns
+    -------
+    float
+        PPM error (positive = observed > theoretical).
+    """
+    return (observed - theoretical) / theoretical * 1e6
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Compute m/z mass accuracy (ppm error) using pyopenms."
+    )
+    group = parser.add_mutually_exclusive_group(required=True)
+    group.add_argument(
+        "--sequence",
+        help="Peptide sequence (e.g. PEPTIDEK)",
+    )
+    group.add_argument(
+        "--formula",
+        help="Molecular formula (e.g. C6H12O6)",
+    )
+    parser.add_argument(
+        "--charge",
+        type=int,
+        default=1,
+        help="Charge state (default: 1)",
+    )
+    parser.add_argument(
+        "--observed",
+        nargs="+",
+        type=float,
+        required=True,
+        metavar="MZ",
+        help="Observed m/z value(s)",
+    )
+    args = parser.parse_args()
+
+    if args.sequence:
+        theoretical = theoretical_mz_from_sequence(args.sequence, args.charge)
+        label = f"sequence={args.sequence}"
+    else:
+        theoretical = theoretical_mz_from_formula(args.formula, args.charge)
+        label = f"formula={args.formula}"
+
+    print(f"Theoretical m/z ({label}, charge {args.charge}+): {theoretical:.6f}")
+    print(f"\n{'Observed m/z':>14}  {'PPM error':>10}")
+    print("-" * 28)
+    for obs in args.observed:
+        ppm = ppm_error(theoretical, obs)
+        print(f"{obs:>14.6f}  {ppm:>+10.4f}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/metabolomics/metabolite_feature_detection.py b/scripts/metabolomics/metabolite_feature_detection.py
new file mode 100644
index 0000000..4a5a0f8
--- /dev/null
+++ b/scripts/metabolomics/metabolite_feature_detection.py
@@ -0,0 +1,145 @@
+"""
+Metabolite Feature Detection
+=============================
+Detect small-molecule features (isotope envelopes) in an LC-MS mzML file
+using the pyopenms FeatureFinderMetabo algorithm.  Results are written to a
+featureXML file which can be opened in TOPPView.
+
+Usage
+-----
+    python metabolite_feature_detection.py --input sample.mzML
+    python metabolite_feature_detection.py --input sample.mzML --output features.featureXML --noise 1e5
+"""
+
+import argparse
+import sys
+
+try:
+    import pyopenms as oms
+except ImportError:
+    sys.exit(
+        "pyopenms is required. Install it with:  pip install pyopenms"
+    )
+
+
+def detect_metabolite_features(
+    input_path: str,
+    output_path: str,
+    noise_threshold: float = 1e4,
+) -> oms.FeatureMap:
+    """Run FeatureFinderMetabo on an mzML file.
+
+    Parameters
+    ----------
+    input_path:
+        Path to the centroided mzML file.
+    output_path:
+        Path for the output featureXML file.
+    noise_threshold:
+        Minimum peak intensity to consider during mass tracing (default 1e4).
+
+    Returns
+    -------
+    pyopenms.FeatureMap
+        Map of detected metabolite features.
+    """
+    exp = oms.MSExperiment()
+    print(f"Loading {input_path} …")
+    oms.MzMLFile().load(input_path, exp)
+    exp.updateRanges()
+
+    # --- Mass tracing ---
+    mass_traces = []
+    mt_params = oms.MassTraceDetection().getDefaults()
+    mt_params.setValue("noise_threshold_int", noise_threshold)
+    mt_det = oms.MassTraceDetection()
+    mt_det.setParameters(mt_params)
+    mt_det.run(exp, mass_traces, 0)
+    print(f"Mass traces found: {len(mass_traces)}")
+
+    # --- Elution peak detection ---
+    mass_traces_split = []
+    mass_traces_final = []
+    epd_params = oms.ElutionPeakDetection().getDefaults()
+    epd = oms.ElutionPeakDetection()
+    epd.setParameters(epd_params)
+    epd.detectPeaks(mass_traces, mass_traces_split)
+    if epd.getParameters().getValue("width_filtering") == "auto":
+        epd.filterByPeakWidth(mass_traces_split, mass_traces_final)
+    else:
+        mass_traces_final = mass_traces_split
+
+    # --- Feature detection ---
+    feature_map = oms.FeatureMap()
+    chrom_fwhm = 0.0
+    ffm_params = oms.FeatureFindingMetabo().getDefaults()
+    ffm = oms.FeatureFindingMetabo()
+    ffm.setParameters(ffm_params)
+    ffm.run(mass_traces_final, feature_map, chrom_fwhm)
+
+    feature_map.setUniqueIds()
+    oms.FeatureXMLFile().store(output_path, feature_map)
+    print(f"Detected {feature_map.size()} metabolite features → {output_path}")
+    return feature_map
+
+
+def print_feature_summary(feature_map: oms.FeatureMap, top_n: int = 20) -> None:
+    """Print a tabular summary of the top-N most intense features."""
+    if feature_map.size() == 0:
+        print("No features detected.")
+        return
+
+    features = list(feature_map)
+    features.sort(key=lambda f: f.getIntensity(), reverse=True)
+
+    display = features[:top_n]
+    print(
+        f"\nTop {len(display)} features (by intensity):\n"
+        f"{'#':>5}  {'RT (s)':>10}  {'m/z':>12}  {'Charge':>6}  {'Intensity':>14}"
+    )
+    print("-" * 56)
+    for i, feature in enumerate(display, 1):
+        print(
+            f"{i:>5}  {feature.getRT():>10.2f}  {feature.getMZ():>12.4f}  "
+            f"{feature.getCharge():>6}  {feature.getIntensity():>14.3e}"
+        )
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Detect metabolite features in an mzML file using pyopenms."
+    )
+    parser.add_argument(
+        "--input",
+        required=True,
+        metavar="FILE",
+        help="Centroided mzML input file",
+    )
+    parser.add_argument(
+        "--output",
+        metavar="FILE",
+        help="Output featureXML file (default: <input>.featureXML)",
+    )
+    parser.add_argument(
+        "--noise",
+        type=float,
+        default=1e4,
+        metavar="THRESHOLD",
+        help="Noise intensity threshold for mass tracing (default: 1e4)",
+    )
+    parser.add_argument(
+        "--top",
+        type=int,
+        default=20,
+        metavar="N",
+        help="Number of top features to print (default: 20)",
+    )
+    args = parser.parse_args()
+
+    output_path = args.output or args.input.replace(".mzML", "_metabolites.featureXML")
+    feature_map = detect_metabolite_features(args.input, output_path, args.noise)
+    print_feature_summary(feature_map, args.top)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/proteomics/README.md b/scripts/proteomics/README.md
new file mode 100644
index 0000000..e4d4046
--- /dev/null
+++ b/scripts/proteomics/README.md
@@ -0,0 +1,54 @@
+# Proteomics Scripts
+
+A collection of agentic-created Python scripts for common proteomics tasks
+using [pyopenms](https://pyopenms.readthedocs.io/).
+
+## Scripts
+
+### `peptide_mass_calculator.py`
+Calculate monoisotopic and average masses for peptide sequences, and compute
+b-ion / y-ion fragment series.
+
+```bash
+# Plain sequence
+python peptide_mass_calculator.py --sequence PEPTIDEK
+
+# Modified sequence (oxidised Met) at charge 2
+python peptide_mass_calculator.py --sequence PEPTM[147]IDEK --charge 2
+
+# Also print fragment ions
+python peptide_mass_calculator.py --sequence ACDEFGHIK --fragments
+```
+
+### `protein_digest.py`
+Perform in-silico enzymatic digestion of a protein sequence and report
+the resulting peptides with their masses.
+
+```bash
+# Tryptic digest
+python protein_digest.py --sequence MKVLWAALLVTFLAGCQAK... --enzyme Trypsin
+
+# Lys-C with up to 2 missed cleavages
+python protein_digest.py --sequence MKVLWAALLVTFLAGCQAK... --enzyme Lys-C --missed-cleavages 2
+
+# List all available enzymes
+python protein_digest.py --list-enzymes
+```
+
+### `spectrum_file_info.py`
+Summarise the contents of an mzML file: number of spectra by MS level,
+retention time range, m/z range, and TIC statistics.
+
+```bash
+python spectrum_file_info.py --input sample.mzML
+python spectrum_file_info.py --input sample.mzML --tic   # also print per-spectrum TIC
+```
+
+### `feature_detection_proteomics.py`
+Detect peptide isotope features in centroided LC-MS/MS data using the
+`FeatureFinderCentroided` algorithm.  Output is written as a featureXML file.
+
+```bash
+python feature_detection_proteomics.py --input sample.mzML
+python feature_detection_proteomics.py --input sample.mzML --output features.featureXML
+```
diff --git a/scripts/proteomics/feature_detection_proteomics.py b/scripts/proteomics/feature_detection_proteomics.py
new file mode 100644
index 0000000..ab25e32
--- /dev/null
+++ b/scripts/proteomics/feature_detection_proteomics.py
@@ -0,0 +1,101 @@
+"""
+Feature Detection for Proteomics LC-MS Data
+============================================
+Detect peptide features (isotope envelopes) in an mzML file using the
+pyopenms FeatureFinderCentroided algorithm.  Results are written to a
+featureXML file which can be opened in TOPPView.
+
+Usage
+-----
+    python feature_detection_proteomics.py --input sample.mzML
+    python feature_detection_proteomics.py --input sample.mzML --output features.featureXML
+"""
+
+import argparse
+import sys
+
+try:
+    import pyopenms as oms
+except ImportError:
+    sys.exit(
+        "pyopenms is required. Install it with:  pip install pyopenms"
+    )
+
+
+def detect_features(
+    input_path: str,
+    output_path: str,
+) -> oms.FeatureMap:
+    """Run FeatureFinderCentroided on an mzML file.
+
+    Parameters
+    ----------
+    input_path:
+        Path to the centroided mzML file.
+    output_path:
+        Path for the output featureXML file.
+
+    Returns
+    -------
+    pyopenms.FeatureMap
+        Map of detected features.
+    """
+    exp = oms.MSExperiment()
+    print(f"Loading {input_path} …")
+    oms.MzMLFile().load(input_path, exp)
+    exp.updateRanges()
+
+    feature_map = oms.FeatureMap()
+    seeds = oms.FeatureMap()
+
+    ff = oms.FeatureFinder()
+    ff.setLogType(oms.LogType.CMD)
+
+    params = oms.FeatureFinder().getParameters("centroided")
+    ff.run("centroided", exp, feature_map, params, seeds)
+
+    feature_map.setUniqueIds()
+    oms.FeatureXMLFile().store(output_path, feature_map)
+    print(f"Detected {feature_map.size()} features → {output_path}")
+    return feature_map
+
+
+def print_feature_summary(feature_map: oms.FeatureMap) -> None:
+    """Print a tabular summary of detected features."""
+    if feature_map.size() == 0:
+        print("No features detected.")
+        return
+
+    print(f"\n{'#':>5}  {'RT (s)':>10}  {'m/z':>12}  {'Charge':>6}  {'Intensity':>14}")
+    print("-" * 56)
+    for i, feature in enumerate(feature_map, 1):
+        print(
+            f"{i:>5}  {feature.getRT():>10.2f}  {feature.getMZ():>12.4f}  "
+            f"{feature.getCharge():>6}  {feature.getIntensity():>14.3e}"
+        )
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Detect peptide features in an mzML file using pyopenms."
+    )
+    parser.add_argument(
+        "--input",
+        required=True,
+        metavar="FILE",
+        help="Centroided mzML input file",
+    )
+    parser.add_argument(
+        "--output",
+        metavar="FILE",
+        help="Output featureXML file (default: <input>.featureXML)",
+    )
+    args = parser.parse_args()
+
+    output_path = args.output or args.input.replace(".mzML", ".featureXML")
+    feature_map = detect_features(args.input, output_path)
+    print_feature_summary(feature_map)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/proteomics/peptide_mass_calculator.py b/scripts/proteomics/peptide_mass_calculator.py
new file mode 100644
index 0000000..8bc4893
--- /dev/null
+++ b/scripts/proteomics/peptide_mass_calculator.py
@@ -0,0 +1,133 @@
+"""
+Peptide Mass Calculator
+=======================
+Calculate monoisotopic and average masses for peptide sequences using pyopenms.
+
+Supports:
+- Plain amino acid sequences (e.g. "PEPTIDEK")
+- Modified sequences in bracket notation (e.g. "PEPTM[147]IDEK")
+- Fragment ion mass series (b-ions and y-ions)
+- Multiple charge states
+
+Usage
+-----
+    python peptide_mass_calculator.py --sequence PEPTIDEK
+    python peptide_mass_calculator.py --sequence PEPTM[147]IDEK --charge 2
+    python peptide_mass_calculator.py --sequence ACDEFGHIK --fragments
+"""
+
+import argparse
+import sys
+
+try:
+    import pyopenms as oms
+except ImportError:
+    sys.exit(
+        "pyopenms is required. Install it with:  pip install pyopenms"
+    )
+
+PROTON = 1.007276
+
+
+def peptide_masses(sequence: str, charge: int = 1) -> dict:
+    """Return monoisotopic and average masses for the given peptide sequence.
+
+    Parameters
+    ----------
+    sequence:
+        Amino acid sequence, optionally with bracket-enclosed modifications,
+        e.g. ``"PEPTM[147]IDEK"``.
+    charge:
+        Desired charge state for m/z calculation (default 1).
+
+    Returns
+    -------
+    dict
+        Dictionary with keys ``monoisotopic_mass``, ``average_mass``,
+        ``mz_monoisotopic``, and ``mz_average``.
+    """
+    aa_seq = oms.AASequence.fromString(sequence)
+    mono = aa_seq.getMonoWeight()
+    avg = aa_seq.getAverageWeight()
+    return {
+        "sequence": sequence,
+        "charge": charge,
+        "monoisotopic_mass": mono,
+        "average_mass": avg,
+        "mz_monoisotopic": (mono + charge * PROTON) / charge,
+        "mz_average": (avg + charge * PROTON) / charge,
+    }
+
+
+def fragment_ions(sequence: str) -> dict:
+    """Compute singly charged b-ion and y-ion series for a peptide.
+
+    Parameters
+    ----------
+    sequence:
+        Plain or modified amino acid sequence.
+
+    Returns
+    -------
+    dict
+        Dictionary with keys ``b_ions`` and ``y_ions``, each a list of
+        ``(index, mass)`` tuples.
+    """
+    aa_seq = oms.AASequence.fromString(sequence)
+    n = aa_seq.size()
+
+    b_ions = []
+    for i in range(1, n):
+        prefix = aa_seq.getPrefix(i)
+        b_ions.append((i, prefix.getMonoWeight(oms.Residue.ResidueType.BIon, 1)))
+
+    y_ions = []
+    for i in range(1, n):
+        suffix = aa_seq.getSuffix(i)
+        y_ions.append((i, suffix.getMonoWeight(oms.Residue.ResidueType.YIon, 1)))
+
+    return {"b_ions": b_ions, "y_ions": y_ions}
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Calculate peptide/fragment masses using pyopenms."
+    )
+    parser.add_argument(
+        "--sequence",
+        required=True,
+        help="Amino acid sequence (e.g. PEPTIDEK or PEPTM[147]IDEK)",
+    )
+    parser.add_argument(
+        "--charge",
+        type=int,
+        default=1,
+        help="Charge state for m/z calculation (default: 1)",
+    )
+    parser.add_argument(
+        "--fragments",
+        action="store_true",
+        help="Also compute b-ion and y-ion series",
+    )
+    args = parser.parse_args()
+
+    info = peptide_masses(args.sequence, args.charge)
+    print(f"Sequence          : {info['sequence']}")
+    print(f"Charge            : {info['charge']}+")
+    print(f"Monoisotopic mass : {info['monoisotopic_mass']:.6f} Da")
+    print(f"Average mass      : {info['average_mass']:.6f} Da")
+    print(f"m/z (mono)        : {info['mz_monoisotopic']:.6f}")
+    print(f"m/z (avg)         : {info['mz_average']:.6f}")
+
+    if args.fragments:
+        ions = fragment_ions(args.sequence)
+        print("\n--- b-ions ---")
+        for idx, mass in ions["b_ions"]:
+            print(f"  b{idx:>2}  {mass:.6f} Da")
+        print("\n--- y-ions ---")
+        for idx, mass in ions["y_ions"]:
+            print(f"  y{idx:>2}  {mass:.6f} Da")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/proteomics/protein_digest.py b/scripts/proteomics/protein_digest.py
new file mode 100644
index 0000000..ce87be1
--- /dev/null
+++ b/scripts/proteomics/protein_digest.py
@@ -0,0 +1,162 @@
+"""
+Protein In-Silico Digest
+========================
+Digest a protein sequence with a chosen enzyme and report the resulting
+peptides together with their masses using pyopenms.
+
+Supported enzymes (examples): Trypsin, Lys-C, Arg-C, Asp-N, Glu-C, Chymotrypsin
+For the full list call:  python protein_digest.py --list-enzymes
+
+Usage
+-----
+    python protein_digest.py --sequence MKVLWAALLVTFLAGCQAKVEQAVETEPEPELRQQTEWQSGQRWELALGRFWDYLRWVQTLSEQVQEELLSSQVTQELRALMDETMKELKAYKSELEEQLTPVAEETRARLSKELQAAQARLGADVLASHGRLVQYRGEVQAMLGQSTEELRVRLASHLRKLRKRLLRDADDLQKRLAVYQAGAREGAERGLSAIRERLGPLVEQGRVRAATVGSLAGQPLQERAQAWGERLRARMEEMGSRTRDRLDEVKEQVAEVRAKLEEQAQQIRLQAEAFQARLKSWFEPLVEDMQRQWAGLVEKVQAAVGTSAAPVPSDNH --enzyme Trypsin
+    python protein_digest.py --sequence MKVLWAALLVTFLAGC --enzyme Lys-C --missed-cleavages 2
+"""
+
+import argparse
+import sys
+
+try:
+    import pyopenms as oms
+except ImportError:
+    sys.exit(
+        "pyopenms is required. Install it with:  pip install pyopenms"
+    )
+
+
+def list_enzymes() -> list:
+    """Return all enzyme names registered in the pyopenms ProteaseDB."""
+    db = oms.ProteaseDB()
+    raw_names = []
+    db.getAllNames(raw_names)
+    return sorted(
+        n.decode() if isinstance(n, bytes) else n for n in raw_names
+    )
+
+
+def digest_protein(
+    sequence: str,
+    enzyme: str = "Trypsin",
+    missed_cleavages: int = 0,
+    min_length: int = 6,
+    max_length: int = 40,
+) -> list:
+    """Digest a protein sequence in silico.
+
+    Parameters
+    ----------
+    sequence:
+        Single-letter amino acid sequence of the protein.
+    enzyme:
+        Enzyme name as known to pyopenms ProteaseDB (default: ``"Trypsin"``).
+    missed_cleavages:
+        Maximum number of missed cleavages allowed (default: 0).
+    min_length:
+        Minimum peptide length to include (default: 6).
+    max_length:
+        Maximum peptide length to include (default: 40).
+
+    Returns
+    -------
+    list of dict
+        Each entry contains ``sequence``, ``start``, ``end``,
+        ``monoisotopic_mass``, and ``missed_cleavages``.
+    """
+    protein_seq = oms.AASequence.fromString(sequence)
+    digest = oms.ProteaseDigestion()
+    digest.setEnzyme(enzyme)
+    digest.setMissedCleavages(missed_cleavages)
+
+    peptides = []
+    digest.digest(protein_seq, peptides, min_length, max_length)
+
+    results = []
+    for pep in peptides:
+        pep_str = pep.toString()
+        mass = pep.getMonoWeight()
+        results.append(
+            {
+                "sequence": pep_str,
+                "length": pep.size(),
+                "monoisotopic_mass": mass,
+            }
+        )
+    return results
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="In-silico protein digestion using pyopenms."
+    )
+    parser.add_argument(
+        "--sequence",
+        help="Single-letter amino acid sequence of the protein",
+    )
+    parser.add_argument(
+        "--enzyme",
+        default="Trypsin",
+        help="Digestion enzyme name (default: Trypsin)",
+    )
+    parser.add_argument(
+        "--missed-cleavages",
+        type=int,
+        default=0,
+        dest="missed_cleavages",
+        help="Maximum missed cleavages (default: 0)",
+    )
+    parser.add_argument(
+        "--min-length",
+        type=int,
+        default=6,
+        dest="min_length",
+        help="Minimum peptide length (default: 6)",
+    )
+    parser.add_argument(
+        "--max-length",
+        type=int,
+        default=40,
+        dest="max_length",
+        help="Maximum peptide length (default: 40)",
+    )
+    parser.add_argument(
+        "--list-enzymes",
+        action="store_true",
+        dest="list_enzymes",
+        help="List all available enzyme names and exit",
+    )
+    args = parser.parse_args()
+
+    if args.list_enzymes:
+        enzymes = list_enzymes()
+        print("Available enzymes:")
+        for name in enzymes:
+            print(f"  {name}")
+        return
+
+    if not args.sequence:
+        parser.error("--sequence is required unless --list-enzymes is used.")
+
+    peptides = digest_protein(
+        args.sequence,
+        enzyme=args.enzyme,
+        missed_cleavages=args.missed_cleavages,
+        min_length=args.min_length,
+        max_length=args.max_length,
+    )
+
+    print(
+        f"Enzyme: {args.enzyme}  |  Missed cleavages ≤ {args.missed_cleavages}  "
+        f"|  Length {args.min_length}–{args.max_length}"
+    )
+    print(f"Total peptides: {len(peptides)}\n")
+    print(f"{'#':>4}  {'Sequence':<40}  {'Length':>6}  {'Mono Mass (Da)':>14}")
+    print("-" * 72)
+    for i, pep in enumerate(peptides, 1):
+        print(
+            f"{i:>4}  {pep['sequence']:<40}  {pep['length']:>6}  "
+            f"{pep['monoisotopic_mass']:>14.6f}"
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/proteomics/spectrum_file_info.py b/scripts/proteomics/spectrum_file_info.py
new file mode 100644
index 0000000..514f64a
--- /dev/null
+++ b/scripts/proteomics/spectrum_file_info.py
@@ -0,0 +1,134 @@
+"""
+Mass Spectrum File Info
+=======================
+Read an mzML (or mzXML) file and print a summary of its contents:
+number of spectra, MS levels, retention time range, m/z range,
+and basic TIC statistics.
+
+Usage
+-----
+    python spectrum_file_info.py --input sample.mzML
+    python spectrum_file_info.py --input sample.mzML --tic
+"""
+
+import argparse
+import sys
+
+try:
+    import pyopenms as oms
+except ImportError:
+    sys.exit(
+        "pyopenms is required. Install it with:  pip install pyopenms"
+    )
+
+
+def summarise_experiment(exp: oms.MSExperiment) -> dict:
+    """Summarise a loaded MSExperiment object.
+
+    Parameters
+    ----------
+    exp:
+        Loaded ``pyopenms.MSExperiment`` instance.
+
+    Returns
+    -------
+    dict
+        Summary statistics for the experiment.
+    """
+    spectra = exp.getSpectra()
+    if not spectra:
+        return {"n_spectra": 0}
+
+    ms_levels = {}
+    rt_min = float("inf")
+    rt_max = float("-inf")
+    mz_min = float("inf")
+    mz_max = float("-inf")
+    tic_values = []
+
+    for spec in spectra:
+        level = spec.getMSLevel()
+        ms_levels[level] = ms_levels.get(level, 0) + 1
+        rt = spec.getRT()
+        rt_min = min(rt_min, rt)
+        rt_max = max(rt_max, rt)
+
+        mzs, intensities = spec.get_peaks()
+        if len(mzs) > 0:
+            mz_min = min(mz_min, float(mzs.min()))
+            mz_max = max(mz_max, float(mzs.max()))
+            tic_values.append(float(intensities.sum()))
+
+    return {
+        "n_spectra": len(spectra),
+        "ms_levels": ms_levels,
+        "rt_range_sec": (rt_min, rt_max),
+        "mz_range": (mz_min, mz_max),
+        "tic_total": sum(tic_values),
+        "tic_max": max(tic_values) if tic_values else 0.0,
+        "tic_per_spectrum": tic_values,
+    }
+
+
+def load_file(path: str) -> oms.MSExperiment:
+    """Load an mzML file into an MSExperiment.
+
+    Parameters
+    ----------
+    path:
+        Path to the mzML file.
+
+    Returns
+    -------
+    pyopenms.MSExperiment
+    """
+    exp = oms.MSExperiment()
+    oms.MzMLFile().load(path, exp)
+    return exp
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Summarise an mzML file using pyopenms."
+    )
+    parser.add_argument(
+        "--input",
+        required=True,
+        metavar="FILE",
+        help="Path to an mzML file",
+    )
+    parser.add_argument(
+        "--tic",
+        action="store_true",
+        help="Print per-spectrum TIC values",
+    )
+    args = parser.parse_args()
+
+    print(f"Loading {args.input} …")
+    exp = load_file(args.input)
+    summary = summarise_experiment(exp)
+
+    if summary["n_spectra"] == 0:
+        print("No spectra found in file.")
+        return
+
+    print(f"\n{'File':<22}: {args.input}")
+    print(f"{'Total spectra':<22}: {summary['n_spectra']}")
+    for level, count in sorted(summary["ms_levels"].items()):
+        print(f"  {'MS' + str(level) + ' spectra':<20}: {count}")
+    rt_min, rt_max = summary["rt_range_sec"]
+    print(f"{'RT range':<22}: {rt_min:.2f} – {rt_max:.2f} s  "
+          f"({rt_min/60:.2f} – {rt_max/60:.2f} min)")
+    mz_lo, mz_hi = summary["mz_range"]
+    print(f"{'m/z range':<22}: {mz_lo:.4f} – {mz_hi:.4f}")
+    print(f"{'Total TIC':<22}: {summary['tic_total']:.3e}")
+    print(f"{'Max spectrum TIC':<22}: {summary['tic_max']:.3e}")
+
+    if args.tic:
+        print("\n--- Per-spectrum TIC ---")
+        for i, tic in enumerate(summary["tic_per_spectrum"], 1):
+            print(f"  Spectrum {i:>5}: {tic:.3e}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_metabolomics.py b/tests/test_metabolomics.py
new file mode 100644
index 0000000..b41cbab
--- /dev/null
+++ b/tests/test_metabolomics.py
@@ -0,0 +1,122 @@
+"""
+Tests for metabolomics scripts.
+Run with:  python -m pytest tests/ -v
+"""
+
+import sys
+import os
+import math
+
+import pytest
+
+# Allow importing from the scripts directory
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "scripts", "metabolomics"))
+
+try:
+    import pyopenms as oms  # noqa: F401
+
+    HAS_PYOPENMS = True
+except ImportError:
+    HAS_PYOPENMS = False
+
+requires_pyopenms = pytest.mark.skipif(
+    not HAS_PYOPENMS, reason="pyopenms not installed"
+)
+
+
+@requires_pyopenms
+class TestMassAccuracyCalculator:
+    def test_sequence_theoretical(self):
+        from mass_accuracy_calculator import theoretical_mz_from_sequence
+
+        mz = theoretical_mz_from_sequence("PEPTIDEK", 1)
+        # The monoisotopic mass of PEPTIDEK is ~927.49 Da; +1 proton / 1
+        assert 928.0 < mz < 929.0
+
+    def test_formula_theoretical(self):
+        from mass_accuracy_calculator import theoretical_mz_from_formula
+
+        # Glucose C6H12O6 monoisotopic mass ~180.063 Da; +1 proton / 1 => ~181.070
+        mz = theoretical_mz_from_formula("C6H12O6", 1)
+        assert 181.0 < mz < 182.0
+
+    def test_ppm_zero_error(self):
+        from mass_accuracy_calculator import ppm_error
+
+        theoretical = 500.0
+        observed = 500.0
+        assert ppm_error(theoretical, observed) == 0.0
+
+    def test_ppm_positive_error(self):
+        from mass_accuracy_calculator import ppm_error
+
+        theoretical = 500.0
+        observed = 500.001
+        ppm = ppm_error(theoretical, observed)
+        assert ppm > 0
+
+    def test_ppm_negative_error(self):
+        from mass_accuracy_calculator import ppm_error
+
+        theoretical = 500.0
+        observed = 499.999
+        ppm = ppm_error(theoretical, observed)
+        assert ppm < 0
+
+    def test_ppm_known_value(self):
+        from mass_accuracy_calculator import ppm_error
+
+        # 1 ppm at 1000 Da => 0.001 Da shift
+        theoretical = 1000.0
+        observed = 1000.001
+        ppm = ppm_error(theoretical, observed)
+        assert abs(ppm - 1.0) < 0.001
+
+
+@requires_pyopenms
+class TestIsotopePatternMatcher:
+    def test_glucose_pattern(self):
+        from isotope_pattern_matcher import get_isotope_distribution
+
+        dist = get_isotope_distribution("C6H12O6", max_isotopes=3)
+        assert len(dist) == 3
+        # M+0 should be the base peak
+        assert dist[0][1] == pytest.approx(100.0)
+        # Each successive peak must be less intense than the previous for small molecules
+        assert dist[1][1] < dist[0][1]
+
+    def test_pattern_mz_ordering(self):
+        from isotope_pattern_matcher import get_isotope_distribution
+
+        dist = get_isotope_distribution("C12H22O11", max_isotopes=4)  # sucrose
+        mzs = [mz for mz, _ in dist]
+        assert mzs == sorted(mzs)
+
+    def test_cosine_similarity_perfect(self):
+        from isotope_pattern_matcher import cosine_similarity
+
+        peaks = [(100.0, 50.0), (101.0, 20.0), (102.0, 5.0)]
+        sim = cosine_similarity(peaks, peaks, mz_tolerance=0.01)
+        assert abs(sim - 1.0) < 1e-9
+
+    def test_cosine_similarity_no_overlap(self):
+        from isotope_pattern_matcher import cosine_similarity
+
+        theoretical = [(100.0, 50.0), (101.0, 20.0)]
+        observed = [(200.0, 50.0), (201.0, 20.0)]
+        sim = cosine_similarity(theoretical, observed, mz_tolerance=0.01)
+        assert sim == 0.0
+
+    def test_parse_peaks(self):
+        from isotope_pattern_matcher import parse_peaks
+
+        result = parse_peaks(["181.0709,100.0", "182.0742,6.7"])
+        assert len(result) == 2
+        assert result[0] == (181.0709, 100.0)
+        assert result[1] == (182.0742, 6.7)
+
+    def test_parse_peaks_invalid(self):
+        from isotope_pattern_matcher import parse_peaks
+
+        with pytest.raises(ValueError):
+            parse_peaks(["181.0709"])
diff --git a/tests/test_proteomics.py b/tests/test_proteomics.py
new file mode 100644
index 0000000..257b774
--- /dev/null
+++ b/tests/test_proteomics.py
@@ -0,0 +1,111 @@
+"""
+Tests for proteomics scripts.
+Run with:  python -m pytest tests/ -v
+"""
+
+import sys
+import os
+
+import pytest
+
+# Allow importing from the scripts directory
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "scripts", "proteomics"))
+
+try:
+    import pyopenms as oms  # noqa: F401
+
+    HAS_PYOPENMS = True
+except ImportError:
+    HAS_PYOPENMS = False
+
+requires_pyopenms = pytest.mark.skipif(
+    not HAS_PYOPENMS, reason="pyopenms not installed"
+)
+
+
+@requires_pyopenms
+class TestPeptideMassCalculator:
+    def test_basic_mass(self):
+        from peptide_mass_calculator import peptide_masses
+
+        result = peptide_masses("PEPTIDEK")
+        assert result["sequence"] == "PEPTIDEK"
+        assert result["charge"] == 1
+        assert 927.0 < result["monoisotopic_mass"] < 928.0
+        assert result["mz_monoisotopic"] > result["monoisotopic_mass"]
+
+    def test_charge_state(self):
+        from peptide_mass_calculator import peptide_masses
+
+        r1 = peptide_masses("PEPTIDEK", charge=1)
+        r2 = peptide_masses("PEPTIDEK", charge=2)
+        # Higher charge → lower m/z
+        assert r2["mz_monoisotopic"] < r1["mz_monoisotopic"]
+
+    def test_fragment_ions(self):
+        from peptide_mass_calculator import fragment_ions
+
+        ions = fragment_ions("PEPTIDEK")
+        seq_len = len("PEPTIDEK")
+        # Expect seq_len-1 b-ions and seq_len-1 y-ions
+        assert len(ions["b_ions"]) == seq_len - 1
+        assert len(ions["y_ions"]) == seq_len - 1
+
+    def test_modified_sequence(self):
+        from peptide_mass_calculator import peptide_masses
+
+        # Oxidised methionine
+        result = peptide_masses("PEPTM[147]IDEK")
+        assert result["monoisotopic_mass"] > 0
+
+    def test_mz_formula(self):
+        from peptide_mass_calculator import peptide_masses, PROTON
+
+        r = peptide_masses("PEPTIDEK", charge=2)
+        expected = (r["monoisotopic_mass"] + 2 * PROTON) / 2
+        assert abs(r["mz_monoisotopic"] - expected) < 1e-6
+
+
+@requires_pyopenms
+class TestProteinDigest:
+    # Short test protein containing known tryptic sites (K, R)
+    PROTEIN = "MKVLWAALLVTFLAGCQAKVEQAVETEPEPELRQQTEWQSGQRWELAL"
+
+    def test_tryptic_digest_returns_peptides(self):
+        from protein_digest import digest_protein
+
+        peptides = digest_protein(self.PROTEIN, enzyme="Trypsin", min_length=1)
+        assert len(peptides) > 0
+
+    def test_peptide_structure(self):
+        from protein_digest import digest_protein
+
+        peptides = digest_protein(self.PROTEIN, enzyme="Trypsin", min_length=1)
+        for pep in peptides:
+            assert "sequence" in pep
+            assert "monoisotopic_mass" in pep
+            assert pep["monoisotopic_mass"] > 0
+
+    def test_missed_cleavages(self):
+        from protein_digest import digest_protein
+
+        peps_0 = digest_protein(self.PROTEIN, enzyme="Trypsin", missed_cleavages=0, min_length=1)
+        peps_2 = digest_protein(self.PROTEIN, enzyme="Trypsin", missed_cleavages=2, min_length=1)
+        # More missed cleavages → at least as many peptides
+        assert len(peps_2) >= len(peps_0)
+
+    def test_length_filter(self):
+        from protein_digest import digest_protein
+
+        peptides = digest_protein(
+            self.PROTEIN, enzyme="Trypsin", min_length=5, max_length=20, missed_cleavages=2
+        )
+        for pep in peptides:
+            assert 5 <= pep["length"] <= 20
+
+    def test_list_enzymes(self):
+        from protein_digest import list_enzymes
+
+        enzymes = list_enzymes()
+        assert "Trypsin" in enzymes
+        assert len(enzymes) > 5