diff --git a/crates/catalyst-network/src/service.rs b/crates/catalyst-network/src/service.rs
index b8cf261..03ee25e 100644
--- a/crates/catalyst-network/src/service.rs
+++ b/crates/catalyst-network/src/service.rs
@@ -493,6 +493,39 @@ fn jitter_ms(peer_id: &PeerId, attempts: u32, max_ms: u64) -> u64 {
     (v % (max_ms + 1)) as u64
 }
 
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use libp2p::PeerId;
+
+    #[test]
+    fn peer_budget_enforces_msgs_and_bytes() {
+        let now = Instant::now();
+        let mut b = PeerBudget {
+            window_start: now,
+            msgs: 0,
+            bytes: 0,
+        };
+
+        // Allow exactly 2 messages of 10 bytes each.
+        assert!(b.allow(now, 10, 2, 20));
+        assert!(b.allow(now, 10, 2, 20));
+        // Third message denied by msg cap.
+        assert!(!b.allow(now, 1, 2, 20));
+    }
+
+    #[test]
+    fn backoff_and_jitter_are_bounded() {
+        let base = Duration::from_millis(100);
+        let b = compute_backoff(base, 100, 1_000).unwrap();
+        assert!(b <= Duration::from_millis(1_000));
+
+        let pid = PeerId::random();
+        let j = jitter_ms(&pid, 5, 250);
+        assert!(j <= 250);
+    }
+}
+
 fn load_or_generate_keypair(path: &Path) -> NetworkResult<identity::Keypair> {
     if let Ok(bytes) = std::fs::read(path) {
         if let Ok(kp) = identity::Keypair::from_protobuf_encoding(&bytes) {
diff --git a/crates/catalyst-network/src/simple.rs b/crates/catalyst-network/src/simple.rs
index a62a211..9007f67 100644
--- a/crates/catalyst-network/src/simple.rs
+++ b/crates/catalyst-network/src/simple.rs
@@ -454,6 +454,38 @@ fn jitter_ms(addr: SocketAddr, attempts: u32, max_ms: u64) -> u64 {
     (v % (max_ms + 1)) as u64
 }
 
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn conn_budget_enforces_msgs_and_bytes() {
+        let now = std::time::Instant::now();
+        let mut b = ConnBudget {
+            window_start: now,
+            msgs: 0,
+            bytes: 0,
+            max_msgs: 2,
+            max_bytes: 20,
+        };
+
+        assert!(b.allow(now, 10));
+        assert!(b.allow(now, 10));
+        assert!(!b.allow(now, 1));
+    }
+
+    #[test]
+    fn backoff_and_jitter_are_bounded() {
+        let base = std::time::Duration::from_millis(100);
+        let b = compute_backoff(base, 100, 1_000).unwrap();
+        assert!(b <= std::time::Duration::from_millis(1_000));
+
+        let addr: SocketAddr = "127.0.0.1:30333".parse().unwrap();
+        let j = jitter_ms(addr, 5, 250);
+        assert!(j <= 250);
+    }
+}
+
 fn multiaddr_to_socketaddr(addr: &Multiaddr) -> Option<SocketAddr> {
     // Support: /ip4/x.x.x.x/tcp/port (and /ip6/.../tcp/port)
     let mut ip: Option<IpAddr> = None;
diff --git a/docs/adversarial-test-plan.md b/docs/adversarial-test-plan.md
new file mode 100644
index 0000000..e9617dd
--- /dev/null
+++ b/docs/adversarial-test-plan.md
@@ -0,0 +1,38 @@
+# Adversarial test plan (working)
+
+This document enumerates adversarial scenarios and how to test them. Some scenarios are suitable for CI; others require WAN/chaos harnesses.
+
+## CI-suitable (fast, deterministic)
+
+- **Envelope wire rejection**:
+  - unknown `PROTOCOL_VERSION` should be rejected cleanly
+- **Rate budget behavior**:
+  - per-peer/per-conn budgets enforce msg/sec and bytes/sec caps
+- **Backoff/jitter bounding**:
+  - dial backoff clamps to configured maximum
+  - jitter stays within configured maximum
+- **Hop/loop bounds**:
+  - rebroadcast stops after `max_hops`
+  - messages don’t loop forever once local id is visited
+
+## Integration/WAN harness (slow, non-deterministic)
+
+- **Eclipse attempt**:
+  - isolate a victim by providing only attacker bootstrap peers
+  - verify victim can regain honest peers when at least one honest seed exists
+- **Sybil pressure**:
+  - connect N peers from limited IP space and verify per-IP caps / peer scoring keeps diversity
+- **Partition + heal**:
+  - split validators into two groups for T seconds, then heal
+  - verify nodes converge and “reliable join” repair does not corrupt state
+- **DoS flood**:
+  - send mixed size payloads at high QPS
+  - verify bounded CPU/memory and steady cycle production
+
+## Metrics to record (for #241/#206)
+
+- peer count over time, `min_peers` satisfaction
+- message drop counts (oversize / budget exceeded / decode fail / version mismatch)
+- CPU, memory, open fds, disk growth rate
+- cycle liveness (no-gap applied_cycle)
+
diff --git a/docs/security-threat-model.md b/docs/security-threat-model.md
new file mode 100644
index 0000000..c13667f
--- /dev/null
+++ b/docs/security-threat-model.md
@@ -0,0 +1,93 @@
+# Security threat model (working)
+
+This document is a living threat model for Catalyst mainnet readiness. It focuses on realistic adversaries and measurable failure modes.
+
+## Security goals
+
+- **Safety**: nodes should not accept or produce invalid state transitions.
+- **Liveness**: the network should continue producing cycles under WAN conditions and moderate adversarial pressure.
+- **Bounded resource usage**: hostile peers should not cause unbounded CPU/memory/disk growth.
+- **Operator recoverability**: clear runbooks for rollback/restore and for key compromise response.
+
+## Non-goals (for now)
+
+- Perfect privacy / traffic analysis resistance (see `#198`).
+- Full economic security analysis (see `#184` / `#185`).
+
+## Trust boundaries + assets
+
+- **Consensus / state**: LSU application, `prev_state_root` continuity, account state root.
+- **P2P network**: peer connections, message relay/rebroadcast, discovery.
+- **RPC surface**: public read APIs, tx submission, snapshot info.
+- **Storage**: RocksDB data directory, snapshots, pruning.
+- **Keys**: node identity key, validator key material, faucet custody keys.
+
+## Adversary model
+
+- **Remote internet attacker**: can connect to public P2P/RPC endpoints and send arbitrary messages at scale.
+- **Sybil**: can create many peers/identities and attempt to dominate connectivity.
+- **Eclipse attacker**: attempts to isolate a victim node by controlling its peer set.
+- **Partition**: network splits due to BGP/routing/NAT/firewall or intentional interference.
+- **Malicious operator**: runs a public RPC/indexer with modified code, logs, or data access.
+
+## Major threats and current mitigations
+
+### DoS via oversized or high-rate messages
+
+- **Threat**: send huge payloads or many small payloads to exhaust CPU/memory/bandwidth.
+- **Mitigations**:
+  - per-peer/per-connection **rate budgets** and **payload caps** in networking layer
+  - configurable safety limits in `[network.safety_limits]` (see `#246`)
+
+### DoS via unbounded rebroadcast/dedup state
+
+- **Threat**: force multi-hop rebroadcast caches to grow without bound.
+- **Mitigations**:
+  - relay/dedup caches are bounded and configurable (`[network.relay_cache]`, `[network.safety_limits.dedup_cache_max_entries]`)
+
+### Replay / downgrade / incompatible wire payloads
+
+- **Threat**: replay old messages or send incompatible wire payloads to cause confusion/crashes.
+- **Mitigations**:
+  - versioned envelope wire wrapper (`CENV` + `PROTOCOL_VERSION`)
+  - libp2p identify protocol gating (`catalyst/1`)
+
+### Eclipse / Sybil
+
+- **Threat**: attacker controls victim’s peer set, preventing honest connectivity.
+- **Mitigations (partial, current)**:
+  - bootstrap peer + DNS seed support
+  - min peer maintenance with dial backoff + jitter (`#200`)
+- **Gaps**:
+  - stronger peer selection diversity (IP/subnet caps, scoring, verified bootstrap sets)
+  - explicit capability/feature negotiation beyond identify string
+
+### Network partition / delayed delivery
+
+- **Threat**: consensus stalls or forks during partitions; rejoin causes state divergence.
+- **Mitigations (partial, current)**:
+  - “reliable join” work: backfill + continuity checks
+  - bounded message TTL/hops and dedup
+- **Gaps**:
+  - explicit partition testing harness and recovery procedures (`#241`)
+
+### Storage durability / corruption / rollback hazards
+
+- **Threat**: disk fills or DB corruption causes node failure or silent divergence.
+- **Mitigations**:
+  - history pruning (opt-in) + maintenance tools (`db-stats`, `db-maintenance`)
+  - snapshot backup/restore runbooks (`docs/node-operator-guide.md`)
+  - storage version marker (`storage:version`)
+
+### RPC abuse
+
+- **Threat**: high-rate RPC calls or expensive queries degrade node liveness.
+- **Mitigations (partial)**:
+  - P2P-side bounding exists; RPC-side needs explicit rate limiting and request shaping (future work).
+
+## What “done” looks like (mainnet bar)
+
+- Threats tracked to mitigations and tests.
+- At least one **adversarial CI suite** exists and is run per PR.
+- WAN soak/chaos testing is run before releases (`#241`).
+
diff --git a/scripts/wan_chaos/README.md b/scripts/wan_chaos/README.md
new file mode 100644
index 0000000..007a958
--- /dev/null
+++ b/scripts/wan_chaos/README.md
@@ -0,0 +1,49 @@
+## WAN soak / chaos harness (Linux netns + tc/netem)
+
+This harness runs a 3-node Catalyst network inside Linux network namespaces and applies
+WAN-like conditions (latency/loss/jitter) and chaos events (partitions, restarts).
+
+It is designed to be run on a dedicated Linux host (VM ok) with root privileges.
+
+### Prerequisites
+
+- Linux with `ip` (iproute2), `tc`, and `iptables` available
+- Root privileges (namespace setup + tc + iptables)
+- Rust toolchain (or prebuilt `catalyst-cli`)
+
+### What it does
+
+- Creates a bridge `catalystbr0` and 3 namespaces `catalyst-n1..n3`
+- Assigns IPs: `10.70.0.1..3`
+- Starts 3 nodes with P2P on `30333` and RPC on `8545` inside each namespace
+- Periodically polls `catalyst-cli status` to check `applied_cycle` monotonicity
+- Applies tc netem profiles and optional partitions
+- Writes logs + a simple summary report under `./wan_chaos/out/<run_id>/`
+
+### Quick start
+
+From repo root:
+
+```bash
+sudo bash scripts/wan_chaos/run.sh
+```
+
+### Useful env vars
+
+- `DURATION_SECS` (default `300`)
+- `LOSS_PCT` (default `0`)
+- `LATENCY_MS` (default `50`)
+- `JITTER_MS` (default `10`)
+- `PARTITION_AT_SECS` (default unset)
+- `PARTITION_DURATION_SECS` (default `30`)
+- `RESTART_AT_SECS` (default unset)
+- `RESTART_NODE` (`n1|n2|n3`, default `n1`)
+
+### Cleanup
+
+The runner attempts to cleanup automatically. If it fails:
+
+```bash
+sudo bash scripts/wan_chaos/cleanup.sh
+```
+
diff --git a/scripts/wan_chaos/cleanup.sh b/scripts/wan_chaos/cleanup.sh
new file mode 100755
index 0000000..05e6bd0
--- /dev/null
+++ b/scripts/wan_chaos/cleanup.sh
@@ -0,0 +1,15 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+BR="catalystbr0"
+NS_PREFIX="catalyst-n"
+
+for i in 1 2 3; do
+  ns="${NS_PREFIX}${i}"
+  ip netns del "$ns" 2>/dev/null || true
+done
+
+ip link del "$BR" 2>/dev/null || true
+
+echo "cleanup_ok: true"
+
diff --git a/scripts/wan_chaos/run.sh b/scripts/wan_chaos/run.sh
new file mode 100755
index 0000000..10ac74c
--- /dev/null
+++ b/scripts/wan_chaos/run.sh
@@ -0,0 +1,317 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+
+DURATION_SECS="${DURATION_SECS:-300}"
+LOSS_PCT="${LOSS_PCT:-0}"
+LATENCY_MS="${LATENCY_MS:-50}"
+JITTER_MS="${JITTER_MS:-10}"
+
+PARTITION_AT_SECS="${PARTITION_AT_SECS:-}"
+PARTITION_DURATION_SECS="${PARTITION_DURATION_SECS:-30}"
+
+RESTART_AT_SECS="${RESTART_AT_SECS:-}"
+RESTART_NODE="${RESTART_NODE:-n1}"
+
+RUN_ID="$(date +%s)"
+OUT_DIR="${ROOT_DIR}/scripts/wan_chaos/out/${RUN_ID}"
+mkdir -p "${OUT_DIR}"
+
+BR="catalystbr0"
+NS_PREFIX="catalyst-n"
+
+CLI="${ROOT_DIR}/target/release/catalyst-cli"
+if [[ ! -x "${CLI}" ]]; then
+  echo "building catalyst-cli..."
+  (cd "${ROOT_DIR}" && cargo build -p catalyst-cli --release --locked)
+fi
+
+cleanup() {
+  set +e
+  sudo bash "${ROOT_DIR}/scripts/wan_chaos/cleanup.sh" >/dev/null 2>&1 || true
+}
+trap cleanup EXIT
+
+echo "run_id: ${RUN_ID}" | tee "${OUT_DIR}/run.meta"
+echo "duration_secs: ${DURATION_SECS}" | tee -a "${OUT_DIR}/run.meta"
+echo "netem: latency_ms=${LATENCY_MS} jitter_ms=${JITTER_MS} loss_pct=${LOSS_PCT}" | tee -a "${OUT_DIR}/run.meta"
+
+sudo bash "${ROOT_DIR}/scripts/wan_chaos/cleanup.sh" >/dev/null 2>&1 || true
+
+sudo ip link add name "${BR}" type bridge
+sudo ip link set "${BR}" up
+
+for i in 1 2 3; do
+  ns="${NS_PREFIX}${i}"
+  veth="veth${i}"
+  peer="veth${i}p"
+  ip="10.70.0.${i}"
+
+  sudo ip netns add "${ns}"
+  sudo ip link add "${veth}" type veth peer name "${peer}"
+  sudo ip link set "${veth}" master "${BR}"
+  sudo ip link set "${veth}" up
+  sudo ip link set "${peer}" netns "${ns}"
+
+  sudo ip netns exec "${ns}" ip link set lo up
+  sudo ip netns exec "${ns}" ip addr add "${ip}/24" dev "${peer}"
+  sudo ip netns exec "${ns}" ip link set "${peer}" up
+
+  # Default route via bridge is not needed for same-subnet comms.
+
+  # Apply netem on the namespace-facing veth.
+  sudo ip netns exec "${ns}" tc qdisc add dev "${peer}" root netem \
+    delay "${LATENCY_MS}ms" "${JITTER_MS}ms" distribution normal \
+    loss "${LOSS_PCT}%"
+done
+
+make_cfg() {
+  local ip="$1"
+  local dir="$2"
+  mkdir -p "$dir"
+  cat > "${dir}/config.toml" <<EOF
+[network]
+listen_addresses = ["\/ip4\/${ip}\/tcp\/30333"]
+bootstrap_peers = [
+  "\/ip4\/10.70.0.1\/tcp\/30333",
+  "\/ip4\/10.70.0.2\/tcp\/30333",
+  "\/ip4\/10.70.0.3\/tcp\/30333",
+]
+dns_seeds = []
+max_peers = 50
+min_peers = 2
+protocol_version = "catalyst/1"
+mdns_discovery = false
+dht_enabled = false
+
+[network.timeouts]
+connection_timeout = 10
+request_timeout = 10
+keep_alive_interval = 30
+
+[rpc]
+enabled = true
+address = "${ip}"
+port = 8545
+
+[storage]
+enabled = true
+data_dir = "${dir}/data"
+capacity_gb = 10
+cache_size_mb = 256
+write_buffer_size_mb = 64
+max_open_files = 1000
+compression_enabled = true
+history_prune_enabled = false
+history_keep_cycles = 604800
+history_prune_interval_seconds = 300
+history_prune_batch_cycles = 1000
+EOF
+}
+
+N1_DIR="${OUT_DIR}/n1"
+N2_DIR="${OUT_DIR}/n2"
+N3_DIR="${OUT_DIR}/n3"
+make_cfg "10.70.0.1" "${N1_DIR}"
+make_cfg "10.70.0.2" "${N2_DIR}"
+make_cfg "10.70.0.3" "${N3_DIR}"
+
+start_node() {
+  local ns="$1"
+  local dir="$2"
+  sudo ip netns exec "${ns}" bash -lc "cd \"${ROOT_DIR}\" && RUST_LOG=info \"${CLI}\" --config \"${dir}/config.toml\" start --validator" \
+    >"${dir}/node.log" 2>&1 &
+  echo $! > "${dir}/node.pid"
+}
+
+start_node "${NS_PREFIX}1" "${N1_DIR}"
+start_node "${NS_PREFIX}2" "${N2_DIR}"
+start_node "${NS_PREFIX}3" "${N3_DIR}"
+
+sleep 2
+
+status_json() {
+  local ns="$1"
+  local ip="$2"
+  sudo ip netns exec "${ns}" bash -lc "\"${CLI}\" status --rpc-url \"http://${ip}:8545\" 2>/dev/null || true"
+}
+
+extract_applied_cycle() {
+  # Expects `catalyst-cli status` output; extracts the first `applied_cycle: <n>` value.
+  # Returns empty string if not present.
+  sed -nE 's/^[[:space:]]*applied_cycle:[[:space:]]*([0-9]+).*$/\1/p' | head -n 1
+}
+
+echo "t=0 starting poll loop" | tee -a "${OUT_DIR}/run.meta"
+
+START_TS="$(date +%s)"
+END_TS="$((START_TS + DURATION_SECS))"
+
+partition_on() {
+  # Partition: isolate n3 from n1/n2 by dropping traffic between 10.70.0.3 and 10.70.0.{1,2}
+  sudo ip netns exec "${NS_PREFIX}3" iptables -I OUTPUT -d 10.70.0.1 -j DROP
+  sudo ip netns exec "${NS_PREFIX}3" iptables -I OUTPUT -d 10.70.0.2 -j DROP
+  sudo ip netns exec "${NS_PREFIX}3" iptables -I INPUT -s 10.70.0.1 -j DROP
+  sudo ip netns exec "${NS_PREFIX}3" iptables -I INPUT -s 10.70.0.2 -j DROP
+  echo "partition: on" | tee -a "${OUT_DIR}/events.log"
+}
+
+partition_off() {
+  sudo ip netns exec "${NS_PREFIX}3" iptables -D OUTPUT -d 10.70.0.1 -j DROP 2>/dev/null || true
+  sudo ip netns exec "${NS_PREFIX}3" iptables -D OUTPUT -d 10.70.0.2 -j DROP 2>/dev/null || true
+  sudo ip netns exec "${NS_PREFIX}3" iptables -D INPUT -s 10.70.0.1 -j DROP 2>/dev/null || true
+  sudo ip netns exec "${NS_PREFIX}3" iptables -D INPUT -s 10.70.0.2 -j DROP 2>/dev/null || true
+  echo "partition: off" | tee -a "${OUT_DIR}/events.log"
+}
+
+restart_node() {
+  local which="$1"
+  local dir="${OUT_DIR}/${which}"
+  if [[ -f "${dir}/node.pid" ]]; then
+    kill "$(cat "${dir}/node.pid")" 2>/dev/null || true
+    sleep 1
+  fi
+  local ns="${NS_PREFIX}${which#n}"
+  start_node "${ns}" "${dir}"
+  echo "restart: ${which}" | tee -a "${OUT_DIR}/events.log"
+}
+
+PARTITION_DONE=0
+RESTART_DONE=0
+PARTITION_OFF_AT=0
+
+LAST_C1=""
+LAST_C2=""
+LAST_C3=""
+STALL_SECS_1=0
+STALL_SECS_2=0
+STALL_SECS_3=0
+LONGEST_STALL_1=0
+LONGEST_STALL_2=0
+LONGEST_STALL_3=0
+CUR_STALL_1=0
+CUR_STALL_2=0
+CUR_STALL_3=0
+
+PARTITION_OFF_ELAPSED=""
+PARTITION_RESUME_SECS=""
+
+RESTART_ELAPSED=""
+RESTART_RESUME_SECS=""
+
+POLL_SECS=2
+
+while [[ "$(date +%s)" -lt "${END_TS}" ]]; do
+  NOW="$(date +%s)"
+  ELAPSED="$((NOW - START_TS))"
+
+  if [[ -n "${PARTITION_AT_SECS}" && "${PARTITION_DONE}" -eq 0 && "${ELAPSED}" -ge "${PARTITION_AT_SECS}" ]]; then
+    partition_on
+    PARTITION_DONE=1
+    PARTITION_OFF_AT="$((ELAPSED + PARTITION_DURATION_SECS))"
+  fi
+
+  if [[ "${PARTITION_DONE}" -eq 1 && "${PARTITION_OFF_AT}" -gt 0 && "${ELAPSED}" -ge "${PARTITION_OFF_AT}" ]]; then
+    partition_off
+    PARTITION_OFF_AT=0
+    PARTITION_OFF_ELAPSED="${ELAPSED}"
+  fi
+
+  if [[ -n "${RESTART_AT_SECS}" && "${RESTART_DONE}" -eq 0 && "${ELAPSED}" -ge "${RESTART_AT_SECS}" ]]; then
+    restart_node "${RESTART_NODE}"
+    RESTART_DONE=1
+    RESTART_ELAPSED="${ELAPSED}"
+  fi
+
+  raw1="$(status_json "${NS_PREFIX}1" "10.70.0.1")"
+  raw2="$(status_json "${NS_PREFIX}2" "10.70.0.2")"
+  raw3="$(status_json "${NS_PREFIX}3" "10.70.0.3")"
+
+  s1="$(echo "${raw1}" | tr '\n' ' ')"
+  s2="$(echo "${raw2}" | tr '\n' ' ')"
+  s3="$(echo "${raw3}" | tr '\n' ' ')"
+
+  c1="$(echo "${raw1}" | extract_applied_cycle || true)"
+  c2="$(echo "${raw2}" | extract_applied_cycle || true)"
+  c3="$(echo "${raw3}" | extract_applied_cycle || true)"
+
+  echo "t=${ELAPSED} n1=${s1} n2=${s2} n3=${s3}" >> "${OUT_DIR}/status.log"
+  echo "t=${ELAPSED} applied_cycle n1=${c1:-na} n2=${c2:-na} n3=${c3:-na}" >> "${OUT_DIR}/cycles.log"
+
+  # Stall accounting per node (only when `applied_cycle` is present).
+  if [[ -n "${c1}" ]]; then
+    if [[ -n "${LAST_C1}" && "${c1}" == "${LAST_C1}" ]]; then
+      STALL_SECS_1=$((STALL_SECS_1 + POLL_SECS))
+      CUR_STALL_1=$((CUR_STALL_1 + POLL_SECS))
+      if [[ "${CUR_STALL_1}" -gt "${LONGEST_STALL_1}" ]]; then LONGEST_STALL_1="${CUR_STALL_1}"; fi
+    else
+      CUR_STALL_1=0
+    fi
+    LAST_C1="${c1}"
+  fi
+  if [[ -n "${c2}" ]]; then
+    if [[ -n "${LAST_C2}" && "${c2}" == "${LAST_C2}" ]]; then
+      STALL_SECS_2=$((STALL_SECS_2 + POLL_SECS))
+      CUR_STALL_2=$((CUR_STALL_2 + POLL_SECS))
+      if [[ "${CUR_STALL_2}" -gt "${LONGEST_STALL_2}" ]]; then LONGEST_STALL_2="${CUR_STALL_2}"; fi
+    else
+      CUR_STALL_2=0
+    fi
+    LAST_C2="${c2}"
+  fi
+  if [[ -n "${c3}" ]]; then
+    if [[ -n "${LAST_C3}" && "${c3}" == "${LAST_C3}" ]]; then
+      STALL_SECS_3=$((STALL_SECS_3 + POLL_SECS))
+      CUR_STALL_3=$((CUR_STALL_3 + POLL_SECS))
+      if [[ "${CUR_STALL_3}" -gt "${LONGEST_STALL_3}" ]]; then LONGEST_STALL_3="${CUR_STALL_3}"; fi
+    else
+      CUR_STALL_3=0
+    fi
+    LAST_C3="${c3}"
+  fi
+
+  # Time-to-resume after partition heal: first time all nodes advance at least once after heal.
+  if [[ -n "${PARTITION_OFF_ELAPSED}" && -z "${PARTITION_RESUME_SECS}" ]]; then
+    # We define "resumed" as: all nodes have a cycle value and none are currently stalled.
+    if [[ -n "${c1}" && -n "${c2}" && -n "${c3}" && "${CUR_STALL_1}" -eq 0 && "${CUR_STALL_2}" -eq 0 && "${CUR_STALL_3}" -eq 0 ]]; then
+      PARTITION_RESUME_SECS=$((ELAPSED - PARTITION_OFF_ELAPSED))
+    fi
+  fi
+
+  # Time-to-resume after restart: first time the restarted node advances at least once post-restart.
+  if [[ -n "${RESTART_ELAPSED}" && -z "${RESTART_RESUME_SECS}" ]]; then
+    if [[ "${RESTART_NODE}" == "n1" && "${CUR_STALL_1}" -eq 0 && -n "${c1}" ]]; then
+      RESTART_RESUME_SECS=$((ELAPSED - RESTART_ELAPSED))
+    fi
+    if [[ "${RESTART_NODE}" == "n2" && "${CUR_STALL_2}" -eq 0 && -n "${c2}" ]]; then
+      RESTART_RESUME_SECS=$((ELAPSED - RESTART_ELAPSED))
+    fi
+    if [[ "${RESTART_NODE}" == "n3" && "${CUR_STALL_3}" -eq 0 && -n "${c3}" ]]; then
+      RESTART_RESUME_SECS=$((ELAPSED - RESTART_ELAPSED))
+    fi
+  fi
+
+  sleep "${POLL_SECS}"
+done
+
+{
+  echo "stalled_seconds.n1: ${STALL_SECS_1}"
+  echo "stalled_seconds.n2: ${STALL_SECS_2}"
+  echo "stalled_seconds.n3: ${STALL_SECS_3}"
+  echo "longest_stall_seconds.n1: ${LONGEST_STALL_1}"
+  echo "longest_stall_seconds.n2: ${LONGEST_STALL_2}"
+  echo "longest_stall_seconds.n3: ${LONGEST_STALL_3}"
+  if [[ -n "${PARTITION_OFF_ELAPSED}" ]]; then
+    echo "partition_heal_at_seconds: ${PARTITION_OFF_ELAPSED}"
+    echo "partition_time_to_resume_seconds: ${PARTITION_RESUME_SECS:-na}"
+  fi
+  if [[ -n "${RESTART_ELAPSED}" ]]; then
+    echo "restart_at_seconds: ${RESTART_ELAPSED}"
+    echo "restart_node: ${RESTART_NODE}"
+    echo "restart_time_to_resume_seconds: ${RESTART_RESUME_SECS:-na}"
+  fi
+} | tee "${OUT_DIR}/report.txt"
+echo "out_dir: ${OUT_DIR}" | tee -a "${OUT_DIR}/report.txt"
+echo "report_ok: true" | tee -a "${OUT_DIR}/report.txt"
+