catalyst-network · TheNewAutonomy · Feb 27, 2026 · Feb 27, 2026 · Feb 27, 2026 · Feb 27, 2026
diff --git a/crates/catalyst-network/src/service.rs b/crates/catalyst-network/src/service.rs
@@ -493,6 +493,39 @@ fn jitter_ms(peer_id: &PeerId, attempts: u32, max_ms: u64) -> u64 {
     (v % (max_ms + 1)) as u64
 }
 
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use libp2p::PeerId;
+
+    #[test]
+    fn peer_budget_enforces_msgs_and_bytes() {
+        let now = Instant::now();
+        let mut b = PeerBudget {
+            window_start: now,
+            msgs: 0,
+            bytes: 0,
+        };
+
+        // Allow exactly 2 messages of 10 bytes each.
+        assert!(b.allow(now, 10, 2, 20));
+        assert!(b.allow(now, 10, 2, 20));
+        // Third message denied by msg cap.
+        assert!(!b.allow(now, 1, 2, 20));
+    }
+
+    #[test]
+    fn backoff_and_jitter_are_bounded() {
+        let base = Duration::from_millis(100);
+        let b = compute_backoff(base, 100, 1_000).unwrap();
+        assert!(b <= Duration::from_millis(1_000));
+
+        let pid = PeerId::random();
+        let j = jitter_ms(&pid, 5, 250);
+        assert!(j <= 250);
+    }
+}
+
 fn load_or_generate_keypair(path: &Path) -> NetworkResult<identity::Keypair> {
     if let Ok(bytes) = std::fs::read(path) {
         if let Ok(kp) = identity::Keypair::from_protobuf_encoding(&bytes) {

diff --git a/crates/catalyst-network/src/simple.rs b/crates/catalyst-network/src/simple.rs
@@ -454,6 +454,38 @@ fn jitter_ms(addr: SocketAddr, attempts: u32, max_ms: u64) -> u64 {
     (v % (max_ms + 1)) as u64
 }
 
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn conn_budget_enforces_msgs_and_bytes() {
+        let now = std::time::Instant::now();
+        let mut b = ConnBudget {
+            window_start: now,
+            msgs: 0,
+            bytes: 0,
+            max_msgs: 2,
+            max_bytes: 20,
+        };
+
+        assert!(b.allow(now, 10));
+        assert!(b.allow(now, 10));
+        assert!(!b.allow(now, 1));
+    }
+
+    #[test]
+    fn backoff_and_jitter_are_bounded() {
+        let base = std::time::Duration::from_millis(100);
+        let b = compute_backoff(base, 100, 1_000).unwrap();
+        assert!(b <= std::time::Duration::from_millis(1_000));
+
+        let addr: SocketAddr = "127.0.0.1:30333".parse().unwrap();
+        let j = jitter_ms(addr, 5, 250);
+        assert!(j <= 250);
+    }
+}
+
 fn multiaddr_to_socketaddr(addr: &Multiaddr) -> Option<SocketAddr> {
     // Support: /ip4/x.x.x.x/tcp/port (and /ip6/.../tcp/port)
     let mut ip: Option<IpAddr> = None;

diff --git a/docs/adversarial-test-plan.md b/docs/adversarial-test-plan.md
@@ -0,0 +1,38 @@
+# Adversarial test plan (working)
+
+This document enumerates adversarial scenarios and how to test them. Some scenarios are suitable for CI; others require WAN/chaos harnesses.
+
+## CI-suitable (fast, deterministic)
+
+- **Envelope wire rejection**:
+  - unknown `PROTOCOL_VERSION` should be rejected cleanly
+- **Rate budget behavior**:
+  - per-peer/per-conn budgets enforce msg/sec and bytes/sec caps
+- **Backoff/jitter bounding**:
+  - dial backoff clamps to configured maximum
+  - jitter stays within configured maximum
+- **Hop/loop bounds**:
+  - rebroadcast stops after `max_hops`
+  - messages don’t loop forever once local id is visited
+
+## Integration/WAN harness (slow, non-deterministic)
+
+- **Eclipse attempt**:
+  - isolate a victim by providing only attacker bootstrap peers
+  - verify victim can regain honest peers when at least one honest seed exists
+- **Sybil pressure**:
+  - connect N peers from limited IP space and verify per-IP caps / peer scoring keeps diversity
+- **Partition + heal**:
+  - split validators into two groups for T seconds, then heal
+  - verify nodes converge and “reliable join” repair does not corrupt state
+- **DoS flood**:
+  - send mixed size payloads at high QPS
+  - verify bounded CPU/memory and steady cycle production
+
+## Metrics to record (for #241/#206)
+
+- peer count over time, `min_peers` satisfaction
+- message drop counts (oversize / budget exceeded / decode fail / version mismatch)
+- CPU, memory, open fds, disk growth rate
+- cycle liveness (no-gap applied_cycle)
+
diff --git a/docs/security-threat-model.md b/docs/security-threat-model.md
@@ -0,0 +1,93 @@
+# Security threat model (working)
+
+This document is a living threat model for Catalyst mainnet readiness. It focuses on realistic adversaries and measurable failure modes.
+
+## Security goals
+
+- **Safety**: nodes should not accept or produce invalid state transitions.
+- **Liveness**: the network should continue producing cycles under WAN conditions and moderate adversarial pressure.
+- **Bounded resource usage**: hostile peers should not cause unbounded CPU/memory/disk growth.
+- **Operator recoverability**: clear runbooks for rollback/restore and for key compromise response.
+
+## Non-goals (for now)
+
+- Perfect privacy / traffic analysis resistance (see `#198`).
+- Full economic security analysis (see `#184` / `#185`).
+
+## Trust boundaries + assets
+
+- **Consensus / state**: LSU application, `prev_state_root` continuity, account state root.
+- **P2P network**: peer connections, message relay/rebroadcast, discovery.
+- **RPC surface**: public read APIs, tx submission, snapshot info.
+- **Storage**: RocksDB data directory, snapshots, pruning.
+- **Keys**: node identity key, validator key material, faucet custody keys.
+
+## Adversary model
+
+- **Remote internet attacker**: can connect to public P2P/RPC endpoints and send arbitrary messages at scale.
+- **Sybil**: can create many peers/identities and attempt to dominate connectivity.
+- **Eclipse attacker**: attempts to isolate a victim node by controlling its peer set.
+- **Partition**: network splits due to BGP/routing/NAT/firewall or intentional interference.
+- **Malicious operator**: runs a public RPC/indexer with modified code, logs, or data access.
+
+## Major threats and current mitigations
+
+### DoS via oversized or high-rate messages
+
+- **Threat**: send huge payloads or many small payloads to exhaust CPU/memory/bandwidth.
+- **Mitigations**:
+  - per-peer/per-connection **rate budgets** and **payload caps** in networking layer
+  - configurable safety limits in `[network.safety_limits]` (see `#246`)
+
+### DoS via unbounded rebroadcast/dedup state
+
+- **Threat**: force multi-hop rebroadcast caches to grow without bound.
+- **Mitigations**:
+  - relay/dedup caches are bounded and configurable (`[network.relay_cache]`, `[network.safety_limits.dedup_cache_max_entries]`)
+
+### Replay / downgrade / incompatible wire payloads
+
+- **Threat**: replay old messages or send incompatible wire payloads to cause confusion/crashes.
+- **Mitigations**:
+  - versioned envelope wire wrapper (`CENV` + `PROTOCOL_VERSION`)
+  - libp2p identify protocol gating (`catalyst/1`)
+
+### Eclipse / Sybil
+
+- **Threat**: attacker controls victim’s peer set, preventing honest connectivity.
+- **Mitigations (partial, current)**:
+  - bootstrap peer + DNS seed support
+  - min peer maintenance with dial backoff + jitter (`#200`)
+- **Gaps**:
+  - stronger peer selection diversity (IP/subnet caps, scoring, verified bootstrap sets)
+  - explicit capability/feature negotiation beyond identify string
+
+### Network partition / delayed delivery
+
+- **Threat**: consensus stalls or forks during partitions; rejoin causes state divergence.
+- **Mitigations (partial, current)**:
+  - “reliable join” work: backfill + continuity checks
+  - bounded message TTL/hops and dedup
+- **Gaps**:
+  - explicit partition testing harness and recovery procedures (`#241`)
+
+### Storage durability / corruption / rollback hazards
+
+- **Threat**: disk fills or DB corruption causes node failure or silent divergence.
+- **Mitigations**:
+  - history pruning (opt-in) + maintenance tools (`db-stats`, `db-maintenance`)
+  - snapshot backup/restore runbooks (`docs/node-operator-guide.md`)
+  - storage version marker (`storage:version`)
+
+### RPC abuse
+
+- **Threat**: high-rate RPC calls or expensive queries degrade node liveness.
+- **Mitigations (partial)**:
+  - P2P-side bounding exists; RPC-side needs explicit rate limiting and request shaping (future work).
+
+## What “done” looks like (mainnet bar)
+
+- Threats tracked to mitigations and tests.
+- At least one **adversarial CI suite** exists and is run per PR.
+- WAN soak/chaos testing is run before releases (`#241`).
+
diff --git a/scripts/wan_chaos/README.md b/scripts/wan_chaos/README.md
@@ -0,0 +1,49 @@
+## WAN soak / chaos harness (Linux netns + tc/netem)
+
+This harness runs a 3-node Catalyst network inside Linux network namespaces and applies
+WAN-like conditions (latency/loss/jitter) and chaos events (partitions, restarts).
+
+It is designed to be run on a dedicated Linux host (VM ok) with root privileges.
+
+### Prerequisites
+
+- Linux with `ip` (iproute2), `tc`, and `iptables` available
+- Root privileges (namespace setup + tc + iptables)
+- Rust toolchain (or prebuilt `catalyst-cli`)
+
+### What it does
+
+- Creates a bridge `catalystbr0` and 3 namespaces `catalyst-n1..n3`
+- Assigns IPs: `10.70.0.1..3`
+- Starts 3 nodes with P2P on `30333` and RPC on `8545` inside each namespace
+- Periodically polls `catalyst-cli status` to check `applied_cycle` monotonicity
+- Applies tc netem profiles and optional partitions
+- Writes logs + a simple summary report under `./wan_chaos/out/<run_id>/`
+
+### Quick start
+
+From repo root:
+
+```bash
+sudo bash scripts/wan_chaos/run.sh
+```
+
+### Useful env vars
+
+- `DURATION_SECS` (default `300`)
+- `LOSS_PCT` (default `0`)
+- `LATENCY_MS` (default `50`)
+- `JITTER_MS` (default `10`)
+- `PARTITION_AT_SECS` (default unset)
+- `PARTITION_DURATION_SECS` (default `30`)
+- `RESTART_AT_SECS` (default unset)
+- `RESTART_NODE` (`n1|n2|n3`, default `n1`)
+
+### Cleanup
+
+The runner attempts to cleanup automatically. If it fails:
+
+```bash
+sudo bash scripts/wan_chaos/cleanup.sh
+```
+
diff --git a/scripts/wan_chaos/cleanup.sh b/scripts/wan_chaos/cleanup.sh
@@ -0,0 +1,15 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+BR="catalystbr0"
+NS_PREFIX="catalyst-n"
+
+for i in 1 2 3; do
+  ns="${NS_PREFIX}${i}"
+  ip netns del "$ns" 2>/dev/null || true
+done
+
+ip link del "$BR" 2>/dev/null || true
+
+echo "cleanup_ok: true"
+